1 /* Expansion pass for OMP directives. Outlines regions of certain OMP 2 directives to separate functions, converts others into explicit calls to the 3 runtime library (libgomp) and so forth 4 5 Copyright (C) 2005-2017 Free Software Foundation, Inc. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "memmodel.h" 27 #include "backend.h" 28 #include "target.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "gimple.h" 32 #include "cfghooks.h" 33 #include "tree-pass.h" 34 #include "ssa.h" 35 #include "optabs.h" 36 #include "cgraph.h" 37 #include "pretty-print.h" 38 #include "diagnostic-core.h" 39 #include "fold-const.h" 40 #include "stor-layout.h" 41 #include "cfganal.h" 42 #include "internal-fn.h" 43 #include "gimplify.h" 44 #include "gimple-iterator.h" 45 #include "gimplify-me.h" 46 #include "gimple-walk.h" 47 #include "tree-cfg.h" 48 #include "tree-into-ssa.h" 49 #include "tree-ssa.h" 50 #include "splay-tree.h" 51 #include "cfgloop.h" 52 #include "omp-general.h" 53 #include "omp-offload.h" 54 #include "tree-cfgcleanup.h" 55 #include "symbol-summary.h" 56 #include "cilk.h" 57 #include "gomp-constants.h" 58 #include "gimple-pretty-print.h" 59 #include "hsa-common.h" 60 #include "debug.h" 61 62 63 /* OMP region information. Every parallel and workshare 64 directive is enclosed between two markers, the OMP_* directive 65 and a corresponding GIMPLE_OMP_RETURN statement. */ 66 67 struct omp_region 68 { 69 /* The enclosing region. */ 70 struct omp_region *outer; 71 72 /* First child region. */ 73 struct omp_region *inner; 74 75 /* Next peer region. */ 76 struct omp_region *next; 77 78 /* Block containing the omp directive as its last stmt. */ 79 basic_block entry; 80 81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ 82 basic_block exit; 83 84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ 85 basic_block cont; 86 87 /* If this is a combined parallel+workshare region, this is a list 88 of additional arguments needed by the combined parallel+workshare 89 library call. */ 90 vec<tree, va_gc> *ws_args; 91 92 /* The code for the omp directive of this region. */ 93 enum gimple_code type; 94 95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ 96 enum omp_clause_schedule_kind sched_kind; 97 98 /* Schedule modifiers. */ 99 unsigned char sched_modifiers; 100 101 /* True if this is a combined parallel+workshare region. */ 102 bool is_combined_parallel; 103 104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has 105 a depend clause. */ 106 gomp_ordered *ord_stmt; 107 }; 108 109 static struct omp_region *root_omp_region; 110 static bool omp_any_child_fn_dumped; 111 112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, 113 bool = false); 114 static gphi *find_phi_with_arg_on_edge (tree, edge); 115 static void expand_omp (struct omp_region *region); 116 117 /* Return true if REGION is a combined parallel+workshare region. */ 118 119 static inline bool 120 is_combined_parallel (struct omp_region *region) 121 { 122 return region->is_combined_parallel; 123 } 124 125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB 126 is the immediate dominator of PAR_ENTRY_BB, return true if there 127 are no data dependencies that would prevent expanding the parallel 128 directive at PAR_ENTRY_BB as a combined parallel+workshare region. 129 130 When expanding a combined parallel+workshare region, the call to 131 the child function may need additional arguments in the case of 132 GIMPLE_OMP_FOR regions. In some cases, these arguments are 133 computed out of variables passed in from the parent to the child 134 via 'struct .omp_data_s'. For instance: 135 136 #pragma omp parallel for schedule (guided, i * 4) 137 for (j ...) 138 139 Is lowered into: 140 141 # BLOCK 2 (PAR_ENTRY_BB) 142 .omp_data_o.i = i; 143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) 144 145 # BLOCK 3 (WS_ENTRY_BB) 146 .omp_data_i = &.omp_data_o; 147 D.1667 = .omp_data_i->i; 148 D.1598 = D.1667 * 4; 149 #pragma omp for schedule (guided, D.1598) 150 151 When we outline the parallel region, the call to the child function 152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but 153 that value is computed *after* the call site. So, in principle we 154 cannot do the transformation. 155 156 To see whether the code in WS_ENTRY_BB blocks the combined 157 parallel+workshare call, we collect all the variables used in the 158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any 159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined 160 call. 161 162 FIXME. If we had the SSA form built at this point, we could merely 163 hoist the code in block 3 into block 2 and be done with it. But at 164 this point we don't have dataflow information and though we could 165 hack something up here, it is really not worth the aggravation. */ 166 167 static bool 168 workshare_safe_to_combine_p (basic_block ws_entry_bb) 169 { 170 struct omp_for_data fd; 171 gimple *ws_stmt = last_stmt (ws_entry_bb); 172 173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 174 return true; 175 176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); 177 178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); 179 180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) 181 return false; 182 if (fd.iter_type != long_integer_type_node) 183 return false; 184 185 /* FIXME. We give up too easily here. If any of these arguments 186 are not constants, they will likely involve variables that have 187 been mapped into fields of .omp_data_s for sharing with the child 188 function. With appropriate data flow, it would be possible to 189 see through this. */ 190 if (!is_gimple_min_invariant (fd.loop.n1) 191 || !is_gimple_min_invariant (fd.loop.n2) 192 || !is_gimple_min_invariant (fd.loop.step) 193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) 194 return false; 195 196 return true; 197 } 198 199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier 200 presence (SIMD_SCHEDULE). */ 201 202 static tree 203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) 204 { 205 if (!simd_schedule) 206 return chunk_size; 207 208 int vf = omp_max_vf (); 209 if (vf == 1) 210 return chunk_size; 211 212 tree type = TREE_TYPE (chunk_size); 213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, 214 build_int_cst (type, vf - 1)); 215 return fold_build2 (BIT_AND_EXPR, type, chunk_size, 216 build_int_cst (type, -vf)); 217 } 218 219 /* Collect additional arguments needed to emit a combined 220 parallel+workshare call. WS_STMT is the workshare directive being 221 expanded. */ 222 223 static vec<tree, va_gc> * 224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) 225 { 226 tree t; 227 location_t loc = gimple_location (ws_stmt); 228 vec<tree, va_gc> *ws_args; 229 230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) 231 { 232 struct omp_for_data fd; 233 tree n1, n2; 234 235 omp_extract_for_data (for_stmt, &fd, NULL); 236 n1 = fd.loop.n1; 237 n2 = fd.loop.n2; 238 239 if (gimple_omp_for_combined_into_p (for_stmt)) 240 { 241 tree innerc 242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), 243 OMP_CLAUSE__LOOPTEMP_); 244 gcc_assert (innerc); 245 n1 = OMP_CLAUSE_DECL (innerc); 246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 247 OMP_CLAUSE__LOOPTEMP_); 248 gcc_assert (innerc); 249 n2 = OMP_CLAUSE_DECL (innerc); 250 } 251 252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); 253 254 t = fold_convert_loc (loc, long_integer_type_node, n1); 255 ws_args->quick_push (t); 256 257 t = fold_convert_loc (loc, long_integer_type_node, n2); 258 ws_args->quick_push (t); 259 260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); 261 ws_args->quick_push (t); 262 263 if (fd.chunk_size) 264 { 265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); 266 t = omp_adjust_chunk_size (t, fd.simd_schedule); 267 ws_args->quick_push (t); 268 } 269 270 return ws_args; 271 } 272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 273 { 274 /* Number of sections is equal to the number of edges from the 275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to 276 the exit of the sections region. */ 277 basic_block bb = single_succ (gimple_bb (ws_stmt)); 278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); 279 vec_alloc (ws_args, 1); 280 ws_args->quick_push (t); 281 return ws_args; 282 } 283 284 gcc_unreachable (); 285 } 286 287 /* Discover whether REGION is a combined parallel+workshare region. */ 288 289 static void 290 determine_parallel_type (struct omp_region *region) 291 { 292 basic_block par_entry_bb, par_exit_bb; 293 basic_block ws_entry_bb, ws_exit_bb; 294 295 if (region == NULL || region->inner == NULL 296 || region->exit == NULL || region->inner->exit == NULL 297 || region->inner->cont == NULL) 298 return; 299 300 /* We only support parallel+for and parallel+sections. */ 301 if (region->type != GIMPLE_OMP_PARALLEL 302 || (region->inner->type != GIMPLE_OMP_FOR 303 && region->inner->type != GIMPLE_OMP_SECTIONS)) 304 return; 305 306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and 307 WS_EXIT_BB -> PAR_EXIT_BB. */ 308 par_entry_bb = region->entry; 309 par_exit_bb = region->exit; 310 ws_entry_bb = region->inner->entry; 311 ws_exit_bb = region->inner->exit; 312 313 if (single_succ (par_entry_bb) == ws_entry_bb 314 && single_succ (ws_exit_bb) == par_exit_bb 315 && workshare_safe_to_combine_p (ws_entry_bb) 316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) 317 || (last_and_only_stmt (ws_entry_bb) 318 && last_and_only_stmt (par_exit_bb)))) 319 { 320 gimple *par_stmt = last_stmt (par_entry_bb); 321 gimple *ws_stmt = last_stmt (ws_entry_bb); 322 323 if (region->inner->type == GIMPLE_OMP_FOR) 324 { 325 /* If this is a combined parallel loop, we need to determine 326 whether or not to use the combined library calls. There 327 are two cases where we do not apply the transformation: 328 static loops and any kind of ordered loop. In the first 329 case, we already open code the loop so there is no need 330 to do anything else. In the latter case, the combined 331 parallel loop call would still need extra synchronization 332 to implement ordered semantics, so there would not be any 333 gain in using the combined call. */ 334 tree clauses = gimple_omp_for_clauses (ws_stmt); 335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); 336 if (c == NULL 337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) 338 == OMP_CLAUSE_SCHEDULE_STATIC) 339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)) 340 { 341 region->is_combined_parallel = false; 342 region->inner->is_combined_parallel = false; 343 return; 344 } 345 } 346 347 region->is_combined_parallel = true; 348 region->inner->is_combined_parallel = true; 349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); 350 } 351 } 352 353 /* Debugging dumps for parallel regions. */ 354 void dump_omp_region (FILE *, struct omp_region *, int); 355 void debug_omp_region (struct omp_region *); 356 void debug_all_omp_regions (void); 357 358 /* Dump the parallel region tree rooted at REGION. */ 359 360 void 361 dump_omp_region (FILE *file, struct omp_region *region, int indent) 362 { 363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, 364 gimple_code_name[region->type]); 365 366 if (region->inner) 367 dump_omp_region (file, region->inner, indent + 4); 368 369 if (region->cont) 370 { 371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", 372 region->cont->index); 373 } 374 375 if (region->exit) 376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", 377 region->exit->index); 378 else 379 fprintf (file, "%*s[no exit marker]\n", indent, ""); 380 381 if (region->next) 382 dump_omp_region (file, region->next, indent); 383 } 384 385 DEBUG_FUNCTION void 386 debug_omp_region (struct omp_region *region) 387 { 388 dump_omp_region (stderr, region, 0); 389 } 390 391 DEBUG_FUNCTION void 392 debug_all_omp_regions (void) 393 { 394 dump_omp_region (stderr, root_omp_region, 0); 395 } 396 397 /* Create a new parallel region starting at STMT inside region PARENT. */ 398 399 static struct omp_region * 400 new_omp_region (basic_block bb, enum gimple_code type, 401 struct omp_region *parent) 402 { 403 struct omp_region *region = XCNEW (struct omp_region); 404 405 region->outer = parent; 406 region->entry = bb; 407 region->type = type; 408 409 if (parent) 410 { 411 /* This is a nested region. Add it to the list of inner 412 regions in PARENT. */ 413 region->next = parent->inner; 414 parent->inner = region; 415 } 416 else 417 { 418 /* This is a toplevel region. Add it to the list of toplevel 419 regions in ROOT_OMP_REGION. */ 420 region->next = root_omp_region; 421 root_omp_region = region; 422 } 423 424 return region; 425 } 426 427 /* Release the memory associated with the region tree rooted at REGION. */ 428 429 static void 430 free_omp_region_1 (struct omp_region *region) 431 { 432 struct omp_region *i, *n; 433 434 for (i = region->inner; i ; i = n) 435 { 436 n = i->next; 437 free_omp_region_1 (i); 438 } 439 440 free (region); 441 } 442 443 /* Release the memory for the entire omp region tree. */ 444 445 void 446 omp_free_regions (void) 447 { 448 struct omp_region *r, *n; 449 for (r = root_omp_region; r ; r = n) 450 { 451 n = r->next; 452 free_omp_region_1 (r); 453 } 454 root_omp_region = NULL; 455 } 456 457 /* A convenience function to build an empty GIMPLE_COND with just the 458 condition. */ 459 460 static gcond * 461 gimple_build_cond_empty (tree cond) 462 { 463 enum tree_code pred_code; 464 tree lhs, rhs; 465 466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); 467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); 468 } 469 470 /* Return true if a parallel REGION is within a declare target function or 471 within a target region and is not a part of a gridified target. */ 472 473 static bool 474 parallel_needs_hsa_kernel_p (struct omp_region *region) 475 { 476 bool indirect = false; 477 for (region = region->outer; region; region = region->outer) 478 { 479 if (region->type == GIMPLE_OMP_PARALLEL) 480 indirect = true; 481 else if (region->type == GIMPLE_OMP_TARGET) 482 { 483 gomp_target *tgt_stmt 484 = as_a <gomp_target *> (last_stmt (region->entry)); 485 486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 487 OMP_CLAUSE__GRIDDIM_)) 488 return indirect; 489 else 490 return true; 491 } 492 } 493 494 if (lookup_attribute ("omp declare target", 495 DECL_ATTRIBUTES (current_function_decl))) 496 return true; 497 498 return false; 499 } 500 501 /* Build the function calls to GOMP_parallel_start etc to actually 502 generate the parallel operation. REGION is the parallel region 503 being expanded. BB is the block where to insert the code. WS_ARGS 504 will be set if this is a call to a combined parallel+workshare 505 construct, it contains the list of additional arguments needed by 506 the workshare construct. */ 507 508 static void 509 expand_parallel_call (struct omp_region *region, basic_block bb, 510 gomp_parallel *entry_stmt, 511 vec<tree, va_gc> *ws_args) 512 { 513 tree t, t1, t2, val, cond, c, clauses, flags; 514 gimple_stmt_iterator gsi; 515 gimple *stmt; 516 enum built_in_function start_ix; 517 int start_ix2; 518 location_t clause_loc; 519 vec<tree, va_gc> *args; 520 521 clauses = gimple_omp_parallel_clauses (entry_stmt); 522 523 /* Determine what flavor of GOMP_parallel we will be 524 emitting. */ 525 start_ix = BUILT_IN_GOMP_PARALLEL; 526 if (is_combined_parallel (region)) 527 { 528 switch (region->inner->type) 529 { 530 case GIMPLE_OMP_FOR: 531 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 532 switch (region->inner->sched_kind) 533 { 534 case OMP_CLAUSE_SCHEDULE_RUNTIME: 535 start_ix2 = 3; 536 break; 537 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 538 case OMP_CLAUSE_SCHEDULE_GUIDED: 539 if (region->inner->sched_modifiers 540 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) 541 { 542 start_ix2 = 3 + region->inner->sched_kind; 543 break; 544 } 545 /* FALLTHRU */ 546 default: 547 start_ix2 = region->inner->sched_kind; 548 break; 549 } 550 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; 551 start_ix = (enum built_in_function) start_ix2; 552 break; 553 case GIMPLE_OMP_SECTIONS: 554 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; 555 break; 556 default: 557 gcc_unreachable (); 558 } 559 } 560 561 /* By default, the value of NUM_THREADS is zero (selected at run time) 562 and there is no conditional. */ 563 cond = NULL_TREE; 564 val = build_int_cst (unsigned_type_node, 0); 565 flags = build_int_cst (unsigned_type_node, 0); 566 567 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 568 if (c) 569 cond = OMP_CLAUSE_IF_EXPR (c); 570 571 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); 572 if (c) 573 { 574 val = OMP_CLAUSE_NUM_THREADS_EXPR (c); 575 clause_loc = OMP_CLAUSE_LOCATION (c); 576 } 577 else 578 clause_loc = gimple_location (entry_stmt); 579 580 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); 581 if (c) 582 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); 583 584 /* Ensure 'val' is of the correct type. */ 585 val = fold_convert_loc (clause_loc, unsigned_type_node, val); 586 587 /* If we found the clause 'if (cond)', build either 588 (cond != 0) or (cond ? val : 1u). */ 589 if (cond) 590 { 591 cond = gimple_boolify (cond); 592 593 if (integer_zerop (val)) 594 val = fold_build2_loc (clause_loc, 595 EQ_EXPR, unsigned_type_node, cond, 596 build_int_cst (TREE_TYPE (cond), 0)); 597 else 598 { 599 basic_block cond_bb, then_bb, else_bb; 600 edge e, e_then, e_else; 601 tree tmp_then, tmp_else, tmp_join, tmp_var; 602 603 tmp_var = create_tmp_var (TREE_TYPE (val)); 604 if (gimple_in_ssa_p (cfun)) 605 { 606 tmp_then = make_ssa_name (tmp_var); 607 tmp_else = make_ssa_name (tmp_var); 608 tmp_join = make_ssa_name (tmp_var); 609 } 610 else 611 { 612 tmp_then = tmp_var; 613 tmp_else = tmp_var; 614 tmp_join = tmp_var; 615 } 616 617 e = split_block_after_labels (bb); 618 cond_bb = e->src; 619 bb = e->dest; 620 remove_edge (e); 621 622 then_bb = create_empty_bb (cond_bb); 623 else_bb = create_empty_bb (then_bb); 624 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 625 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 626 627 stmt = gimple_build_cond_empty (cond); 628 gsi = gsi_start_bb (cond_bb); 629 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 630 631 gsi = gsi_start_bb (then_bb); 632 expand_omp_build_assign (&gsi, tmp_then, val, true); 633 634 gsi = gsi_start_bb (else_bb); 635 expand_omp_build_assign (&gsi, tmp_else, 636 build_int_cst (unsigned_type_node, 1), 637 true); 638 639 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 640 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 641 add_bb_to_loop (then_bb, cond_bb->loop_father); 642 add_bb_to_loop (else_bb, cond_bb->loop_father); 643 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); 644 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); 645 646 if (gimple_in_ssa_p (cfun)) 647 { 648 gphi *phi = create_phi_node (tmp_join, bb); 649 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); 650 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); 651 } 652 653 val = tmp_join; 654 } 655 656 gsi = gsi_start_bb (bb); 657 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, 658 false, GSI_CONTINUE_LINKING); 659 } 660 661 gsi = gsi_last_bb (bb); 662 t = gimple_omp_parallel_data_arg (entry_stmt); 663 if (t == NULL) 664 t1 = null_pointer_node; 665 else 666 t1 = build_fold_addr_expr (t); 667 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); 668 t2 = build_fold_addr_expr (child_fndecl); 669 670 vec_alloc (args, 4 + vec_safe_length (ws_args)); 671 args->quick_push (t2); 672 args->quick_push (t1); 673 args->quick_push (val); 674 if (ws_args) 675 args->splice (*ws_args); 676 args->quick_push (flags); 677 678 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 679 builtin_decl_explicit (start_ix), args); 680 681 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 682 false, GSI_CONTINUE_LINKING); 683 684 if (hsa_gen_requested_p () 685 && parallel_needs_hsa_kernel_p (region)) 686 { 687 cgraph_node *child_cnode = cgraph_node::get (child_fndecl); 688 hsa_register_kernel (child_cnode); 689 } 690 } 691 692 /* Insert a function call whose name is FUNC_NAME with the information from 693 ENTRY_STMT into the basic_block BB. */ 694 695 static void 696 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt, 697 vec <tree, va_gc> *ws_args) 698 { 699 tree t, t1, t2; 700 gimple_stmt_iterator gsi; 701 vec <tree, va_gc> *args; 702 703 gcc_assert (vec_safe_length (ws_args) == 2); 704 tree func_name = (*ws_args)[0]; 705 tree grain = (*ws_args)[1]; 706 707 tree clauses = gimple_omp_parallel_clauses (entry_stmt); 708 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_); 709 gcc_assert (count != NULL_TREE); 710 count = OMP_CLAUSE_OPERAND (count, 0); 711 712 gsi = gsi_last_bb (bb); 713 t = gimple_omp_parallel_data_arg (entry_stmt); 714 if (t == NULL) 715 t1 = null_pointer_node; 716 else 717 t1 = build_fold_addr_expr (t); 718 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt)); 719 720 vec_alloc (args, 4); 721 args->quick_push (t2); 722 args->quick_push (t1); 723 args->quick_push (count); 724 args->quick_push (grain); 725 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args); 726 727 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, 728 GSI_CONTINUE_LINKING); 729 } 730 731 /* Build the function call to GOMP_task to actually 732 generate the task operation. BB is the block where to insert the code. */ 733 734 static void 735 expand_task_call (struct omp_region *region, basic_block bb, 736 gomp_task *entry_stmt) 737 { 738 tree t1, t2, t3; 739 gimple_stmt_iterator gsi; 740 location_t loc = gimple_location (entry_stmt); 741 742 tree clauses = gimple_omp_task_clauses (entry_stmt); 743 744 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); 745 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); 746 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); 747 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 748 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); 749 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); 750 751 unsigned int iflags 752 = (untied ? GOMP_TASK_FLAG_UNTIED : 0) 753 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) 754 | (depend ? GOMP_TASK_FLAG_DEPEND : 0); 755 756 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); 757 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; 758 tree num_tasks = NULL_TREE; 759 bool ull = false; 760 if (taskloop_p) 761 { 762 gimple *g = last_stmt (region->outer->entry); 763 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR 764 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); 765 struct omp_for_data fd; 766 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); 767 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 768 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), 769 OMP_CLAUSE__LOOPTEMP_); 770 startvar = OMP_CLAUSE_DECL (startvar); 771 endvar = OMP_CLAUSE_DECL (endvar); 772 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); 773 if (fd.loop.cond_code == LT_EXPR) 774 iflags |= GOMP_TASK_FLAG_UP; 775 tree tclauses = gimple_omp_for_clauses (g); 776 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); 777 if (num_tasks) 778 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); 779 else 780 { 781 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); 782 if (num_tasks) 783 { 784 iflags |= GOMP_TASK_FLAG_GRAINSIZE; 785 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); 786 } 787 else 788 num_tasks = integer_zero_node; 789 } 790 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); 791 if (ifc == NULL_TREE) 792 iflags |= GOMP_TASK_FLAG_IF; 793 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) 794 iflags |= GOMP_TASK_FLAG_NOGROUP; 795 ull = fd.iter_type == long_long_unsigned_type_node; 796 } 797 else if (priority) 798 iflags |= GOMP_TASK_FLAG_PRIORITY; 799 800 tree flags = build_int_cst (unsigned_type_node, iflags); 801 802 tree cond = boolean_true_node; 803 if (ifc) 804 { 805 if (taskloop_p) 806 { 807 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 808 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 809 build_int_cst (unsigned_type_node, 810 GOMP_TASK_FLAG_IF), 811 build_int_cst (unsigned_type_node, 0)); 812 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, 813 flags, t); 814 } 815 else 816 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 817 } 818 819 if (finalc) 820 { 821 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); 822 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 823 build_int_cst (unsigned_type_node, 824 GOMP_TASK_FLAG_FINAL), 825 build_int_cst (unsigned_type_node, 0)); 826 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); 827 } 828 if (depend) 829 depend = OMP_CLAUSE_DECL (depend); 830 else 831 depend = build_int_cst (ptr_type_node, 0); 832 if (priority) 833 priority = fold_convert (integer_type_node, 834 OMP_CLAUSE_PRIORITY_EXPR (priority)); 835 else 836 priority = integer_zero_node; 837 838 gsi = gsi_last_bb (bb); 839 tree t = gimple_omp_task_data_arg (entry_stmt); 840 if (t == NULL) 841 t2 = null_pointer_node; 842 else 843 t2 = build_fold_addr_expr_loc (loc, t); 844 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); 845 t = gimple_omp_task_copy_fn (entry_stmt); 846 if (t == NULL) 847 t3 = null_pointer_node; 848 else 849 t3 = build_fold_addr_expr_loc (loc, t); 850 851 if (taskloop_p) 852 t = build_call_expr (ull 853 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) 854 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), 855 11, t1, t2, t3, 856 gimple_omp_task_arg_size (entry_stmt), 857 gimple_omp_task_arg_align (entry_stmt), flags, 858 num_tasks, priority, startvar, endvar, step); 859 else 860 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), 861 9, t1, t2, t3, 862 gimple_omp_task_arg_size (entry_stmt), 863 gimple_omp_task_arg_align (entry_stmt), cond, flags, 864 depend, priority); 865 866 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 867 false, GSI_CONTINUE_LINKING); 868 } 869 870 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ 871 872 static tree 873 vec2chain (vec<tree, va_gc> *v) 874 { 875 tree chain = NULL_TREE, t; 876 unsigned ix; 877 878 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) 879 { 880 DECL_CHAIN (t) = chain; 881 chain = t; 882 } 883 884 return chain; 885 } 886 887 /* Remove barriers in REGION->EXIT's block. Note that this is only 888 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region 889 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that 890 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be 891 removed. */ 892 893 static void 894 remove_exit_barrier (struct omp_region *region) 895 { 896 gimple_stmt_iterator gsi; 897 basic_block exit_bb; 898 edge_iterator ei; 899 edge e; 900 gimple *stmt; 901 int any_addressable_vars = -1; 902 903 exit_bb = region->exit; 904 905 /* If the parallel region doesn't return, we don't have REGION->EXIT 906 block at all. */ 907 if (! exit_bb) 908 return; 909 910 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The 911 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of 912 statements that can appear in between are extremely limited -- no 913 memory operations at all. Here, we allow nothing at all, so the 914 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ 915 gsi = gsi_last_bb (exit_bb); 916 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 917 gsi_prev (&gsi); 918 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) 919 return; 920 921 FOR_EACH_EDGE (e, ei, exit_bb->preds) 922 { 923 gsi = gsi_last_bb (e->src); 924 if (gsi_end_p (gsi)) 925 continue; 926 stmt = gsi_stmt (gsi); 927 if (gimple_code (stmt) == GIMPLE_OMP_RETURN 928 && !gimple_omp_return_nowait_p (stmt)) 929 { 930 /* OpenMP 3.0 tasks unfortunately prevent this optimization 931 in many cases. If there could be tasks queued, the barrier 932 might be needed to let the tasks run before some local 933 variable of the parallel that the task uses as shared 934 runs out of scope. The task can be spawned either 935 from within current function (this would be easy to check) 936 or from some function it calls and gets passed an address 937 of such a variable. */ 938 if (any_addressable_vars < 0) 939 { 940 gomp_parallel *parallel_stmt 941 = as_a <gomp_parallel *> (last_stmt (region->entry)); 942 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); 943 tree local_decls, block, decl; 944 unsigned ix; 945 946 any_addressable_vars = 0; 947 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) 948 if (TREE_ADDRESSABLE (decl)) 949 { 950 any_addressable_vars = 1; 951 break; 952 } 953 for (block = gimple_block (stmt); 954 !any_addressable_vars 955 && block 956 && TREE_CODE (block) == BLOCK; 957 block = BLOCK_SUPERCONTEXT (block)) 958 { 959 for (local_decls = BLOCK_VARS (block); 960 local_decls; 961 local_decls = DECL_CHAIN (local_decls)) 962 if (TREE_ADDRESSABLE (local_decls)) 963 { 964 any_addressable_vars = 1; 965 break; 966 } 967 if (block == gimple_block (parallel_stmt)) 968 break; 969 } 970 } 971 if (!any_addressable_vars) 972 gimple_omp_return_set_nowait (stmt); 973 } 974 } 975 } 976 977 static void 978 remove_exit_barriers (struct omp_region *region) 979 { 980 if (region->type == GIMPLE_OMP_PARALLEL) 981 remove_exit_barrier (region); 982 983 if (region->inner) 984 { 985 region = region->inner; 986 remove_exit_barriers (region); 987 while (region->next) 988 { 989 region = region->next; 990 remove_exit_barriers (region); 991 } 992 } 993 } 994 995 /* Optimize omp_get_thread_num () and omp_get_num_threads () 996 calls. These can't be declared as const functions, but 997 within one parallel body they are constant, so they can be 998 transformed there into __builtin_omp_get_{thread_num,num_threads} () 999 which are declared const. Similarly for task body, except 1000 that in untied task omp_get_thread_num () can change at any task 1001 scheduling point. */ 1002 1003 static void 1004 optimize_omp_library_calls (gimple *entry_stmt) 1005 { 1006 basic_block bb; 1007 gimple_stmt_iterator gsi; 1008 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1009 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); 1010 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1011 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); 1012 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1013 && omp_find_clause (gimple_omp_task_clauses (entry_stmt), 1014 OMP_CLAUSE_UNTIED) != NULL); 1015 1016 FOR_EACH_BB_FN (bb, cfun) 1017 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1018 { 1019 gimple *call = gsi_stmt (gsi); 1020 tree decl; 1021 1022 if (is_gimple_call (call) 1023 && (decl = gimple_call_fndecl (call)) 1024 && DECL_EXTERNAL (decl) 1025 && TREE_PUBLIC (decl) 1026 && DECL_INITIAL (decl) == NULL) 1027 { 1028 tree built_in; 1029 1030 if (DECL_NAME (decl) == thr_num_id) 1031 { 1032 /* In #pragma omp task untied omp_get_thread_num () can change 1033 during the execution of the task region. */ 1034 if (untied_task) 1035 continue; 1036 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1037 } 1038 else if (DECL_NAME (decl) == num_thr_id) 1039 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1040 else 1041 continue; 1042 1043 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) 1044 || gimple_call_num_args (call) != 0) 1045 continue; 1046 1047 if (flag_exceptions && !TREE_NOTHROW (decl)) 1048 continue; 1049 1050 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE 1051 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), 1052 TREE_TYPE (TREE_TYPE (built_in)))) 1053 continue; 1054 1055 gimple_call_set_fndecl (call, built_in); 1056 } 1057 } 1058 } 1059 1060 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be 1061 regimplified. */ 1062 1063 static tree 1064 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) 1065 { 1066 tree t = *tp; 1067 1068 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ 1069 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) 1070 return t; 1071 1072 if (TREE_CODE (t) == ADDR_EXPR) 1073 recompute_tree_invariant_for_addr_expr (t); 1074 1075 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 1076 return NULL_TREE; 1077 } 1078 1079 /* Prepend or append TO = FROM assignment before or after *GSI_P. */ 1080 1081 static void 1082 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, 1083 bool after) 1084 { 1085 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); 1086 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, 1087 !after, after ? GSI_CONTINUE_LINKING 1088 : GSI_SAME_STMT); 1089 gimple *stmt = gimple_build_assign (to, from); 1090 if (after) 1091 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); 1092 else 1093 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); 1094 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) 1095 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) 1096 { 1097 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1098 gimple_regimplify_operands (stmt, &gsi); 1099 } 1100 } 1101 1102 /* Expand the OpenMP parallel or task directive starting at REGION. */ 1103 1104 static void 1105 expand_omp_taskreg (struct omp_region *region) 1106 { 1107 basic_block entry_bb, exit_bb, new_bb; 1108 struct function *child_cfun; 1109 tree child_fn, block, t; 1110 gimple_stmt_iterator gsi; 1111 gimple *entry_stmt, *stmt; 1112 edge e; 1113 vec<tree, va_gc> *ws_args; 1114 1115 entry_stmt = last_stmt (region->entry); 1116 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); 1117 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 1118 1119 entry_bb = region->entry; 1120 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) 1121 exit_bb = region->cont; 1122 else 1123 exit_bb = region->exit; 1124 1125 bool is_cilk_for 1126 = (flag_cilkplus 1127 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL 1128 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt), 1129 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE); 1130 1131 if (is_cilk_for) 1132 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for, 1133 and the inner statement contains the name of the built-in function 1134 and grain. */ 1135 ws_args = region->inner->ws_args; 1136 else if (is_combined_parallel (region)) 1137 ws_args = region->ws_args; 1138 else 1139 ws_args = NULL; 1140 1141 if (child_cfun->cfg) 1142 { 1143 /* Due to inlining, it may happen that we have already outlined 1144 the region, in which case all we need to do is make the 1145 sub-graph unreachable and emit the parallel call. */ 1146 edge entry_succ_e, exit_succ_e; 1147 1148 entry_succ_e = single_succ_edge (entry_bb); 1149 1150 gsi = gsi_last_bb (entry_bb); 1151 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL 1152 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); 1153 gsi_remove (&gsi, true); 1154 1155 new_bb = entry_bb; 1156 if (exit_bb) 1157 { 1158 exit_succ_e = single_succ_edge (exit_bb); 1159 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); 1160 } 1161 remove_edge_and_dominated_blocks (entry_succ_e); 1162 } 1163 else 1164 { 1165 unsigned srcidx, dstidx, num; 1166 1167 /* If the parallel region needs data sent from the parent 1168 function, then the very first statement (except possible 1169 tree profile counter updates) of the parallel body 1170 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 1171 &.OMP_DATA_O is passed as an argument to the child function, 1172 we need to replace it with the argument as seen by the child 1173 function. 1174 1175 In most cases, this will end up being the identity assignment 1176 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had 1177 a function call that has been inlined, the original PARM_DECL 1178 .OMP_DATA_I may have been converted into a different local 1179 variable. In which case, we need to keep the assignment. */ 1180 if (gimple_omp_taskreg_data_arg (entry_stmt)) 1181 { 1182 basic_block entry_succ_bb 1183 = single_succ_p (entry_bb) ? single_succ (entry_bb) 1184 : FALLTHRU_EDGE (entry_bb)->dest; 1185 tree arg; 1186 gimple *parcopy_stmt = NULL; 1187 1188 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 1189 { 1190 gimple *stmt; 1191 1192 gcc_assert (!gsi_end_p (gsi)); 1193 stmt = gsi_stmt (gsi); 1194 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1195 continue; 1196 1197 if (gimple_num_ops (stmt) == 2) 1198 { 1199 tree arg = gimple_assign_rhs1 (stmt); 1200 1201 /* We're ignore the subcode because we're 1202 effectively doing a STRIP_NOPS. */ 1203 1204 if (TREE_CODE (arg) == ADDR_EXPR 1205 && TREE_OPERAND (arg, 0) 1206 == gimple_omp_taskreg_data_arg (entry_stmt)) 1207 { 1208 parcopy_stmt = stmt; 1209 break; 1210 } 1211 } 1212 } 1213 1214 gcc_assert (parcopy_stmt != NULL); 1215 arg = DECL_ARGUMENTS (child_fn); 1216 1217 if (!gimple_in_ssa_p (cfun)) 1218 { 1219 if (gimple_assign_lhs (parcopy_stmt) == arg) 1220 gsi_remove (&gsi, true); 1221 else 1222 { 1223 /* ?? Is setting the subcode really necessary ?? */ 1224 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); 1225 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1226 } 1227 } 1228 else 1229 { 1230 tree lhs = gimple_assign_lhs (parcopy_stmt); 1231 gcc_assert (SSA_NAME_VAR (lhs) == arg); 1232 /* We'd like to set the rhs to the default def in the child_fn, 1233 but it's too early to create ssa names in the child_fn. 1234 Instead, we set the rhs to the parm. In 1235 move_sese_region_to_fn, we introduce a default def for the 1236 parm, map the parm to it's default def, and once we encounter 1237 this stmt, replace the parm with the default def. */ 1238 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1239 update_stmt (parcopy_stmt); 1240 } 1241 } 1242 1243 /* Declare local variables needed in CHILD_CFUN. */ 1244 block = DECL_INITIAL (child_fn); 1245 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 1246 /* The gimplifier could record temporaries in parallel/task block 1247 rather than in containing function's local_decls chain, 1248 which would mean cgraph missed finalizing them. Do it now. */ 1249 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 1250 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 1251 varpool_node::finalize_decl (t); 1252 DECL_SAVED_TREE (child_fn) = NULL; 1253 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 1254 gimple_set_body (child_fn, NULL); 1255 TREE_USED (block) = 1; 1256 1257 /* Reset DECL_CONTEXT on function arguments. */ 1258 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 1259 DECL_CONTEXT (t) = child_fn; 1260 1261 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, 1262 so that it can be moved to the child function. */ 1263 gsi = gsi_last_bb (entry_bb); 1264 stmt = gsi_stmt (gsi); 1265 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL 1266 || gimple_code (stmt) == GIMPLE_OMP_TASK)); 1267 e = split_block (entry_bb, stmt); 1268 gsi_remove (&gsi, true); 1269 entry_bb = e->dest; 1270 edge e2 = NULL; 1271 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1272 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 1273 else 1274 { 1275 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); 1276 gcc_assert (e2->dest == region->exit); 1277 remove_edge (BRANCH_EDGE (entry_bb)); 1278 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); 1279 gsi = gsi_last_bb (region->exit); 1280 gcc_assert (!gsi_end_p (gsi) 1281 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1282 gsi_remove (&gsi, true); 1283 } 1284 1285 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ 1286 if (exit_bb) 1287 { 1288 gsi = gsi_last_bb (exit_bb); 1289 gcc_assert (!gsi_end_p (gsi) 1290 && (gimple_code (gsi_stmt (gsi)) 1291 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); 1292 stmt = gimple_build_return (NULL); 1293 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1294 gsi_remove (&gsi, true); 1295 } 1296 1297 /* Move the parallel region into CHILD_CFUN. */ 1298 1299 if (gimple_in_ssa_p (cfun)) 1300 { 1301 init_tree_ssa (child_cfun); 1302 init_ssa_operands (child_cfun); 1303 child_cfun->gimple_df->in_ssa_p = true; 1304 block = NULL_TREE; 1305 } 1306 else 1307 block = gimple_block (entry_stmt); 1308 1309 /* Make sure to generate early debug for the function before 1310 outlining anything. */ 1311 if (! gimple_in_ssa_p (cfun)) 1312 (*debug_hooks->early_global_decl) (cfun->decl); 1313 1314 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 1315 if (exit_bb) 1316 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 1317 if (e2) 1318 { 1319 basic_block dest_bb = e2->dest; 1320 if (!exit_bb) 1321 make_edge (new_bb, dest_bb, EDGE_FALLTHRU); 1322 remove_edge (e2); 1323 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); 1324 } 1325 /* When the OMP expansion process cannot guarantee an up-to-date 1326 loop tree arrange for the child function to fixup loops. */ 1327 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1328 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 1329 1330 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 1331 num = vec_safe_length (child_cfun->local_decls); 1332 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 1333 { 1334 t = (*child_cfun->local_decls)[srcidx]; 1335 if (DECL_CONTEXT (t) == cfun->decl) 1336 continue; 1337 if (srcidx != dstidx) 1338 (*child_cfun->local_decls)[dstidx] = t; 1339 dstidx++; 1340 } 1341 if (dstidx != num) 1342 vec_safe_truncate (child_cfun->local_decls, dstidx); 1343 1344 /* Inform the callgraph about the new function. */ 1345 child_cfun->curr_properties = cfun->curr_properties; 1346 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 1347 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 1348 cgraph_node *node = cgraph_node::get_create (child_fn); 1349 node->parallelized_function = 1; 1350 cgraph_node::add_new_function (child_fn, true); 1351 1352 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 1353 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 1354 1355 /* Fix the callgraph edges for child_cfun. Those for cfun will be 1356 fixed in a following pass. */ 1357 push_cfun (child_cfun); 1358 if (need_asm) 1359 assign_assembler_name_if_needed (child_fn); 1360 1361 if (optimize) 1362 optimize_omp_library_calls (entry_stmt); 1363 cgraph_edge::rebuild_edges (); 1364 1365 /* Some EH regions might become dead, see PR34608. If 1366 pass_cleanup_cfg isn't the first pass to happen with the 1367 new child, these dead EH edges might cause problems. 1368 Clean them up now. */ 1369 if (flag_exceptions) 1370 { 1371 basic_block bb; 1372 bool changed = false; 1373 1374 FOR_EACH_BB_FN (bb, cfun) 1375 changed |= gimple_purge_dead_eh_edges (bb); 1376 if (changed) 1377 cleanup_tree_cfg (); 1378 } 1379 if (gimple_in_ssa_p (cfun)) 1380 update_ssa (TODO_update_ssa); 1381 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1382 verify_loop_structure (); 1383 pop_cfun (); 1384 1385 if (dump_file && !gimple_in_ssa_p (cfun)) 1386 { 1387 omp_any_child_fn_dumped = true; 1388 dump_function_header (dump_file, child_fn, dump_flags); 1389 dump_function_to_file (child_fn, dump_file, dump_flags); 1390 } 1391 } 1392 1393 /* Emit a library call to launch the children threads. */ 1394 if (is_cilk_for) 1395 expand_cilk_for_call (new_bb, 1396 as_a <gomp_parallel *> (entry_stmt), ws_args); 1397 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1398 expand_parallel_call (region, new_bb, 1399 as_a <gomp_parallel *> (entry_stmt), ws_args); 1400 else 1401 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); 1402 if (gimple_in_ssa_p (cfun)) 1403 update_ssa (TODO_update_ssa_only_virtuals); 1404 } 1405 1406 /* Information about members of an OpenACC collapsed loop nest. */ 1407 1408 struct oacc_collapse 1409 { 1410 tree base; /* Base value. */ 1411 tree iters; /* Number of steps. */ 1412 tree step; /* Step size. */ 1413 tree tile; /* Tile increment (if tiled). */ 1414 tree outer; /* Tile iterator var. */ 1415 }; 1416 1417 /* Helper for expand_oacc_for. Determine collapsed loop information. 1418 Fill in COUNTS array. Emit any initialization code before GSI. 1419 Return the calculated outer loop bound of BOUND_TYPE. */ 1420 1421 static tree 1422 expand_oacc_collapse_init (const struct omp_for_data *fd, 1423 gimple_stmt_iterator *gsi, 1424 oacc_collapse *counts, tree bound_type, 1425 location_t loc) 1426 { 1427 tree tiling = fd->tiling; 1428 tree total = build_int_cst (bound_type, 1); 1429 int ix; 1430 1431 gcc_assert (integer_onep (fd->loop.step)); 1432 gcc_assert (integer_zerop (fd->loop.n1)); 1433 1434 /* When tiling, the first operand of the tile clause applies to the 1435 innermost loop, and we work outwards from there. Seems 1436 backwards, but whatever. */ 1437 for (ix = fd->collapse; ix--;) 1438 { 1439 const omp_for_data_loop *loop = &fd->loops[ix]; 1440 1441 tree iter_type = TREE_TYPE (loop->v); 1442 tree diff_type = iter_type; 1443 tree plus_type = iter_type; 1444 1445 gcc_assert (loop->cond_code == fd->loop.cond_code); 1446 1447 if (POINTER_TYPE_P (iter_type)) 1448 plus_type = sizetype; 1449 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 1450 diff_type = signed_type_for (diff_type); 1451 1452 if (tiling) 1453 { 1454 tree num = build_int_cst (integer_type_node, fd->collapse); 1455 tree loop_no = build_int_cst (integer_type_node, ix); 1456 tree tile = TREE_VALUE (tiling); 1457 gcall *call 1458 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, 1459 /* gwv-outer=*/integer_zero_node, 1460 /* gwv-inner=*/integer_zero_node); 1461 1462 counts[ix].outer = create_tmp_var (iter_type, ".outer"); 1463 counts[ix].tile = create_tmp_var (diff_type, ".tile"); 1464 gimple_call_set_lhs (call, counts[ix].tile); 1465 gimple_set_location (call, loc); 1466 gsi_insert_before (gsi, call, GSI_SAME_STMT); 1467 1468 tiling = TREE_CHAIN (tiling); 1469 } 1470 else 1471 { 1472 counts[ix].tile = NULL; 1473 counts[ix].outer = loop->v; 1474 } 1475 1476 tree b = loop->n1; 1477 tree e = loop->n2; 1478 tree s = loop->step; 1479 bool up = loop->cond_code == LT_EXPR; 1480 tree dir = build_int_cst (diff_type, up ? +1 : -1); 1481 bool negating; 1482 tree expr; 1483 1484 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, 1485 true, GSI_SAME_STMT); 1486 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, 1487 true, GSI_SAME_STMT); 1488 1489 /* Convert the step, avoiding possible unsigned->signed overflow. */ 1490 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 1491 if (negating) 1492 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 1493 s = fold_convert (diff_type, s); 1494 if (negating) 1495 s = fold_build1 (NEGATE_EXPR, diff_type, s); 1496 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, 1497 true, GSI_SAME_STMT); 1498 1499 /* Determine the range, avoiding possible unsigned->signed overflow. */ 1500 negating = !up && TYPE_UNSIGNED (iter_type); 1501 expr = fold_build2 (MINUS_EXPR, plus_type, 1502 fold_convert (plus_type, negating ? b : e), 1503 fold_convert (plus_type, negating ? e : b)); 1504 expr = fold_convert (diff_type, expr); 1505 if (negating) 1506 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 1507 tree range = force_gimple_operand_gsi 1508 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); 1509 1510 /* Determine number of iterations. */ 1511 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 1512 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 1513 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 1514 1515 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, 1516 true, GSI_SAME_STMT); 1517 1518 counts[ix].base = b; 1519 counts[ix].iters = iters; 1520 counts[ix].step = s; 1521 1522 total = fold_build2 (MULT_EXPR, bound_type, total, 1523 fold_convert (bound_type, iters)); 1524 } 1525 1526 return total; 1527 } 1528 1529 /* Emit initializers for collapsed loop members. INNER is true if 1530 this is for the element loop of a TILE. IVAR is the outer 1531 loop iteration variable, from which collapsed loop iteration values 1532 are calculated. COUNTS array has been initialized by 1533 expand_oacc_collapse_inits. */ 1534 1535 static void 1536 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, 1537 gimple_stmt_iterator *gsi, 1538 const oacc_collapse *counts, tree ivar) 1539 { 1540 tree ivar_type = TREE_TYPE (ivar); 1541 1542 /* The most rapidly changing iteration variable is the innermost 1543 one. */ 1544 for (int ix = fd->collapse; ix--;) 1545 { 1546 const omp_for_data_loop *loop = &fd->loops[ix]; 1547 const oacc_collapse *collapse = &counts[ix]; 1548 tree v = inner ? loop->v : collapse->outer; 1549 tree iter_type = TREE_TYPE (v); 1550 tree diff_type = TREE_TYPE (collapse->step); 1551 tree plus_type = iter_type; 1552 enum tree_code plus_code = PLUS_EXPR; 1553 tree expr; 1554 1555 if (POINTER_TYPE_P (iter_type)) 1556 { 1557 plus_code = POINTER_PLUS_EXPR; 1558 plus_type = sizetype; 1559 } 1560 1561 expr = ivar; 1562 if (ix) 1563 { 1564 tree mod = fold_convert (ivar_type, collapse->iters); 1565 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); 1566 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); 1567 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, 1568 true, GSI_SAME_STMT); 1569 } 1570 1571 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), 1572 collapse->step); 1573 expr = fold_build2 (plus_code, iter_type, 1574 inner ? collapse->outer : collapse->base, 1575 fold_convert (plus_type, expr)); 1576 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, 1577 true, GSI_SAME_STMT); 1578 gassign *ass = gimple_build_assign (v, expr); 1579 gsi_insert_before (gsi, ass, GSI_SAME_STMT); 1580 } 1581 } 1582 1583 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost 1584 of the combined collapse > 1 loop constructs, generate code like: 1585 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; 1586 if (cond3 is <) 1587 adj = STEP3 - 1; 1588 else 1589 adj = STEP3 + 1; 1590 count3 = (adj + N32 - N31) / STEP3; 1591 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; 1592 if (cond2 is <) 1593 adj = STEP2 - 1; 1594 else 1595 adj = STEP2 + 1; 1596 count2 = (adj + N22 - N21) / STEP2; 1597 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; 1598 if (cond1 is <) 1599 adj = STEP1 - 1; 1600 else 1601 adj = STEP1 + 1; 1602 count1 = (adj + N12 - N11) / STEP1; 1603 count = count1 * count2 * count3; 1604 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: 1605 count = 0; 1606 and set ZERO_ITER_BB to that bb. If this isn't the outermost 1607 of the combined loop constructs, just initialize COUNTS array 1608 from the _looptemp_ clauses. */ 1609 1610 /* NOTE: It *could* be better to moosh all of the BBs together, 1611 creating one larger BB with all the computation and the unexpected 1612 jump at the end. I.e. 1613 1614 bool zero3, zero2, zero1, zero; 1615 1616 zero3 = N32 c3 N31; 1617 count3 = (N32 - N31) /[cl] STEP3; 1618 zero2 = N22 c2 N21; 1619 count2 = (N22 - N21) /[cl] STEP2; 1620 zero1 = N12 c1 N11; 1621 count1 = (N12 - N11) /[cl] STEP1; 1622 zero = zero3 || zero2 || zero1; 1623 count = count1 * count2 * count3; 1624 if (__builtin_expect(zero, false)) goto zero_iter_bb; 1625 1626 After all, we expect the zero=false, and thus we expect to have to 1627 evaluate all of the comparison expressions, so short-circuiting 1628 oughtn't be a win. Since the condition isn't protecting a 1629 denominator, we're not concerned about divide-by-zero, so we can 1630 fully evaluate count even if a numerator turned out to be wrong. 1631 1632 It seems like putting this all together would create much better 1633 scheduling opportunities, and less pressure on the chip's branch 1634 predictor. */ 1635 1636 static void 1637 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1638 basic_block &entry_bb, tree *counts, 1639 basic_block &zero_iter1_bb, int &first_zero_iter1, 1640 basic_block &zero_iter2_bb, int &first_zero_iter2, 1641 basic_block &l2_dom_bb) 1642 { 1643 tree t, type = TREE_TYPE (fd->loop.v); 1644 edge e, ne; 1645 int i; 1646 1647 /* Collapsed loops need work for expansion into SSA form. */ 1648 gcc_assert (!gimple_in_ssa_p (cfun)); 1649 1650 if (gimple_omp_for_combined_into_p (fd->for_stmt) 1651 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 1652 { 1653 gcc_assert (fd->ordered == 0); 1654 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1655 isn't supposed to be handled, as the inner loop doesn't 1656 use it. */ 1657 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 1658 OMP_CLAUSE__LOOPTEMP_); 1659 gcc_assert (innerc); 1660 for (i = 0; i < fd->collapse; i++) 1661 { 1662 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1663 OMP_CLAUSE__LOOPTEMP_); 1664 gcc_assert (innerc); 1665 if (i) 1666 counts[i] = OMP_CLAUSE_DECL (innerc); 1667 else 1668 counts[0] = NULL_TREE; 1669 } 1670 return; 1671 } 1672 1673 for (i = fd->collapse; i < fd->ordered; i++) 1674 { 1675 tree itype = TREE_TYPE (fd->loops[i].v); 1676 counts[i] = NULL_TREE; 1677 t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1678 fold_convert (itype, fd->loops[i].n1), 1679 fold_convert (itype, fd->loops[i].n2)); 1680 if (t && integer_zerop (t)) 1681 { 1682 for (i = fd->collapse; i < fd->ordered; i++) 1683 counts[i] = build_int_cst (type, 0); 1684 break; 1685 } 1686 } 1687 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) 1688 { 1689 tree itype = TREE_TYPE (fd->loops[i].v); 1690 1691 if (i >= fd->collapse && counts[i]) 1692 continue; 1693 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) 1694 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1695 fold_convert (itype, fd->loops[i].n1), 1696 fold_convert (itype, fd->loops[i].n2))) 1697 == NULL_TREE || !integer_onep (t))) 1698 { 1699 gcond *cond_stmt; 1700 tree n1, n2; 1701 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 1702 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, 1703 true, GSI_SAME_STMT); 1704 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 1705 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, 1706 true, GSI_SAME_STMT); 1707 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, 1708 NULL_TREE, NULL_TREE); 1709 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); 1710 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 1711 expand_omp_regimplify_p, NULL, NULL) 1712 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 1713 expand_omp_regimplify_p, NULL, NULL)) 1714 { 1715 *gsi = gsi_for_stmt (cond_stmt); 1716 gimple_regimplify_operands (cond_stmt, gsi); 1717 } 1718 e = split_block (entry_bb, cond_stmt); 1719 basic_block &zero_iter_bb 1720 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; 1721 int &first_zero_iter 1722 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; 1723 if (zero_iter_bb == NULL) 1724 { 1725 gassign *assign_stmt; 1726 first_zero_iter = i; 1727 zero_iter_bb = create_empty_bb (entry_bb); 1728 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); 1729 *gsi = gsi_after_labels (zero_iter_bb); 1730 if (i < fd->collapse) 1731 assign_stmt = gimple_build_assign (fd->loop.n2, 1732 build_zero_cst (type)); 1733 else 1734 { 1735 counts[i] = create_tmp_reg (type, ".count"); 1736 assign_stmt 1737 = gimple_build_assign (counts[i], build_zero_cst (type)); 1738 } 1739 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); 1740 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, 1741 entry_bb); 1742 } 1743 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); 1744 ne->probability = REG_BR_PROB_BASE / 2000 - 1; 1745 e->flags = EDGE_TRUE_VALUE; 1746 e->probability = REG_BR_PROB_BASE - ne->probability; 1747 if (l2_dom_bb == NULL) 1748 l2_dom_bb = entry_bb; 1749 entry_bb = e->dest; 1750 *gsi = gsi_last_bb (entry_bb); 1751 } 1752 1753 if (POINTER_TYPE_P (itype)) 1754 itype = signed_type_for (itype); 1755 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 1756 ? -1 : 1)); 1757 t = fold_build2 (PLUS_EXPR, itype, 1758 fold_convert (itype, fd->loops[i].step), t); 1759 t = fold_build2 (PLUS_EXPR, itype, t, 1760 fold_convert (itype, fd->loops[i].n2)); 1761 t = fold_build2 (MINUS_EXPR, itype, t, 1762 fold_convert (itype, fd->loops[i].n1)); 1763 /* ?? We could probably use CEIL_DIV_EXPR instead of 1764 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't 1765 generate the same code in the end because generically we 1766 don't know that the values involved must be negative for 1767 GT?? */ 1768 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 1769 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1770 fold_build1 (NEGATE_EXPR, itype, t), 1771 fold_build1 (NEGATE_EXPR, itype, 1772 fold_convert (itype, 1773 fd->loops[i].step))); 1774 else 1775 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 1776 fold_convert (itype, fd->loops[i].step)); 1777 t = fold_convert (type, t); 1778 if (TREE_CODE (t) == INTEGER_CST) 1779 counts[i] = t; 1780 else 1781 { 1782 if (i < fd->collapse || i != first_zero_iter2) 1783 counts[i] = create_tmp_reg (type, ".count"); 1784 expand_omp_build_assign (gsi, counts[i], t); 1785 } 1786 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) 1787 { 1788 if (i == 0) 1789 t = counts[0]; 1790 else 1791 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); 1792 expand_omp_build_assign (gsi, fd->loop.n2, t); 1793 } 1794 } 1795 } 1796 1797 /* Helper function for expand_omp_{for_*,simd}. Generate code like: 1798 T = V; 1799 V3 = N31 + (T % count3) * STEP3; 1800 T = T / count3; 1801 V2 = N21 + (T % count2) * STEP2; 1802 T = T / count2; 1803 V1 = N11 + T * STEP1; 1804 if this loop doesn't have an inner loop construct combined with it. 1805 If it does have an inner loop construct combined with it and the 1806 iteration count isn't known constant, store values from counts array 1807 into its _looptemp_ temporaries instead. */ 1808 1809 static void 1810 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1811 tree *counts, gimple *inner_stmt, tree startvar) 1812 { 1813 int i; 1814 if (gimple_omp_for_combined_p (fd->for_stmt)) 1815 { 1816 /* If fd->loop.n2 is constant, then no propagation of the counts 1817 is needed, they are constant. */ 1818 if (TREE_CODE (fd->loop.n2) == INTEGER_CST) 1819 return; 1820 1821 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR 1822 ? gimple_omp_taskreg_clauses (inner_stmt) 1823 : gimple_omp_for_clauses (inner_stmt); 1824 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1825 isn't supposed to be handled, as the inner loop doesn't 1826 use it. */ 1827 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 1828 gcc_assert (innerc); 1829 for (i = 0; i < fd->collapse; i++) 1830 { 1831 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1832 OMP_CLAUSE__LOOPTEMP_); 1833 gcc_assert (innerc); 1834 if (i) 1835 { 1836 tree tem = OMP_CLAUSE_DECL (innerc); 1837 tree t = fold_convert (TREE_TYPE (tem), counts[i]); 1838 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1839 false, GSI_CONTINUE_LINKING); 1840 gassign *stmt = gimple_build_assign (tem, t); 1841 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1842 } 1843 } 1844 return; 1845 } 1846 1847 tree type = TREE_TYPE (fd->loop.v); 1848 tree tem = create_tmp_reg (type, ".tem"); 1849 gassign *stmt = gimple_build_assign (tem, startvar); 1850 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1851 1852 for (i = fd->collapse - 1; i >= 0; i--) 1853 { 1854 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; 1855 itype = vtype; 1856 if (POINTER_TYPE_P (vtype)) 1857 itype = signed_type_for (vtype); 1858 if (i != 0) 1859 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); 1860 else 1861 t = tem; 1862 t = fold_convert (itype, t); 1863 t = fold_build2 (MULT_EXPR, itype, t, 1864 fold_convert (itype, fd->loops[i].step)); 1865 if (POINTER_TYPE_P (vtype)) 1866 t = fold_build_pointer_plus (fd->loops[i].n1, t); 1867 else 1868 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 1869 t = force_gimple_operand_gsi (gsi, t, 1870 DECL_P (fd->loops[i].v) 1871 && TREE_ADDRESSABLE (fd->loops[i].v), 1872 NULL_TREE, false, 1873 GSI_CONTINUE_LINKING); 1874 stmt = gimple_build_assign (fd->loops[i].v, t); 1875 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1876 if (i != 0) 1877 { 1878 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); 1879 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1880 false, GSI_CONTINUE_LINKING); 1881 stmt = gimple_build_assign (tem, t); 1882 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1883 } 1884 } 1885 } 1886 1887 /* Helper function for expand_omp_for_*. Generate code like: 1888 L10: 1889 V3 += STEP3; 1890 if (V3 cond3 N32) goto BODY_BB; else goto L11; 1891 L11: 1892 V3 = N31; 1893 V2 += STEP2; 1894 if (V2 cond2 N22) goto BODY_BB; else goto L12; 1895 L12: 1896 V2 = N21; 1897 V1 += STEP1; 1898 goto BODY_BB; */ 1899 1900 static basic_block 1901 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, 1902 basic_block body_bb) 1903 { 1904 basic_block last_bb, bb, collapse_bb = NULL; 1905 int i; 1906 gimple_stmt_iterator gsi; 1907 edge e; 1908 tree t; 1909 gimple *stmt; 1910 1911 last_bb = cont_bb; 1912 for (i = fd->collapse - 1; i >= 0; i--) 1913 { 1914 tree vtype = TREE_TYPE (fd->loops[i].v); 1915 1916 bb = create_empty_bb (last_bb); 1917 add_bb_to_loop (bb, last_bb->loop_father); 1918 gsi = gsi_start_bb (bb); 1919 1920 if (i < fd->collapse - 1) 1921 { 1922 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); 1923 e->probability = REG_BR_PROB_BASE / 8; 1924 1925 t = fd->loops[i + 1].n1; 1926 t = force_gimple_operand_gsi (&gsi, t, 1927 DECL_P (fd->loops[i + 1].v) 1928 && TREE_ADDRESSABLE (fd->loops[i 1929 + 1].v), 1930 NULL_TREE, false, 1931 GSI_CONTINUE_LINKING); 1932 stmt = gimple_build_assign (fd->loops[i + 1].v, t); 1933 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1934 } 1935 else 1936 collapse_bb = bb; 1937 1938 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 1939 1940 if (POINTER_TYPE_P (vtype)) 1941 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); 1942 else 1943 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); 1944 t = force_gimple_operand_gsi (&gsi, t, 1945 DECL_P (fd->loops[i].v) 1946 && TREE_ADDRESSABLE (fd->loops[i].v), 1947 NULL_TREE, false, GSI_CONTINUE_LINKING); 1948 stmt = gimple_build_assign (fd->loops[i].v, t); 1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1950 1951 if (i > 0) 1952 { 1953 t = fd->loops[i].n2; 1954 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 1955 false, GSI_CONTINUE_LINKING); 1956 tree v = fd->loops[i].v; 1957 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 1958 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 1959 false, GSI_CONTINUE_LINKING); 1960 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 1961 stmt = gimple_build_cond_empty (t); 1962 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1963 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); 1964 e->probability = REG_BR_PROB_BASE * 7 / 8; 1965 } 1966 else 1967 make_edge (bb, body_bb, EDGE_FALLTHRU); 1968 last_bb = bb; 1969 } 1970 1971 return collapse_bb; 1972 } 1973 1974 /* Expand #pragma omp ordered depend(source). */ 1975 1976 static void 1977 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 1978 tree *counts, location_t loc) 1979 { 1980 enum built_in_function source_ix 1981 = fd->iter_type == long_integer_type_node 1982 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; 1983 gimple *g 1984 = gimple_build_call (builtin_decl_explicit (source_ix), 1, 1985 build_fold_addr_expr (counts[fd->ordered])); 1986 gimple_set_location (g, loc); 1987 gsi_insert_before (gsi, g, GSI_SAME_STMT); 1988 } 1989 1990 /* Expand a single depend from #pragma omp ordered depend(sink:...). */ 1991 1992 static void 1993 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 1994 tree *counts, tree c, location_t loc) 1995 { 1996 auto_vec<tree, 10> args; 1997 enum built_in_function sink_ix 1998 = fd->iter_type == long_integer_type_node 1999 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; 2000 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; 2001 int i; 2002 gimple_stmt_iterator gsi2 = *gsi; 2003 bool warned_step = false; 2004 2005 for (i = 0; i < fd->ordered; i++) 2006 { 2007 tree step = NULL_TREE; 2008 off = TREE_PURPOSE (deps); 2009 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2010 { 2011 step = TREE_OPERAND (off, 1); 2012 off = TREE_OPERAND (off, 0); 2013 } 2014 if (!integer_zerop (off)) 2015 { 2016 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2017 || fd->loops[i].cond_code == GT_EXPR); 2018 bool forward = fd->loops[i].cond_code == LT_EXPR; 2019 if (step) 2020 { 2021 /* Non-simple Fortran DO loops. If step is variable, 2022 we don't know at compile even the direction, so can't 2023 warn. */ 2024 if (TREE_CODE (step) != INTEGER_CST) 2025 break; 2026 forward = tree_int_cst_sgn (step) != -1; 2027 } 2028 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2029 warning_at (loc, 0, "%<depend(sink)%> clause waiting for " 2030 "lexically later iteration"); 2031 break; 2032 } 2033 deps = TREE_CHAIN (deps); 2034 } 2035 /* If all offsets corresponding to the collapsed loops are zero, 2036 this depend clause can be ignored. FIXME: but there is still a 2037 flush needed. We need to emit one __sync_synchronize () for it 2038 though (perhaps conditionally)? Solve this together with the 2039 conservative dependence folding optimization. 2040 if (i >= fd->collapse) 2041 return; */ 2042 2043 deps = OMP_CLAUSE_DECL (c); 2044 gsi_prev (&gsi2); 2045 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); 2046 edge e2 = split_block_after_labels (e1->dest); 2047 2048 gsi2 = gsi_after_labels (e1->dest); 2049 *gsi = gsi_last_bb (e1->src); 2050 for (i = 0; i < fd->ordered; i++) 2051 { 2052 tree itype = TREE_TYPE (fd->loops[i].v); 2053 tree step = NULL_TREE; 2054 tree orig_off = NULL_TREE; 2055 if (POINTER_TYPE_P (itype)) 2056 itype = sizetype; 2057 if (i) 2058 deps = TREE_CHAIN (deps); 2059 off = TREE_PURPOSE (deps); 2060 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2061 { 2062 step = TREE_OPERAND (off, 1); 2063 off = TREE_OPERAND (off, 0); 2064 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2065 && integer_onep (fd->loops[i].step) 2066 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); 2067 } 2068 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); 2069 if (step) 2070 { 2071 off = fold_convert_loc (loc, itype, off); 2072 orig_off = off; 2073 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2074 } 2075 2076 if (integer_zerop (off)) 2077 t = boolean_true_node; 2078 else 2079 { 2080 tree a; 2081 tree co = fold_convert_loc (loc, itype, off); 2082 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 2083 { 2084 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2085 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); 2086 a = fold_build2_loc (loc, POINTER_PLUS_EXPR, 2087 TREE_TYPE (fd->loops[i].v), fd->loops[i].v, 2088 co); 2089 } 2090 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2091 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2092 fd->loops[i].v, co); 2093 else 2094 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 2095 fd->loops[i].v, co); 2096 if (step) 2097 { 2098 tree t1, t2; 2099 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2100 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2101 fd->loops[i].n1); 2102 else 2103 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2104 fd->loops[i].n2); 2105 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2106 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2107 fd->loops[i].n2); 2108 else 2109 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2110 fd->loops[i].n1); 2111 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, 2112 step, build_int_cst (TREE_TYPE (step), 0)); 2113 if (TREE_CODE (step) != INTEGER_CST) 2114 { 2115 t1 = unshare_expr (t1); 2116 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, 2117 false, GSI_CONTINUE_LINKING); 2118 t2 = unshare_expr (t2); 2119 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, 2120 false, GSI_CONTINUE_LINKING); 2121 } 2122 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, 2123 t, t2, t1); 2124 } 2125 else if (fd->loops[i].cond_code == LT_EXPR) 2126 { 2127 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2128 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2129 fd->loops[i].n1); 2130 else 2131 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2132 fd->loops[i].n2); 2133 } 2134 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2135 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, 2136 fd->loops[i].n2); 2137 else 2138 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, 2139 fd->loops[i].n1); 2140 } 2141 if (cond) 2142 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); 2143 else 2144 cond = t; 2145 2146 off = fold_convert_loc (loc, itype, off); 2147 2148 if (step 2149 || (fd->loops[i].cond_code == LT_EXPR 2150 ? !integer_onep (fd->loops[i].step) 2151 : !integer_minus_onep (fd->loops[i].step))) 2152 { 2153 if (step == NULL_TREE 2154 && TYPE_UNSIGNED (itype) 2155 && fd->loops[i].cond_code == GT_EXPR) 2156 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, 2157 fold_build1_loc (loc, NEGATE_EXPR, itype, 2158 s)); 2159 else 2160 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, 2161 orig_off ? orig_off : off, s); 2162 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, 2163 build_int_cst (itype, 0)); 2164 if (integer_zerop (t) && !warned_step) 2165 { 2166 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " 2167 "in the iteration space"); 2168 warned_step = true; 2169 } 2170 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, 2171 cond, t); 2172 } 2173 2174 if (i <= fd->collapse - 1 && fd->collapse > 1) 2175 t = fd->loop.v; 2176 else if (counts[i]) 2177 t = counts[i]; 2178 else 2179 { 2180 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2181 fd->loops[i].v, fd->loops[i].n1); 2182 t = fold_convert_loc (loc, fd->iter_type, t); 2183 } 2184 if (step) 2185 /* We have divided off by step already earlier. */; 2186 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 2187 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, 2188 fold_build1_loc (loc, NEGATE_EXPR, itype, 2189 s)); 2190 else 2191 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2192 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2193 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); 2194 off = fold_convert_loc (loc, fd->iter_type, off); 2195 if (i <= fd->collapse - 1 && fd->collapse > 1) 2196 { 2197 if (i) 2198 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, 2199 off); 2200 if (i < fd->collapse - 1) 2201 { 2202 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, 2203 counts[i]); 2204 continue; 2205 } 2206 } 2207 off = unshare_expr (off); 2208 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); 2209 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2210 true, GSI_SAME_STMT); 2211 args.safe_push (t); 2212 } 2213 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); 2214 gimple_set_location (g, loc); 2215 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 2216 2217 cond = unshare_expr (cond); 2218 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, 2219 GSI_CONTINUE_LINKING); 2220 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); 2221 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); 2222 e3->probability = REG_BR_PROB_BASE / 8; 2223 e1->probability = REG_BR_PROB_BASE - e3->probability; 2224 e1->flags = EDGE_TRUE_VALUE; 2225 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); 2226 2227 *gsi = gsi_after_labels (e2->dest); 2228 } 2229 2230 /* Expand all #pragma omp ordered depend(source) and 2231 #pragma omp ordered depend(sink:...) constructs in the current 2232 #pragma omp for ordered(n) region. */ 2233 2234 static void 2235 expand_omp_ordered_source_sink (struct omp_region *region, 2236 struct omp_for_data *fd, tree *counts, 2237 basic_block cont_bb) 2238 { 2239 struct omp_region *inner; 2240 int i; 2241 for (i = fd->collapse - 1; i < fd->ordered; i++) 2242 if (i == fd->collapse - 1 && fd->collapse > 1) 2243 counts[i] = NULL_TREE; 2244 else if (i >= fd->collapse && !cont_bb) 2245 counts[i] = build_zero_cst (fd->iter_type); 2246 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 2247 && integer_onep (fd->loops[i].step)) 2248 counts[i] = NULL_TREE; 2249 else 2250 counts[i] = create_tmp_var (fd->iter_type, ".orditer"); 2251 tree atype 2252 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); 2253 counts[fd->ordered] = create_tmp_var (atype, ".orditera"); 2254 TREE_ADDRESSABLE (counts[fd->ordered]) = 1; 2255 2256 for (inner = region->inner; inner; inner = inner->next) 2257 if (inner->type == GIMPLE_OMP_ORDERED) 2258 { 2259 gomp_ordered *ord_stmt = inner->ord_stmt; 2260 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); 2261 location_t loc = gimple_location (ord_stmt); 2262 tree c; 2263 for (c = gimple_omp_ordered_clauses (ord_stmt); 2264 c; c = OMP_CLAUSE_CHAIN (c)) 2265 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) 2266 break; 2267 if (c) 2268 expand_omp_ordered_source (&gsi, fd, counts, loc); 2269 for (c = gimple_omp_ordered_clauses (ord_stmt); 2270 c; c = OMP_CLAUSE_CHAIN (c)) 2271 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) 2272 expand_omp_ordered_sink (&gsi, fd, counts, c, loc); 2273 gsi_remove (&gsi, true); 2274 } 2275 } 2276 2277 /* Wrap the body into fd->ordered - fd->collapse loops that aren't 2278 collapsed. */ 2279 2280 static basic_block 2281 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, 2282 basic_block cont_bb, basic_block body_bb, 2283 bool ordered_lastprivate) 2284 { 2285 if (fd->ordered == fd->collapse) 2286 return cont_bb; 2287 2288 if (!cont_bb) 2289 { 2290 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2291 for (int i = fd->collapse; i < fd->ordered; i++) 2292 { 2293 tree type = TREE_TYPE (fd->loops[i].v); 2294 tree n1 = fold_convert (type, fd->loops[i].n1); 2295 expand_omp_build_assign (&gsi, fd->loops[i].v, n1); 2296 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2297 size_int (i - fd->collapse + 1), 2298 NULL_TREE, NULL_TREE); 2299 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2300 } 2301 return NULL; 2302 } 2303 2304 for (int i = fd->ordered - 1; i >= fd->collapse; i--) 2305 { 2306 tree t, type = TREE_TYPE (fd->loops[i].v); 2307 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2308 expand_omp_build_assign (&gsi, fd->loops[i].v, 2309 fold_convert (type, fd->loops[i].n1)); 2310 if (counts[i]) 2311 expand_omp_build_assign (&gsi, counts[i], 2312 build_zero_cst (fd->iter_type)); 2313 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2314 size_int (i - fd->collapse + 1), 2315 NULL_TREE, NULL_TREE); 2316 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2317 if (!gsi_end_p (gsi)) 2318 gsi_prev (&gsi); 2319 else 2320 gsi = gsi_last_bb (body_bb); 2321 edge e1 = split_block (body_bb, gsi_stmt (gsi)); 2322 basic_block new_body = e1->dest; 2323 if (body_bb == cont_bb) 2324 cont_bb = new_body; 2325 edge e2 = NULL; 2326 basic_block new_header; 2327 if (EDGE_COUNT (cont_bb->preds) > 0) 2328 { 2329 gsi = gsi_last_bb (cont_bb); 2330 if (POINTER_TYPE_P (type)) 2331 t = fold_build_pointer_plus (fd->loops[i].v, 2332 fold_convert (sizetype, 2333 fd->loops[i].step)); 2334 else 2335 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, 2336 fold_convert (type, fd->loops[i].step)); 2337 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 2338 if (counts[i]) 2339 { 2340 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], 2341 build_int_cst (fd->iter_type, 1)); 2342 expand_omp_build_assign (&gsi, counts[i], t); 2343 t = counts[i]; 2344 } 2345 else 2346 { 2347 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2348 fd->loops[i].v, fd->loops[i].n1); 2349 t = fold_convert (fd->iter_type, t); 2350 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2351 true, GSI_SAME_STMT); 2352 } 2353 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2354 size_int (i - fd->collapse + 1), 2355 NULL_TREE, NULL_TREE); 2356 expand_omp_build_assign (&gsi, aref, t); 2357 gsi_prev (&gsi); 2358 e2 = split_block (cont_bb, gsi_stmt (gsi)); 2359 new_header = e2->dest; 2360 } 2361 else 2362 new_header = cont_bb; 2363 gsi = gsi_after_labels (new_header); 2364 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, 2365 true, GSI_SAME_STMT); 2366 tree n2 2367 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), 2368 true, NULL_TREE, true, GSI_SAME_STMT); 2369 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); 2370 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); 2371 edge e3 = split_block (new_header, gsi_stmt (gsi)); 2372 cont_bb = e3->dest; 2373 remove_edge (e1); 2374 make_edge (body_bb, new_header, EDGE_FALLTHRU); 2375 e3->flags = EDGE_FALSE_VALUE; 2376 e3->probability = REG_BR_PROB_BASE / 8; 2377 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); 2378 e1->probability = REG_BR_PROB_BASE - e3->probability; 2379 2380 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); 2381 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); 2382 2383 if (e2) 2384 { 2385 struct loop *loop = alloc_loop (); 2386 loop->header = new_header; 2387 loop->latch = e2->src; 2388 add_loop (loop, body_bb->loop_father); 2389 } 2390 } 2391 2392 /* If there are any lastprivate clauses and it is possible some loops 2393 might have zero iterations, ensure all the decls are initialized, 2394 otherwise we could crash evaluating C++ class iterators with lastprivate 2395 clauses. */ 2396 bool need_inits = false; 2397 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) 2398 if (need_inits) 2399 { 2400 tree type = TREE_TYPE (fd->loops[i].v); 2401 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2402 expand_omp_build_assign (&gsi, fd->loops[i].v, 2403 fold_convert (type, fd->loops[i].n1)); 2404 } 2405 else 2406 { 2407 tree type = TREE_TYPE (fd->loops[i].v); 2408 tree this_cond = fold_build2 (fd->loops[i].cond_code, 2409 boolean_type_node, 2410 fold_convert (type, fd->loops[i].n1), 2411 fold_convert (type, fd->loops[i].n2)); 2412 if (!integer_onep (this_cond)) 2413 need_inits = true; 2414 } 2415 2416 return cont_bb; 2417 } 2418 2419 /* A subroutine of expand_omp_for. Generate code for a parallel 2420 loop with any schedule. Given parameters: 2421 2422 for (V = N1; V cond N2; V += STEP) BODY; 2423 2424 where COND is "<" or ">", we generate pseudocode 2425 2426 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); 2427 if (more) goto L0; else goto L3; 2428 L0: 2429 V = istart0; 2430 iend = iend0; 2431 L1: 2432 BODY; 2433 V += STEP; 2434 if (V cond iend) goto L1; else goto L2; 2435 L2: 2436 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2437 L3: 2438 2439 If this is a combined omp parallel loop, instead of the call to 2440 GOMP_loop_foo_start, we call GOMP_loop_foo_next. 2441 If this is gimple_omp_for_combined_p loop, then instead of assigning 2442 V and iend in L0 we assign the first two _looptemp_ clause decls of the 2443 inner GIMPLE_OMP_FOR and V += STEP; and 2444 if (V cond iend) goto L1; else goto L2; are removed. 2445 2446 For collapsed loops, given parameters: 2447 collapse(3) 2448 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 2449 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 2450 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 2451 BODY; 2452 2453 we generate pseudocode 2454 2455 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; 2456 if (cond3 is <) 2457 adj = STEP3 - 1; 2458 else 2459 adj = STEP3 + 1; 2460 count3 = (adj + N32 - N31) / STEP3; 2461 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; 2462 if (cond2 is <) 2463 adj = STEP2 - 1; 2464 else 2465 adj = STEP2 + 1; 2466 count2 = (adj + N22 - N21) / STEP2; 2467 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; 2468 if (cond1 is <) 2469 adj = STEP1 - 1; 2470 else 2471 adj = STEP1 + 1; 2472 count1 = (adj + N12 - N11) / STEP1; 2473 count = count1 * count2 * count3; 2474 goto Z1; 2475 Z0: 2476 count = 0; 2477 Z1: 2478 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); 2479 if (more) goto L0; else goto L3; 2480 L0: 2481 V = istart0; 2482 T = V; 2483 V3 = N31 + (T % count3) * STEP3; 2484 T = T / count3; 2485 V2 = N21 + (T % count2) * STEP2; 2486 T = T / count2; 2487 V1 = N11 + T * STEP1; 2488 iend = iend0; 2489 L1: 2490 BODY; 2491 V += 1; 2492 if (V < iend) goto L10; else goto L2; 2493 L10: 2494 V3 += STEP3; 2495 if (V3 cond3 N32) goto L1; else goto L11; 2496 L11: 2497 V3 = N31; 2498 V2 += STEP2; 2499 if (V2 cond2 N22) goto L1; else goto L12; 2500 L12: 2501 V2 = N21; 2502 V1 += STEP1; 2503 goto L1; 2504 L2: 2505 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2506 L3: 2507 2508 */ 2509 2510 static void 2511 expand_omp_for_generic (struct omp_region *region, 2512 struct omp_for_data *fd, 2513 enum built_in_function start_fn, 2514 enum built_in_function next_fn, 2515 gimple *inner_stmt) 2516 { 2517 tree type, istart0, iend0, iend; 2518 tree t, vmain, vback, bias = NULL_TREE; 2519 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; 2520 basic_block l2_bb = NULL, l3_bb = NULL; 2521 gimple_stmt_iterator gsi; 2522 gassign *assign_stmt; 2523 bool in_combined_parallel = is_combined_parallel (region); 2524 bool broken_loop = region->cont == NULL; 2525 edge e, ne; 2526 tree *counts = NULL; 2527 int i; 2528 bool ordered_lastprivate = false; 2529 2530 gcc_assert (!broken_loop || !in_combined_parallel); 2531 gcc_assert (fd->iter_type == long_integer_type_node 2532 || !in_combined_parallel); 2533 2534 entry_bb = region->entry; 2535 cont_bb = region->cont; 2536 collapse_bb = NULL; 2537 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 2538 gcc_assert (broken_loop 2539 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 2540 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 2541 l1_bb = single_succ (l0_bb); 2542 if (!broken_loop) 2543 { 2544 l2_bb = create_empty_bb (cont_bb); 2545 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb 2546 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest 2547 == l1_bb)); 2548 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 2549 } 2550 else 2551 l2_bb = NULL; 2552 l3_bb = BRANCH_EDGE (entry_bb)->dest; 2553 exit_bb = region->exit; 2554 2555 gsi = gsi_last_bb (entry_bb); 2556 2557 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 2558 if (fd->ordered 2559 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), 2560 OMP_CLAUSE_LASTPRIVATE)) 2561 ordered_lastprivate = false; 2562 if (fd->collapse > 1 || fd->ordered) 2563 { 2564 int first_zero_iter1 = -1, first_zero_iter2 = -1; 2565 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; 2566 2567 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); 2568 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 2569 zero_iter1_bb, first_zero_iter1, 2570 zero_iter2_bb, first_zero_iter2, l2_dom_bb); 2571 2572 if (zero_iter1_bb) 2573 { 2574 /* Some counts[i] vars might be uninitialized if 2575 some loop has zero iterations. But the body shouldn't 2576 be executed in that case, so just avoid uninit warnings. */ 2577 for (i = first_zero_iter1; 2578 i < (fd->ordered ? fd->ordered : fd->collapse); i++) 2579 if (SSA_VAR_P (counts[i])) 2580 TREE_NO_WARNING (counts[i]) = 1; 2581 gsi_prev (&gsi); 2582 e = split_block (entry_bb, gsi_stmt (gsi)); 2583 entry_bb = e->dest; 2584 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); 2585 gsi = gsi_last_bb (entry_bb); 2586 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2587 get_immediate_dominator (CDI_DOMINATORS, 2588 zero_iter1_bb)); 2589 } 2590 if (zero_iter2_bb) 2591 { 2592 /* Some counts[i] vars might be uninitialized if 2593 some loop has zero iterations. But the body shouldn't 2594 be executed in that case, so just avoid uninit warnings. */ 2595 for (i = first_zero_iter2; i < fd->ordered; i++) 2596 if (SSA_VAR_P (counts[i])) 2597 TREE_NO_WARNING (counts[i]) = 1; 2598 if (zero_iter1_bb) 2599 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2600 else 2601 { 2602 gsi_prev (&gsi); 2603 e = split_block (entry_bb, gsi_stmt (gsi)); 2604 entry_bb = e->dest; 2605 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2606 gsi = gsi_last_bb (entry_bb); 2607 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2608 get_immediate_dominator 2609 (CDI_DOMINATORS, zero_iter2_bb)); 2610 } 2611 } 2612 if (fd->collapse == 1) 2613 { 2614 counts[0] = fd->loop.n2; 2615 fd->loop = fd->loops[0]; 2616 } 2617 } 2618 2619 type = TREE_TYPE (fd->loop.v); 2620 istart0 = create_tmp_var (fd->iter_type, ".istart0"); 2621 iend0 = create_tmp_var (fd->iter_type, ".iend0"); 2622 TREE_ADDRESSABLE (istart0) = 1; 2623 TREE_ADDRESSABLE (iend0) = 1; 2624 2625 /* See if we need to bias by LLONG_MIN. */ 2626 if (fd->iter_type == long_long_unsigned_type_node 2627 && TREE_CODE (type) == INTEGER_TYPE 2628 && !TYPE_UNSIGNED (type) 2629 && fd->ordered == 0) 2630 { 2631 tree n1, n2; 2632 2633 if (fd->loop.cond_code == LT_EXPR) 2634 { 2635 n1 = fd->loop.n1; 2636 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 2637 } 2638 else 2639 { 2640 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 2641 n2 = fd->loop.n1; 2642 } 2643 if (TREE_CODE (n1) != INTEGER_CST 2644 || TREE_CODE (n2) != INTEGER_CST 2645 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 2646 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 2647 } 2648 2649 gimple_stmt_iterator gsif = gsi; 2650 gsi_prev (&gsif); 2651 2652 tree arr = NULL_TREE; 2653 if (in_combined_parallel) 2654 { 2655 gcc_assert (fd->ordered == 0); 2656 /* In a combined parallel loop, emit a call to 2657 GOMP_loop_foo_next. */ 2658 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 2659 build_fold_addr_expr (istart0), 2660 build_fold_addr_expr (iend0)); 2661 } 2662 else 2663 { 2664 tree t0, t1, t2, t3, t4; 2665 /* If this is not a combined parallel loop, emit a call to 2666 GOMP_loop_foo_start in ENTRY_BB. */ 2667 t4 = build_fold_addr_expr (iend0); 2668 t3 = build_fold_addr_expr (istart0); 2669 if (fd->ordered) 2670 { 2671 t0 = build_int_cst (unsigned_type_node, 2672 fd->ordered - fd->collapse + 1); 2673 arr = create_tmp_var (build_array_type_nelts (fd->iter_type, 2674 fd->ordered 2675 - fd->collapse + 1), 2676 ".omp_counts"); 2677 DECL_NAMELESS (arr) = 1; 2678 TREE_ADDRESSABLE (arr) = 1; 2679 TREE_STATIC (arr) = 1; 2680 vec<constructor_elt, va_gc> *v; 2681 vec_alloc (v, fd->ordered - fd->collapse + 1); 2682 int idx; 2683 2684 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) 2685 { 2686 tree c; 2687 if (idx == 0 && fd->collapse > 1) 2688 c = fd->loop.n2; 2689 else 2690 c = counts[idx + fd->collapse - 1]; 2691 tree purpose = size_int (idx); 2692 CONSTRUCTOR_APPEND_ELT (v, purpose, c); 2693 if (TREE_CODE (c) != INTEGER_CST) 2694 TREE_STATIC (arr) = 0; 2695 } 2696 2697 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); 2698 if (!TREE_STATIC (arr)) 2699 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, 2700 void_type_node, arr), 2701 true, NULL_TREE, true, GSI_SAME_STMT); 2702 t1 = build_fold_addr_expr (arr); 2703 t2 = NULL_TREE; 2704 } 2705 else 2706 { 2707 t2 = fold_convert (fd->iter_type, fd->loop.step); 2708 t1 = fd->loop.n2; 2709 t0 = fd->loop.n1; 2710 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 2711 { 2712 tree innerc 2713 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2714 OMP_CLAUSE__LOOPTEMP_); 2715 gcc_assert (innerc); 2716 t0 = OMP_CLAUSE_DECL (innerc); 2717 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2718 OMP_CLAUSE__LOOPTEMP_); 2719 gcc_assert (innerc); 2720 t1 = OMP_CLAUSE_DECL (innerc); 2721 } 2722 if (POINTER_TYPE_P (TREE_TYPE (t0)) 2723 && TYPE_PRECISION (TREE_TYPE (t0)) 2724 != TYPE_PRECISION (fd->iter_type)) 2725 { 2726 /* Avoid casting pointers to integer of a different size. */ 2727 tree itype = signed_type_for (type); 2728 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 2729 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 2730 } 2731 else 2732 { 2733 t1 = fold_convert (fd->iter_type, t1); 2734 t0 = fold_convert (fd->iter_type, t0); 2735 } 2736 if (bias) 2737 { 2738 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 2739 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 2740 } 2741 } 2742 if (fd->iter_type == long_integer_type_node || fd->ordered) 2743 { 2744 if (fd->chunk_size) 2745 { 2746 t = fold_convert (fd->iter_type, fd->chunk_size); 2747 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2748 if (fd->ordered) 2749 t = build_call_expr (builtin_decl_explicit (start_fn), 2750 5, t0, t1, t, t3, t4); 2751 else 2752 t = build_call_expr (builtin_decl_explicit (start_fn), 2753 6, t0, t1, t2, t, t3, t4); 2754 } 2755 else if (fd->ordered) 2756 t = build_call_expr (builtin_decl_explicit (start_fn), 2757 4, t0, t1, t3, t4); 2758 else 2759 t = build_call_expr (builtin_decl_explicit (start_fn), 2760 5, t0, t1, t2, t3, t4); 2761 } 2762 else 2763 { 2764 tree t5; 2765 tree c_bool_type; 2766 tree bfn_decl; 2767 2768 /* The GOMP_loop_ull_*start functions have additional boolean 2769 argument, true for < loops and false for > loops. 2770 In Fortran, the C bool type can be different from 2771 boolean_type_node. */ 2772 bfn_decl = builtin_decl_explicit (start_fn); 2773 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); 2774 t5 = build_int_cst (c_bool_type, 2775 fd->loop.cond_code == LT_EXPR ? 1 : 0); 2776 if (fd->chunk_size) 2777 { 2778 tree bfn_decl = builtin_decl_explicit (start_fn); 2779 t = fold_convert (fd->iter_type, fd->chunk_size); 2780 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2781 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); 2782 } 2783 else 2784 t = build_call_expr (builtin_decl_explicit (start_fn), 2785 6, t5, t0, t1, t2, t3, t4); 2786 } 2787 } 2788 if (TREE_TYPE (t) != boolean_type_node) 2789 t = fold_build2 (NE_EXPR, boolean_type_node, 2790 t, build_int_cst (TREE_TYPE (t), 0)); 2791 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2792 true, GSI_SAME_STMT); 2793 if (arr && !TREE_STATIC (arr)) 2794 { 2795 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 2796 TREE_THIS_VOLATILE (clobber) = 1; 2797 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), 2798 GSI_SAME_STMT); 2799 } 2800 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 2801 2802 /* Remove the GIMPLE_OMP_FOR statement. */ 2803 gsi_remove (&gsi, true); 2804 2805 if (gsi_end_p (gsif)) 2806 gsif = gsi_after_labels (gsi_bb (gsif)); 2807 gsi_next (&gsif); 2808 2809 /* Iteration setup for sequential loop goes in L0_BB. */ 2810 tree startvar = fd->loop.v; 2811 tree endvar = NULL_TREE; 2812 2813 if (gimple_omp_for_combined_p (fd->for_stmt)) 2814 { 2815 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR 2816 && gimple_omp_for_kind (inner_stmt) 2817 == GF_OMP_FOR_KIND_SIMD); 2818 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), 2819 OMP_CLAUSE__LOOPTEMP_); 2820 gcc_assert (innerc); 2821 startvar = OMP_CLAUSE_DECL (innerc); 2822 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2823 OMP_CLAUSE__LOOPTEMP_); 2824 gcc_assert (innerc); 2825 endvar = OMP_CLAUSE_DECL (innerc); 2826 } 2827 2828 gsi = gsi_start_bb (l0_bb); 2829 t = istart0; 2830 if (fd->ordered && fd->collapse == 1) 2831 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 2832 fold_convert (fd->iter_type, fd->loop.step)); 2833 else if (bias) 2834 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 2835 if (fd->ordered && fd->collapse == 1) 2836 { 2837 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2838 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 2839 fd->loop.n1, fold_convert (sizetype, t)); 2840 else 2841 { 2842 t = fold_convert (TREE_TYPE (startvar), t); 2843 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 2844 fd->loop.n1, t); 2845 } 2846 } 2847 else 2848 { 2849 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2850 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 2851 t = fold_convert (TREE_TYPE (startvar), t); 2852 } 2853 t = force_gimple_operand_gsi (&gsi, t, 2854 DECL_P (startvar) 2855 && TREE_ADDRESSABLE (startvar), 2856 NULL_TREE, false, GSI_CONTINUE_LINKING); 2857 assign_stmt = gimple_build_assign (startvar, t); 2858 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2859 2860 t = iend0; 2861 if (fd->ordered && fd->collapse == 1) 2862 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 2863 fold_convert (fd->iter_type, fd->loop.step)); 2864 else if (bias) 2865 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 2866 if (fd->ordered && fd->collapse == 1) 2867 { 2868 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2869 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 2870 fd->loop.n1, fold_convert (sizetype, t)); 2871 else 2872 { 2873 t = fold_convert (TREE_TYPE (startvar), t); 2874 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 2875 fd->loop.n1, t); 2876 } 2877 } 2878 else 2879 { 2880 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2881 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 2882 t = fold_convert (TREE_TYPE (startvar), t); 2883 } 2884 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2885 false, GSI_CONTINUE_LINKING); 2886 if (endvar) 2887 { 2888 assign_stmt = gimple_build_assign (endvar, iend); 2889 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2890 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) 2891 assign_stmt = gimple_build_assign (fd->loop.v, iend); 2892 else 2893 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); 2894 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2895 } 2896 /* Handle linear clause adjustments. */ 2897 tree itercnt = NULL_TREE; 2898 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 2899 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 2900 c; c = OMP_CLAUSE_CHAIN (c)) 2901 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 2902 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 2903 { 2904 tree d = OMP_CLAUSE_DECL (c); 2905 bool is_ref = omp_is_reference (d); 2906 tree t = d, a, dest; 2907 if (is_ref) 2908 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 2909 tree type = TREE_TYPE (t); 2910 if (POINTER_TYPE_P (type)) 2911 type = sizetype; 2912 dest = unshare_expr (t); 2913 tree v = create_tmp_var (TREE_TYPE (t), NULL); 2914 expand_omp_build_assign (&gsif, v, t); 2915 if (itercnt == NULL_TREE) 2916 { 2917 itercnt = startvar; 2918 tree n1 = fd->loop.n1; 2919 if (POINTER_TYPE_P (TREE_TYPE (itercnt))) 2920 { 2921 itercnt 2922 = fold_convert (signed_type_for (TREE_TYPE (itercnt)), 2923 itercnt); 2924 n1 = fold_convert (TREE_TYPE (itercnt), n1); 2925 } 2926 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), 2927 itercnt, n1); 2928 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), 2929 itercnt, fd->loop.step); 2930 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 2931 NULL_TREE, false, 2932 GSI_CONTINUE_LINKING); 2933 } 2934 a = fold_build2 (MULT_EXPR, type, 2935 fold_convert (type, itercnt), 2936 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 2937 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 2938 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 2939 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2940 false, GSI_CONTINUE_LINKING); 2941 assign_stmt = gimple_build_assign (dest, t); 2942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2943 } 2944 if (fd->collapse > 1) 2945 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 2946 2947 if (fd->ordered) 2948 { 2949 /* Until now, counts array contained number of iterations or 2950 variable containing it for ith loop. From now on, we need 2951 those counts only for collapsed loops, and only for the 2nd 2952 till the last collapsed one. Move those one element earlier, 2953 we'll use counts[fd->collapse - 1] for the first source/sink 2954 iteration counter and so on and counts[fd->ordered] 2955 as the array holding the current counter values for 2956 depend(source). */ 2957 if (fd->collapse > 1) 2958 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); 2959 if (broken_loop) 2960 { 2961 int i; 2962 for (i = fd->collapse; i < fd->ordered; i++) 2963 { 2964 tree type = TREE_TYPE (fd->loops[i].v); 2965 tree this_cond 2966 = fold_build2 (fd->loops[i].cond_code, boolean_type_node, 2967 fold_convert (type, fd->loops[i].n1), 2968 fold_convert (type, fd->loops[i].n2)); 2969 if (!integer_onep (this_cond)) 2970 break; 2971 } 2972 if (i < fd->ordered) 2973 { 2974 cont_bb 2975 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 2976 add_bb_to_loop (cont_bb, l1_bb->loop_father); 2977 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); 2978 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); 2979 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 2980 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); 2981 make_edge (cont_bb, l1_bb, 0); 2982 l2_bb = create_empty_bb (cont_bb); 2983 broken_loop = false; 2984 } 2985 } 2986 expand_omp_ordered_source_sink (region, fd, counts, cont_bb); 2987 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, 2988 ordered_lastprivate); 2989 if (counts[fd->collapse - 1]) 2990 { 2991 gcc_assert (fd->collapse == 1); 2992 gsi = gsi_last_bb (l0_bb); 2993 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], 2994 istart0, true); 2995 gsi = gsi_last_bb (cont_bb); 2996 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], 2997 build_int_cst (fd->iter_type, 1)); 2998 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); 2999 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3000 size_zero_node, NULL_TREE, NULL_TREE); 3001 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); 3002 t = counts[fd->collapse - 1]; 3003 } 3004 else if (fd->collapse > 1) 3005 t = fd->loop.v; 3006 else 3007 { 3008 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3009 fd->loops[0].v, fd->loops[0].n1); 3010 t = fold_convert (fd->iter_type, t); 3011 } 3012 gsi = gsi_last_bb (l0_bb); 3013 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3014 size_zero_node, NULL_TREE, NULL_TREE); 3015 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3016 false, GSI_CONTINUE_LINKING); 3017 expand_omp_build_assign (&gsi, aref, t, true); 3018 } 3019 3020 if (!broken_loop) 3021 { 3022 /* Code to control the increment and predicate for the sequential 3023 loop goes in the CONT_BB. */ 3024 gsi = gsi_last_bb (cont_bb); 3025 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3026 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3027 vmain = gimple_omp_continue_control_use (cont_stmt); 3028 vback = gimple_omp_continue_control_def (cont_stmt); 3029 3030 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3031 { 3032 if (POINTER_TYPE_P (type)) 3033 t = fold_build_pointer_plus (vmain, fd->loop.step); 3034 else 3035 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); 3036 t = force_gimple_operand_gsi (&gsi, t, 3037 DECL_P (vback) 3038 && TREE_ADDRESSABLE (vback), 3039 NULL_TREE, true, GSI_SAME_STMT); 3040 assign_stmt = gimple_build_assign (vback, t); 3041 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3042 3043 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) 3044 { 3045 if (fd->collapse > 1) 3046 t = fd->loop.v; 3047 else 3048 { 3049 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3050 fd->loops[0].v, fd->loops[0].n1); 3051 t = fold_convert (fd->iter_type, t); 3052 } 3053 tree aref = build4 (ARRAY_REF, fd->iter_type, 3054 counts[fd->ordered], size_zero_node, 3055 NULL_TREE, NULL_TREE); 3056 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3057 true, GSI_SAME_STMT); 3058 expand_omp_build_assign (&gsi, aref, t); 3059 } 3060 3061 t = build2 (fd->loop.cond_code, boolean_type_node, 3062 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, 3063 iend); 3064 gcond *cond_stmt = gimple_build_cond_empty (t); 3065 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3066 } 3067 3068 /* Remove GIMPLE_OMP_CONTINUE. */ 3069 gsi_remove (&gsi, true); 3070 3071 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3072 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); 3073 3074 /* Emit code to get the next parallel iteration in L2_BB. */ 3075 gsi = gsi_start_bb (l2_bb); 3076 3077 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 3078 build_fold_addr_expr (istart0), 3079 build_fold_addr_expr (iend0)); 3080 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3081 false, GSI_CONTINUE_LINKING); 3082 if (TREE_TYPE (t) != boolean_type_node) 3083 t = fold_build2 (NE_EXPR, boolean_type_node, 3084 t, build_int_cst (TREE_TYPE (t), 0)); 3085 gcond *cond_stmt = gimple_build_cond_empty (t); 3086 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 3087 } 3088 3089 /* Add the loop cleanup function. */ 3090 gsi = gsi_last_bb (exit_bb); 3091 if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3092 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 3093 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3094 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 3095 else 3096 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 3097 gcall *call_stmt = gimple_build_call (t, 0); 3098 if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3099 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); 3100 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); 3101 if (fd->ordered) 3102 { 3103 tree arr = counts[fd->ordered]; 3104 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 3105 TREE_THIS_VOLATILE (clobber) = 1; 3106 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), 3107 GSI_SAME_STMT); 3108 } 3109 gsi_remove (&gsi, true); 3110 3111 /* Connect the new blocks. */ 3112 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; 3113 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; 3114 3115 if (!broken_loop) 3116 { 3117 gimple_seq phis; 3118 3119 e = find_edge (cont_bb, l3_bb); 3120 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); 3121 3122 phis = phi_nodes (l3_bb); 3123 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 3124 { 3125 gimple *phi = gsi_stmt (gsi); 3126 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), 3127 PHI_ARG_DEF_FROM_EDGE (phi, e)); 3128 } 3129 remove_edge (e); 3130 3131 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); 3132 e = find_edge (cont_bb, l1_bb); 3133 if (e == NULL) 3134 { 3135 e = BRANCH_EDGE (cont_bb); 3136 gcc_assert (single_succ (e->dest) == l1_bb); 3137 } 3138 if (gimple_omp_for_combined_p (fd->for_stmt)) 3139 { 3140 remove_edge (e); 3141 e = NULL; 3142 } 3143 else if (fd->collapse > 1) 3144 { 3145 remove_edge (e); 3146 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3147 } 3148 else 3149 e->flags = EDGE_TRUE_VALUE; 3150 if (e) 3151 { 3152 e->probability = REG_BR_PROB_BASE * 7 / 8; 3153 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8; 3154 } 3155 else 3156 { 3157 e = find_edge (cont_bb, l2_bb); 3158 e->flags = EDGE_FALLTHRU; 3159 } 3160 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); 3161 3162 if (gimple_in_ssa_p (cfun)) 3163 { 3164 /* Add phis to the outer loop that connect to the phis in the inner, 3165 original loop, and move the loop entry value of the inner phi to 3166 the loop entry value of the outer phi. */ 3167 gphi_iterator psi; 3168 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) 3169 { 3170 source_location locus; 3171 gphi *nphi; 3172 gphi *exit_phi = psi.phi (); 3173 3174 edge l2_to_l3 = find_edge (l2_bb, l3_bb); 3175 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); 3176 3177 basic_block latch = BRANCH_EDGE (cont_bb)->dest; 3178 edge latch_to_l1 = find_edge (latch, l1_bb); 3179 gphi *inner_phi 3180 = find_phi_with_arg_on_edge (exit_res, latch_to_l1); 3181 3182 tree t = gimple_phi_result (exit_phi); 3183 tree new_res = copy_ssa_name (t, NULL); 3184 nphi = create_phi_node (new_res, l0_bb); 3185 3186 edge l0_to_l1 = find_edge (l0_bb, l1_bb); 3187 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); 3188 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); 3189 edge entry_to_l0 = find_edge (entry_bb, l0_bb); 3190 add_phi_arg (nphi, t, entry_to_l0, locus); 3191 3192 edge l2_to_l0 = find_edge (l2_bb, l0_bb); 3193 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); 3194 3195 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); 3196 }; 3197 } 3198 3199 set_immediate_dominator (CDI_DOMINATORS, l2_bb, 3200 recompute_dominator (CDI_DOMINATORS, l2_bb)); 3201 set_immediate_dominator (CDI_DOMINATORS, l3_bb, 3202 recompute_dominator (CDI_DOMINATORS, l3_bb)); 3203 set_immediate_dominator (CDI_DOMINATORS, l0_bb, 3204 recompute_dominator (CDI_DOMINATORS, l0_bb)); 3205 set_immediate_dominator (CDI_DOMINATORS, l1_bb, 3206 recompute_dominator (CDI_DOMINATORS, l1_bb)); 3207 3208 /* We enter expand_omp_for_generic with a loop. This original loop may 3209 have its own loop struct, or it may be part of an outer loop struct 3210 (which may be the fake loop). */ 3211 struct loop *outer_loop = entry_bb->loop_father; 3212 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; 3213 3214 add_bb_to_loop (l2_bb, outer_loop); 3215 3216 /* We've added a new loop around the original loop. Allocate the 3217 corresponding loop struct. */ 3218 struct loop *new_loop = alloc_loop (); 3219 new_loop->header = l0_bb; 3220 new_loop->latch = l2_bb; 3221 add_loop (new_loop, outer_loop); 3222 3223 /* Allocate a loop structure for the original loop unless we already 3224 had one. */ 3225 if (!orig_loop_has_loop_struct 3226 && !gimple_omp_for_combined_p (fd->for_stmt)) 3227 { 3228 struct loop *orig_loop = alloc_loop (); 3229 orig_loop->header = l1_bb; 3230 /* The loop may have multiple latches. */ 3231 add_loop (orig_loop, new_loop); 3232 } 3233 } 3234 } 3235 3236 /* A subroutine of expand_omp_for. Generate code for a parallel 3237 loop with static schedule and no specified chunk size. Given 3238 parameters: 3239 3240 for (V = N1; V cond N2; V += STEP) BODY; 3241 3242 where COND is "<" or ">", we generate pseudocode 3243 3244 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3245 if (cond is <) 3246 adj = STEP - 1; 3247 else 3248 adj = STEP + 1; 3249 if ((__typeof (V)) -1 > 0 && cond is >) 3250 n = -(adj + N2 - N1) / -STEP; 3251 else 3252 n = (adj + N2 - N1) / STEP; 3253 q = n / nthreads; 3254 tt = n % nthreads; 3255 if (threadid < tt) goto L3; else goto L4; 3256 L3: 3257 tt = 0; 3258 q = q + 1; 3259 L4: 3260 s0 = q * threadid + tt; 3261 e0 = s0 + q; 3262 V = s0 * STEP + N1; 3263 if (s0 >= e0) goto L2; else goto L0; 3264 L0: 3265 e = e0 * STEP + N1; 3266 L1: 3267 BODY; 3268 V += STEP; 3269 if (V cond e) goto L1; 3270 L2: 3271 */ 3272 3273 static void 3274 expand_omp_for_static_nochunk (struct omp_region *region, 3275 struct omp_for_data *fd, 3276 gimple *inner_stmt) 3277 { 3278 tree n, q, s0, e0, e, t, tt, nthreads, threadid; 3279 tree type, itype, vmain, vback; 3280 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; 3281 basic_block body_bb, cont_bb, collapse_bb = NULL; 3282 basic_block fin_bb; 3283 gimple_stmt_iterator gsi; 3284 edge ep; 3285 bool broken_loop = region->cont == NULL; 3286 tree *counts = NULL; 3287 tree n1, n2, step; 3288 3289 itype = type = TREE_TYPE (fd->loop.v); 3290 if (POINTER_TYPE_P (type)) 3291 itype = signed_type_for (type); 3292 3293 entry_bb = region->entry; 3294 cont_bb = region->cont; 3295 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 3296 fin_bb = BRANCH_EDGE (entry_bb)->dest; 3297 gcc_assert (broken_loop 3298 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 3299 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 3300 body_bb = single_succ (seq_start_bb); 3301 if (!broken_loop) 3302 { 3303 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3304 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3305 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3306 } 3307 exit_bb = region->exit; 3308 3309 /* Iteration space partitioning goes in ENTRY_BB. */ 3310 gsi = gsi_last_bb (entry_bb); 3311 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3312 3313 if (fd->collapse > 1) 3314 { 3315 int first_zero_iter = -1, dummy = -1; 3316 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3317 3318 counts = XALLOCAVEC (tree, fd->collapse); 3319 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3320 fin_bb, first_zero_iter, 3321 dummy_bb, dummy, l2_dom_bb); 3322 t = NULL_TREE; 3323 } 3324 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3325 t = integer_one_node; 3326 else 3327 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3328 fold_convert (type, fd->loop.n1), 3329 fold_convert (type, fd->loop.n2)); 3330 if (fd->collapse == 1 3331 && TYPE_UNSIGNED (type) 3332 && (t == NULL_TREE || !integer_onep (t))) 3333 { 3334 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3335 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3336 true, GSI_SAME_STMT); 3337 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3338 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3339 true, GSI_SAME_STMT); 3340 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3341 NULL_TREE, NULL_TREE); 3342 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3343 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3344 expand_omp_regimplify_p, NULL, NULL) 3345 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3346 expand_omp_regimplify_p, NULL, NULL)) 3347 { 3348 gsi = gsi_for_stmt (cond_stmt); 3349 gimple_regimplify_operands (cond_stmt, &gsi); 3350 } 3351 ep = split_block (entry_bb, cond_stmt); 3352 ep->flags = EDGE_TRUE_VALUE; 3353 entry_bb = ep->dest; 3354 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); 3355 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); 3356 ep->probability = REG_BR_PROB_BASE / 2000 - 1; 3357 if (gimple_in_ssa_p (cfun)) 3358 { 3359 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; 3360 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3361 !gsi_end_p (gpi); gsi_next (&gpi)) 3362 { 3363 gphi *phi = gpi.phi (); 3364 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3365 ep, UNKNOWN_LOCATION); 3366 } 3367 } 3368 gsi = gsi_last_bb (entry_bb); 3369 } 3370 3371 switch (gimple_omp_for_kind (fd->for_stmt)) 3372 { 3373 case GF_OMP_FOR_KIND_FOR: 3374 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3375 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3376 break; 3377 case GF_OMP_FOR_KIND_DISTRIBUTE: 3378 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3379 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3380 break; 3381 default: 3382 gcc_unreachable (); 3383 } 3384 nthreads = build_call_expr (nthreads, 0); 3385 nthreads = fold_convert (itype, nthreads); 3386 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3387 true, GSI_SAME_STMT); 3388 threadid = build_call_expr (threadid, 0); 3389 threadid = fold_convert (itype, threadid); 3390 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3391 true, GSI_SAME_STMT); 3392 3393 n1 = fd->loop.n1; 3394 n2 = fd->loop.n2; 3395 step = fd->loop.step; 3396 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3397 { 3398 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3399 OMP_CLAUSE__LOOPTEMP_); 3400 gcc_assert (innerc); 3401 n1 = OMP_CLAUSE_DECL (innerc); 3402 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3403 OMP_CLAUSE__LOOPTEMP_); 3404 gcc_assert (innerc); 3405 n2 = OMP_CLAUSE_DECL (innerc); 3406 } 3407 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3408 true, NULL_TREE, true, GSI_SAME_STMT); 3409 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3410 true, NULL_TREE, true, GSI_SAME_STMT); 3411 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3412 true, NULL_TREE, true, GSI_SAME_STMT); 3413 3414 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3415 t = fold_build2 (PLUS_EXPR, itype, step, t); 3416 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3417 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3418 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3419 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3420 fold_build1 (NEGATE_EXPR, itype, t), 3421 fold_build1 (NEGATE_EXPR, itype, step)); 3422 else 3423 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3424 t = fold_convert (itype, t); 3425 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3426 3427 q = create_tmp_reg (itype, "q"); 3428 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); 3429 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3430 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); 3431 3432 tt = create_tmp_reg (itype, "tt"); 3433 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); 3434 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3435 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); 3436 3437 t = build2 (LT_EXPR, boolean_type_node, threadid, tt); 3438 gcond *cond_stmt = gimple_build_cond_empty (t); 3439 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3440 3441 second_bb = split_block (entry_bb, cond_stmt)->dest; 3442 gsi = gsi_last_bb (second_bb); 3443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3444 3445 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), 3446 GSI_SAME_STMT); 3447 gassign *assign_stmt 3448 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); 3449 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3450 3451 third_bb = split_block (second_bb, assign_stmt)->dest; 3452 gsi = gsi_last_bb (third_bb); 3453 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3454 3455 t = build2 (MULT_EXPR, itype, q, threadid); 3456 t = build2 (PLUS_EXPR, itype, t, tt); 3457 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3458 3459 t = fold_build2 (PLUS_EXPR, itype, s0, q); 3460 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3461 3462 t = build2 (GE_EXPR, boolean_type_node, s0, e0); 3463 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3464 3465 /* Remove the GIMPLE_OMP_FOR statement. */ 3466 gsi_remove (&gsi, true); 3467 3468 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 3469 gsi = gsi_start_bb (seq_start_bb); 3470 3471 tree startvar = fd->loop.v; 3472 tree endvar = NULL_TREE; 3473 3474 if (gimple_omp_for_combined_p (fd->for_stmt)) 3475 { 3476 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 3477 ? gimple_omp_parallel_clauses (inner_stmt) 3478 : gimple_omp_for_clauses (inner_stmt); 3479 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 3480 gcc_assert (innerc); 3481 startvar = OMP_CLAUSE_DECL (innerc); 3482 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3483 OMP_CLAUSE__LOOPTEMP_); 3484 gcc_assert (innerc); 3485 endvar = OMP_CLAUSE_DECL (innerc); 3486 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 3487 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 3488 { 3489 int i; 3490 for (i = 1; i < fd->collapse; i++) 3491 { 3492 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3493 OMP_CLAUSE__LOOPTEMP_); 3494 gcc_assert (innerc); 3495 } 3496 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3497 OMP_CLAUSE__LOOPTEMP_); 3498 if (innerc) 3499 { 3500 /* If needed (distribute parallel for with lastprivate), 3501 propagate down the total number of iterations. */ 3502 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 3503 fd->loop.n2); 3504 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 3505 GSI_CONTINUE_LINKING); 3506 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 3507 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3508 } 3509 } 3510 } 3511 t = fold_convert (itype, s0); 3512 t = fold_build2 (MULT_EXPR, itype, t, step); 3513 if (POINTER_TYPE_P (type)) 3514 t = fold_build_pointer_plus (n1, t); 3515 else 3516 t = fold_build2 (PLUS_EXPR, type, t, n1); 3517 t = fold_convert (TREE_TYPE (startvar), t); 3518 t = force_gimple_operand_gsi (&gsi, t, 3519 DECL_P (startvar) 3520 && TREE_ADDRESSABLE (startvar), 3521 NULL_TREE, false, GSI_CONTINUE_LINKING); 3522 assign_stmt = gimple_build_assign (startvar, t); 3523 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3524 3525 t = fold_convert (itype, e0); 3526 t = fold_build2 (MULT_EXPR, itype, t, step); 3527 if (POINTER_TYPE_P (type)) 3528 t = fold_build_pointer_plus (n1, t); 3529 else 3530 t = fold_build2 (PLUS_EXPR, type, t, n1); 3531 t = fold_convert (TREE_TYPE (startvar), t); 3532 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3533 false, GSI_CONTINUE_LINKING); 3534 if (endvar) 3535 { 3536 assign_stmt = gimple_build_assign (endvar, e); 3537 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3538 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 3539 assign_stmt = gimple_build_assign (fd->loop.v, e); 3540 else 3541 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 3542 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3543 } 3544 /* Handle linear clause adjustments. */ 3545 tree itercnt = NULL_TREE; 3546 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 3547 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 3548 c; c = OMP_CLAUSE_CHAIN (c)) 3549 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 3550 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 3551 { 3552 tree d = OMP_CLAUSE_DECL (c); 3553 bool is_ref = omp_is_reference (d); 3554 tree t = d, a, dest; 3555 if (is_ref) 3556 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 3557 if (itercnt == NULL_TREE) 3558 { 3559 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3560 { 3561 itercnt = fold_build2 (MINUS_EXPR, itype, 3562 fold_convert (itype, n1), 3563 fold_convert (itype, fd->loop.n1)); 3564 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); 3565 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); 3566 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 3567 NULL_TREE, false, 3568 GSI_CONTINUE_LINKING); 3569 } 3570 else 3571 itercnt = s0; 3572 } 3573 tree type = TREE_TYPE (t); 3574 if (POINTER_TYPE_P (type)) 3575 type = sizetype; 3576 a = fold_build2 (MULT_EXPR, type, 3577 fold_convert (type, itercnt), 3578 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 3579 dest = unshare_expr (t); 3580 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 3581 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); 3582 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3583 false, GSI_CONTINUE_LINKING); 3584 assign_stmt = gimple_build_assign (dest, t); 3585 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3586 } 3587 if (fd->collapse > 1) 3588 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 3589 3590 if (!broken_loop) 3591 { 3592 /* The code controlling the sequential loop replaces the 3593 GIMPLE_OMP_CONTINUE. */ 3594 gsi = gsi_last_bb (cont_bb); 3595 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3596 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3597 vmain = gimple_omp_continue_control_use (cont_stmt); 3598 vback = gimple_omp_continue_control_def (cont_stmt); 3599 3600 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3601 { 3602 if (POINTER_TYPE_P (type)) 3603 t = fold_build_pointer_plus (vmain, step); 3604 else 3605 t = fold_build2 (PLUS_EXPR, type, vmain, step); 3606 t = force_gimple_operand_gsi (&gsi, t, 3607 DECL_P (vback) 3608 && TREE_ADDRESSABLE (vback), 3609 NULL_TREE, true, GSI_SAME_STMT); 3610 assign_stmt = gimple_build_assign (vback, t); 3611 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3612 3613 t = build2 (fd->loop.cond_code, boolean_type_node, 3614 DECL_P (vback) && TREE_ADDRESSABLE (vback) 3615 ? t : vback, e); 3616 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3617 } 3618 3619 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 3620 gsi_remove (&gsi, true); 3621 3622 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3623 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 3624 } 3625 3626 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 3627 gsi = gsi_last_bb (exit_bb); 3628 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3629 { 3630 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 3631 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 3632 } 3633 gsi_remove (&gsi, true); 3634 3635 /* Connect all the blocks. */ 3636 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); 3637 ep->probability = REG_BR_PROB_BASE / 4 * 3; 3638 ep = find_edge (entry_bb, second_bb); 3639 ep->flags = EDGE_TRUE_VALUE; 3640 ep->probability = REG_BR_PROB_BASE / 4; 3641 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; 3642 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; 3643 3644 if (!broken_loop) 3645 { 3646 ep = find_edge (cont_bb, body_bb); 3647 if (ep == NULL) 3648 { 3649 ep = BRANCH_EDGE (cont_bb); 3650 gcc_assert (single_succ (ep->dest) == body_bb); 3651 } 3652 if (gimple_omp_for_combined_p (fd->for_stmt)) 3653 { 3654 remove_edge (ep); 3655 ep = NULL; 3656 } 3657 else if (fd->collapse > 1) 3658 { 3659 remove_edge (ep); 3660 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3661 } 3662 else 3663 ep->flags = EDGE_TRUE_VALUE; 3664 find_edge (cont_bb, fin_bb)->flags 3665 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 3666 } 3667 3668 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); 3669 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); 3670 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); 3671 3672 set_immediate_dominator (CDI_DOMINATORS, body_bb, 3673 recompute_dominator (CDI_DOMINATORS, body_bb)); 3674 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 3675 recompute_dominator (CDI_DOMINATORS, fin_bb)); 3676 3677 struct loop *loop = body_bb->loop_father; 3678 if (loop != entry_bb->loop_father) 3679 { 3680 gcc_assert (broken_loop || loop->header == body_bb); 3681 gcc_assert (broken_loop 3682 || loop->latch == region->cont 3683 || single_pred (loop->latch) == region->cont); 3684 return; 3685 } 3686 3687 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 3688 { 3689 loop = alloc_loop (); 3690 loop->header = body_bb; 3691 if (collapse_bb == NULL) 3692 loop->latch = cont_bb; 3693 add_loop (loop, body_bb->loop_father); 3694 } 3695 } 3696 3697 /* Return phi in E->DEST with ARG on edge E. */ 3698 3699 static gphi * 3700 find_phi_with_arg_on_edge (tree arg, edge e) 3701 { 3702 basic_block bb = e->dest; 3703 3704 for (gphi_iterator gpi = gsi_start_phis (bb); 3705 !gsi_end_p (gpi); 3706 gsi_next (&gpi)) 3707 { 3708 gphi *phi = gpi.phi (); 3709 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) 3710 return phi; 3711 } 3712 3713 return NULL; 3714 } 3715 3716 /* A subroutine of expand_omp_for. Generate code for a parallel 3717 loop with static schedule and a specified chunk size. Given 3718 parameters: 3719 3720 for (V = N1; V cond N2; V += STEP) BODY; 3721 3722 where COND is "<" or ">", we generate pseudocode 3723 3724 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3725 if (cond is <) 3726 adj = STEP - 1; 3727 else 3728 adj = STEP + 1; 3729 if ((__typeof (V)) -1 > 0 && cond is >) 3730 n = -(adj + N2 - N1) / -STEP; 3731 else 3732 n = (adj + N2 - N1) / STEP; 3733 trip = 0; 3734 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is 3735 here so that V is defined 3736 if the loop is not entered 3737 L0: 3738 s0 = (trip * nthreads + threadid) * CHUNK; 3739 e0 = min (s0 + CHUNK, n); 3740 if (s0 < n) goto L1; else goto L4; 3741 L1: 3742 V = s0 * STEP + N1; 3743 e = e0 * STEP + N1; 3744 L2: 3745 BODY; 3746 V += STEP; 3747 if (V cond e) goto L2; else goto L3; 3748 L3: 3749 trip += 1; 3750 goto L0; 3751 L4: 3752 */ 3753 3754 static void 3755 expand_omp_for_static_chunk (struct omp_region *region, 3756 struct omp_for_data *fd, gimple *inner_stmt) 3757 { 3758 tree n, s0, e0, e, t; 3759 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; 3760 tree type, itype, vmain, vback, vextra; 3761 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; 3762 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; 3763 gimple_stmt_iterator gsi; 3764 edge se; 3765 bool broken_loop = region->cont == NULL; 3766 tree *counts = NULL; 3767 tree n1, n2, step; 3768 3769 itype = type = TREE_TYPE (fd->loop.v); 3770 if (POINTER_TYPE_P (type)) 3771 itype = signed_type_for (type); 3772 3773 entry_bb = region->entry; 3774 se = split_block (entry_bb, last_stmt (entry_bb)); 3775 entry_bb = se->src; 3776 iter_part_bb = se->dest; 3777 cont_bb = region->cont; 3778 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); 3779 fin_bb = BRANCH_EDGE (iter_part_bb)->dest; 3780 gcc_assert (broken_loop 3781 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); 3782 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); 3783 body_bb = single_succ (seq_start_bb); 3784 if (!broken_loop) 3785 { 3786 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3787 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3788 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3789 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); 3790 } 3791 exit_bb = region->exit; 3792 3793 /* Trip and adjustment setup goes in ENTRY_BB. */ 3794 gsi = gsi_last_bb (entry_bb); 3795 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3796 3797 if (fd->collapse > 1) 3798 { 3799 int first_zero_iter = -1, dummy = -1; 3800 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3801 3802 counts = XALLOCAVEC (tree, fd->collapse); 3803 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3804 fin_bb, first_zero_iter, 3805 dummy_bb, dummy, l2_dom_bb); 3806 t = NULL_TREE; 3807 } 3808 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3809 t = integer_one_node; 3810 else 3811 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3812 fold_convert (type, fd->loop.n1), 3813 fold_convert (type, fd->loop.n2)); 3814 if (fd->collapse == 1 3815 && TYPE_UNSIGNED (type) 3816 && (t == NULL_TREE || !integer_onep (t))) 3817 { 3818 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3819 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3820 true, GSI_SAME_STMT); 3821 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3822 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3823 true, GSI_SAME_STMT); 3824 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3825 NULL_TREE, NULL_TREE); 3826 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3828 expand_omp_regimplify_p, NULL, NULL) 3829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3830 expand_omp_regimplify_p, NULL, NULL)) 3831 { 3832 gsi = gsi_for_stmt (cond_stmt); 3833 gimple_regimplify_operands (cond_stmt, &gsi); 3834 } 3835 se = split_block (entry_bb, cond_stmt); 3836 se->flags = EDGE_TRUE_VALUE; 3837 entry_bb = se->dest; 3838 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); 3839 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); 3840 se->probability = REG_BR_PROB_BASE / 2000 - 1; 3841 if (gimple_in_ssa_p (cfun)) 3842 { 3843 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; 3844 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3845 !gsi_end_p (gpi); gsi_next (&gpi)) 3846 { 3847 gphi *phi = gpi.phi (); 3848 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3849 se, UNKNOWN_LOCATION); 3850 } 3851 } 3852 gsi = gsi_last_bb (entry_bb); 3853 } 3854 3855 switch (gimple_omp_for_kind (fd->for_stmt)) 3856 { 3857 case GF_OMP_FOR_KIND_FOR: 3858 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3859 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3860 break; 3861 case GF_OMP_FOR_KIND_DISTRIBUTE: 3862 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3863 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3864 break; 3865 default: 3866 gcc_unreachable (); 3867 } 3868 nthreads = build_call_expr (nthreads, 0); 3869 nthreads = fold_convert (itype, nthreads); 3870 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3871 true, GSI_SAME_STMT); 3872 threadid = build_call_expr (threadid, 0); 3873 threadid = fold_convert (itype, threadid); 3874 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3875 true, GSI_SAME_STMT); 3876 3877 n1 = fd->loop.n1; 3878 n2 = fd->loop.n2; 3879 step = fd->loop.step; 3880 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3881 { 3882 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3883 OMP_CLAUSE__LOOPTEMP_); 3884 gcc_assert (innerc); 3885 n1 = OMP_CLAUSE_DECL (innerc); 3886 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3887 OMP_CLAUSE__LOOPTEMP_); 3888 gcc_assert (innerc); 3889 n2 = OMP_CLAUSE_DECL (innerc); 3890 } 3891 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3892 true, NULL_TREE, true, GSI_SAME_STMT); 3893 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3894 true, NULL_TREE, true, GSI_SAME_STMT); 3895 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3896 true, NULL_TREE, true, GSI_SAME_STMT); 3897 tree chunk_size = fold_convert (itype, fd->chunk_size); 3898 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); 3899 chunk_size 3900 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, 3901 GSI_SAME_STMT); 3902 3903 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3904 t = fold_build2 (PLUS_EXPR, itype, step, t); 3905 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3906 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3907 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3908 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3909 fold_build1 (NEGATE_EXPR, itype, t), 3910 fold_build1 (NEGATE_EXPR, itype, step)); 3911 else 3912 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3913 t = fold_convert (itype, t); 3914 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3915 true, GSI_SAME_STMT); 3916 3917 trip_var = create_tmp_reg (itype, ".trip"); 3918 if (gimple_in_ssa_p (cfun)) 3919 { 3920 trip_init = make_ssa_name (trip_var); 3921 trip_main = make_ssa_name (trip_var); 3922 trip_back = make_ssa_name (trip_var); 3923 } 3924 else 3925 { 3926 trip_init = trip_var; 3927 trip_main = trip_var; 3928 trip_back = trip_var; 3929 } 3930 3931 gassign *assign_stmt 3932 = gimple_build_assign (trip_init, build_int_cst (itype, 0)); 3933 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3934 3935 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); 3936 t = fold_build2 (MULT_EXPR, itype, t, step); 3937 if (POINTER_TYPE_P (type)) 3938 t = fold_build_pointer_plus (n1, t); 3939 else 3940 t = fold_build2 (PLUS_EXPR, type, t, n1); 3941 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3942 true, GSI_SAME_STMT); 3943 3944 /* Remove the GIMPLE_OMP_FOR. */ 3945 gsi_remove (&gsi, true); 3946 3947 gimple_stmt_iterator gsif = gsi; 3948 3949 /* Iteration space partitioning goes in ITER_PART_BB. */ 3950 gsi = gsi_last_bb (iter_part_bb); 3951 3952 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); 3953 t = fold_build2 (PLUS_EXPR, itype, t, threadid); 3954 t = fold_build2 (MULT_EXPR, itype, t, chunk_size); 3955 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3956 false, GSI_CONTINUE_LINKING); 3957 3958 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); 3959 t = fold_build2 (MIN_EXPR, itype, t, n); 3960 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3961 false, GSI_CONTINUE_LINKING); 3962 3963 t = build2 (LT_EXPR, boolean_type_node, s0, n); 3964 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); 3965 3966 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 3967 gsi = gsi_start_bb (seq_start_bb); 3968 3969 tree startvar = fd->loop.v; 3970 tree endvar = NULL_TREE; 3971 3972 if (gimple_omp_for_combined_p (fd->for_stmt)) 3973 { 3974 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 3975 ? gimple_omp_parallel_clauses (inner_stmt) 3976 : gimple_omp_for_clauses (inner_stmt); 3977 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 3978 gcc_assert (innerc); 3979 startvar = OMP_CLAUSE_DECL (innerc); 3980 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3981 OMP_CLAUSE__LOOPTEMP_); 3982 gcc_assert (innerc); 3983 endvar = OMP_CLAUSE_DECL (innerc); 3984 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 3985 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 3986 { 3987 int i; 3988 for (i = 1; i < fd->collapse; i++) 3989 { 3990 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3991 OMP_CLAUSE__LOOPTEMP_); 3992 gcc_assert (innerc); 3993 } 3994 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3995 OMP_CLAUSE__LOOPTEMP_); 3996 if (innerc) 3997 { 3998 /* If needed (distribute parallel for with lastprivate), 3999 propagate down the total number of iterations. */ 4000 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4001 fd->loop.n2); 4002 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4003 GSI_CONTINUE_LINKING); 4004 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4006 } 4007 } 4008 } 4009 4010 t = fold_convert (itype, s0); 4011 t = fold_build2 (MULT_EXPR, itype, t, step); 4012 if (POINTER_TYPE_P (type)) 4013 t = fold_build_pointer_plus (n1, t); 4014 else 4015 t = fold_build2 (PLUS_EXPR, type, t, n1); 4016 t = fold_convert (TREE_TYPE (startvar), t); 4017 t = force_gimple_operand_gsi (&gsi, t, 4018 DECL_P (startvar) 4019 && TREE_ADDRESSABLE (startvar), 4020 NULL_TREE, false, GSI_CONTINUE_LINKING); 4021 assign_stmt = gimple_build_assign (startvar, t); 4022 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4023 4024 t = fold_convert (itype, e0); 4025 t = fold_build2 (MULT_EXPR, itype, t, step); 4026 if (POINTER_TYPE_P (type)) 4027 t = fold_build_pointer_plus (n1, t); 4028 else 4029 t = fold_build2 (PLUS_EXPR, type, t, n1); 4030 t = fold_convert (TREE_TYPE (startvar), t); 4031 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4032 false, GSI_CONTINUE_LINKING); 4033 if (endvar) 4034 { 4035 assign_stmt = gimple_build_assign (endvar, e); 4036 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4037 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4038 assign_stmt = gimple_build_assign (fd->loop.v, e); 4039 else 4040 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4041 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4042 } 4043 /* Handle linear clause adjustments. */ 4044 tree itercnt = NULL_TREE, itercntbias = NULL_TREE; 4045 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4046 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4047 c; c = OMP_CLAUSE_CHAIN (c)) 4048 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4049 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4050 { 4051 tree d = OMP_CLAUSE_DECL (c); 4052 bool is_ref = omp_is_reference (d); 4053 tree t = d, a, dest; 4054 if (is_ref) 4055 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4056 tree type = TREE_TYPE (t); 4057 if (POINTER_TYPE_P (type)) 4058 type = sizetype; 4059 dest = unshare_expr (t); 4060 tree v = create_tmp_var (TREE_TYPE (t), NULL); 4061 expand_omp_build_assign (&gsif, v, t); 4062 if (itercnt == NULL_TREE) 4063 { 4064 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4065 { 4066 itercntbias 4067 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), 4068 fold_convert (itype, fd->loop.n1)); 4069 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, 4070 itercntbias, step); 4071 itercntbias 4072 = force_gimple_operand_gsi (&gsif, itercntbias, true, 4073 NULL_TREE, true, 4074 GSI_SAME_STMT); 4075 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); 4076 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4077 NULL_TREE, false, 4078 GSI_CONTINUE_LINKING); 4079 } 4080 else 4081 itercnt = s0; 4082 } 4083 a = fold_build2 (MULT_EXPR, type, 4084 fold_convert (type, itercnt), 4085 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4086 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4087 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 4088 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4089 false, GSI_CONTINUE_LINKING); 4090 assign_stmt = gimple_build_assign (dest, t); 4091 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4092 } 4093 if (fd->collapse > 1) 4094 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4095 4096 if (!broken_loop) 4097 { 4098 /* The code controlling the sequential loop goes in CONT_BB, 4099 replacing the GIMPLE_OMP_CONTINUE. */ 4100 gsi = gsi_last_bb (cont_bb); 4101 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4102 vmain = gimple_omp_continue_control_use (cont_stmt); 4103 vback = gimple_omp_continue_control_def (cont_stmt); 4104 4105 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4106 { 4107 if (POINTER_TYPE_P (type)) 4108 t = fold_build_pointer_plus (vmain, step); 4109 else 4110 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4111 if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) 4112 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4113 true, GSI_SAME_STMT); 4114 assign_stmt = gimple_build_assign (vback, t); 4115 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4116 4117 if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) 4118 t = build2 (EQ_EXPR, boolean_type_node, 4119 build_int_cst (itype, 0), 4120 build_int_cst (itype, 1)); 4121 else 4122 t = build2 (fd->loop.cond_code, boolean_type_node, 4123 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4124 ? t : vback, e); 4125 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4126 } 4127 4128 /* Remove GIMPLE_OMP_CONTINUE. */ 4129 gsi_remove (&gsi, true); 4130 4131 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4132 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4133 4134 /* Trip update code goes into TRIP_UPDATE_BB. */ 4135 gsi = gsi_start_bb (trip_update_bb); 4136 4137 t = build_int_cst (itype, 1); 4138 t = build2 (PLUS_EXPR, itype, trip_main, t); 4139 assign_stmt = gimple_build_assign (trip_back, t); 4140 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4141 } 4142 4143 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4144 gsi = gsi_last_bb (exit_bb); 4145 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4146 { 4147 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4148 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4149 } 4150 gsi_remove (&gsi, true); 4151 4152 /* Connect the new blocks. */ 4153 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; 4154 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; 4155 4156 if (!broken_loop) 4157 { 4158 se = find_edge (cont_bb, body_bb); 4159 if (se == NULL) 4160 { 4161 se = BRANCH_EDGE (cont_bb); 4162 gcc_assert (single_succ (se->dest) == body_bb); 4163 } 4164 if (gimple_omp_for_combined_p (fd->for_stmt)) 4165 { 4166 remove_edge (se); 4167 se = NULL; 4168 } 4169 else if (fd->collapse > 1) 4170 { 4171 remove_edge (se); 4172 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 4173 } 4174 else 4175 se->flags = EDGE_TRUE_VALUE; 4176 find_edge (cont_bb, trip_update_bb)->flags 4177 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 4178 4179 redirect_edge_and_branch (single_succ_edge (trip_update_bb), 4180 iter_part_bb); 4181 } 4182 4183 if (gimple_in_ssa_p (cfun)) 4184 { 4185 gphi_iterator psi; 4186 gphi *phi; 4187 edge re, ene; 4188 edge_var_map *vm; 4189 size_t i; 4190 4191 gcc_assert (fd->collapse == 1 && !broken_loop); 4192 4193 /* When we redirect the edge from trip_update_bb to iter_part_bb, we 4194 remove arguments of the phi nodes in fin_bb. We need to create 4195 appropriate phi nodes in iter_part_bb instead. */ 4196 se = find_edge (iter_part_bb, fin_bb); 4197 re = single_succ_edge (trip_update_bb); 4198 vec<edge_var_map> *head = redirect_edge_var_map_vector (re); 4199 ene = single_succ_edge (entry_bb); 4200 4201 psi = gsi_start_phis (fin_bb); 4202 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); 4203 gsi_next (&psi), ++i) 4204 { 4205 gphi *nphi; 4206 source_location locus; 4207 4208 phi = psi.phi (); 4209 t = gimple_phi_result (phi); 4210 gcc_assert (t == redirect_edge_var_map_result (vm)); 4211 4212 if (!single_pred_p (fin_bb)) 4213 t = copy_ssa_name (t, phi); 4214 4215 nphi = create_phi_node (t, iter_part_bb); 4216 4217 t = PHI_ARG_DEF_FROM_EDGE (phi, se); 4218 locus = gimple_phi_arg_location_from_edge (phi, se); 4219 4220 /* A special case -- fd->loop.v is not yet computed in 4221 iter_part_bb, we need to use vextra instead. */ 4222 if (t == fd->loop.v) 4223 t = vextra; 4224 add_phi_arg (nphi, t, ene, locus); 4225 locus = redirect_edge_var_map_location (vm); 4226 tree back_arg = redirect_edge_var_map_def (vm); 4227 add_phi_arg (nphi, back_arg, re, locus); 4228 edge ce = find_edge (cont_bb, body_bb); 4229 if (ce == NULL) 4230 { 4231 ce = BRANCH_EDGE (cont_bb); 4232 gcc_assert (single_succ (ce->dest) == body_bb); 4233 ce = single_succ_edge (ce->dest); 4234 } 4235 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); 4236 gcc_assert (inner_loop_phi != NULL); 4237 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), 4238 find_edge (seq_start_bb, body_bb), locus); 4239 4240 if (!single_pred_p (fin_bb)) 4241 add_phi_arg (phi, gimple_phi_result (nphi), se, locus); 4242 } 4243 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); 4244 redirect_edge_var_map_clear (re); 4245 if (single_pred_p (fin_bb)) 4246 while (1) 4247 { 4248 psi = gsi_start_phis (fin_bb); 4249 if (gsi_end_p (psi)) 4250 break; 4251 remove_phi_node (&psi, false); 4252 } 4253 4254 /* Make phi node for trip. */ 4255 phi = create_phi_node (trip_main, iter_part_bb); 4256 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), 4257 UNKNOWN_LOCATION); 4258 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), 4259 UNKNOWN_LOCATION); 4260 } 4261 4262 if (!broken_loop) 4263 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); 4264 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, 4265 recompute_dominator (CDI_DOMINATORS, iter_part_bb)); 4266 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 4267 recompute_dominator (CDI_DOMINATORS, fin_bb)); 4268 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, 4269 recompute_dominator (CDI_DOMINATORS, seq_start_bb)); 4270 set_immediate_dominator (CDI_DOMINATORS, body_bb, 4271 recompute_dominator (CDI_DOMINATORS, body_bb)); 4272 4273 if (!broken_loop) 4274 { 4275 struct loop *loop = body_bb->loop_father; 4276 struct loop *trip_loop = alloc_loop (); 4277 trip_loop->header = iter_part_bb; 4278 trip_loop->latch = trip_update_bb; 4279 add_loop (trip_loop, iter_part_bb->loop_father); 4280 4281 if (loop != entry_bb->loop_father) 4282 { 4283 gcc_assert (loop->header == body_bb); 4284 gcc_assert (loop->latch == region->cont 4285 || single_pred (loop->latch) == region->cont); 4286 trip_loop->inner = loop; 4287 return; 4288 } 4289 4290 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4291 { 4292 loop = alloc_loop (); 4293 loop->header = body_bb; 4294 if (collapse_bb == NULL) 4295 loop->latch = cont_bb; 4296 add_loop (loop, trip_loop); 4297 } 4298 } 4299 } 4300 4301 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop. 4302 Given parameters: 4303 for (V = N1; V cond N2; V += STEP) BODY; 4304 4305 where COND is "<" or ">" or "!=", we generate pseudocode 4306 4307 for (ind_var = low; ind_var < high; ind_var++) 4308 { 4309 V = n1 + (ind_var * STEP) 4310 4311 <BODY> 4312 } 4313 4314 In the above pseudocode, low and high are function parameters of the 4315 child function. In the function below, we are inserting a temp. 4316 variable that will be making a call to two OMP functions that will not be 4317 found in the body of _Cilk_for (since OMP_FOR cannot be mixed 4318 with _Cilk_for). These functions are replaced with low and high 4319 by the function that handles taskreg. */ 4320 4321 4322 static void 4323 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd) 4324 { 4325 bool broken_loop = region->cont == NULL; 4326 basic_block entry_bb = region->entry; 4327 basic_block cont_bb = region->cont; 4328 4329 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4330 gcc_assert (broken_loop 4331 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4332 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 4333 basic_block l1_bb, l2_bb; 4334 4335 if (!broken_loop) 4336 { 4337 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 4338 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4339 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 4340 l2_bb = BRANCH_EDGE (entry_bb)->dest; 4341 } 4342 else 4343 { 4344 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 4345 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 4346 l2_bb = single_succ (l1_bb); 4347 } 4348 basic_block exit_bb = region->exit; 4349 basic_block l2_dom_bb = NULL; 4350 4351 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb); 4352 4353 /* Below statements until the "tree high_val = ..." are pseudo statements 4354 used to pass information to be used by expand_omp_taskreg. 4355 low_val and high_val will be replaced by the __low and __high 4356 parameter from the child function. 4357 4358 The call_exprs part is a place-holder, it is mainly used 4359 to distinctly identify to the top-level part that this is 4360 where we should put low and high (reasoning given in header 4361 comment). */ 4362 4363 gomp_parallel *par_stmt 4364 = as_a <gomp_parallel *> (last_stmt (region->outer->entry)); 4365 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt); 4366 tree t, low_val = NULL_TREE, high_val = NULL_TREE; 4367 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t)) 4368 { 4369 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high")) 4370 high_val = t; 4371 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low")) 4372 low_val = t; 4373 } 4374 gcc_assert (low_val && high_val); 4375 4376 tree type = TREE_TYPE (low_val); 4377 tree ind_var = create_tmp_reg (type, "__cilk_ind_var"); 4378 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4379 4380 /* Not needed in SSA form right now. */ 4381 gcc_assert (!gimple_in_ssa_p (cfun)); 4382 if (l2_dom_bb == NULL) 4383 l2_dom_bb = l1_bb; 4384 4385 tree n1 = low_val; 4386 tree n2 = high_val; 4387 4388 gimple *stmt = gimple_build_assign (ind_var, n1); 4389 4390 /* Replace the GIMPLE_OMP_FOR statement. */ 4391 gsi_replace (&gsi, stmt, true); 4392 4393 if (!broken_loop) 4394 { 4395 /* Code to control the increment goes in the CONT_BB. */ 4396 gsi = gsi_last_bb (cont_bb); 4397 stmt = gsi_stmt (gsi); 4398 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 4399 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var, 4400 build_one_cst (type)); 4401 4402 /* Replace GIMPLE_OMP_CONTINUE. */ 4403 gsi_replace (&gsi, stmt, true); 4404 } 4405 4406 /* Emit the condition in L1_BB. */ 4407 gsi = gsi_after_labels (l1_bb); 4408 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step), 4409 fold_convert (TREE_TYPE (fd->loop.step), ind_var), 4410 fd->loop.step); 4411 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1))) 4412 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1), 4413 fd->loop.n1, fold_convert (sizetype, t)); 4414 else 4415 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1), 4416 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t)); 4417 t = fold_convert (TREE_TYPE (fd->loop.v), t); 4418 expand_omp_build_assign (&gsi, fd->loop.v, t); 4419 4420 /* The condition is always '<' since the runtime will fill in the low 4421 and high values. */ 4422 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE); 4423 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4424 4425 /* Remove GIMPLE_OMP_RETURN. */ 4426 gsi = gsi_last_bb (exit_bb); 4427 gsi_remove (&gsi, true); 4428 4429 /* Connect the new blocks. */ 4430 remove_edge (FALLTHRU_EDGE (entry_bb)); 4431 4432 edge e, ne; 4433 if (!broken_loop) 4434 { 4435 remove_edge (BRANCH_EDGE (entry_bb)); 4436 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 4437 4438 e = BRANCH_EDGE (l1_bb); 4439 ne = FALLTHRU_EDGE (l1_bb); 4440 e->flags = EDGE_TRUE_VALUE; 4441 } 4442 else 4443 { 4444 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 4445 4446 ne = single_succ_edge (l1_bb); 4447 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 4448 4449 } 4450 ne->flags = EDGE_FALSE_VALUE; 4451 e->probability = REG_BR_PROB_BASE * 7 / 8; 4452 ne->probability = REG_BR_PROB_BASE / 8; 4453 4454 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 4455 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 4456 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 4457 4458 if (!broken_loop) 4459 { 4460 struct loop *loop = alloc_loop (); 4461 loop->header = l1_bb; 4462 loop->latch = cont_bb; 4463 add_loop (loop, l1_bb->loop_father); 4464 loop->safelen = INT_MAX; 4465 } 4466 4467 /* Pick the correct library function based on the precision of the 4468 induction variable type. */ 4469 tree lib_fun = NULL_TREE; 4470 if (TYPE_PRECISION (type) == 32) 4471 lib_fun = cilk_for_32_fndecl; 4472 else if (TYPE_PRECISION (type) == 64) 4473 lib_fun = cilk_for_64_fndecl; 4474 else 4475 gcc_unreachable (); 4476 4477 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR); 4478 4479 /* WS_ARGS contains the library function flavor to call: 4480 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the 4481 user-defined grain value. If the user does not define one, then zero 4482 is passed in by the parser. */ 4483 vec_alloc (region->ws_args, 2); 4484 region->ws_args->quick_push (lib_fun); 4485 region->ws_args->quick_push (fd->chunk_size); 4486 } 4487 4488 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing 4489 loop. Given parameters: 4490 4491 for (V = N1; V cond N2; V += STEP) BODY; 4492 4493 where COND is "<" or ">", we generate pseudocode 4494 4495 V = N1; 4496 goto L1; 4497 L0: 4498 BODY; 4499 V += STEP; 4500 L1: 4501 if (V cond N2) goto L0; else goto L2; 4502 L2: 4503 4504 For collapsed loops, given parameters: 4505 collapse(3) 4506 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 4507 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 4508 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 4509 BODY; 4510 4511 we generate pseudocode 4512 4513 if (cond3 is <) 4514 adj = STEP3 - 1; 4515 else 4516 adj = STEP3 + 1; 4517 count3 = (adj + N32 - N31) / STEP3; 4518 if (cond2 is <) 4519 adj = STEP2 - 1; 4520 else 4521 adj = STEP2 + 1; 4522 count2 = (adj + N22 - N21) / STEP2; 4523 if (cond1 is <) 4524 adj = STEP1 - 1; 4525 else 4526 adj = STEP1 + 1; 4527 count1 = (adj + N12 - N11) / STEP1; 4528 count = count1 * count2 * count3; 4529 V = 0; 4530 V1 = N11; 4531 V2 = N21; 4532 V3 = N31; 4533 goto L1; 4534 L0: 4535 BODY; 4536 V += 1; 4537 V3 += STEP3; 4538 V2 += (V3 cond3 N32) ? 0 : STEP2; 4539 V3 = (V3 cond3 N32) ? V3 : N31; 4540 V1 += (V2 cond2 N22) ? 0 : STEP1; 4541 V2 = (V2 cond2 N22) ? V2 : N21; 4542 L1: 4543 if (V < count) goto L0; else goto L2; 4544 L2: 4545 4546 */ 4547 4548 static void 4549 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) 4550 { 4551 tree type, t; 4552 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; 4553 gimple_stmt_iterator gsi; 4554 gimple *stmt; 4555 gcond *cond_stmt; 4556 bool broken_loop = region->cont == NULL; 4557 edge e, ne; 4558 tree *counts = NULL; 4559 int i; 4560 int safelen_int = INT_MAX; 4561 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4562 OMP_CLAUSE_SAFELEN); 4563 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4564 OMP_CLAUSE__SIMDUID_); 4565 tree n1, n2; 4566 4567 if (safelen) 4568 { 4569 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); 4570 if (TREE_CODE (safelen) != INTEGER_CST) 4571 safelen_int = 0; 4572 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX) 4573 safelen_int = tree_to_uhwi (safelen); 4574 if (safelen_int == 1) 4575 safelen_int = 0; 4576 } 4577 type = TREE_TYPE (fd->loop.v); 4578 entry_bb = region->entry; 4579 cont_bb = region->cont; 4580 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4581 gcc_assert (broken_loop 4582 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4583 l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 4584 if (!broken_loop) 4585 { 4586 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 4587 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4588 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 4589 l2_bb = BRANCH_EDGE (entry_bb)->dest; 4590 } 4591 else 4592 { 4593 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 4594 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 4595 l2_bb = single_succ (l1_bb); 4596 } 4597 exit_bb = region->exit; 4598 l2_dom_bb = NULL; 4599 4600 gsi = gsi_last_bb (entry_bb); 4601 4602 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4603 /* Not needed in SSA form right now. */ 4604 gcc_assert (!gimple_in_ssa_p (cfun)); 4605 if (fd->collapse > 1) 4606 { 4607 int first_zero_iter = -1, dummy = -1; 4608 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; 4609 4610 counts = XALLOCAVEC (tree, fd->collapse); 4611 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4612 zero_iter_bb, first_zero_iter, 4613 dummy_bb, dummy, l2_dom_bb); 4614 } 4615 if (l2_dom_bb == NULL) 4616 l2_dom_bb = l1_bb; 4617 4618 n1 = fd->loop.n1; 4619 n2 = fd->loop.n2; 4620 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4621 { 4622 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4623 OMP_CLAUSE__LOOPTEMP_); 4624 gcc_assert (innerc); 4625 n1 = OMP_CLAUSE_DECL (innerc); 4626 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4627 OMP_CLAUSE__LOOPTEMP_); 4628 gcc_assert (innerc); 4629 n2 = OMP_CLAUSE_DECL (innerc); 4630 } 4631 tree step = fd->loop.step; 4632 4633 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4634 OMP_CLAUSE__SIMT_); 4635 if (is_simt) 4636 { 4637 cfun->curr_properties &= ~PROP_gimple_lomp_dev; 4638 is_simt = safelen_int > 1; 4639 } 4640 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; 4641 if (is_simt) 4642 { 4643 simt_lane = create_tmp_var (unsigned_type_node); 4644 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); 4645 gimple_call_set_lhs (g, simt_lane); 4646 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4647 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, 4648 fold_convert (TREE_TYPE (step), simt_lane)); 4649 n1 = fold_convert (type, n1); 4650 if (POINTER_TYPE_P (type)) 4651 n1 = fold_build_pointer_plus (n1, offset); 4652 else 4653 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); 4654 4655 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ 4656 if (fd->collapse > 1) 4657 simt_maxlane = build_one_cst (unsigned_type_node); 4658 else if (safelen_int < omp_max_simt_vf ()) 4659 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); 4660 tree vf 4661 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, 4662 unsigned_type_node, 0); 4663 if (simt_maxlane) 4664 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); 4665 vf = fold_convert (TREE_TYPE (step), vf); 4666 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); 4667 } 4668 4669 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 4670 if (fd->collapse > 1) 4671 { 4672 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4673 { 4674 gsi_prev (&gsi); 4675 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); 4676 gsi_next (&gsi); 4677 } 4678 else 4679 for (i = 0; i < fd->collapse; i++) 4680 { 4681 tree itype = TREE_TYPE (fd->loops[i].v); 4682 if (POINTER_TYPE_P (itype)) 4683 itype = signed_type_for (itype); 4684 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); 4685 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4686 } 4687 } 4688 4689 /* Remove the GIMPLE_OMP_FOR statement. */ 4690 gsi_remove (&gsi, true); 4691 4692 if (!broken_loop) 4693 { 4694 /* Code to control the increment goes in the CONT_BB. */ 4695 gsi = gsi_last_bb (cont_bb); 4696 stmt = gsi_stmt (gsi); 4697 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 4698 4699 if (POINTER_TYPE_P (type)) 4700 t = fold_build_pointer_plus (fd->loop.v, step); 4701 else 4702 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4703 expand_omp_build_assign (&gsi, fd->loop.v, t); 4704 4705 if (fd->collapse > 1) 4706 { 4707 i = fd->collapse - 1; 4708 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 4709 { 4710 t = fold_convert (sizetype, fd->loops[i].step); 4711 t = fold_build_pointer_plus (fd->loops[i].v, t); 4712 } 4713 else 4714 { 4715 t = fold_convert (TREE_TYPE (fd->loops[i].v), 4716 fd->loops[i].step); 4717 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 4718 fd->loops[i].v, t); 4719 } 4720 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4721 4722 for (i = fd->collapse - 1; i > 0; i--) 4723 { 4724 tree itype = TREE_TYPE (fd->loops[i].v); 4725 tree itype2 = TREE_TYPE (fd->loops[i - 1].v); 4726 if (POINTER_TYPE_P (itype2)) 4727 itype2 = signed_type_for (itype2); 4728 t = fold_convert (itype2, fd->loops[i - 1].step); 4729 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4730 GSI_SAME_STMT); 4731 t = build3 (COND_EXPR, itype2, 4732 build2 (fd->loops[i].cond_code, boolean_type_node, 4733 fd->loops[i].v, 4734 fold_convert (itype, fd->loops[i].n2)), 4735 build_int_cst (itype2, 0), t); 4736 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) 4737 t = fold_build_pointer_plus (fd->loops[i - 1].v, t); 4738 else 4739 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); 4740 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); 4741 4742 t = fold_convert (itype, fd->loops[i].n1); 4743 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4744 GSI_SAME_STMT); 4745 t = build3 (COND_EXPR, itype, 4746 build2 (fd->loops[i].cond_code, boolean_type_node, 4747 fd->loops[i].v, 4748 fold_convert (itype, fd->loops[i].n2)), 4749 fd->loops[i].v, t); 4750 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4751 } 4752 } 4753 4754 /* Remove GIMPLE_OMP_CONTINUE. */ 4755 gsi_remove (&gsi, true); 4756 } 4757 4758 /* Emit the condition in L1_BB. */ 4759 gsi = gsi_start_bb (l1_bb); 4760 4761 t = fold_convert (type, n2); 4762 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4763 false, GSI_CONTINUE_LINKING); 4764 tree v = fd->loop.v; 4765 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 4766 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 4767 false, GSI_CONTINUE_LINKING); 4768 t = build2 (fd->loop.cond_code, boolean_type_node, v, t); 4769 cond_stmt = gimple_build_cond_empty (t); 4770 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 4771 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 4772 NULL, NULL) 4773 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 4774 NULL, NULL)) 4775 { 4776 gsi = gsi_for_stmt (cond_stmt); 4777 gimple_regimplify_operands (cond_stmt, &gsi); 4778 } 4779 4780 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ 4781 if (is_simt) 4782 { 4783 gsi = gsi_start_bb (l2_bb); 4784 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); 4785 if (POINTER_TYPE_P (type)) 4786 t = fold_build_pointer_plus (fd->loop.v, step); 4787 else 4788 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4789 expand_omp_build_assign (&gsi, fd->loop.v, t); 4790 } 4791 4792 /* Remove GIMPLE_OMP_RETURN. */ 4793 gsi = gsi_last_bb (exit_bb); 4794 gsi_remove (&gsi, true); 4795 4796 /* Connect the new blocks. */ 4797 remove_edge (FALLTHRU_EDGE (entry_bb)); 4798 4799 if (!broken_loop) 4800 { 4801 remove_edge (BRANCH_EDGE (entry_bb)); 4802 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 4803 4804 e = BRANCH_EDGE (l1_bb); 4805 ne = FALLTHRU_EDGE (l1_bb); 4806 e->flags = EDGE_TRUE_VALUE; 4807 } 4808 else 4809 { 4810 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 4811 4812 ne = single_succ_edge (l1_bb); 4813 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 4814 4815 } 4816 ne->flags = EDGE_FALSE_VALUE; 4817 e->probability = REG_BR_PROB_BASE * 7 / 8; 4818 ne->probability = REG_BR_PROB_BASE / 8; 4819 4820 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 4821 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 4822 4823 if (simt_maxlane) 4824 { 4825 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, 4826 NULL_TREE, NULL_TREE); 4827 gsi = gsi_last_bb (entry_bb); 4828 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 4829 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); 4830 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; 4831 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8; 4832 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8; 4833 l2_dom_bb = entry_bb; 4834 } 4835 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 4836 4837 if (!broken_loop) 4838 { 4839 struct loop *loop = alloc_loop (); 4840 loop->header = l1_bb; 4841 loop->latch = cont_bb; 4842 add_loop (loop, l1_bb->loop_father); 4843 loop->safelen = safelen_int; 4844 if (simduid) 4845 { 4846 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); 4847 cfun->has_simduid_loops = true; 4848 } 4849 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize 4850 the loop. */ 4851 if ((flag_tree_loop_vectorize 4852 || (!global_options_set.x_flag_tree_loop_vectorize 4853 && !global_options_set.x_flag_tree_vectorize)) 4854 && flag_tree_loop_optimize 4855 && loop->safelen > 1) 4856 { 4857 loop->force_vectorize = true; 4858 cfun->has_force_vectorize_loops = true; 4859 } 4860 } 4861 else if (simduid) 4862 cfun->has_simduid_loops = true; 4863 } 4864 4865 /* Taskloop construct is represented after gimplification with 4866 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 4867 in between them. This routine expands the outer GIMPLE_OMP_FOR, 4868 which should just compute all the needed loop temporaries 4869 for GIMPLE_OMP_TASK. */ 4870 4871 static void 4872 expand_omp_taskloop_for_outer (struct omp_region *region, 4873 struct omp_for_data *fd, 4874 gimple *inner_stmt) 4875 { 4876 tree type, bias = NULL_TREE; 4877 basic_block entry_bb, cont_bb, exit_bb; 4878 gimple_stmt_iterator gsi; 4879 gassign *assign_stmt; 4880 tree *counts = NULL; 4881 int i; 4882 4883 gcc_assert (inner_stmt); 4884 gcc_assert (region->cont); 4885 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK 4886 && gimple_omp_task_taskloop_p (inner_stmt)); 4887 type = TREE_TYPE (fd->loop.v); 4888 4889 /* See if we need to bias by LLONG_MIN. */ 4890 if (fd->iter_type == long_long_unsigned_type_node 4891 && TREE_CODE (type) == INTEGER_TYPE 4892 && !TYPE_UNSIGNED (type)) 4893 { 4894 tree n1, n2; 4895 4896 if (fd->loop.cond_code == LT_EXPR) 4897 { 4898 n1 = fd->loop.n1; 4899 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 4900 } 4901 else 4902 { 4903 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 4904 n2 = fd->loop.n1; 4905 } 4906 if (TREE_CODE (n1) != INTEGER_CST 4907 || TREE_CODE (n2) != INTEGER_CST 4908 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 4909 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 4910 } 4911 4912 entry_bb = region->entry; 4913 cont_bb = region->cont; 4914 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4915 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4916 exit_bb = region->exit; 4917 4918 gsi = gsi_last_bb (entry_bb); 4919 gimple *for_stmt = gsi_stmt (gsi); 4920 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); 4921 if (fd->collapse > 1) 4922 { 4923 int first_zero_iter = -1, dummy = -1; 4924 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; 4925 4926 counts = XALLOCAVEC (tree, fd->collapse); 4927 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4928 zero_iter_bb, first_zero_iter, 4929 dummy_bb, dummy, l2_dom_bb); 4930 4931 if (zero_iter_bb) 4932 { 4933 /* Some counts[i] vars might be uninitialized if 4934 some loop has zero iterations. But the body shouldn't 4935 be executed in that case, so just avoid uninit warnings. */ 4936 for (i = first_zero_iter; i < fd->collapse; i++) 4937 if (SSA_VAR_P (counts[i])) 4938 TREE_NO_WARNING (counts[i]) = 1; 4939 gsi_prev (&gsi); 4940 edge e = split_block (entry_bb, gsi_stmt (gsi)); 4941 entry_bb = e->dest; 4942 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); 4943 gsi = gsi_last_bb (entry_bb); 4944 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 4945 get_immediate_dominator (CDI_DOMINATORS, 4946 zero_iter_bb)); 4947 } 4948 } 4949 4950 tree t0, t1; 4951 t1 = fd->loop.n2; 4952 t0 = fd->loop.n1; 4953 if (POINTER_TYPE_P (TREE_TYPE (t0)) 4954 && TYPE_PRECISION (TREE_TYPE (t0)) 4955 != TYPE_PRECISION (fd->iter_type)) 4956 { 4957 /* Avoid casting pointers to integer of a different size. */ 4958 tree itype = signed_type_for (type); 4959 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 4960 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 4961 } 4962 else 4963 { 4964 t1 = fold_convert (fd->iter_type, t1); 4965 t0 = fold_convert (fd->iter_type, t0); 4966 } 4967 if (bias) 4968 { 4969 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 4970 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 4971 } 4972 4973 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), 4974 OMP_CLAUSE__LOOPTEMP_); 4975 gcc_assert (innerc); 4976 tree startvar = OMP_CLAUSE_DECL (innerc); 4977 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 4978 gcc_assert (innerc); 4979 tree endvar = OMP_CLAUSE_DECL (innerc); 4980 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 4981 { 4982 gcc_assert (innerc); 4983 for (i = 1; i < fd->collapse; i++) 4984 { 4985 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4986 OMP_CLAUSE__LOOPTEMP_); 4987 gcc_assert (innerc); 4988 } 4989 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4990 OMP_CLAUSE__LOOPTEMP_); 4991 if (innerc) 4992 { 4993 /* If needed (inner taskloop has lastprivate clause), propagate 4994 down the total number of iterations. */ 4995 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, 4996 NULL_TREE, false, 4997 GSI_CONTINUE_LINKING); 4998 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4999 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5000 } 5001 } 5002 5003 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, 5004 GSI_CONTINUE_LINKING); 5005 assign_stmt = gimple_build_assign (startvar, t0); 5006 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5007 5008 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, 5009 GSI_CONTINUE_LINKING); 5010 assign_stmt = gimple_build_assign (endvar, t1); 5011 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5012 if (fd->collapse > 1) 5013 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5014 5015 /* Remove the GIMPLE_OMP_FOR statement. */ 5016 gsi = gsi_for_stmt (for_stmt); 5017 gsi_remove (&gsi, true); 5018 5019 gsi = gsi_last_bb (cont_bb); 5020 gsi_remove (&gsi, true); 5021 5022 gsi = gsi_last_bb (exit_bb); 5023 gsi_remove (&gsi, true); 5024 5025 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; 5026 remove_edge (BRANCH_EDGE (entry_bb)); 5027 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE; 5028 remove_edge (BRANCH_EDGE (cont_bb)); 5029 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); 5030 set_immediate_dominator (CDI_DOMINATORS, region->entry, 5031 recompute_dominator (CDI_DOMINATORS, region->entry)); 5032 } 5033 5034 /* Taskloop construct is represented after gimplification with 5035 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 5036 in between them. This routine expands the inner GIMPLE_OMP_FOR. 5037 GOMP_taskloop{,_ull} function arranges for each task to be given just 5038 a single range of iterations. */ 5039 5040 static void 5041 expand_omp_taskloop_for_inner (struct omp_region *region, 5042 struct omp_for_data *fd, 5043 gimple *inner_stmt) 5044 { 5045 tree e, t, type, itype, vmain, vback, bias = NULL_TREE; 5046 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; 5047 basic_block fin_bb; 5048 gimple_stmt_iterator gsi; 5049 edge ep; 5050 bool broken_loop = region->cont == NULL; 5051 tree *counts = NULL; 5052 tree n1, n2, step; 5053 5054 itype = type = TREE_TYPE (fd->loop.v); 5055 if (POINTER_TYPE_P (type)) 5056 itype = signed_type_for (type); 5057 5058 /* See if we need to bias by LLONG_MIN. */ 5059 if (fd->iter_type == long_long_unsigned_type_node 5060 && TREE_CODE (type) == INTEGER_TYPE 5061 && !TYPE_UNSIGNED (type)) 5062 { 5063 tree n1, n2; 5064 5065 if (fd->loop.cond_code == LT_EXPR) 5066 { 5067 n1 = fd->loop.n1; 5068 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5069 } 5070 else 5071 { 5072 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5073 n2 = fd->loop.n1; 5074 } 5075 if (TREE_CODE (n1) != INTEGER_CST 5076 || TREE_CODE (n2) != INTEGER_CST 5077 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5078 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5079 } 5080 5081 entry_bb = region->entry; 5082 cont_bb = region->cont; 5083 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5084 fin_bb = BRANCH_EDGE (entry_bb)->dest; 5085 gcc_assert (broken_loop 5086 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 5087 body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5088 if (!broken_loop) 5089 { 5090 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); 5091 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5092 } 5093 exit_bb = region->exit; 5094 5095 /* Iteration space partitioning goes in ENTRY_BB. */ 5096 gsi = gsi_last_bb (entry_bb); 5097 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5098 5099 if (fd->collapse > 1) 5100 { 5101 int first_zero_iter = -1, dummy = -1; 5102 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 5103 5104 counts = XALLOCAVEC (tree, fd->collapse); 5105 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5106 fin_bb, first_zero_iter, 5107 dummy_bb, dummy, l2_dom_bb); 5108 t = NULL_TREE; 5109 } 5110 else 5111 t = integer_one_node; 5112 5113 step = fd->loop.step; 5114 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5115 OMP_CLAUSE__LOOPTEMP_); 5116 gcc_assert (innerc); 5117 n1 = OMP_CLAUSE_DECL (innerc); 5118 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5119 gcc_assert (innerc); 5120 n2 = OMP_CLAUSE_DECL (innerc); 5121 if (bias) 5122 { 5123 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); 5124 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); 5125 } 5126 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 5127 true, NULL_TREE, true, GSI_SAME_STMT); 5128 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 5129 true, NULL_TREE, true, GSI_SAME_STMT); 5130 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 5131 true, NULL_TREE, true, GSI_SAME_STMT); 5132 5133 tree startvar = fd->loop.v; 5134 tree endvar = NULL_TREE; 5135 5136 if (gimple_omp_for_combined_p (fd->for_stmt)) 5137 { 5138 tree clauses = gimple_omp_for_clauses (inner_stmt); 5139 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 5140 gcc_assert (innerc); 5141 startvar = OMP_CLAUSE_DECL (innerc); 5142 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5143 OMP_CLAUSE__LOOPTEMP_); 5144 gcc_assert (innerc); 5145 endvar = OMP_CLAUSE_DECL (innerc); 5146 } 5147 t = fold_convert (TREE_TYPE (startvar), n1); 5148 t = force_gimple_operand_gsi (&gsi, t, 5149 DECL_P (startvar) 5150 && TREE_ADDRESSABLE (startvar), 5151 NULL_TREE, false, GSI_CONTINUE_LINKING); 5152 gimple *assign_stmt = gimple_build_assign (startvar, t); 5153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5154 5155 t = fold_convert (TREE_TYPE (startvar), n2); 5156 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5157 false, GSI_CONTINUE_LINKING); 5158 if (endvar) 5159 { 5160 assign_stmt = gimple_build_assign (endvar, e); 5161 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5162 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 5163 assign_stmt = gimple_build_assign (fd->loop.v, e); 5164 else 5165 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 5166 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5167 } 5168 if (fd->collapse > 1) 5169 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5170 5171 if (!broken_loop) 5172 { 5173 /* The code controlling the sequential loop replaces the 5174 GIMPLE_OMP_CONTINUE. */ 5175 gsi = gsi_last_bb (cont_bb); 5176 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5177 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 5178 vmain = gimple_omp_continue_control_use (cont_stmt); 5179 vback = gimple_omp_continue_control_def (cont_stmt); 5180 5181 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5182 { 5183 if (POINTER_TYPE_P (type)) 5184 t = fold_build_pointer_plus (vmain, step); 5185 else 5186 t = fold_build2 (PLUS_EXPR, type, vmain, step); 5187 t = force_gimple_operand_gsi (&gsi, t, 5188 DECL_P (vback) 5189 && TREE_ADDRESSABLE (vback), 5190 NULL_TREE, true, GSI_SAME_STMT); 5191 assign_stmt = gimple_build_assign (vback, t); 5192 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5193 5194 t = build2 (fd->loop.cond_code, boolean_type_node, 5195 DECL_P (vback) && TREE_ADDRESSABLE (vback) 5196 ? t : vback, e); 5197 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5198 } 5199 5200 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 5201 gsi_remove (&gsi, true); 5202 5203 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 5204 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 5205 } 5206 5207 /* Remove the GIMPLE_OMP_FOR statement. */ 5208 gsi = gsi_for_stmt (fd->for_stmt); 5209 gsi_remove (&gsi, true); 5210 5211 /* Remove the GIMPLE_OMP_RETURN statement. */ 5212 gsi = gsi_last_bb (exit_bb); 5213 gsi_remove (&gsi, true); 5214 5215 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; 5216 if (!broken_loop) 5217 remove_edge (BRANCH_EDGE (entry_bb)); 5218 else 5219 { 5220 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); 5221 region->outer->cont = NULL; 5222 } 5223 5224 /* Connect all the blocks. */ 5225 if (!broken_loop) 5226 { 5227 ep = find_edge (cont_bb, body_bb); 5228 if (gimple_omp_for_combined_p (fd->for_stmt)) 5229 { 5230 remove_edge (ep); 5231 ep = NULL; 5232 } 5233 else if (fd->collapse > 1) 5234 { 5235 remove_edge (ep); 5236 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5237 } 5238 else 5239 ep->flags = EDGE_TRUE_VALUE; 5240 find_edge (cont_bb, fin_bb)->flags 5241 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5242 } 5243 5244 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5245 recompute_dominator (CDI_DOMINATORS, body_bb)); 5246 if (!broken_loop) 5247 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5248 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5249 5250 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 5251 { 5252 struct loop *loop = alloc_loop (); 5253 loop->header = body_bb; 5254 if (collapse_bb == NULL) 5255 loop->latch = cont_bb; 5256 add_loop (loop, body_bb->loop_father); 5257 } 5258 } 5259 5260 /* A subroutine of expand_omp_for. Generate code for an OpenACC 5261 partitioned loop. The lowering here is abstracted, in that the 5262 loop parameters are passed through internal functions, which are 5263 further lowered by oacc_device_lower, once we get to the target 5264 compiler. The loop is of the form: 5265 5266 for (V = B; V LTGT E; V += S) {BODY} 5267 5268 where LTGT is < or >. We may have a specified chunking size, CHUNKING 5269 (constant 0 for no chunking) and we will have a GWV partitioning 5270 mask, specifying dimensions over which the loop is to be 5271 partitioned (see note below). We generate code that looks like 5272 (this ignores tiling): 5273 5274 <entry_bb> [incoming FALL->body, BRANCH->exit] 5275 typedef signedintify (typeof (V)) T; // underlying signed integral type 5276 T range = E - B; 5277 T chunk_no = 0; 5278 T DIR = LTGT == '<' ? +1 : -1; 5279 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); 5280 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); 5281 5282 <head_bb> [created by splitting end of entry_bb] 5283 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); 5284 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); 5285 if (!(offset LTGT bound)) goto bottom_bb; 5286 5287 <body_bb> [incoming] 5288 V = B + offset; 5289 {BODY} 5290 5291 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] 5292 offset += step; 5293 if (offset LTGT bound) goto body_bb; [*] 5294 5295 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb 5296 chunk_no++; 5297 if (chunk < chunk_max) goto head_bb; 5298 5299 <exit_bb> [incoming] 5300 V = B + ((range -/+ 1) / S +/- 1) * S [*] 5301 5302 [*] Needed if V live at end of loop. */ 5303 5304 static void 5305 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) 5306 { 5307 tree v = fd->loop.v; 5308 enum tree_code cond_code = fd->loop.cond_code; 5309 enum tree_code plus_code = PLUS_EXPR; 5310 5311 tree chunk_size = integer_minus_one_node; 5312 tree gwv = integer_zero_node; 5313 tree iter_type = TREE_TYPE (v); 5314 tree diff_type = iter_type; 5315 tree plus_type = iter_type; 5316 struct oacc_collapse *counts = NULL; 5317 5318 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) 5319 == GF_OMP_FOR_KIND_OACC_LOOP); 5320 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); 5321 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); 5322 5323 if (POINTER_TYPE_P (iter_type)) 5324 { 5325 plus_code = POINTER_PLUS_EXPR; 5326 plus_type = sizetype; 5327 } 5328 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 5329 diff_type = signed_type_for (diff_type); 5330 5331 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ 5332 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ 5333 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ 5334 basic_block bottom_bb = NULL; 5335 5336 /* entry_bb has two sucessors; the branch edge is to the exit 5337 block, fallthrough edge to body. */ 5338 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 5339 && BRANCH_EDGE (entry_bb)->dest == exit_bb); 5340 5341 /* If cont_bb non-NULL, it has 2 successors. The branch successor is 5342 body_bb, or to a block whose only successor is the body_bb. Its 5343 fallthrough successor is the final block (same as the branch 5344 successor of the entry_bb). */ 5345 if (cont_bb) 5346 { 5347 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5348 basic_block bed = BRANCH_EDGE (cont_bb)->dest; 5349 5350 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); 5351 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); 5352 } 5353 else 5354 gcc_assert (!gimple_in_ssa_p (cfun)); 5355 5356 /* The exit block only has entry_bb and cont_bb as predecessors. */ 5357 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); 5358 5359 tree chunk_no; 5360 tree chunk_max = NULL_TREE; 5361 tree bound, offset; 5362 tree step = create_tmp_var (diff_type, ".step"); 5363 bool up = cond_code == LT_EXPR; 5364 tree dir = build_int_cst (diff_type, up ? +1 : -1); 5365 bool chunking = !gimple_in_ssa_p (cfun); 5366 bool negating; 5367 5368 /* Tiling vars. */ 5369 tree tile_size = NULL_TREE; 5370 tree element_s = NULL_TREE; 5371 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE; 5372 basic_block elem_body_bb = NULL; 5373 basic_block elem_cont_bb = NULL; 5374 5375 /* SSA instances. */ 5376 tree offset_incr = NULL_TREE; 5377 tree offset_init = NULL_TREE; 5378 5379 gimple_stmt_iterator gsi; 5380 gassign *ass; 5381 gcall *call; 5382 gimple *stmt; 5383 tree expr; 5384 location_t loc; 5385 edge split, be, fte; 5386 5387 /* Split the end of entry_bb to create head_bb. */ 5388 split = split_block (entry_bb, last_stmt (entry_bb)); 5389 basic_block head_bb = split->dest; 5390 entry_bb = split->src; 5391 5392 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ 5393 gsi = gsi_last_bb (entry_bb); 5394 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); 5395 loc = gimple_location (for_stmt); 5396 5397 if (gimple_in_ssa_p (cfun)) 5398 { 5399 offset_init = gimple_omp_for_index (for_stmt, 0); 5400 gcc_assert (integer_zerop (fd->loop.n1)); 5401 /* The SSA parallelizer does gang parallelism. */ 5402 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); 5403 } 5404 5405 if (fd->collapse > 1 || fd->tiling) 5406 { 5407 gcc_assert (!gimple_in_ssa_p (cfun) && up); 5408 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); 5409 tree total = expand_oacc_collapse_init (fd, &gsi, counts, 5410 TREE_TYPE (fd->loop.n2), loc); 5411 5412 if (SSA_VAR_P (fd->loop.n2)) 5413 { 5414 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, 5415 true, GSI_SAME_STMT); 5416 ass = gimple_build_assign (fd->loop.n2, total); 5417 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5418 } 5419 } 5420 5421 tree b = fd->loop.n1; 5422 tree e = fd->loop.n2; 5423 tree s = fd->loop.step; 5424 5425 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); 5426 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); 5427 5428 /* Convert the step, avoiding possible unsigned->signed overflow. */ 5429 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 5430 if (negating) 5431 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 5432 s = fold_convert (diff_type, s); 5433 if (negating) 5434 s = fold_build1 (NEGATE_EXPR, diff_type, s); 5435 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); 5436 5437 if (!chunking) 5438 chunk_size = integer_zero_node; 5439 expr = fold_convert (diff_type, chunk_size); 5440 chunk_size = force_gimple_operand_gsi (&gsi, expr, true, 5441 NULL_TREE, true, GSI_SAME_STMT); 5442 5443 if (fd->tiling) 5444 { 5445 /* Determine the tile size and element step, 5446 modify the outer loop step size. */ 5447 tile_size = create_tmp_var (diff_type, ".tile_size"); 5448 expr = build_int_cst (diff_type, 1); 5449 for (int ix = 0; ix < fd->collapse; ix++) 5450 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr); 5451 expr = force_gimple_operand_gsi (&gsi, expr, true, 5452 NULL_TREE, true, GSI_SAME_STMT); 5453 ass = gimple_build_assign (tile_size, expr); 5454 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5455 5456 element_s = create_tmp_var (diff_type, ".element_s"); 5457 ass = gimple_build_assign (element_s, s); 5458 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5459 5460 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size); 5461 s = force_gimple_operand_gsi (&gsi, expr, true, 5462 NULL_TREE, true, GSI_SAME_STMT); 5463 } 5464 5465 /* Determine the range, avoiding possible unsigned->signed overflow. */ 5466 negating = !up && TYPE_UNSIGNED (iter_type); 5467 expr = fold_build2 (MINUS_EXPR, plus_type, 5468 fold_convert (plus_type, negating ? b : e), 5469 fold_convert (plus_type, negating ? e : b)); 5470 expr = fold_convert (diff_type, expr); 5471 if (negating) 5472 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 5473 tree range = force_gimple_operand_gsi (&gsi, expr, true, 5474 NULL_TREE, true, GSI_SAME_STMT); 5475 5476 chunk_no = build_int_cst (diff_type, 0); 5477 if (chunking) 5478 { 5479 gcc_assert (!gimple_in_ssa_p (cfun)); 5480 5481 expr = chunk_no; 5482 chunk_max = create_tmp_var (diff_type, ".chunk_max"); 5483 chunk_no = create_tmp_var (diff_type, ".chunk_no"); 5484 5485 ass = gimple_build_assign (chunk_no, expr); 5486 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5487 5488 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5489 build_int_cst (integer_type_node, 5490 IFN_GOACC_LOOP_CHUNKS), 5491 dir, range, s, chunk_size, gwv); 5492 gimple_call_set_lhs (call, chunk_max); 5493 gimple_set_location (call, loc); 5494 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5495 } 5496 else 5497 chunk_size = chunk_no; 5498 5499 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5500 build_int_cst (integer_type_node, 5501 IFN_GOACC_LOOP_STEP), 5502 dir, range, s, chunk_size, gwv); 5503 gimple_call_set_lhs (call, step); 5504 gimple_set_location (call, loc); 5505 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5506 5507 /* Remove the GIMPLE_OMP_FOR. */ 5508 gsi_remove (&gsi, true); 5509 5510 /* Fixup edges from head_bb. */ 5511 be = BRANCH_EDGE (head_bb); 5512 fte = FALLTHRU_EDGE (head_bb); 5513 be->flags |= EDGE_FALSE_VALUE; 5514 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5515 5516 basic_block body_bb = fte->dest; 5517 5518 if (gimple_in_ssa_p (cfun)) 5519 { 5520 gsi = gsi_last_bb (cont_bb); 5521 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5522 5523 offset = gimple_omp_continue_control_use (cont_stmt); 5524 offset_incr = gimple_omp_continue_control_def (cont_stmt); 5525 } 5526 else 5527 { 5528 offset = create_tmp_var (diff_type, ".offset"); 5529 offset_init = offset_incr = offset; 5530 } 5531 bound = create_tmp_var (TREE_TYPE (offset), ".bound"); 5532 5533 /* Loop offset & bound go into head_bb. */ 5534 gsi = gsi_start_bb (head_bb); 5535 5536 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5537 build_int_cst (integer_type_node, 5538 IFN_GOACC_LOOP_OFFSET), 5539 dir, range, s, 5540 chunk_size, gwv, chunk_no); 5541 gimple_call_set_lhs (call, offset_init); 5542 gimple_set_location (call, loc); 5543 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5544 5545 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5546 build_int_cst (integer_type_node, 5547 IFN_GOACC_LOOP_BOUND), 5548 dir, range, s, 5549 chunk_size, gwv, offset_init); 5550 gimple_call_set_lhs (call, bound); 5551 gimple_set_location (call, loc); 5552 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5553 5554 expr = build2 (cond_code, boolean_type_node, offset_init, bound); 5555 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5556 GSI_CONTINUE_LINKING); 5557 5558 /* V assignment goes into body_bb. */ 5559 if (!gimple_in_ssa_p (cfun)) 5560 { 5561 gsi = gsi_start_bb (body_bb); 5562 5563 expr = build2 (plus_code, iter_type, b, 5564 fold_convert (plus_type, offset)); 5565 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5566 true, GSI_SAME_STMT); 5567 ass = gimple_build_assign (v, expr); 5568 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5569 5570 if (fd->collapse > 1 || fd->tiling) 5571 expand_oacc_collapse_vars (fd, false, &gsi, counts, v); 5572 5573 if (fd->tiling) 5574 { 5575 /* Determine the range of the element loop -- usually simply 5576 the tile_size, but could be smaller if the final 5577 iteration of the outer loop is a partial tile. */ 5578 tree e_range = create_tmp_var (diff_type, ".e_range"); 5579 5580 expr = build2 (MIN_EXPR, diff_type, 5581 build2 (MINUS_EXPR, diff_type, bound, offset), 5582 build2 (MULT_EXPR, diff_type, tile_size, 5583 element_s)); 5584 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5585 true, GSI_SAME_STMT); 5586 ass = gimple_build_assign (e_range, expr); 5587 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5588 5589 /* Determine bound, offset & step of inner loop. */ 5590 e_bound = create_tmp_var (diff_type, ".e_bound"); 5591 e_offset = create_tmp_var (diff_type, ".e_offset"); 5592 e_step = create_tmp_var (diff_type, ".e_step"); 5593 5594 /* Mark these as element loops. */ 5595 tree t, e_gwv = integer_minus_one_node; 5596 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ 5597 5598 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); 5599 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5600 element_s, chunk, e_gwv, chunk); 5601 gimple_call_set_lhs (call, e_offset); 5602 gimple_set_location (call, loc); 5603 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5604 5605 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); 5606 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5607 element_s, chunk, e_gwv, e_offset); 5608 gimple_call_set_lhs (call, e_bound); 5609 gimple_set_location (call, loc); 5610 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5611 5612 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP); 5613 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range, 5614 element_s, chunk, e_gwv); 5615 gimple_call_set_lhs (call, e_step); 5616 gimple_set_location (call, loc); 5617 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5618 5619 /* Add test and split block. */ 5620 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5621 stmt = gimple_build_cond_empty (expr); 5622 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5623 split = split_block (body_bb, stmt); 5624 elem_body_bb = split->dest; 5625 if (cont_bb == body_bb) 5626 cont_bb = elem_body_bb; 5627 body_bb = split->src; 5628 5629 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5630 5631 /* Add a dummy exit for the tiled block when cont_bb is missing. */ 5632 if (cont_bb == NULL) 5633 { 5634 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE); 5635 e->probability = PROB_EVEN; 5636 split->probability = PROB_EVEN; 5637 } 5638 5639 /* Initialize the user's loop vars. */ 5640 gsi = gsi_start_bb (elem_body_bb); 5641 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset); 5642 } 5643 } 5644 5645 /* Loop increment goes into cont_bb. If this is not a loop, we 5646 will have spawned threads as if it was, and each one will 5647 execute one iteration. The specification is not explicit about 5648 whether such constructs are ill-formed or not, and they can 5649 occur, especially when noreturn routines are involved. */ 5650 if (cont_bb) 5651 { 5652 gsi = gsi_last_bb (cont_bb); 5653 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5654 loc = gimple_location (cont_stmt); 5655 5656 if (fd->tiling) 5657 { 5658 /* Insert element loop increment and test. */ 5659 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step); 5660 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5661 true, GSI_SAME_STMT); 5662 ass = gimple_build_assign (e_offset, expr); 5663 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5664 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5665 5666 stmt = gimple_build_cond_empty (expr); 5667 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5668 split = split_block (cont_bb, stmt); 5669 elem_cont_bb = split->src; 5670 cont_bb = split->dest; 5671 5672 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5673 make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE); 5674 5675 make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE); 5676 5677 gsi = gsi_for_stmt (cont_stmt); 5678 } 5679 5680 /* Increment offset. */ 5681 if (gimple_in_ssa_p (cfun)) 5682 expr = build2 (plus_code, iter_type, offset, 5683 fold_convert (plus_type, step)); 5684 else 5685 expr = build2 (PLUS_EXPR, diff_type, offset, step); 5686 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5687 true, GSI_SAME_STMT); 5688 ass = gimple_build_assign (offset_incr, expr); 5689 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5690 expr = build2 (cond_code, boolean_type_node, offset_incr, bound); 5691 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); 5692 5693 /* Remove the GIMPLE_OMP_CONTINUE. */ 5694 gsi_remove (&gsi, true); 5695 5696 /* Fixup edges from cont_bb. */ 5697 be = BRANCH_EDGE (cont_bb); 5698 fte = FALLTHRU_EDGE (cont_bb); 5699 be->flags |= EDGE_TRUE_VALUE; 5700 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5701 5702 if (chunking) 5703 { 5704 /* Split the beginning of exit_bb to make bottom_bb. We 5705 need to insert a nop at the start, because splitting is 5706 after a stmt, not before. */ 5707 gsi = gsi_start_bb (exit_bb); 5708 stmt = gimple_build_nop (); 5709 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5710 split = split_block (exit_bb, stmt); 5711 bottom_bb = split->src; 5712 exit_bb = split->dest; 5713 gsi = gsi_last_bb (bottom_bb); 5714 5715 /* Chunk increment and test goes into bottom_bb. */ 5716 expr = build2 (PLUS_EXPR, diff_type, chunk_no, 5717 build_int_cst (diff_type, 1)); 5718 ass = gimple_build_assign (chunk_no, expr); 5719 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); 5720 5721 /* Chunk test at end of bottom_bb. */ 5722 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); 5723 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5724 GSI_CONTINUE_LINKING); 5725 5726 /* Fixup edges from bottom_bb. */ 5727 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5728 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); 5729 } 5730 } 5731 5732 gsi = gsi_last_bb (exit_bb); 5733 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 5734 loc = gimple_location (gsi_stmt (gsi)); 5735 5736 if (!gimple_in_ssa_p (cfun)) 5737 { 5738 /* Insert the final value of V, in case it is live. This is the 5739 value for the only thread that survives past the join. */ 5740 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 5741 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 5742 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 5743 expr = fold_build2 (MULT_EXPR, diff_type, expr, s); 5744 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); 5745 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5746 true, GSI_SAME_STMT); 5747 ass = gimple_build_assign (v, expr); 5748 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5749 } 5750 5751 /* Remove the OMP_RETURN. */ 5752 gsi_remove (&gsi, true); 5753 5754 if (cont_bb) 5755 { 5756 /* We now have one, two or three nested loops. Update the loop 5757 structures. */ 5758 struct loop *parent = entry_bb->loop_father; 5759 struct loop *body = body_bb->loop_father; 5760 5761 if (chunking) 5762 { 5763 struct loop *chunk_loop = alloc_loop (); 5764 chunk_loop->header = head_bb; 5765 chunk_loop->latch = bottom_bb; 5766 add_loop (chunk_loop, parent); 5767 parent = chunk_loop; 5768 } 5769 else if (parent != body) 5770 { 5771 gcc_assert (body->header == body_bb); 5772 gcc_assert (body->latch == cont_bb 5773 || single_pred (body->latch) == cont_bb); 5774 parent = NULL; 5775 } 5776 5777 if (parent) 5778 { 5779 struct loop *body_loop = alloc_loop (); 5780 body_loop->header = body_bb; 5781 body_loop->latch = cont_bb; 5782 add_loop (body_loop, parent); 5783 5784 if (fd->tiling) 5785 { 5786 /* Insert tiling's element loop. */ 5787 struct loop *inner_loop = alloc_loop (); 5788 inner_loop->header = elem_body_bb; 5789 inner_loop->latch = elem_cont_bb; 5790 add_loop (inner_loop, body_loop); 5791 } 5792 } 5793 } 5794 } 5795 5796 /* Expand the OMP loop defined by REGION. */ 5797 5798 static void 5799 expand_omp_for (struct omp_region *region, gimple *inner_stmt) 5800 { 5801 struct omp_for_data fd; 5802 struct omp_for_data_loop *loops; 5803 5804 loops 5805 = (struct omp_for_data_loop *) 5806 alloca (gimple_omp_for_collapse (last_stmt (region->entry)) 5807 * sizeof (struct omp_for_data_loop)); 5808 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), 5809 &fd, loops); 5810 region->sched_kind = fd.sched_kind; 5811 region->sched_modifiers = fd.sched_modifiers; 5812 5813 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); 5814 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5815 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5816 if (region->cont) 5817 { 5818 gcc_assert (EDGE_COUNT (region->cont->succs) == 2); 5819 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5820 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5821 } 5822 else 5823 /* If there isn't a continue then this is a degerate case where 5824 the introduction of abnormal edges during lowering will prevent 5825 original loops from being detected. Fix that up. */ 5826 loops_state_set (LOOPS_NEED_FIXUP); 5827 5828 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) 5829 expand_omp_simd (region, &fd); 5830 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) 5831 expand_cilk_for (region, &fd); 5832 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) 5833 { 5834 gcc_assert (!inner_stmt); 5835 expand_oacc_for (region, &fd); 5836 } 5837 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) 5838 { 5839 if (gimple_omp_for_combined_into_p (fd.for_stmt)) 5840 expand_omp_taskloop_for_inner (region, &fd, inner_stmt); 5841 else 5842 expand_omp_taskloop_for_outer (region, &fd, inner_stmt); 5843 } 5844 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC 5845 && !fd.have_ordered) 5846 { 5847 if (fd.chunk_size == NULL) 5848 expand_omp_for_static_nochunk (region, &fd, inner_stmt); 5849 else 5850 expand_omp_for_static_chunk (region, &fd, inner_stmt); 5851 } 5852 else 5853 { 5854 int fn_index, start_ix, next_ix; 5855 5856 gcc_assert (gimple_omp_for_kind (fd.for_stmt) 5857 == GF_OMP_FOR_KIND_FOR); 5858 if (fd.chunk_size == NULL 5859 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) 5860 fd.chunk_size = integer_zero_node; 5861 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 5862 switch (fd.sched_kind) 5863 { 5864 case OMP_CLAUSE_SCHEDULE_RUNTIME: 5865 fn_index = 3; 5866 break; 5867 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 5868 case OMP_CLAUSE_SCHEDULE_GUIDED: 5869 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) 5870 && !fd.ordered 5871 && !fd.have_ordered) 5872 { 5873 fn_index = 3 + fd.sched_kind; 5874 break; 5875 } 5876 /* FALLTHRU */ 5877 default: 5878 fn_index = fd.sched_kind; 5879 break; 5880 } 5881 if (!fd.ordered) 5882 fn_index += fd.have_ordered * 6; 5883 if (fd.ordered) 5884 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; 5885 else 5886 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; 5887 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; 5888 if (fd.iter_type == long_long_unsigned_type_node) 5889 { 5890 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START 5891 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); 5892 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT 5893 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); 5894 } 5895 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, 5896 (enum built_in_function) next_ix, inner_stmt); 5897 } 5898 5899 if (gimple_in_ssa_p (cfun)) 5900 update_ssa (TODO_update_ssa_only_virtuals); 5901 } 5902 5903 /* Expand code for an OpenMP sections directive. In pseudo code, we generate 5904 5905 v = GOMP_sections_start (n); 5906 L0: 5907 switch (v) 5908 { 5909 case 0: 5910 goto L2; 5911 case 1: 5912 section 1; 5913 goto L1; 5914 case 2: 5915 ... 5916 case n: 5917 ... 5918 default: 5919 abort (); 5920 } 5921 L1: 5922 v = GOMP_sections_next (); 5923 goto L0; 5924 L2: 5925 reduction; 5926 5927 If this is a combined parallel sections, replace the call to 5928 GOMP_sections_start with call to GOMP_sections_next. */ 5929 5930 static void 5931 expand_omp_sections (struct omp_region *region) 5932 { 5933 tree t, u, vin = NULL, vmain, vnext, l2; 5934 unsigned len; 5935 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; 5936 gimple_stmt_iterator si, switch_si; 5937 gomp_sections *sections_stmt; 5938 gimple *stmt; 5939 gomp_continue *cont; 5940 edge_iterator ei; 5941 edge e; 5942 struct omp_region *inner; 5943 unsigned i, casei; 5944 bool exit_reachable = region->cont != NULL; 5945 5946 gcc_assert (region->exit != NULL); 5947 entry_bb = region->entry; 5948 l0_bb = single_succ (entry_bb); 5949 l1_bb = region->cont; 5950 l2_bb = region->exit; 5951 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) 5952 l2 = gimple_block_label (l2_bb); 5953 else 5954 { 5955 /* This can happen if there are reductions. */ 5956 len = EDGE_COUNT (l0_bb->succs); 5957 gcc_assert (len > 0); 5958 e = EDGE_SUCC (l0_bb, len - 1); 5959 si = gsi_last_bb (e->dest); 5960 l2 = NULL_TREE; 5961 if (gsi_end_p (si) 5962 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 5963 l2 = gimple_block_label (e->dest); 5964 else 5965 FOR_EACH_EDGE (e, ei, l0_bb->succs) 5966 { 5967 si = gsi_last_bb (e->dest); 5968 if (gsi_end_p (si) 5969 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 5970 { 5971 l2 = gimple_block_label (e->dest); 5972 break; 5973 } 5974 } 5975 } 5976 if (exit_reachable) 5977 default_bb = create_empty_bb (l1_bb->prev_bb); 5978 else 5979 default_bb = create_empty_bb (l0_bb); 5980 5981 /* We will build a switch() with enough cases for all the 5982 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work 5983 and a default case to abort if something goes wrong. */ 5984 len = EDGE_COUNT (l0_bb->succs); 5985 5986 /* Use vec::quick_push on label_vec throughout, since we know the size 5987 in advance. */ 5988 auto_vec<tree> label_vec (len); 5989 5990 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the 5991 GIMPLE_OMP_SECTIONS statement. */ 5992 si = gsi_last_bb (entry_bb); 5993 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); 5994 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); 5995 vin = gimple_omp_sections_control (sections_stmt); 5996 if (!is_combined_parallel (region)) 5997 { 5998 /* If we are not inside a combined parallel+sections region, 5999 call GOMP_sections_start. */ 6000 t = build_int_cst (unsigned_type_node, len - 1); 6001 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); 6002 stmt = gimple_build_call (u, 1, t); 6003 } 6004 else 6005 { 6006 /* Otherwise, call GOMP_sections_next. */ 6007 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6008 stmt = gimple_build_call (u, 0); 6009 } 6010 gimple_call_set_lhs (stmt, vin); 6011 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6012 gsi_remove (&si, true); 6013 6014 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in 6015 L0_BB. */ 6016 switch_si = gsi_last_bb (l0_bb); 6017 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); 6018 if (exit_reachable) 6019 { 6020 cont = as_a <gomp_continue *> (last_stmt (l1_bb)); 6021 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); 6022 vmain = gimple_omp_continue_control_use (cont); 6023 vnext = gimple_omp_continue_control_def (cont); 6024 } 6025 else 6026 { 6027 vmain = vin; 6028 vnext = NULL_TREE; 6029 } 6030 6031 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); 6032 label_vec.quick_push (t); 6033 i = 1; 6034 6035 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ 6036 for (inner = region->inner, casei = 1; 6037 inner; 6038 inner = inner->next, i++, casei++) 6039 { 6040 basic_block s_entry_bb, s_exit_bb; 6041 6042 /* Skip optional reduction region. */ 6043 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) 6044 { 6045 --i; 6046 --casei; 6047 continue; 6048 } 6049 6050 s_entry_bb = inner->entry; 6051 s_exit_bb = inner->exit; 6052 6053 t = gimple_block_label (s_entry_bb); 6054 u = build_int_cst (unsigned_type_node, casei); 6055 u = build_case_label (u, NULL, t); 6056 label_vec.quick_push (u); 6057 6058 si = gsi_last_bb (s_entry_bb); 6059 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); 6060 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); 6061 gsi_remove (&si, true); 6062 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; 6063 6064 if (s_exit_bb == NULL) 6065 continue; 6066 6067 si = gsi_last_bb (s_exit_bb); 6068 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6069 gsi_remove (&si, true); 6070 6071 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; 6072 } 6073 6074 /* Error handling code goes in DEFAULT_BB. */ 6075 t = gimple_block_label (default_bb); 6076 u = build_case_label (NULL, NULL, t); 6077 make_edge (l0_bb, default_bb, 0); 6078 add_bb_to_loop (default_bb, current_loops->tree_root); 6079 6080 stmt = gimple_build_switch (vmain, u, label_vec); 6081 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); 6082 gsi_remove (&switch_si, true); 6083 6084 si = gsi_start_bb (default_bb); 6085 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); 6086 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); 6087 6088 if (exit_reachable) 6089 { 6090 tree bfn_decl; 6091 6092 /* Code to get the next section goes in L1_BB. */ 6093 si = gsi_last_bb (l1_bb); 6094 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); 6095 6096 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6097 stmt = gimple_build_call (bfn_decl, 0); 6098 gimple_call_set_lhs (stmt, vnext); 6099 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6100 gsi_remove (&si, true); 6101 6102 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; 6103 } 6104 6105 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ 6106 si = gsi_last_bb (l2_bb); 6107 if (gimple_omp_return_nowait_p (gsi_stmt (si))) 6108 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); 6109 else if (gimple_omp_return_lhs (gsi_stmt (si))) 6110 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); 6111 else 6112 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); 6113 stmt = gimple_build_call (t, 0); 6114 if (gimple_omp_return_lhs (gsi_stmt (si))) 6115 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); 6116 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6117 gsi_remove (&si, true); 6118 6119 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); 6120 } 6121 6122 /* Expand code for an OpenMP single directive. We've already expanded 6123 much of the code, here we simply place the GOMP_barrier call. */ 6124 6125 static void 6126 expand_omp_single (struct omp_region *region) 6127 { 6128 basic_block entry_bb, exit_bb; 6129 gimple_stmt_iterator si; 6130 6131 entry_bb = region->entry; 6132 exit_bb = region->exit; 6133 6134 si = gsi_last_bb (entry_bb); 6135 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); 6136 gsi_remove (&si, true); 6137 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6138 6139 si = gsi_last_bb (exit_bb); 6140 if (!gimple_omp_return_nowait_p (gsi_stmt (si))) 6141 { 6142 tree t = gimple_omp_return_lhs (gsi_stmt (si)); 6143 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); 6144 } 6145 gsi_remove (&si, true); 6146 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6147 } 6148 6149 /* Generic expansion for OpenMP synchronization directives: master, 6150 ordered and critical. All we need to do here is remove the entry 6151 and exit markers for REGION. */ 6152 6153 static void 6154 expand_omp_synch (struct omp_region *region) 6155 { 6156 basic_block entry_bb, exit_bb; 6157 gimple_stmt_iterator si; 6158 6159 entry_bb = region->entry; 6160 exit_bb = region->exit; 6161 6162 si = gsi_last_bb (entry_bb); 6163 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE 6164 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER 6165 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP 6166 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED 6167 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL 6168 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); 6169 gsi_remove (&si, true); 6170 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6171 6172 if (exit_bb) 6173 { 6174 si = gsi_last_bb (exit_bb); 6175 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6176 gsi_remove (&si, true); 6177 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6178 } 6179 } 6180 6181 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6182 operation as a normal volatile load. */ 6183 6184 static bool 6185 expand_omp_atomic_load (basic_block load_bb, tree addr, 6186 tree loaded_val, int index) 6187 { 6188 enum built_in_function tmpbase; 6189 gimple_stmt_iterator gsi; 6190 basic_block store_bb; 6191 location_t loc; 6192 gimple *stmt; 6193 tree decl, call, type, itype; 6194 6195 gsi = gsi_last_bb (load_bb); 6196 stmt = gsi_stmt (gsi); 6197 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6198 loc = gimple_location (stmt); 6199 6200 /* ??? If the target does not implement atomic_load_optab[mode], and mode 6201 is smaller than word size, then expand_atomic_load assumes that the load 6202 is atomic. We could avoid the builtin entirely in this case. */ 6203 6204 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6205 decl = builtin_decl_explicit (tmpbase); 6206 if (decl == NULL_TREE) 6207 return false; 6208 6209 type = TREE_TYPE (loaded_val); 6210 itype = TREE_TYPE (TREE_TYPE (decl)); 6211 6212 call = build_call_expr_loc (loc, decl, 2, addr, 6213 build_int_cst (NULL, 6214 gimple_omp_atomic_seq_cst_p (stmt) 6215 ? MEMMODEL_SEQ_CST 6216 : MEMMODEL_RELAXED)); 6217 if (!useless_type_conversion_p (type, itype)) 6218 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6219 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6220 6221 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6222 gsi_remove (&gsi, true); 6223 6224 store_bb = single_succ (load_bb); 6225 gsi = gsi_last_bb (store_bb); 6226 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6227 gsi_remove (&gsi, true); 6228 6229 if (gimple_in_ssa_p (cfun)) 6230 update_ssa (TODO_update_ssa_no_phi); 6231 6232 return true; 6233 } 6234 6235 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6236 operation as a normal volatile store. */ 6237 6238 static bool 6239 expand_omp_atomic_store (basic_block load_bb, tree addr, 6240 tree loaded_val, tree stored_val, int index) 6241 { 6242 enum built_in_function tmpbase; 6243 gimple_stmt_iterator gsi; 6244 basic_block store_bb = single_succ (load_bb); 6245 location_t loc; 6246 gimple *stmt; 6247 tree decl, call, type, itype; 6248 machine_mode imode; 6249 bool exchange; 6250 6251 gsi = gsi_last_bb (load_bb); 6252 stmt = gsi_stmt (gsi); 6253 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6254 6255 /* If the load value is needed, then this isn't a store but an exchange. */ 6256 exchange = gimple_omp_atomic_need_value_p (stmt); 6257 6258 gsi = gsi_last_bb (store_bb); 6259 stmt = gsi_stmt (gsi); 6260 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); 6261 loc = gimple_location (stmt); 6262 6263 /* ??? If the target does not implement atomic_store_optab[mode], and mode 6264 is smaller than word size, then expand_atomic_store assumes that the store 6265 is atomic. We could avoid the builtin entirely in this case. */ 6266 6267 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); 6268 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); 6269 decl = builtin_decl_explicit (tmpbase); 6270 if (decl == NULL_TREE) 6271 return false; 6272 6273 type = TREE_TYPE (stored_val); 6274 6275 /* Dig out the type of the function's second argument. */ 6276 itype = TREE_TYPE (decl); 6277 itype = TYPE_ARG_TYPES (itype); 6278 itype = TREE_CHAIN (itype); 6279 itype = TREE_VALUE (itype); 6280 imode = TYPE_MODE (itype); 6281 6282 if (exchange && !can_atomic_exchange_p (imode, true)) 6283 return false; 6284 6285 if (!useless_type_conversion_p (itype, type)) 6286 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); 6287 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, 6288 build_int_cst (NULL, 6289 gimple_omp_atomic_seq_cst_p (stmt) 6290 ? MEMMODEL_SEQ_CST 6291 : MEMMODEL_RELAXED)); 6292 if (exchange) 6293 { 6294 if (!useless_type_conversion_p (type, itype)) 6295 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6296 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6297 } 6298 6299 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6300 gsi_remove (&gsi, true); 6301 6302 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ 6303 gsi = gsi_last_bb (load_bb); 6304 gsi_remove (&gsi, true); 6305 6306 if (gimple_in_ssa_p (cfun)) 6307 update_ssa (TODO_update_ssa_no_phi); 6308 6309 return true; 6310 } 6311 6312 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6313 operation as a __atomic_fetch_op builtin. INDEX is log2 of the 6314 size of the data type, and thus usable to find the index of the builtin 6315 decl. Returns false if the expression is not of the proper form. */ 6316 6317 static bool 6318 expand_omp_atomic_fetch_op (basic_block load_bb, 6319 tree addr, tree loaded_val, 6320 tree stored_val, int index) 6321 { 6322 enum built_in_function oldbase, newbase, tmpbase; 6323 tree decl, itype, call; 6324 tree lhs, rhs; 6325 basic_block store_bb = single_succ (load_bb); 6326 gimple_stmt_iterator gsi; 6327 gimple *stmt; 6328 location_t loc; 6329 enum tree_code code; 6330 bool need_old, need_new; 6331 machine_mode imode; 6332 bool seq_cst; 6333 6334 /* We expect to find the following sequences: 6335 6336 load_bb: 6337 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 6338 6339 store_bb: 6340 val = tmp OP something; (or: something OP tmp) 6341 GIMPLE_OMP_STORE (val) 6342 6343 ???FIXME: Allow a more flexible sequence. 6344 Perhaps use data flow to pick the statements. 6345 6346 */ 6347 6348 gsi = gsi_after_labels (store_bb); 6349 stmt = gsi_stmt (gsi); 6350 loc = gimple_location (stmt); 6351 if (!is_gimple_assign (stmt)) 6352 return false; 6353 gsi_next (&gsi); 6354 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) 6355 return false; 6356 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); 6357 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); 6358 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); 6359 gcc_checking_assert (!need_old || !need_new); 6360 6361 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) 6362 return false; 6363 6364 /* Check for one of the supported fetch-op operations. */ 6365 code = gimple_assign_rhs_code (stmt); 6366 switch (code) 6367 { 6368 case PLUS_EXPR: 6369 case POINTER_PLUS_EXPR: 6370 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; 6371 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; 6372 break; 6373 case MINUS_EXPR: 6374 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; 6375 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; 6376 break; 6377 case BIT_AND_EXPR: 6378 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; 6379 newbase = BUILT_IN_ATOMIC_AND_FETCH_N; 6380 break; 6381 case BIT_IOR_EXPR: 6382 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; 6383 newbase = BUILT_IN_ATOMIC_OR_FETCH_N; 6384 break; 6385 case BIT_XOR_EXPR: 6386 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; 6387 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; 6388 break; 6389 default: 6390 return false; 6391 } 6392 6393 /* Make sure the expression is of the proper form. */ 6394 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) 6395 rhs = gimple_assign_rhs2 (stmt); 6396 else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) 6397 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) 6398 rhs = gimple_assign_rhs1 (stmt); 6399 else 6400 return false; 6401 6402 tmpbase = ((enum built_in_function) 6403 ((need_new ? newbase : oldbase) + index + 1)); 6404 decl = builtin_decl_explicit (tmpbase); 6405 if (decl == NULL_TREE) 6406 return false; 6407 itype = TREE_TYPE (TREE_TYPE (decl)); 6408 imode = TYPE_MODE (itype); 6409 6410 /* We could test all of the various optabs involved, but the fact of the 6411 matter is that (with the exception of i486 vs i586 and xadd) all targets 6412 that support any atomic operaton optab also implements compare-and-swap. 6413 Let optabs.c take care of expanding any compare-and-swap loop. */ 6414 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode)) 6415 return false; 6416 6417 gsi = gsi_last_bb (load_bb); 6418 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); 6419 6420 /* OpenMP does not imply any barrier-like semantics on its atomic ops. 6421 It only requires that the operation happen atomically. Thus we can 6422 use the RELAXED memory model. */ 6423 call = build_call_expr_loc (loc, decl, 3, addr, 6424 fold_convert_loc (loc, itype, rhs), 6425 build_int_cst (NULL, 6426 seq_cst ? MEMMODEL_SEQ_CST 6427 : MEMMODEL_RELAXED)); 6428 6429 if (need_old || need_new) 6430 { 6431 lhs = need_old ? loaded_val : stored_val; 6432 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); 6433 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); 6434 } 6435 else 6436 call = fold_convert_loc (loc, void_type_node, call); 6437 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6438 gsi_remove (&gsi, true); 6439 6440 gsi = gsi_last_bb (store_bb); 6441 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6442 gsi_remove (&gsi, true); 6443 gsi = gsi_last_bb (store_bb); 6444 stmt = gsi_stmt (gsi); 6445 gsi_remove (&gsi, true); 6446 6447 if (gimple_in_ssa_p (cfun)) 6448 { 6449 release_defs (stmt); 6450 update_ssa (TODO_update_ssa_no_phi); 6451 } 6452 6453 return true; 6454 } 6455 6456 /* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6457 6458 oldval = *addr; 6459 repeat: 6460 newval = rhs; // with oldval replacing *addr in rhs 6461 oldval = __sync_val_compare_and_swap (addr, oldval, newval); 6462 if (oldval != newval) 6463 goto repeat; 6464 6465 INDEX is log2 of the size of the data type, and thus usable to find the 6466 index of the builtin decl. */ 6467 6468 static bool 6469 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, 6470 tree addr, tree loaded_val, tree stored_val, 6471 int index) 6472 { 6473 tree loadedi, storedi, initial, new_storedi, old_vali; 6474 tree type, itype, cmpxchg, iaddr; 6475 gimple_stmt_iterator si; 6476 basic_block loop_header = single_succ (load_bb); 6477 gimple *phi, *stmt; 6478 edge e; 6479 enum built_in_function fncode; 6480 6481 /* ??? We need a non-pointer interface to __atomic_compare_exchange in 6482 order to use the RELAXED memory model effectively. */ 6483 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 6484 + index + 1); 6485 cmpxchg = builtin_decl_explicit (fncode); 6486 if (cmpxchg == NULL_TREE) 6487 return false; 6488 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); 6489 itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 6490 6491 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 6492 || !can_atomic_load_p (TYPE_MODE (itype))) 6493 return false; 6494 6495 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ 6496 si = gsi_last_bb (load_bb); 6497 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6498 6499 /* For floating-point values, we'll need to view-convert them to integers 6500 so that we can perform the atomic compare and swap. Simplify the 6501 following code by always setting up the "i"ntegral variables. */ 6502 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) 6503 { 6504 tree iaddr_val; 6505 6506 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, 6507 true)); 6508 iaddr_val 6509 = force_gimple_operand_gsi (&si, 6510 fold_convert (TREE_TYPE (iaddr), addr), 6511 false, NULL_TREE, true, GSI_SAME_STMT); 6512 stmt = gimple_build_assign (iaddr, iaddr_val); 6513 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6514 loadedi = create_tmp_var (itype); 6515 if (gimple_in_ssa_p (cfun)) 6516 loadedi = make_ssa_name (loadedi); 6517 } 6518 else 6519 { 6520 iaddr = addr; 6521 loadedi = loaded_val; 6522 } 6523 6524 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6525 tree loaddecl = builtin_decl_explicit (fncode); 6526 if (loaddecl) 6527 initial 6528 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)), 6529 build_call_expr (loaddecl, 2, iaddr, 6530 build_int_cst (NULL_TREE, 6531 MEMMODEL_RELAXED))); 6532 else 6533 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr, 6534 build_int_cst (TREE_TYPE (iaddr), 0)); 6535 6536 initial 6537 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, 6538 GSI_SAME_STMT); 6539 6540 /* Move the value to the LOADEDI temporary. */ 6541 if (gimple_in_ssa_p (cfun)) 6542 { 6543 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); 6544 phi = create_phi_node (loadedi, loop_header); 6545 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), 6546 initial); 6547 } 6548 else 6549 gsi_insert_before (&si, 6550 gimple_build_assign (loadedi, initial), 6551 GSI_SAME_STMT); 6552 if (loadedi != loaded_val) 6553 { 6554 gimple_stmt_iterator gsi2; 6555 tree x; 6556 6557 x = build1 (VIEW_CONVERT_EXPR, type, loadedi); 6558 gsi2 = gsi_start_bb (loop_header); 6559 if (gimple_in_ssa_p (cfun)) 6560 { 6561 gassign *stmt; 6562 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6563 true, GSI_SAME_STMT); 6564 stmt = gimple_build_assign (loaded_val, x); 6565 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); 6566 } 6567 else 6568 { 6569 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); 6570 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6571 true, GSI_SAME_STMT); 6572 } 6573 } 6574 gsi_remove (&si, true); 6575 6576 si = gsi_last_bb (store_bb); 6577 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6578 6579 if (iaddr == addr) 6580 storedi = stored_val; 6581 else 6582 storedi 6583 = force_gimple_operand_gsi (&si, 6584 build1 (VIEW_CONVERT_EXPR, itype, 6585 stored_val), true, NULL_TREE, true, 6586 GSI_SAME_STMT); 6587 6588 /* Build the compare&swap statement. */ 6589 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); 6590 new_storedi = force_gimple_operand_gsi (&si, 6591 fold_convert (TREE_TYPE (loadedi), 6592 new_storedi), 6593 true, NULL_TREE, 6594 true, GSI_SAME_STMT); 6595 6596 if (gimple_in_ssa_p (cfun)) 6597 old_vali = loadedi; 6598 else 6599 { 6600 old_vali = create_tmp_var (TREE_TYPE (loadedi)); 6601 stmt = gimple_build_assign (old_vali, loadedi); 6602 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6603 6604 stmt = gimple_build_assign (loadedi, new_storedi); 6605 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6606 } 6607 6608 /* Note that we always perform the comparison as an integer, even for 6609 floating point. This allows the atomic operation to properly 6610 succeed even with NaNs and -0.0. */ 6611 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); 6612 stmt = gimple_build_cond_empty (ne); 6613 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6614 6615 /* Update cfg. */ 6616 e = single_succ_edge (store_bb); 6617 e->flags &= ~EDGE_FALLTHRU; 6618 e->flags |= EDGE_FALSE_VALUE; 6619 6620 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); 6621 6622 /* Copy the new value to loadedi (we already did that before the condition 6623 if we are not in SSA). */ 6624 if (gimple_in_ssa_p (cfun)) 6625 { 6626 phi = gimple_seq_first_stmt (phi_nodes (loop_header)); 6627 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); 6628 } 6629 6630 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ 6631 gsi_remove (&si, true); 6632 6633 struct loop *loop = alloc_loop (); 6634 loop->header = loop_header; 6635 loop->latch = store_bb; 6636 add_loop (loop, loop_header->loop_father); 6637 6638 if (gimple_in_ssa_p (cfun)) 6639 update_ssa (TODO_update_ssa_no_phi); 6640 6641 return true; 6642 } 6643 6644 /* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6645 6646 GOMP_atomic_start (); 6647 *addr = rhs; 6648 GOMP_atomic_end (); 6649 6650 The result is not globally atomic, but works so long as all parallel 6651 references are within #pragma omp atomic directives. According to 6652 responses received from omp@openmp.org, appears to be within spec. 6653 Which makes sense, since that's how several other compilers handle 6654 this situation as well. 6655 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're 6656 expanding. STORED_VAL is the operand of the matching 6657 GIMPLE_OMP_ATOMIC_STORE. 6658 6659 We replace 6660 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with 6661 loaded_val = *addr; 6662 6663 and replace 6664 GIMPLE_OMP_ATOMIC_STORE (stored_val) with 6665 *addr = stored_val; 6666 */ 6667 6668 static bool 6669 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, 6670 tree addr, tree loaded_val, tree stored_val) 6671 { 6672 gimple_stmt_iterator si; 6673 gassign *stmt; 6674 tree t; 6675 6676 si = gsi_last_bb (load_bb); 6677 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6678 6679 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); 6680 t = build_call_expr (t, 0); 6681 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6682 6683 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr)); 6684 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6685 gsi_remove (&si, true); 6686 6687 si = gsi_last_bb (store_bb); 6688 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6689 6690 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)), 6691 stored_val); 6692 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6693 6694 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); 6695 t = build_call_expr (t, 0); 6696 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6697 gsi_remove (&si, true); 6698 6699 if (gimple_in_ssa_p (cfun)) 6700 update_ssa (TODO_update_ssa_no_phi); 6701 return true; 6702 } 6703 6704 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand 6705 using expand_omp_atomic_fetch_op. If it failed, we try to 6706 call expand_omp_atomic_pipeline, and if it fails too, the 6707 ultimate fallback is wrapping the operation in a mutex 6708 (expand_omp_atomic_mutex). REGION is the atomic region built 6709 by build_omp_regions_1(). */ 6710 6711 static void 6712 expand_omp_atomic (struct omp_region *region) 6713 { 6714 basic_block load_bb = region->entry, store_bb = region->exit; 6715 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); 6716 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); 6717 tree loaded_val = gimple_omp_atomic_load_lhs (load); 6718 tree addr = gimple_omp_atomic_load_rhs (load); 6719 tree stored_val = gimple_omp_atomic_store_val (store); 6720 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); 6721 HOST_WIDE_INT index; 6722 6723 /* Make sure the type is one of the supported sizes. */ 6724 index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 6725 index = exact_log2 (index); 6726 if (index >= 0 && index <= 4) 6727 { 6728 unsigned int align = TYPE_ALIGN_UNIT (type); 6729 6730 /* __sync builtins require strict data alignment. */ 6731 if (exact_log2 (align) >= index) 6732 { 6733 /* Atomic load. */ 6734 if (loaded_val == stored_val 6735 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT 6736 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) 6737 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD 6738 && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) 6739 return; 6740 6741 /* Atomic store. */ 6742 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT 6743 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) 6744 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD 6745 && store_bb == single_succ (load_bb) 6746 && first_stmt (store_bb) == store 6747 && expand_omp_atomic_store (load_bb, addr, loaded_val, 6748 stored_val, index)) 6749 return; 6750 6751 /* When possible, use specialized atomic update functions. */ 6752 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) 6753 && store_bb == single_succ (load_bb) 6754 && expand_omp_atomic_fetch_op (load_bb, addr, 6755 loaded_val, stored_val, index)) 6756 return; 6757 6758 /* If we don't have specialized __sync builtins, try and implement 6759 as a compare and swap loop. */ 6760 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, 6761 loaded_val, stored_val, index)) 6762 return; 6763 } 6764 } 6765 6766 /* The ultimate fallback is wrapping the operation in a mutex. */ 6767 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); 6768 } 6769 6770 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending 6771 at REGION_EXIT. */ 6772 6773 static void 6774 mark_loops_in_oacc_kernels_region (basic_block region_entry, 6775 basic_block region_exit) 6776 { 6777 struct loop *outer = region_entry->loop_father; 6778 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); 6779 6780 /* Don't parallelize the kernels region if it contains more than one outer 6781 loop. */ 6782 unsigned int nr_outer_loops = 0; 6783 struct loop *single_outer = NULL; 6784 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) 6785 { 6786 gcc_assert (loop_outer (loop) == outer); 6787 6788 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) 6789 continue; 6790 6791 if (region_exit != NULL 6792 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) 6793 continue; 6794 6795 nr_outer_loops++; 6796 single_outer = loop; 6797 } 6798 if (nr_outer_loops != 1) 6799 return; 6800 6801 for (struct loop *loop = single_outer->inner; 6802 loop != NULL; 6803 loop = loop->inner) 6804 if (loop->next) 6805 return; 6806 6807 /* Mark the loops in the region. */ 6808 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) 6809 loop->in_oacc_kernels_region = true; 6810 } 6811 6812 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */ 6813 6814 struct GTY(()) grid_launch_attributes_trees 6815 { 6816 tree kernel_dim_array_type; 6817 tree kernel_lattrs_dimnum_decl; 6818 tree kernel_lattrs_grid_decl; 6819 tree kernel_lattrs_group_decl; 6820 tree kernel_launch_attributes_type; 6821 }; 6822 6823 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; 6824 6825 /* Create types used to pass kernel launch attributes to target. */ 6826 6827 static void 6828 grid_create_kernel_launch_attr_types (void) 6829 { 6830 if (grid_attr_trees) 6831 return; 6832 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); 6833 6834 tree dim_arr_index_type 6835 = build_index_type (build_int_cst (integer_type_node, 2)); 6836 grid_attr_trees->kernel_dim_array_type 6837 = build_array_type (uint32_type_node, dim_arr_index_type); 6838 6839 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); 6840 grid_attr_trees->kernel_lattrs_dimnum_decl 6841 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), 6842 uint32_type_node); 6843 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; 6844 6845 grid_attr_trees->kernel_lattrs_grid_decl 6846 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), 6847 grid_attr_trees->kernel_dim_array_type); 6848 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) 6849 = grid_attr_trees->kernel_lattrs_dimnum_decl; 6850 grid_attr_trees->kernel_lattrs_group_decl 6851 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), 6852 grid_attr_trees->kernel_dim_array_type); 6853 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) 6854 = grid_attr_trees->kernel_lattrs_grid_decl; 6855 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, 6856 "__gomp_kernel_launch_attributes", 6857 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); 6858 } 6859 6860 /* Insert before the current statement in GSI a store of VALUE to INDEX of 6861 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be 6862 of type uint32_type_node. */ 6863 6864 static void 6865 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, 6866 tree fld_decl, int index, tree value) 6867 { 6868 tree ref = build4 (ARRAY_REF, uint32_type_node, 6869 build3 (COMPONENT_REF, 6870 grid_attr_trees->kernel_dim_array_type, 6871 range_var, fld_decl, NULL_TREE), 6872 build_int_cst (integer_type_node, index), 6873 NULL_TREE, NULL_TREE); 6874 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); 6875 } 6876 6877 /* Return a tree representation of a pointer to a structure with grid and 6878 work-group size information. Statements filling that information will be 6879 inserted before GSI, TGT_STMT is the target statement which has the 6880 necessary information in it. */ 6881 6882 static tree 6883 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, 6884 gomp_target *tgt_stmt) 6885 { 6886 grid_create_kernel_launch_attr_types (); 6887 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, 6888 "__kernel_launch_attrs"); 6889 6890 unsigned max_dim = 0; 6891 for (tree clause = gimple_omp_target_clauses (tgt_stmt); 6892 clause; 6893 clause = OMP_CLAUSE_CHAIN (clause)) 6894 { 6895 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) 6896 continue; 6897 6898 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); 6899 max_dim = MAX (dim, max_dim); 6900 6901 grid_insert_store_range_dim (gsi, lattrs, 6902 grid_attr_trees->kernel_lattrs_grid_decl, 6903 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); 6904 grid_insert_store_range_dim (gsi, lattrs, 6905 grid_attr_trees->kernel_lattrs_group_decl, 6906 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); 6907 } 6908 6909 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, 6910 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); 6911 gcc_checking_assert (max_dim <= 2); 6912 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); 6913 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), 6914 GSI_SAME_STMT); 6915 TREE_ADDRESSABLE (lattrs) = 1; 6916 return build_fold_addr_expr (lattrs); 6917 } 6918 6919 /* Build target argument identifier from the DEVICE identifier, value 6920 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ 6921 6922 static tree 6923 get_target_argument_identifier_1 (int device, bool subseqent_param, int id) 6924 { 6925 tree t = build_int_cst (integer_type_node, device); 6926 if (subseqent_param) 6927 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6928 build_int_cst (integer_type_node, 6929 GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); 6930 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6931 build_int_cst (integer_type_node, id)); 6932 return t; 6933 } 6934 6935 /* Like above but return it in type that can be directly stored as an element 6936 of the argument array. */ 6937 6938 static tree 6939 get_target_argument_identifier (int device, bool subseqent_param, int id) 6940 { 6941 tree t = get_target_argument_identifier_1 (device, subseqent_param, id); 6942 return fold_convert (ptr_type_node, t); 6943 } 6944 6945 /* Return a target argument consisting of DEVICE identifier, value identifier 6946 ID, and the actual VALUE. */ 6947 6948 static tree 6949 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, 6950 tree value) 6951 { 6952 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, 6953 fold_convert (integer_type_node, value), 6954 build_int_cst (unsigned_type_node, 6955 GOMP_TARGET_ARG_VALUE_SHIFT)); 6956 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6957 get_target_argument_identifier_1 (device, false, id)); 6958 t = fold_convert (ptr_type_node, t); 6959 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); 6960 } 6961 6962 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, 6963 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, 6964 otherwise push an identifier (with DEVICE and ID) and the VALUE in two 6965 arguments. */ 6966 6967 static void 6968 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, 6969 int id, tree value, vec <tree> *args) 6970 { 6971 if (tree_fits_shwi_p (value) 6972 && tree_to_shwi (value) > -(1 << 15) 6973 && tree_to_shwi (value) < (1 << 15)) 6974 args->quick_push (get_target_argument_value (gsi, device, id, value)); 6975 else 6976 { 6977 args->quick_push (get_target_argument_identifier (device, true, id)); 6978 value = fold_convert (ptr_type_node, value); 6979 value = force_gimple_operand_gsi (gsi, value, true, NULL, true, 6980 GSI_SAME_STMT); 6981 args->quick_push (value); 6982 } 6983 } 6984 6985 /* Create an array of arguments that is then passed to GOMP_target. */ 6986 6987 static tree 6988 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) 6989 { 6990 auto_vec <tree, 6> args; 6991 tree clauses = gimple_omp_target_clauses (tgt_stmt); 6992 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 6993 if (c) 6994 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); 6995 else 6996 t = integer_minus_one_node; 6997 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 6998 GOMP_TARGET_ARG_NUM_TEAMS, t, &args); 6999 7000 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 7001 if (c) 7002 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); 7003 else 7004 t = integer_minus_one_node; 7005 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7006 GOMP_TARGET_ARG_THREAD_LIMIT, t, 7007 &args); 7008 7009 /* Add HSA-specific grid sizes, if available. */ 7010 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7011 OMP_CLAUSE__GRIDDIM_)) 7012 { 7013 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES; 7014 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id); 7015 args.quick_push (t); 7016 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); 7017 } 7018 7019 /* Produce more, perhaps device specific, arguments here. */ 7020 7021 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, 7022 args.length () + 1), 7023 ".omp_target_args"); 7024 for (unsigned i = 0; i < args.length (); i++) 7025 { 7026 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7027 build_int_cst (integer_type_node, i), 7028 NULL_TREE, NULL_TREE); 7029 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), 7030 GSI_SAME_STMT); 7031 } 7032 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7033 build_int_cst (integer_type_node, args.length ()), 7034 NULL_TREE, NULL_TREE); 7035 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), 7036 GSI_SAME_STMT); 7037 TREE_ADDRESSABLE (argarray) = 1; 7038 return build_fold_addr_expr (argarray); 7039 } 7040 7041 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */ 7042 7043 static void 7044 expand_omp_target (struct omp_region *region) 7045 { 7046 basic_block entry_bb, exit_bb, new_bb; 7047 struct function *child_cfun; 7048 tree child_fn, block, t; 7049 gimple_stmt_iterator gsi; 7050 gomp_target *entry_stmt; 7051 gimple *stmt; 7052 edge e; 7053 bool offloaded, data_region; 7054 7055 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); 7056 new_bb = region->entry; 7057 7058 offloaded = is_gimple_omp_offloaded (entry_stmt); 7059 switch (gimple_omp_target_kind (entry_stmt)) 7060 { 7061 case GF_OMP_TARGET_KIND_REGION: 7062 case GF_OMP_TARGET_KIND_UPDATE: 7063 case GF_OMP_TARGET_KIND_ENTER_DATA: 7064 case GF_OMP_TARGET_KIND_EXIT_DATA: 7065 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7066 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7067 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7068 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7069 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7070 data_region = false; 7071 break; 7072 case GF_OMP_TARGET_KIND_DATA: 7073 case GF_OMP_TARGET_KIND_OACC_DATA: 7074 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7075 data_region = true; 7076 break; 7077 default: 7078 gcc_unreachable (); 7079 } 7080 7081 child_fn = NULL_TREE; 7082 child_cfun = NULL; 7083 if (offloaded) 7084 { 7085 child_fn = gimple_omp_target_child_fn (entry_stmt); 7086 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 7087 } 7088 7089 /* Supported by expand_omp_taskreg, but not here. */ 7090 if (child_cfun != NULL) 7091 gcc_checking_assert (!child_cfun->cfg); 7092 gcc_checking_assert (!gimple_in_ssa_p (cfun)); 7093 7094 entry_bb = region->entry; 7095 exit_bb = region->exit; 7096 7097 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) 7098 mark_loops_in_oacc_kernels_region (region->entry, region->exit); 7099 7100 if (offloaded) 7101 { 7102 unsigned srcidx, dstidx, num; 7103 7104 /* If the offloading region needs data sent from the parent 7105 function, then the very first statement (except possible 7106 tree profile counter updates) of the offloading body 7107 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 7108 &.OMP_DATA_O is passed as an argument to the child function, 7109 we need to replace it with the argument as seen by the child 7110 function. 7111 7112 In most cases, this will end up being the identity assignment 7113 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had 7114 a function call that has been inlined, the original PARM_DECL 7115 .OMP_DATA_I may have been converted into a different local 7116 variable. In which case, we need to keep the assignment. */ 7117 tree data_arg = gimple_omp_target_data_arg (entry_stmt); 7118 if (data_arg) 7119 { 7120 basic_block entry_succ_bb = single_succ (entry_bb); 7121 gimple_stmt_iterator gsi; 7122 tree arg; 7123 gimple *tgtcopy_stmt = NULL; 7124 tree sender = TREE_VEC_ELT (data_arg, 0); 7125 7126 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 7127 { 7128 gcc_assert (!gsi_end_p (gsi)); 7129 stmt = gsi_stmt (gsi); 7130 if (gimple_code (stmt) != GIMPLE_ASSIGN) 7131 continue; 7132 7133 if (gimple_num_ops (stmt) == 2) 7134 { 7135 tree arg = gimple_assign_rhs1 (stmt); 7136 7137 /* We're ignoring the subcode because we're 7138 effectively doing a STRIP_NOPS. */ 7139 7140 if (TREE_CODE (arg) == ADDR_EXPR 7141 && TREE_OPERAND (arg, 0) == sender) 7142 { 7143 tgtcopy_stmt = stmt; 7144 break; 7145 } 7146 } 7147 } 7148 7149 gcc_assert (tgtcopy_stmt != NULL); 7150 arg = DECL_ARGUMENTS (child_fn); 7151 7152 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); 7153 gsi_remove (&gsi, true); 7154 } 7155 7156 /* Declare local variables needed in CHILD_CFUN. */ 7157 block = DECL_INITIAL (child_fn); 7158 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 7159 /* The gimplifier could record temporaries in the offloading block 7160 rather than in containing function's local_decls chain, 7161 which would mean cgraph missed finalizing them. Do it now. */ 7162 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 7163 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 7164 varpool_node::finalize_decl (t); 7165 DECL_SAVED_TREE (child_fn) = NULL; 7166 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 7167 gimple_set_body (child_fn, NULL); 7168 TREE_USED (block) = 1; 7169 7170 /* Reset DECL_CONTEXT on function arguments. */ 7171 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 7172 DECL_CONTEXT (t) = child_fn; 7173 7174 /* Split ENTRY_BB at GIMPLE_*, 7175 so that it can be moved to the child function. */ 7176 gsi = gsi_last_bb (entry_bb); 7177 stmt = gsi_stmt (gsi); 7178 gcc_assert (stmt 7179 && gimple_code (stmt) == gimple_code (entry_stmt)); 7180 e = split_block (entry_bb, stmt); 7181 gsi_remove (&gsi, true); 7182 entry_bb = e->dest; 7183 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 7184 7185 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ 7186 if (exit_bb) 7187 { 7188 gsi = gsi_last_bb (exit_bb); 7189 gcc_assert (!gsi_end_p (gsi) 7190 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7191 stmt = gimple_build_return (NULL); 7192 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 7193 gsi_remove (&gsi, true); 7194 } 7195 7196 /* Make sure to generate early debug for the function before 7197 outlining anything. */ 7198 if (! gimple_in_ssa_p (cfun)) 7199 (*debug_hooks->early_global_decl) (cfun->decl); 7200 7201 /* Move the offloading region into CHILD_CFUN. */ 7202 7203 block = gimple_block (entry_stmt); 7204 7205 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 7206 if (exit_bb) 7207 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 7208 /* When the OMP expansion process cannot guarantee an up-to-date 7209 loop tree arrange for the child function to fixup loops. */ 7210 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7211 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 7212 7213 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 7214 num = vec_safe_length (child_cfun->local_decls); 7215 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 7216 { 7217 t = (*child_cfun->local_decls)[srcidx]; 7218 if (DECL_CONTEXT (t) == cfun->decl) 7219 continue; 7220 if (srcidx != dstidx) 7221 (*child_cfun->local_decls)[dstidx] = t; 7222 dstidx++; 7223 } 7224 if (dstidx != num) 7225 vec_safe_truncate (child_cfun->local_decls, dstidx); 7226 7227 /* Inform the callgraph about the new function. */ 7228 child_cfun->curr_properties = cfun->curr_properties; 7229 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 7230 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 7231 cgraph_node *node = cgraph_node::get_create (child_fn); 7232 node->parallelized_function = 1; 7233 cgraph_node::add_new_function (child_fn, true); 7234 7235 /* Add the new function to the offload table. */ 7236 if (ENABLE_OFFLOADING) 7237 vec_safe_push (offload_funcs, child_fn); 7238 7239 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 7240 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 7241 7242 /* Fix the callgraph edges for child_cfun. Those for cfun will be 7243 fixed in a following pass. */ 7244 push_cfun (child_cfun); 7245 if (need_asm) 7246 assign_assembler_name_if_needed (child_fn); 7247 cgraph_edge::rebuild_edges (); 7248 7249 /* Some EH regions might become dead, see PR34608. If 7250 pass_cleanup_cfg isn't the first pass to happen with the 7251 new child, these dead EH edges might cause problems. 7252 Clean them up now. */ 7253 if (flag_exceptions) 7254 { 7255 basic_block bb; 7256 bool changed = false; 7257 7258 FOR_EACH_BB_FN (bb, cfun) 7259 changed |= gimple_purge_dead_eh_edges (bb); 7260 if (changed) 7261 cleanup_tree_cfg (); 7262 } 7263 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7264 verify_loop_structure (); 7265 pop_cfun (); 7266 7267 if (dump_file && !gimple_in_ssa_p (cfun)) 7268 { 7269 omp_any_child_fn_dumped = true; 7270 dump_function_header (dump_file, child_fn, dump_flags); 7271 dump_function_to_file (child_fn, dump_file, dump_flags); 7272 } 7273 } 7274 7275 /* Emit a library call to launch the offloading region, or do data 7276 transfers. */ 7277 tree t1, t2, t3, t4, device, cond, depend, c, clauses; 7278 enum built_in_function start_ix; 7279 location_t clause_loc; 7280 unsigned int flags_i = 0; 7281 bool oacc_kernels_p = false; 7282 7283 switch (gimple_omp_target_kind (entry_stmt)) 7284 { 7285 case GF_OMP_TARGET_KIND_REGION: 7286 start_ix = BUILT_IN_GOMP_TARGET; 7287 break; 7288 case GF_OMP_TARGET_KIND_DATA: 7289 start_ix = BUILT_IN_GOMP_TARGET_DATA; 7290 break; 7291 case GF_OMP_TARGET_KIND_UPDATE: 7292 start_ix = BUILT_IN_GOMP_TARGET_UPDATE; 7293 break; 7294 case GF_OMP_TARGET_KIND_ENTER_DATA: 7295 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7296 break; 7297 case GF_OMP_TARGET_KIND_EXIT_DATA: 7298 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7299 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; 7300 break; 7301 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7302 oacc_kernels_p = true; 7303 /* FALLTHROUGH */ 7304 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7305 start_ix = BUILT_IN_GOACC_PARALLEL; 7306 break; 7307 case GF_OMP_TARGET_KIND_OACC_DATA: 7308 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7309 start_ix = BUILT_IN_GOACC_DATA_START; 7310 break; 7311 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7312 start_ix = BUILT_IN_GOACC_UPDATE; 7313 break; 7314 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7315 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; 7316 break; 7317 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7318 start_ix = BUILT_IN_GOACC_DECLARE; 7319 break; 7320 default: 7321 gcc_unreachable (); 7322 } 7323 7324 clauses = gimple_omp_target_clauses (entry_stmt); 7325 7326 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime 7327 library choose) and there is no conditional. */ 7328 cond = NULL_TREE; 7329 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); 7330 7331 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 7332 if (c) 7333 cond = OMP_CLAUSE_IF_EXPR (c); 7334 7335 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); 7336 if (c) 7337 { 7338 /* Even if we pass it to all library function calls, it is currently only 7339 defined/used for the OpenMP target ones. */ 7340 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET 7341 || start_ix == BUILT_IN_GOMP_TARGET_DATA 7342 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE 7343 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); 7344 7345 device = OMP_CLAUSE_DEVICE_ID (c); 7346 clause_loc = OMP_CLAUSE_LOCATION (c); 7347 } 7348 else 7349 clause_loc = gimple_location (entry_stmt); 7350 7351 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); 7352 if (c) 7353 flags_i |= GOMP_TARGET_FLAG_NOWAIT; 7354 7355 /* Ensure 'device' is of the correct type. */ 7356 device = fold_convert_loc (clause_loc, integer_type_node, device); 7357 7358 /* If we found the clause 'if (cond)', build 7359 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ 7360 if (cond) 7361 { 7362 cond = gimple_boolify (cond); 7363 7364 basic_block cond_bb, then_bb, else_bb; 7365 edge e; 7366 tree tmp_var; 7367 7368 tmp_var = create_tmp_var (TREE_TYPE (device)); 7369 if (offloaded) 7370 e = split_block_after_labels (new_bb); 7371 else 7372 { 7373 gsi = gsi_last_bb (new_bb); 7374 gsi_prev (&gsi); 7375 e = split_block (new_bb, gsi_stmt (gsi)); 7376 } 7377 cond_bb = e->src; 7378 new_bb = e->dest; 7379 remove_edge (e); 7380 7381 then_bb = create_empty_bb (cond_bb); 7382 else_bb = create_empty_bb (then_bb); 7383 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 7384 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 7385 7386 stmt = gimple_build_cond_empty (cond); 7387 gsi = gsi_last_bb (cond_bb); 7388 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7389 7390 gsi = gsi_start_bb (then_bb); 7391 stmt = gimple_build_assign (tmp_var, device); 7392 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7393 7394 gsi = gsi_start_bb (else_bb); 7395 stmt = gimple_build_assign (tmp_var, 7396 build_int_cst (integer_type_node, 7397 GOMP_DEVICE_HOST_FALLBACK)); 7398 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7399 7400 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 7401 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 7402 add_bb_to_loop (then_bb, cond_bb->loop_father); 7403 add_bb_to_loop (else_bb, cond_bb->loop_father); 7404 make_edge (then_bb, new_bb, EDGE_FALLTHRU); 7405 make_edge (else_bb, new_bb, EDGE_FALLTHRU); 7406 7407 device = tmp_var; 7408 gsi = gsi_last_bb (new_bb); 7409 } 7410 else 7411 { 7412 gsi = gsi_last_bb (new_bb); 7413 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, 7414 true, GSI_SAME_STMT); 7415 } 7416 7417 t = gimple_omp_target_data_arg (entry_stmt); 7418 if (t == NULL) 7419 { 7420 t1 = size_zero_node; 7421 t2 = build_zero_cst (ptr_type_node); 7422 t3 = t2; 7423 t4 = t2; 7424 } 7425 else 7426 { 7427 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); 7428 t1 = size_binop (PLUS_EXPR, t1, size_int (1)); 7429 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); 7430 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); 7431 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); 7432 } 7433 7434 gimple *g; 7435 bool tagging = false; 7436 /* The maximum number used by any start_ix, without varargs. */ 7437 auto_vec<tree, 11> args; 7438 args.quick_push (device); 7439 if (offloaded) 7440 args.quick_push (build_fold_addr_expr (child_fn)); 7441 args.quick_push (t1); 7442 args.quick_push (t2); 7443 args.quick_push (t3); 7444 args.quick_push (t4); 7445 switch (start_ix) 7446 { 7447 case BUILT_IN_GOACC_DATA_START: 7448 case BUILT_IN_GOACC_DECLARE: 7449 case BUILT_IN_GOMP_TARGET_DATA: 7450 break; 7451 case BUILT_IN_GOMP_TARGET: 7452 case BUILT_IN_GOMP_TARGET_UPDATE: 7453 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: 7454 args.quick_push (build_int_cst (unsigned_type_node, flags_i)); 7455 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 7456 if (c) 7457 depend = OMP_CLAUSE_DECL (c); 7458 else 7459 depend = build_int_cst (ptr_type_node, 0); 7460 args.quick_push (depend); 7461 if (start_ix == BUILT_IN_GOMP_TARGET) 7462 args.quick_push (get_target_arguments (&gsi, entry_stmt)); 7463 break; 7464 case BUILT_IN_GOACC_PARALLEL: 7465 { 7466 oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args); 7467 tagging = true; 7468 } 7469 /* FALLTHRU */ 7470 case BUILT_IN_GOACC_ENTER_EXIT_DATA: 7471 case BUILT_IN_GOACC_UPDATE: 7472 { 7473 tree t_async = NULL_TREE; 7474 7475 /* If present, use the value specified by the respective 7476 clause, making sure that is of the correct type. */ 7477 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); 7478 if (c) 7479 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7480 integer_type_node, 7481 OMP_CLAUSE_ASYNC_EXPR (c)); 7482 else if (!tagging) 7483 /* Default values for t_async. */ 7484 t_async = fold_convert_loc (gimple_location (entry_stmt), 7485 integer_type_node, 7486 build_int_cst (integer_type_node, 7487 GOMP_ASYNC_SYNC)); 7488 if (tagging && t_async) 7489 { 7490 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; 7491 7492 if (TREE_CODE (t_async) == INTEGER_CST) 7493 { 7494 /* See if we can pack the async arg in to the tag's 7495 operand. */ 7496 i_async = TREE_INT_CST_LOW (t_async); 7497 if (i_async < GOMP_LAUNCH_OP_MAX) 7498 t_async = NULL_TREE; 7499 else 7500 i_async = GOMP_LAUNCH_OP_MAX; 7501 } 7502 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, 7503 i_async)); 7504 } 7505 if (t_async) 7506 args.safe_push (t_async); 7507 7508 /* Save the argument index, and ... */ 7509 unsigned t_wait_idx = args.length (); 7510 unsigned num_waits = 0; 7511 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); 7512 if (!tagging || c) 7513 /* ... push a placeholder. */ 7514 args.safe_push (integer_zero_node); 7515 7516 for (; c; c = OMP_CLAUSE_CHAIN (c)) 7517 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) 7518 { 7519 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7520 integer_type_node, 7521 OMP_CLAUSE_WAIT_EXPR (c))); 7522 num_waits++; 7523 } 7524 7525 if (!tagging || num_waits) 7526 { 7527 tree len; 7528 7529 /* Now that we know the number, update the placeholder. */ 7530 if (tagging) 7531 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); 7532 else 7533 len = build_int_cst (integer_type_node, num_waits); 7534 len = fold_convert_loc (gimple_location (entry_stmt), 7535 unsigned_type_node, len); 7536 args[t_wait_idx] = len; 7537 } 7538 } 7539 break; 7540 default: 7541 gcc_unreachable (); 7542 } 7543 if (tagging) 7544 /* Push terminal marker - zero. */ 7545 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); 7546 7547 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); 7548 gimple_set_location (g, gimple_location (entry_stmt)); 7549 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 7550 if (!offloaded) 7551 { 7552 g = gsi_stmt (gsi); 7553 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); 7554 gsi_remove (&gsi, true); 7555 } 7556 if (data_region && region->exit) 7557 { 7558 gsi = gsi_last_bb (region->exit); 7559 g = gsi_stmt (gsi); 7560 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); 7561 gsi_remove (&gsi, true); 7562 } 7563 } 7564 7565 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with 7566 iteration variable derived from the thread number. INTRA_GROUP means this 7567 is an expansion of a loop iterating over work-items within a separate 7568 iteration over groups. */ 7569 7570 static void 7571 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) 7572 { 7573 gimple_stmt_iterator gsi; 7574 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7575 gcc_checking_assert (gimple_omp_for_kind (for_stmt) 7576 == GF_OMP_FOR_KIND_GRID_LOOP); 7577 size_t collapse = gimple_omp_for_collapse (for_stmt); 7578 struct omp_for_data_loop *loops 7579 = XALLOCAVEC (struct omp_for_data_loop, 7580 gimple_omp_for_collapse (for_stmt)); 7581 struct omp_for_data fd; 7582 7583 remove_edge (BRANCH_EDGE (kfor->entry)); 7584 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; 7585 7586 gcc_assert (kfor->cont); 7587 omp_extract_for_data (for_stmt, &fd, loops); 7588 7589 gsi = gsi_start_bb (body_bb); 7590 7591 for (size_t dim = 0; dim < collapse; dim++) 7592 { 7593 tree type, itype; 7594 itype = type = TREE_TYPE (fd.loops[dim].v); 7595 if (POINTER_TYPE_P (type)) 7596 itype = signed_type_for (type); 7597 7598 tree n1 = fd.loops[dim].n1; 7599 tree step = fd.loops[dim].step; 7600 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 7601 true, NULL_TREE, true, GSI_SAME_STMT); 7602 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 7603 true, NULL_TREE, true, GSI_SAME_STMT); 7604 tree threadid; 7605 if (gimple_omp_for_grid_group_iter (for_stmt)) 7606 { 7607 gcc_checking_assert (!intra_group); 7608 threadid = build_call_expr (builtin_decl_explicit 7609 (BUILT_IN_HSA_WORKGROUPID), 1, 7610 build_int_cstu (unsigned_type_node, dim)); 7611 } 7612 else if (intra_group) 7613 threadid = build_call_expr (builtin_decl_explicit 7614 (BUILT_IN_HSA_WORKITEMID), 1, 7615 build_int_cstu (unsigned_type_node, dim)); 7616 else 7617 threadid = build_call_expr (builtin_decl_explicit 7618 (BUILT_IN_HSA_WORKITEMABSID), 1, 7619 build_int_cstu (unsigned_type_node, dim)); 7620 threadid = fold_convert (itype, threadid); 7621 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 7622 true, GSI_SAME_STMT); 7623 7624 tree startvar = fd.loops[dim].v; 7625 tree t = fold_build2 (MULT_EXPR, itype, threadid, step); 7626 if (POINTER_TYPE_P (type)) 7627 t = fold_build_pointer_plus (n1, t); 7628 else 7629 t = fold_build2 (PLUS_EXPR, type, t, n1); 7630 t = fold_convert (type, t); 7631 t = force_gimple_operand_gsi (&gsi, t, 7632 DECL_P (startvar) 7633 && TREE_ADDRESSABLE (startvar), 7634 NULL_TREE, true, GSI_SAME_STMT); 7635 gassign *assign_stmt = gimple_build_assign (startvar, t); 7636 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 7637 } 7638 /* Remove the omp for statement. */ 7639 gsi = gsi_last_bb (kfor->entry); 7640 gsi_remove (&gsi, true); 7641 7642 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 7643 gsi = gsi_last_bb (kfor->cont); 7644 gcc_assert (!gsi_end_p (gsi) 7645 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); 7646 gsi_remove (&gsi, true); 7647 7648 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ 7649 gsi = gsi_last_bb (kfor->exit); 7650 gcc_assert (!gsi_end_p (gsi) 7651 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7652 if (intra_group) 7653 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); 7654 gsi_remove (&gsi, true); 7655 7656 /* Fixup the much simpler CFG. */ 7657 remove_edge (find_edge (kfor->cont, body_bb)); 7658 7659 if (kfor->cont != body_bb) 7660 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); 7661 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); 7662 } 7663 7664 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap 7665 argument_decls. */ 7666 7667 struct grid_arg_decl_map 7668 { 7669 tree old_arg; 7670 tree new_arg; 7671 }; 7672 7673 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones 7674 pertaining to kernel function. */ 7675 7676 static tree 7677 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) 7678 { 7679 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 7680 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; 7681 tree t = *tp; 7682 7683 if (t == adm->old_arg) 7684 *tp = adm->new_arg; 7685 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 7686 return NULL_TREE; 7687 } 7688 7689 /* If TARGET region contains a kernel body for loop, remove its region from the 7690 TARGET and expand it in HSA gridified kernel fashion. */ 7691 7692 static void 7693 grid_expand_target_grid_body (struct omp_region *target) 7694 { 7695 if (!hsa_gen_requested_p ()) 7696 return; 7697 7698 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); 7699 struct omp_region **pp; 7700 7701 for (pp = &target->inner; *pp; pp = &(*pp)->next) 7702 if ((*pp)->type == GIMPLE_OMP_GRID_BODY) 7703 break; 7704 7705 struct omp_region *gpukernel = *pp; 7706 7707 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); 7708 if (!gpukernel) 7709 { 7710 /* HSA cannot handle OACC stuff. */ 7711 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) 7712 return; 7713 gcc_checking_assert (orig_child_fndecl); 7714 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7715 OMP_CLAUSE__GRIDDIM_)); 7716 cgraph_node *n = cgraph_node::get (orig_child_fndecl); 7717 7718 hsa_register_kernel (n); 7719 return; 7720 } 7721 7722 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7723 OMP_CLAUSE__GRIDDIM_)); 7724 tree inside_block 7725 = gimple_block (first_stmt (single_succ (gpukernel->entry))); 7726 *pp = gpukernel->next; 7727 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) 7728 if ((*pp)->type == GIMPLE_OMP_FOR) 7729 break; 7730 7731 struct omp_region *kfor = *pp; 7732 gcc_assert (kfor); 7733 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7734 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); 7735 *pp = kfor->next; 7736 if (kfor->inner) 7737 { 7738 if (gimple_omp_for_grid_group_iter (for_stmt)) 7739 { 7740 struct omp_region **next_pp; 7741 for (pp = &kfor->inner; *pp; pp = next_pp) 7742 { 7743 next_pp = &(*pp)->next; 7744 if ((*pp)->type != GIMPLE_OMP_FOR) 7745 continue; 7746 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); 7747 gcc_assert (gimple_omp_for_kind (inner) 7748 == GF_OMP_FOR_KIND_GRID_LOOP); 7749 grid_expand_omp_for_loop (*pp, true); 7750 *pp = (*pp)->next; 7751 next_pp = pp; 7752 } 7753 } 7754 expand_omp (kfor->inner); 7755 } 7756 if (gpukernel->inner) 7757 expand_omp (gpukernel->inner); 7758 7759 tree kern_fndecl = copy_node (orig_child_fndecl); 7760 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); 7761 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); 7762 tree tgtblock = gimple_block (tgt_stmt); 7763 tree fniniblock = make_node (BLOCK); 7764 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; 7765 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); 7766 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); 7767 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; 7768 DECL_INITIAL (kern_fndecl) = fniniblock; 7769 push_struct_function (kern_fndecl); 7770 cfun->function_end_locus = gimple_location (tgt_stmt); 7771 init_tree_ssa (cfun); 7772 pop_cfun (); 7773 7774 /* Make sure to generate early debug for the function before 7775 outlining anything. */ 7776 if (! gimple_in_ssa_p (cfun)) 7777 (*debug_hooks->early_global_decl) (cfun->decl); 7778 7779 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); 7780 gcc_assert (!DECL_CHAIN (old_parm_decl)); 7781 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); 7782 DECL_CONTEXT (new_parm_decl) = kern_fndecl; 7783 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; 7784 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); 7785 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); 7786 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; 7787 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); 7788 kern_cfun->curr_properties = cfun->curr_properties; 7789 7790 grid_expand_omp_for_loop (kfor, false); 7791 7792 /* Remove the omp for statement. */ 7793 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry); 7794 gsi_remove (&gsi, true); 7795 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real 7796 return. */ 7797 gsi = gsi_last_bb (gpukernel->exit); 7798 gcc_assert (!gsi_end_p (gsi) 7799 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7800 gimple *ret_stmt = gimple_build_return (NULL); 7801 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); 7802 gsi_remove (&gsi, true); 7803 7804 /* Statements in the first BB in the target construct have been produced by 7805 target lowering and must be copied inside the GPUKERNEL, with the two 7806 exceptions of the first OMP statement and the OMP_DATA assignment 7807 statement. */ 7808 gsi = gsi_start_bb (single_succ (gpukernel->entry)); 7809 tree data_arg = gimple_omp_target_data_arg (tgt_stmt); 7810 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; 7811 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); 7812 !gsi_end_p (tsi); gsi_next (&tsi)) 7813 { 7814 gimple *stmt = gsi_stmt (tsi); 7815 if (is_gimple_omp (stmt)) 7816 break; 7817 if (sender 7818 && is_gimple_assign (stmt) 7819 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR 7820 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) 7821 continue; 7822 gimple *copy = gimple_copy (stmt); 7823 gsi_insert_before (&gsi, copy, GSI_SAME_STMT); 7824 gimple_set_block (copy, fniniblock); 7825 } 7826 7827 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), 7828 gpukernel->exit, inside_block); 7829 7830 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); 7831 kcn->mark_force_output (); 7832 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); 7833 7834 hsa_register_kernel (kcn, orig_child); 7835 7836 cgraph_node::add_new_function (kern_fndecl, true); 7837 push_cfun (kern_cfun); 7838 cgraph_edge::rebuild_edges (); 7839 7840 /* Re-map any mention of the PARM_DECL of the original function to the 7841 PARM_DECL of the new one. 7842 7843 TODO: It would be great if lowering produced references into the GPU 7844 kernel decl straight away and we did not have to do this. */ 7845 struct grid_arg_decl_map adm; 7846 adm.old_arg = old_parm_decl; 7847 adm.new_arg = new_parm_decl; 7848 basic_block bb; 7849 FOR_EACH_BB_FN (bb, kern_cfun) 7850 { 7851 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 7852 { 7853 gimple *stmt = gsi_stmt (gsi); 7854 struct walk_stmt_info wi; 7855 memset (&wi, 0, sizeof (wi)); 7856 wi.info = &adm; 7857 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); 7858 } 7859 } 7860 pop_cfun (); 7861 7862 return; 7863 } 7864 7865 /* Expand the parallel region tree rooted at REGION. Expansion 7866 proceeds in depth-first order. Innermost regions are expanded 7867 first. This way, parallel regions that require a new function to 7868 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any 7869 internal dependencies in their body. */ 7870 7871 static void 7872 expand_omp (struct omp_region *region) 7873 { 7874 omp_any_child_fn_dumped = false; 7875 while (region) 7876 { 7877 location_t saved_location; 7878 gimple *inner_stmt = NULL; 7879 7880 /* First, determine whether this is a combined parallel+workshare 7881 region. */ 7882 if (region->type == GIMPLE_OMP_PARALLEL) 7883 determine_parallel_type (region); 7884 else if (region->type == GIMPLE_OMP_TARGET) 7885 grid_expand_target_grid_body (region); 7886 7887 if (region->type == GIMPLE_OMP_FOR 7888 && gimple_omp_for_combined_p (last_stmt (region->entry))) 7889 inner_stmt = last_stmt (region->inner->entry); 7890 7891 if (region->inner) 7892 expand_omp (region->inner); 7893 7894 saved_location = input_location; 7895 if (gimple_has_location (last_stmt (region->entry))) 7896 input_location = gimple_location (last_stmt (region->entry)); 7897 7898 switch (region->type) 7899 { 7900 case GIMPLE_OMP_PARALLEL: 7901 case GIMPLE_OMP_TASK: 7902 expand_omp_taskreg (region); 7903 break; 7904 7905 case GIMPLE_OMP_FOR: 7906 expand_omp_for (region, inner_stmt); 7907 break; 7908 7909 case GIMPLE_OMP_SECTIONS: 7910 expand_omp_sections (region); 7911 break; 7912 7913 case GIMPLE_OMP_SECTION: 7914 /* Individual omp sections are handled together with their 7915 parent GIMPLE_OMP_SECTIONS region. */ 7916 break; 7917 7918 case GIMPLE_OMP_SINGLE: 7919 expand_omp_single (region); 7920 break; 7921 7922 case GIMPLE_OMP_ORDERED: 7923 { 7924 gomp_ordered *ord_stmt 7925 = as_a <gomp_ordered *> (last_stmt (region->entry)); 7926 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), 7927 OMP_CLAUSE_DEPEND)) 7928 { 7929 /* We'll expand these when expanding corresponding 7930 worksharing region with ordered(n) clause. */ 7931 gcc_assert (region->outer 7932 && region->outer->type == GIMPLE_OMP_FOR); 7933 region->ord_stmt = ord_stmt; 7934 break; 7935 } 7936 } 7937 /* FALLTHRU */ 7938 case GIMPLE_OMP_MASTER: 7939 case GIMPLE_OMP_TASKGROUP: 7940 case GIMPLE_OMP_CRITICAL: 7941 case GIMPLE_OMP_TEAMS: 7942 expand_omp_synch (region); 7943 break; 7944 7945 case GIMPLE_OMP_ATOMIC_LOAD: 7946 expand_omp_atomic (region); 7947 break; 7948 7949 case GIMPLE_OMP_TARGET: 7950 expand_omp_target (region); 7951 break; 7952 7953 default: 7954 gcc_unreachable (); 7955 } 7956 7957 input_location = saved_location; 7958 region = region->next; 7959 } 7960 if (omp_any_child_fn_dumped) 7961 { 7962 if (dump_file) 7963 dump_function_header (dump_file, current_function_decl, dump_flags); 7964 omp_any_child_fn_dumped = false; 7965 } 7966 } 7967 7968 /* Helper for build_omp_regions. Scan the dominator tree starting at 7969 block BB. PARENT is the region that contains BB. If SINGLE_TREE is 7970 true, the function ends once a single tree is built (otherwise, whole 7971 forest of OMP constructs may be built). */ 7972 7973 static void 7974 build_omp_regions_1 (basic_block bb, struct omp_region *parent, 7975 bool single_tree) 7976 { 7977 gimple_stmt_iterator gsi; 7978 gimple *stmt; 7979 basic_block son; 7980 7981 gsi = gsi_last_bb (bb); 7982 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) 7983 { 7984 struct omp_region *region; 7985 enum gimple_code code; 7986 7987 stmt = gsi_stmt (gsi); 7988 code = gimple_code (stmt); 7989 if (code == GIMPLE_OMP_RETURN) 7990 { 7991 /* STMT is the return point out of region PARENT. Mark it 7992 as the exit point and make PARENT the immediately 7993 enclosing region. */ 7994 gcc_assert (parent); 7995 region = parent; 7996 region->exit = bb; 7997 parent = parent->outer; 7998 } 7999 else if (code == GIMPLE_OMP_ATOMIC_STORE) 8000 { 8001 /* GIMPLE_OMP_ATOMIC_STORE is analogous to 8002 GIMPLE_OMP_RETURN, but matches with 8003 GIMPLE_OMP_ATOMIC_LOAD. */ 8004 gcc_assert (parent); 8005 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); 8006 region = parent; 8007 region->exit = bb; 8008 parent = parent->outer; 8009 } 8010 else if (code == GIMPLE_OMP_CONTINUE) 8011 { 8012 gcc_assert (parent); 8013 parent->cont = bb; 8014 } 8015 else if (code == GIMPLE_OMP_SECTIONS_SWITCH) 8016 { 8017 /* GIMPLE_OMP_SECTIONS_SWITCH is part of 8018 GIMPLE_OMP_SECTIONS, and we do nothing for it. */ 8019 } 8020 else 8021 { 8022 region = new_omp_region (bb, code, parent); 8023 /* Otherwise... */ 8024 if (code == GIMPLE_OMP_TARGET) 8025 { 8026 switch (gimple_omp_target_kind (stmt)) 8027 { 8028 case GF_OMP_TARGET_KIND_REGION: 8029 case GF_OMP_TARGET_KIND_DATA: 8030 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8031 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8032 case GF_OMP_TARGET_KIND_OACC_DATA: 8033 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8034 break; 8035 case GF_OMP_TARGET_KIND_UPDATE: 8036 case GF_OMP_TARGET_KIND_ENTER_DATA: 8037 case GF_OMP_TARGET_KIND_EXIT_DATA: 8038 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8039 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8040 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8041 /* ..., other than for those stand-alone directives... */ 8042 region = NULL; 8043 break; 8044 default: 8045 gcc_unreachable (); 8046 } 8047 } 8048 else if (code == GIMPLE_OMP_ORDERED 8049 && omp_find_clause (gimple_omp_ordered_clauses 8050 (as_a <gomp_ordered *> (stmt)), 8051 OMP_CLAUSE_DEPEND)) 8052 /* #pragma omp ordered depend is also just a stand-alone 8053 directive. */ 8054 region = NULL; 8055 /* ..., this directive becomes the parent for a new region. */ 8056 if (region) 8057 parent = region; 8058 } 8059 } 8060 8061 if (single_tree && !parent) 8062 return; 8063 8064 for (son = first_dom_son (CDI_DOMINATORS, bb); 8065 son; 8066 son = next_dom_son (CDI_DOMINATORS, son)) 8067 build_omp_regions_1 (son, parent, single_tree); 8068 } 8069 8070 /* Builds the tree of OMP regions rooted at ROOT, storing it to 8071 root_omp_region. */ 8072 8073 static void 8074 build_omp_regions_root (basic_block root) 8075 { 8076 gcc_assert (root_omp_region == NULL); 8077 build_omp_regions_1 (root, NULL, true); 8078 gcc_assert (root_omp_region != NULL); 8079 } 8080 8081 /* Expands omp construct (and its subconstructs) starting in HEAD. */ 8082 8083 void 8084 omp_expand_local (basic_block head) 8085 { 8086 build_omp_regions_root (head); 8087 if (dump_file && (dump_flags & TDF_DETAILS)) 8088 { 8089 fprintf (dump_file, "\nOMP region tree\n\n"); 8090 dump_omp_region (dump_file, root_omp_region, 0); 8091 fprintf (dump_file, "\n"); 8092 } 8093 8094 remove_exit_barriers (root_omp_region); 8095 expand_omp (root_omp_region); 8096 8097 omp_free_regions (); 8098 } 8099 8100 /* Scan the CFG and build a tree of OMP regions. Return the root of 8101 the OMP region tree. */ 8102 8103 static void 8104 build_omp_regions (void) 8105 { 8106 gcc_assert (root_omp_region == NULL); 8107 calculate_dominance_info (CDI_DOMINATORS); 8108 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); 8109 } 8110 8111 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */ 8112 8113 static unsigned int 8114 execute_expand_omp (void) 8115 { 8116 build_omp_regions (); 8117 8118 if (!root_omp_region) 8119 return 0; 8120 8121 if (dump_file) 8122 { 8123 fprintf (dump_file, "\nOMP region tree\n\n"); 8124 dump_omp_region (dump_file, root_omp_region, 0); 8125 fprintf (dump_file, "\n"); 8126 } 8127 8128 remove_exit_barriers (root_omp_region); 8129 8130 expand_omp (root_omp_region); 8131 8132 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 8133 verify_loop_structure (); 8134 cleanup_tree_cfg (); 8135 8136 omp_free_regions (); 8137 8138 return 0; 8139 } 8140 8141 /* OMP expansion -- the default pass, run before creation of SSA form. */ 8142 8143 namespace { 8144 8145 const pass_data pass_data_expand_omp = 8146 { 8147 GIMPLE_PASS, /* type */ 8148 "ompexp", /* name */ 8149 OPTGROUP_OMP, /* optinfo_flags */ 8150 TV_NONE, /* tv_id */ 8151 PROP_gimple_any, /* properties_required */ 8152 PROP_gimple_eomp, /* properties_provided */ 8153 0, /* properties_destroyed */ 8154 0, /* todo_flags_start */ 8155 0, /* todo_flags_finish */ 8156 }; 8157 8158 class pass_expand_omp : public gimple_opt_pass 8159 { 8160 public: 8161 pass_expand_omp (gcc::context *ctxt) 8162 : gimple_opt_pass (pass_data_expand_omp, ctxt) 8163 {} 8164 8165 /* opt_pass methods: */ 8166 virtual unsigned int execute (function *) 8167 { 8168 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0 8169 || flag_openmp_simd != 0) 8170 && !seen_error ()); 8171 8172 /* This pass always runs, to provide PROP_gimple_eomp. 8173 But often, there is nothing to do. */ 8174 if (!gate) 8175 return 0; 8176 8177 return execute_expand_omp (); 8178 } 8179 8180 }; // class pass_expand_omp 8181 8182 } // anon namespace 8183 8184 gimple_opt_pass * 8185 make_pass_expand_omp (gcc::context *ctxt) 8186 { 8187 return new pass_expand_omp (ctxt); 8188 } 8189 8190 namespace { 8191 8192 const pass_data pass_data_expand_omp_ssa = 8193 { 8194 GIMPLE_PASS, /* type */ 8195 "ompexpssa", /* name */ 8196 OPTGROUP_OMP, /* optinfo_flags */ 8197 TV_NONE, /* tv_id */ 8198 PROP_cfg | PROP_ssa, /* properties_required */ 8199 PROP_gimple_eomp, /* properties_provided */ 8200 0, /* properties_destroyed */ 8201 0, /* todo_flags_start */ 8202 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ 8203 }; 8204 8205 class pass_expand_omp_ssa : public gimple_opt_pass 8206 { 8207 public: 8208 pass_expand_omp_ssa (gcc::context *ctxt) 8209 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) 8210 {} 8211 8212 /* opt_pass methods: */ 8213 virtual bool gate (function *fun) 8214 { 8215 return !(fun->curr_properties & PROP_gimple_eomp); 8216 } 8217 virtual unsigned int execute (function *) { return execute_expand_omp (); } 8218 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } 8219 8220 }; // class pass_expand_omp_ssa 8221 8222 } // anon namespace 8223 8224 gimple_opt_pass * 8225 make_pass_expand_omp_ssa (gcc::context *ctxt) 8226 { 8227 return new pass_expand_omp_ssa (ctxt); 8228 } 8229 8230 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant 8231 GIMPLE_* codes. */ 8232 8233 bool 8234 omp_make_gimple_edges (basic_block bb, struct omp_region **region, 8235 int *region_idx) 8236 { 8237 gimple *last = last_stmt (bb); 8238 enum gimple_code code = gimple_code (last); 8239 struct omp_region *cur_region = *region; 8240 bool fallthru = false; 8241 8242 switch (code) 8243 { 8244 case GIMPLE_OMP_PARALLEL: 8245 case GIMPLE_OMP_TASK: 8246 case GIMPLE_OMP_FOR: 8247 case GIMPLE_OMP_SINGLE: 8248 case GIMPLE_OMP_TEAMS: 8249 case GIMPLE_OMP_MASTER: 8250 case GIMPLE_OMP_TASKGROUP: 8251 case GIMPLE_OMP_CRITICAL: 8252 case GIMPLE_OMP_SECTION: 8253 case GIMPLE_OMP_GRID_BODY: 8254 cur_region = new_omp_region (bb, code, cur_region); 8255 fallthru = true; 8256 break; 8257 8258 case GIMPLE_OMP_ORDERED: 8259 cur_region = new_omp_region (bb, code, cur_region); 8260 fallthru = true; 8261 if (omp_find_clause (gimple_omp_ordered_clauses 8262 (as_a <gomp_ordered *> (last)), 8263 OMP_CLAUSE_DEPEND)) 8264 cur_region = cur_region->outer; 8265 break; 8266 8267 case GIMPLE_OMP_TARGET: 8268 cur_region = new_omp_region (bb, code, cur_region); 8269 fallthru = true; 8270 switch (gimple_omp_target_kind (last)) 8271 { 8272 case GF_OMP_TARGET_KIND_REGION: 8273 case GF_OMP_TARGET_KIND_DATA: 8274 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8275 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8276 case GF_OMP_TARGET_KIND_OACC_DATA: 8277 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8278 break; 8279 case GF_OMP_TARGET_KIND_UPDATE: 8280 case GF_OMP_TARGET_KIND_ENTER_DATA: 8281 case GF_OMP_TARGET_KIND_EXIT_DATA: 8282 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8283 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8284 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8285 cur_region = cur_region->outer; 8286 break; 8287 default: 8288 gcc_unreachable (); 8289 } 8290 break; 8291 8292 case GIMPLE_OMP_SECTIONS: 8293 cur_region = new_omp_region (bb, code, cur_region); 8294 fallthru = true; 8295 break; 8296 8297 case GIMPLE_OMP_SECTIONS_SWITCH: 8298 fallthru = false; 8299 break; 8300 8301 case GIMPLE_OMP_ATOMIC_LOAD: 8302 case GIMPLE_OMP_ATOMIC_STORE: 8303 fallthru = true; 8304 break; 8305 8306 case GIMPLE_OMP_RETURN: 8307 /* In the case of a GIMPLE_OMP_SECTION, the edge will go 8308 somewhere other than the next block. This will be 8309 created later. */ 8310 cur_region->exit = bb; 8311 if (cur_region->type == GIMPLE_OMP_TASK) 8312 /* Add an edge corresponding to not scheduling the task 8313 immediately. */ 8314 make_edge (cur_region->entry, bb, EDGE_ABNORMAL); 8315 fallthru = cur_region->type != GIMPLE_OMP_SECTION; 8316 cur_region = cur_region->outer; 8317 break; 8318 8319 case GIMPLE_OMP_CONTINUE: 8320 cur_region->cont = bb; 8321 switch (cur_region->type) 8322 { 8323 case GIMPLE_OMP_FOR: 8324 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE 8325 succs edges as abnormal to prevent splitting 8326 them. */ 8327 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; 8328 /* Make the loopback edge. */ 8329 make_edge (bb, single_succ (cur_region->entry), 8330 EDGE_ABNORMAL); 8331 8332 /* Create an edge from GIMPLE_OMP_FOR to exit, which 8333 corresponds to the case that the body of the loop 8334 is not executed at all. */ 8335 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); 8336 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); 8337 fallthru = false; 8338 break; 8339 8340 case GIMPLE_OMP_SECTIONS: 8341 /* Wire up the edges into and out of the nested sections. */ 8342 { 8343 basic_block switch_bb = single_succ (cur_region->entry); 8344 8345 struct omp_region *i; 8346 for (i = cur_region->inner; i ; i = i->next) 8347 { 8348 gcc_assert (i->type == GIMPLE_OMP_SECTION); 8349 make_edge (switch_bb, i->entry, 0); 8350 make_edge (i->exit, bb, EDGE_FALLTHRU); 8351 } 8352 8353 /* Make the loopback edge to the block with 8354 GIMPLE_OMP_SECTIONS_SWITCH. */ 8355 make_edge (bb, switch_bb, 0); 8356 8357 /* Make the edge from the switch to exit. */ 8358 make_edge (switch_bb, bb->next_bb, 0); 8359 fallthru = false; 8360 } 8361 break; 8362 8363 case GIMPLE_OMP_TASK: 8364 fallthru = true; 8365 break; 8366 8367 default: 8368 gcc_unreachable (); 8369 } 8370 break; 8371 8372 default: 8373 gcc_unreachable (); 8374 } 8375 8376 if (*region != cur_region) 8377 { 8378 *region = cur_region; 8379 if (cur_region) 8380 *region_idx = cur_region->entry->index; 8381 else 8382 *region_idx = 0; 8383 } 8384 8385 return fallthru; 8386 } 8387 8388 #include "gt-omp-expand.h" 8389