1 /* Lowering and expansion of OpenMP directives for HSA GPU agents. 2 3 Copyright (C) 2013-2020 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "tree.h" 26 #include "gimple.h" 27 #include "tree-pass.h" 28 #include "ssa.h" 29 #include "cgraph.h" 30 #include "pretty-print.h" 31 #include "fold-const.h" 32 #include "gimplify.h" 33 #include "gimple-iterator.h" 34 #include "gimple-walk.h" 35 #include "tree-inline.h" 36 #include "langhooks.h" 37 #include "omp-general.h" 38 #include "omp-low.h" 39 #include "omp-grid.h" 40 #include "gimple-pretty-print.h" 41 42 /* Return the lastprivate predicate for a given gridified loop described by 43 FD). */ 44 45 tree 46 omp_grid_lastprivate_predicate (struct omp_for_data *fd) 47 { 48 /* When dealing with a gridified loop, we need to check up to three collapsed 49 iteration variables but they are not actually captured in this fd. 50 Fortunately, we can easily rely on HSA builtins to get this 51 information. */ 52 53 tree id, size; 54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP 55 && gimple_omp_for_grid_intra_group (fd->for_stmt)) 56 { 57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); 58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); 59 } 60 else 61 { 62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); 63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); 64 } 65 tree cond = NULL; 66 for (int dim = 0; dim < fd->collapse; dim++) 67 { 68 tree dim_tree = build_int_cstu (unsigned_type_node, dim); 69 tree u1 = build_int_cstu (unsigned_type_node, 1); 70 tree c2 71 = build2 (EQ_EXPR, boolean_type_node, 72 build2 (PLUS_EXPR, unsigned_type_node, 73 build_call_expr (id, 1, dim_tree), u1), 74 build_call_expr (size, 1, dim_tree)); 75 if (cond) 76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); 77 else 78 cond = c2; 79 } 80 return cond; 81 } 82 83 /* Structure describing the basic properties of the loop we ara analyzing 84 whether it can be gridified and when it is gridified. */ 85 86 class grid_prop 87 { 88 public: 89 /* True when we are doing tiling gridification, i.e. when there is a distinct 90 distribute loop over groups and a loop construct over work-items. False 91 when distribute and parallel for loops form a combined construct. */ 92 bool tiling; 93 /* Location of the target construct for optimization information 94 messages. */ 95 dump_user_location_t target_loc; 96 /* The collapse clause of the involved loops. Collapse value of all of them 97 must be the same for gridification to take place. */ 98 size_t collapse; 99 /* Group sizes, if requested by the user or NULL if not requested. */ 100 tree group_sizes[3]; 101 }; 102 103 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ 104 "gridified HSA kernel because " 105 106 /* Return true if STMT is an assignment of a register-type into a local 107 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to 108 any of the trees specifying group sizes there. */ 109 110 static bool 111 grid_safe_assignment_p (gimple *stmt, grid_prop *grid) 112 { 113 gassign *assign = dyn_cast <gassign *> (stmt); 114 if (!assign) 115 return false; 116 if (gimple_clobber_p (assign)) 117 return true; 118 tree lhs = gimple_assign_lhs (assign); 119 if (!VAR_P (lhs) 120 || !is_gimple_reg_type (TREE_TYPE (lhs)) 121 || is_global_var (lhs)) 122 return false; 123 if (grid) 124 for (unsigned i = 0; i < grid->collapse; i++) 125 if (lhs == grid->group_sizes[i]) 126 return false; 127 return true; 128 } 129 130 /* Return true if all statements in SEQ are assignments to local register-type 131 variables that do not hold group size information. */ 132 133 static bool 134 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) 135 { 136 if (!seq) 137 return true; 138 139 gimple_stmt_iterator gsi; 140 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) 141 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) 142 return false; 143 return true; 144 } 145 146 /* Scan statements in SEQ and call itself recursively on any bind. GRID 147 describes hitherto discovered properties of the loop that is evaluated for 148 possible gridification. If during whole search only assignments to 149 register-type local variables (that do not overwrite group size information) 150 and one single OMP statement is encountered, return true, otherwise return 151 false. RET is where we store any OMP statement encountered. */ 152 153 static bool 154 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, 155 const char *name, gimple **ret) 156 { 157 gimple_stmt_iterator gsi; 158 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) 159 { 160 gimple *stmt = gsi_stmt (gsi); 161 162 if (grid_safe_assignment_p (stmt, grid)) 163 continue; 164 if (gbind *bind = dyn_cast <gbind *> (stmt)) 165 { 166 gimple_seq bind_body = gimple_bind_body (bind); 167 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name, 168 ret)) 169 return false; 170 } 171 else if (is_gimple_omp (stmt)) 172 { 173 if (*ret) 174 { 175 if (dump_enabled_p ()) 176 { 177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 178 GRID_MISSED_MSG_PREFIX "%s construct " 179 "contains multiple OpenMP constructs\n", 180 name); 181 dump_printf_loc (MSG_NOTE, *ret, 182 "The first OpenMP construct within " 183 "a parallel\n"); 184 dump_printf_loc (MSG_NOTE, stmt, 185 "The second OpenMP construct within " 186 "a parallel\n"); 187 } 188 return false; 189 } 190 *ret = stmt; 191 } 192 else 193 { 194 if (dump_enabled_p ()) 195 { 196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 197 GRID_MISSED_MSG_PREFIX "%s construct contains " 198 "a complex statement\n", name); 199 dump_printf_loc (MSG_NOTE, stmt, 200 "This statement cannot be analyzed for " 201 "gridification\n"); 202 } 203 return false; 204 } 205 } 206 return true; 207 } 208 209 /* Scan statements in SEQ and make sure that it and any binds in it contain 210 only assignments to local register-type variables (that do not overwrite 211 group size information) and one OMP construct. If so, return that 212 construct, otherwise return NULL. GRID describes hitherto discovered 213 properties of the loop that is evaluated for possible gridification. If 214 dumping is enabled and function fails, use NAME to dump a note with the 215 reason for failure. */ 216 217 static gimple * 218 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, 219 const char *name) 220 { 221 if (!seq) 222 { 223 if (dump_enabled_p ()) 224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 225 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", 226 name); 227 return NULL; 228 } 229 230 gimple *ret = NULL; 231 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) 232 { 233 if (!ret && dump_enabled_p ()) 234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 235 GRID_MISSED_MSG_PREFIX "%s construct does not contain" 236 " any other OpenMP construct\n", name); 237 return ret; 238 } 239 else 240 return NULL; 241 } 242 243 /* Walker function looking for statements there is no point gridifying (and for 244 noreturn function calls which we cannot do). Return non-NULL if such a 245 function is found. */ 246 247 static tree 248 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, 249 bool *handled_ops_p, 250 struct walk_stmt_info *wi) 251 { 252 *handled_ops_p = false; 253 gimple *stmt = gsi_stmt (*gsi); 254 switch (gimple_code (stmt)) 255 { 256 case GIMPLE_CALL: 257 if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) 258 { 259 *handled_ops_p = true; 260 wi->info = stmt; 261 return error_mark_node; 262 } 263 break; 264 265 /* We may reduce the following list if we find a way to implement the 266 clauses, but now there is no point trying further. */ 267 case GIMPLE_OMP_CRITICAL: 268 case GIMPLE_OMP_TASKGROUP: 269 case GIMPLE_OMP_TASK: 270 case GIMPLE_OMP_SECTION: 271 case GIMPLE_OMP_SECTIONS: 272 case GIMPLE_OMP_SECTIONS_SWITCH: 273 case GIMPLE_OMP_TARGET: 274 case GIMPLE_OMP_ORDERED: 275 *handled_ops_p = true; 276 wi->info = stmt; 277 return error_mark_node; 278 default: 279 break; 280 } 281 return NULL; 282 } 283 284 /* Examine clauses of omp parallel statement PAR and if any prevents 285 gridification, issue a missed-optimization diagnostics and return false, 286 otherwise return true. GRID describes hitherto discovered properties of the 287 loop that is evaluated for possible gridification. */ 288 289 static bool 290 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc) 291 { 292 tree clauses = gimple_omp_parallel_clauses (par); 293 while (clauses) 294 { 295 switch (OMP_CLAUSE_CODE (clauses)) 296 { 297 case OMP_CLAUSE_NUM_THREADS: 298 if (dump_enabled_p ()) 299 { 300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 301 GRID_MISSED_MSG_PREFIX "because there is " 302 "a num_threads clause of the parallel " 303 "construct\n"); 304 dump_printf_loc (MSG_NOTE, par, 305 "Parallel construct has a num_threads clause\n"); 306 } 307 return false; 308 309 case OMP_CLAUSE_REDUCTION: 310 if (dump_enabled_p ()) 311 { 312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 313 GRID_MISSED_MSG_PREFIX "a reduction clause " 314 "is present\n "); 315 dump_printf_loc (MSG_NOTE, par, 316 "Parallel construct has a reduction clause\n"); 317 } 318 return false; 319 320 default: 321 break; 322 } 323 clauses = OMP_CLAUSE_CHAIN (clauses); 324 } 325 return true; 326 } 327 328 /* Examine clauses and the body of omp loop statement GFOR and if something 329 prevents gridification, issue a missed-optimization diagnostics and return 330 false, otherwise return true. GRID describes hitherto discovered properties 331 of the loop that is evaluated for possible gridification. */ 332 333 static bool 334 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) 335 { 336 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), 337 grid)) 338 { 339 if (dump_enabled_p ()) 340 { 341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 342 GRID_MISSED_MSG_PREFIX "the inner loop " 343 "loop bounds computation contains a complex " 344 "statement\n"); 345 dump_printf_loc (MSG_NOTE, gfor, 346 "Loop construct cannot be analyzed for " 347 "gridification\n"); 348 } 349 return false; 350 } 351 352 tree clauses = gimple_omp_for_clauses (gfor); 353 while (clauses) 354 { 355 switch (OMP_CLAUSE_CODE (clauses)) 356 { 357 case OMP_CLAUSE_SCHEDULE: 358 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) 359 { 360 if (dump_enabled_p ()) 361 { 362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 363 GRID_MISSED_MSG_PREFIX "the inner loop " 364 "has a non-automatic schedule clause\n"); 365 dump_printf_loc (MSG_NOTE, gfor, 366 "Loop construct has a non automatic " 367 "schedule clause\n"); 368 } 369 return false; 370 } 371 break; 372 373 case OMP_CLAUSE_REDUCTION: 374 if (dump_enabled_p ()) 375 { 376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 377 GRID_MISSED_MSG_PREFIX "a reduction " 378 "clause is present\n "); 379 dump_printf_loc (MSG_NOTE, gfor, 380 "Loop construct has a reduction schedule " 381 "clause\n"); 382 } 383 return false; 384 385 default: 386 break; 387 } 388 clauses = OMP_CLAUSE_CHAIN (clauses); 389 } 390 struct walk_stmt_info wi; 391 memset (&wi, 0, sizeof (wi)); 392 if (walk_gimple_seq (gimple_omp_body (gfor), 393 grid_find_ungridifiable_statement, 394 NULL, &wi)) 395 { 396 gimple *bad = (gimple *) wi.info; 397 if (dump_enabled_p ()) 398 { 399 if (is_gimple_call (bad)) 400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 401 GRID_MISSED_MSG_PREFIX "the inner loop contains " 402 "call to a noreturn function\n"); 403 else 404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 405 GRID_MISSED_MSG_PREFIX "the inner loop contains " 406 "statement %s which cannot be transformed\n", 407 gimple_code_name[(int) gimple_code (bad)]); 408 dump_printf_loc (MSG_NOTE, bad, 409 "This statement cannot be analyzed for " 410 "gridification\n"); 411 } 412 return false; 413 } 414 return true; 415 } 416 417 /* Given distribute omp construct represented by DIST, which in the original 418 source forms a compound construct with a looping construct, return true if it 419 can be turned into a gridified HSA kernel. Otherwise return false. GRID 420 describes hitherto discovered properties of the loop that is evaluated for 421 possible gridification. */ 422 423 static bool 424 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) 425 { 426 dump_user_location_t tloc = grid->target_loc; 427 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), 428 grid, "distribute"); 429 gomp_parallel *par; 430 if (!stmt 431 || !(par = dyn_cast <gomp_parallel *> (stmt)) 432 || !grid_parallel_clauses_gridifiable (par, tloc)) 433 return false; 434 435 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, 436 "parallel"); 437 gomp_for *gfor; 438 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) 439 return false; 440 441 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) 442 { 443 if (dump_enabled_p ()) 444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 445 GRID_MISSED_MSG_PREFIX "the inner loop is not " 446 "a simple for loop\n"); 447 return false; 448 } 449 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); 450 451 if (!grid_inner_loop_gridifiable_p (gfor, grid)) 452 return false; 453 454 return true; 455 } 456 457 /* Given an omp loop statement GFOR, return true if it can participate in 458 tiling gridification, i.e. in one where the distribute and parallel for 459 loops do not form a compound statement. GRID describes hitherto discovered 460 properties of the loop that is evaluated for possible gridification. */ 461 462 static bool 463 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) 464 { 465 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) 466 { 467 if (dump_enabled_p ()) 468 { 469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 470 GRID_MISSED_MSG_PREFIX "an inner loop is not " 471 "a simple for loop\n"); 472 dump_printf_loc (MSG_NOTE, gfor, 473 "This statement is not a simple for loop\n"); 474 } 475 return false; 476 } 477 478 if (!grid_inner_loop_gridifiable_p (gfor, grid)) 479 return false; 480 481 if (gimple_omp_for_collapse (gfor) != grid->collapse) 482 { 483 if (dump_enabled_p ()) 484 { 485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 486 GRID_MISSED_MSG_PREFIX "an inner loop does not " 487 "have use the same collapse clause\n"); 488 dump_printf_loc (MSG_NOTE, gfor, 489 "Loop construct uses a different collapse clause\n"); 490 } 491 return false; 492 } 493 494 struct omp_for_data fd; 495 struct omp_for_data_loop *loops 496 = (struct omp_for_data_loop *)alloca (grid->collapse 497 * sizeof (struct omp_for_data_loop)); 498 omp_extract_for_data (gfor, &fd, loops); 499 for (unsigned i = 0; i < grid->collapse; i++) 500 { 501 tree itype, type = TREE_TYPE (fd.loops[i].v); 502 if (POINTER_TYPE_P (type)) 503 itype = signed_type_for (type); 504 else 505 itype = type; 506 507 tree n1 = fold_convert (itype, fd.loops[i].n1); 508 tree n2 = fold_convert (itype, fd.loops[i].n2); 509 tree t = build_int_cst (itype, 510 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); 511 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); 512 t = fold_build2 (PLUS_EXPR, itype, t, n2); 513 t = fold_build2 (MINUS_EXPR, itype, t, n1); 514 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) 515 t = fold_build2 (TRUNC_DIV_EXPR, itype, 516 fold_build1 (NEGATE_EXPR, itype, t), 517 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); 518 else 519 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); 520 521 if (!operand_equal_p (grid->group_sizes[i], t, 0)) 522 { 523 if (dump_enabled_p ()) 524 { 525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 526 GRID_MISSED_MSG_PREFIX "the distribute and " 527 "an internal loop do not agree on tile size\n"); 528 dump_printf_loc (MSG_NOTE, gfor, 529 "Loop construct does not seem to loop over " 530 "a tile size\n"); 531 } 532 return false; 533 } 534 } 535 return true; 536 } 537 538 /* Facing a call to FNDECL in the body of a distribute construct, return true 539 if we can handle it or false if it precludes gridification. */ 540 541 static bool 542 grid_call_permissible_in_distribute_p (tree fndecl) 543 { 544 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) 545 return true; 546 547 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); 548 if (strstr (name, "omp_") != name) 549 return false; 550 551 if ((strcmp (name, "omp_get_thread_num") == 0) 552 || (strcmp (name, "omp_get_num_threads") == 0) 553 || (strcmp (name, "omp_get_num_teams") == 0) 554 || (strcmp (name, "omp_get_team_num") == 0) 555 || (strcmp (name, "omp_get_level") == 0) 556 || (strcmp (name, "omp_get_active_level") == 0) 557 || (strcmp (name, "omp_in_parallel") == 0)) 558 return true; 559 560 return false; 561 } 562 563 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body 564 of a distribute construct that is pointed at by GSI, modify it as necessary 565 for gridification. If the statement itself got removed, return true. */ 566 567 static bool 568 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) 569 { 570 gimple *stmt = gsi_stmt (*gsi); 571 tree fndecl = gimple_call_fndecl (stmt); 572 gcc_checking_assert (stmt); 573 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) 574 return false; 575 576 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); 577 if ((strcmp (name, "omp_get_thread_num") == 0) 578 || (strcmp (name, "omp_get_level") == 0) 579 || (strcmp (name, "omp_get_active_level") == 0) 580 || (strcmp (name, "omp_in_parallel") == 0)) 581 { 582 tree lhs = gimple_call_lhs (stmt); 583 if (lhs) 584 { 585 gassign *assign 586 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); 587 gsi_insert_before (gsi, assign, GSI_SAME_STMT); 588 } 589 gsi_remove (gsi, true); 590 return true; 591 } 592 593 /* The rest of the omp functions can stay as they are, HSA back-end will 594 handle them correctly. */ 595 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) 596 || (strcmp (name, "omp_get_num_teams") == 0) 597 || (strcmp (name, "omp_get_team_num") == 0)); 598 return false; 599 } 600 601 /* Given a sequence of statements within a distribute omp construct or a 602 parallel construct, which in the original source does not form a compound 603 construct with a looping construct, return true if it does not prevent us 604 from turning it into a gridified HSA kernel. Otherwise return false. GRID 605 describes hitherto discovered properties of the loop that is evaluated for 606 possible gridification. IN_PARALLEL must be true if seq is within a 607 parallel construct and flase if it is only within a distribute 608 construct. */ 609 610 static bool 611 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, 612 bool in_parallel) 613 { 614 gimple_stmt_iterator gsi; 615 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) 616 { 617 gimple *stmt = gsi_stmt (gsi); 618 619 if (grid_safe_assignment_p (stmt, grid) 620 || gimple_code (stmt) == GIMPLE_GOTO 621 || gimple_code (stmt) == GIMPLE_LABEL 622 || gimple_code (stmt) == GIMPLE_COND) 623 continue; 624 else if (gbind *bind = dyn_cast <gbind *> (stmt)) 625 { 626 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), 627 grid, in_parallel)) 628 return false; 629 continue; 630 } 631 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) 632 { 633 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) 634 { 635 if (dump_enabled_p ()) 636 { 637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 638 GRID_MISSED_MSG_PREFIX "the distribute " 639 "construct contains a try..catch region\n"); 640 dump_printf_loc (MSG_NOTE, try_stmt, 641 "This statement cannot be analyzed for " 642 "tiled gridification\n"); 643 } 644 return false; 645 } 646 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), 647 grid, in_parallel)) 648 return false; 649 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), 650 grid, in_parallel)) 651 return false; 652 continue; 653 } 654 else if (is_gimple_call (stmt)) 655 { 656 tree fndecl = gimple_call_fndecl (stmt); 657 if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) 658 continue; 659 660 if (dump_enabled_p ()) 661 { 662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 663 GRID_MISSED_MSG_PREFIX "the distribute " 664 "construct contains a call\n"); 665 dump_printf_loc (MSG_NOTE, stmt, 666 "This statement cannot be analyzed for " 667 "tiled gridification\n"); 668 } 669 return false; 670 } 671 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) 672 { 673 if (in_parallel) 674 { 675 if (dump_enabled_p ()) 676 { 677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 678 GRID_MISSED_MSG_PREFIX "a parallel " 679 "construct contains another parallel " 680 "construct\n"); 681 dump_printf_loc (MSG_NOTE, stmt, 682 "This parallel construct is nested in " 683 "another one\n"); 684 } 685 return false; 686 } 687 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) 688 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), 689 grid, true)) 690 return false; 691 } 692 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) 693 { 694 if (!in_parallel) 695 { 696 if (dump_enabled_p ()) 697 { 698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 699 GRID_MISSED_MSG_PREFIX "a loop " 700 "construct is not nested within a parallel " 701 "construct\n"); 702 dump_printf_loc (MSG_NOTE, stmt, 703 "This loop construct is not nested in " 704 "a parallel construct\n"); 705 } 706 return false; 707 } 708 if (!grid_gfor_follows_tiling_pattern (gfor, grid)) 709 return false; 710 } 711 else 712 { 713 if (dump_enabled_p ()) 714 { 715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 716 GRID_MISSED_MSG_PREFIX "the distribute " 717 "construct contains a complex statement\n"); 718 dump_printf_loc (MSG_NOTE, stmt, 719 "This statement cannot be analyzed for " 720 "tiled gridification\n"); 721 } 722 return false; 723 } 724 } 725 return true; 726 } 727 728 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel, 729 return true, otherwise return false. In the case of success, also fill in 730 GRID with information describing the kernel grid. */ 731 732 static bool 733 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) 734 { 735 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) 736 return false; 737 738 dump_user_location_t tloc = target; 739 grid->target_loc = tloc; 740 gimple *stmt 741 = grid_find_single_omp_among_assignments (gimple_omp_body (target), 742 grid, "target"); 743 if (!stmt) 744 return false; 745 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); 746 tree group_size = NULL; 747 if (!teams) 748 { 749 if (dump_enabled_p ()) 750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 751 GRID_MISSED_MSG_PREFIX "it does not have a sole " 752 "teams construct in it.\n"); 753 return false; 754 } 755 756 tree clauses = gimple_omp_teams_clauses (teams); 757 while (clauses) 758 { 759 switch (OMP_CLAUSE_CODE (clauses)) 760 { 761 case OMP_CLAUSE_NUM_TEAMS: 762 if (dump_enabled_p ()) 763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 764 GRID_MISSED_MSG_PREFIX "the teams construct " 765 "contains a num_teams clause\n "); 766 return false; 767 768 case OMP_CLAUSE_REDUCTION: 769 if (dump_enabled_p ()) 770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 771 GRID_MISSED_MSG_PREFIX "a reduction " 772 "clause is present\n "); 773 return false; 774 775 case OMP_CLAUSE_THREAD_LIMIT: 776 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) 777 group_size = OMP_CLAUSE_OPERAND (clauses, 0); 778 break; 779 780 default: 781 break; 782 } 783 clauses = OMP_CLAUSE_CHAIN (clauses); 784 } 785 786 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, 787 "teams"); 788 if (!stmt) 789 return false; 790 gomp_for *dist = dyn_cast <gomp_for *> (stmt); 791 if (!dist) 792 { 793 if (dump_enabled_p ()) 794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 795 GRID_MISSED_MSG_PREFIX "the teams construct does not " 796 "have a single distribute construct in it.\n"); 797 return false; 798 } 799 800 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); 801 802 grid->collapse = gimple_omp_for_collapse (dist); 803 if (grid->collapse > 3) 804 { 805 if (dump_enabled_p ()) 806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 807 GRID_MISSED_MSG_PREFIX "the distribute construct " 808 "contains collapse clause with parameter greater " 809 "than 3\n"); 810 return false; 811 } 812 813 struct omp_for_data fd; 814 struct omp_for_data_loop *dist_loops 815 = (struct omp_for_data_loop *)alloca (grid->collapse 816 * sizeof (struct omp_for_data_loop)); 817 omp_extract_for_data (dist, &fd, dist_loops); 818 if (fd.chunk_size) 819 { 820 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) 821 { 822 if (dump_enabled_p ()) 823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 824 GRID_MISSED_MSG_PREFIX "the teams " 825 "thread limit is different from distribute " 826 "schedule chunk\n"); 827 return false; 828 } 829 group_size = fd.chunk_size; 830 } 831 if (group_size && grid->collapse > 1) 832 { 833 if (dump_enabled_p ()) 834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 835 GRID_MISSED_MSG_PREFIX "group size cannot be " 836 "set using thread_limit or schedule clauses " 837 "when also using a collapse clause greater than 1\n"); 838 return false; 839 } 840 841 if (gimple_omp_for_combined_p (dist)) 842 { 843 grid->tiling = false; 844 grid->group_sizes[0] = group_size; 845 for (unsigned i = 1; i < grid->collapse; i++) 846 grid->group_sizes[i] = NULL; 847 return grid_dist_follows_simple_pattern (dist, grid); 848 } 849 else 850 { 851 grid->tiling = true; 852 if (group_size) 853 { 854 if (dump_enabled_p ()) 855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 856 GRID_MISSED_MSG_PREFIX "group size cannot be set " 857 "using thread_limit or schedule clauses when " 858 "distribute and loop constructs do not form " 859 "one combined construct\n"); 860 return false; 861 } 862 for (unsigned i = 0; i < grid->collapse; i++) 863 { 864 if (fd.loops[i].cond_code == GT_EXPR) 865 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, 866 TREE_TYPE (fd.loops[i].step), 867 fd.loops[i].step); 868 else 869 grid->group_sizes[i] = fd.loops[i].step; 870 } 871 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, 872 false); 873 } 874 } 875 876 /* Operand walker, used to remap pre-body declarations according to a hash map 877 provided in DATA. */ 878 879 static tree 880 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) 881 { 882 tree t = *tp; 883 884 if (DECL_P (t) || TYPE_P (t)) 885 *walk_subtrees = 0; 886 else 887 *walk_subtrees = 1; 888 889 if (VAR_P (t)) 890 { 891 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 892 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; 893 tree *repl = declmap->get (t); 894 if (repl) 895 *tp = *repl; 896 } 897 return NULL_TREE; 898 } 899 900 /* Identifiers of segments into which a particular variable should be places 901 when gridifying. */ 902 903 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, 904 GRID_SEGMENT_GLOBAL}; 905 906 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial 907 builtin call into SEQ that will make sure the variable is always considered 908 address taken. */ 909 910 static void 911 grid_mark_variable_segment (tree var, enum grid_var_segment segment) 912 { 913 /* Making a non-addressable variables would require that we re-gimplify all 914 their uses. Fortunately, we do not have to do this because if they are 915 not addressable, it means they are not used in atomic or parallel 916 statements and so relaxed GPU consistency rules mean we can just keep them 917 private. */ 918 if (!TREE_ADDRESSABLE (var)) 919 return; 920 921 switch (segment) 922 { 923 case GRID_SEGMENT_GROUP: 924 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), 925 NULL, DECL_ATTRIBUTES (var)); 926 break; 927 case GRID_SEGMENT_GLOBAL: 928 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), 929 NULL, DECL_ATTRIBUTES (var)); 930 break; 931 default: 932 gcc_unreachable (); 933 } 934 935 if (!TREE_STATIC (var)) 936 { 937 TREE_STATIC (var) = 1; 938 const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var)); 939 SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix)); 940 varpool_node::finalize_decl (var); 941 } 942 943 } 944 945 /* Copy leading register-type assignments to local variables in SRC to just 946 before DST, Creating temporaries, adjusting mapping of operands in WI and 947 remapping operands as necessary. Add any new temporaries to TGT_BIND. 948 Return the first statement that does not conform to grid_safe_assignment_p 949 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all 950 variables in traversed bind statements so that they are put into the 951 appropriate segment. */ 952 953 static gimple * 954 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, 955 gbind *tgt_bind, 956 enum grid_var_segment var_segment, 957 struct walk_stmt_info *wi) 958 { 959 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; 960 gimple_stmt_iterator gsi; 961 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) 962 { 963 gimple *stmt = gsi_stmt (gsi); 964 if (gbind *bind = dyn_cast <gbind *> (stmt)) 965 { 966 gimple *r = grid_copy_leading_local_assignments 967 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); 968 969 if (var_segment != GRID_SEGMENT_PRIVATE) 970 for (tree var = gimple_bind_vars (bind); 971 var; 972 var = DECL_CHAIN (var)) 973 grid_mark_variable_segment (var, var_segment); 974 if (r) 975 return r; 976 else 977 continue; 978 } 979 if (!grid_safe_assignment_p (stmt, NULL)) 980 return stmt; 981 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); 982 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), 983 TREE_TYPE (lhs)); 984 DECL_CONTEXT (repl) = current_function_decl; 985 gimple_bind_append_vars (tgt_bind, repl); 986 987 declmap->put (lhs, repl); 988 gassign *copy = as_a <gassign *> (gimple_copy (stmt)); 989 walk_gimple_op (copy, grid_remap_prebody_decls, wi); 990 gsi_insert_before (dst, copy, GSI_SAME_STMT); 991 } 992 return NULL; 993 } 994 995 /* Statement walker function to make adjustments to statements within the 996 gridifed kernel copy. */ 997 998 static tree 999 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, 1000 struct walk_stmt_info *) 1001 { 1002 *handled_ops_p = false; 1003 gimple *stmt = gsi_stmt (*gsi); 1004 if (gimple_code (stmt) == GIMPLE_OMP_FOR 1005 && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD) 1006 { 1007 gomp_for *loop = as_a <gomp_for *> (stmt); 1008 tree clauses = gimple_omp_for_clauses (loop); 1009 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN); 1010 if (cl) 1011 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; 1012 else 1013 { 1014 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); 1015 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; 1016 OMP_CLAUSE_CHAIN (c) = clauses; 1017 gimple_omp_for_set_clauses (loop, c); 1018 } 1019 } 1020 return NULL_TREE; 1021 } 1022 1023 /* Given a PARLOOP that is a normal for looping construct but also a part of a 1024 combined construct with a simd loop, eliminate the simd loop. */ 1025 1026 static void 1027 grid_eliminate_combined_simd_part (gomp_for *parloop) 1028 { 1029 struct walk_stmt_info wi; 1030 1031 memset (&wi, 0, sizeof (wi)); 1032 wi.val_only = true; 1033 enum gf_mask msk = GF_OMP_FOR_KIND_SIMD; 1034 wi.info = (void *) &msk; 1035 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi); 1036 gimple *stmt = (gimple *) wi.info; 1037 /* We expect that the SIMD id the only statement in the parallel loop. */ 1038 gcc_assert (stmt 1039 && gimple_code (stmt) == GIMPLE_OMP_FOR 1040 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD) 1041 && gimple_omp_for_combined_into_p (stmt) 1042 && !gimple_omp_for_combined_p (stmt)); 1043 gomp_for *simd = as_a <gomp_for *> (stmt); 1044 1045 /* Copy over the iteration properties because the body refers to the index in 1046 the bottmom-most loop. */ 1047 unsigned i, collapse = gimple_omp_for_collapse (parloop); 1048 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); 1049 for (i = 0; i < collapse; i++) 1050 { 1051 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); 1052 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); 1053 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); 1054 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); 1055 } 1056 1057 tree *tgt= gimple_omp_for_clauses_ptr (parloop); 1058 while (*tgt) 1059 tgt = &OMP_CLAUSE_CHAIN (*tgt); 1060 1061 /* Copy over all clauses, except for linear clauses, which are turned into 1062 private clauses, and all other simd-specific clauses, which are 1063 ignored. */ 1064 tree *pc = gimple_omp_for_clauses_ptr (simd); 1065 while (*pc) 1066 { 1067 tree c = *pc; 1068 switch (OMP_CLAUSE_CODE (c)) 1069 { 1070 case OMP_CLAUSE_LINEAR: 1071 { 1072 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); 1073 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); 1074 OMP_CLAUSE_CHAIN (priv) = NULL; 1075 *tgt = priv; 1076 tgt = &OMP_CLAUSE_CHAIN (priv); 1077 pc = &OMP_CLAUSE_CHAIN (c); 1078 break; 1079 } 1080 1081 case OMP_CLAUSE_SAFELEN: 1082 case OMP_CLAUSE_SIMDLEN: 1083 case OMP_CLAUSE_ALIGNED: 1084 pc = &OMP_CLAUSE_CHAIN (c); 1085 break; 1086 1087 default: 1088 *pc = OMP_CLAUSE_CHAIN (c); 1089 OMP_CLAUSE_CHAIN (c) = NULL; 1090 *tgt = c; 1091 tgt = &OMP_CLAUSE_CHAIN (c); 1092 break; 1093 } 1094 } 1095 1096 /* Finally, throw away the simd and mark the parallel loop as not 1097 combined. */ 1098 gimple_omp_set_body (parloop, gimple_omp_body (simd)); 1099 gimple_omp_for_set_combined_p (parloop, false); 1100 } 1101 1102 /* Statement walker function marking all parallels as grid_phony and loops as 1103 grid ones representing threads of a particular thread group. */ 1104 1105 static tree 1106 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, 1107 struct walk_stmt_info *wi_in) 1108 { 1109 *handled_ops_p = false; 1110 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) 1111 { 1112 *handled_ops_p = true; 1113 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); 1114 gimple_omp_for_set_grid_intra_group (loop, true); 1115 if (gimple_omp_for_combined_p (loop)) 1116 grid_eliminate_combined_simd_part (loop); 1117 1118 struct walk_stmt_info body_wi; 1119 memset (&body_wi, 0, sizeof (body_wi)); 1120 walk_gimple_seq_mod (gimple_omp_body_ptr (loop), 1121 grid_process_grid_body, NULL, &body_wi); 1122 1123 gbind *bind = (gbind *) wi_in->info; 1124 tree c; 1125 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) 1126 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) 1127 { 1128 push_gimplify_context (); 1129 tree ov = OMP_CLAUSE_DECL (c); 1130 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), 1131 TREE_TYPE (ov)); 1132 1133 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); 1134 DECL_CONTEXT (gv) = current_function_decl; 1135 gimple_bind_append_vars (bind, gv); 1136 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); 1137 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); 1138 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); 1139 gimple_seq l = NULL; 1140 gimplify_and_add (x, &l); 1141 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); 1142 pop_gimplify_context (bind); 1143 } 1144 } 1145 return NULL_TREE; 1146 } 1147 1148 /* Statement walker function marking all parallels as grid_phony and loops as 1149 grid ones representing threads of a particular thread group. */ 1150 1151 static tree 1152 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, 1153 bool *handled_ops_p, 1154 struct walk_stmt_info *wi_in) 1155 { 1156 *handled_ops_p = false; 1157 wi_in->removed_stmt = false; 1158 gimple *stmt = gsi_stmt (*gsi); 1159 if (gbind *bind = dyn_cast <gbind *> (stmt)) 1160 { 1161 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) 1162 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); 1163 } 1164 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) 1165 { 1166 *handled_ops_p = true; 1167 gimple_omp_parallel_set_grid_phony (parallel, true); 1168 1169 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); 1170 gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); 1171 gimple_seq s = NULL; 1172 gimple_seq_add_stmt (&s, new_bind); 1173 gimple_omp_set_body (parallel, s); 1174 1175 struct walk_stmt_info wi_par; 1176 memset (&wi_par, 0, sizeof (wi_par)); 1177 wi_par.info = new_bind; 1178 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), 1179 grid_mark_tiling_loops, NULL, &wi_par); 1180 } 1181 else if (is_a <gcall *> (stmt)) 1182 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); 1183 return NULL_TREE; 1184 } 1185 1186 /* Given freshly copied top level kernel SEQ, identify the individual OMP 1187 components, mark them as part of kernel, copy assignment leading to them 1188 just before DST, remapping them using WI and adding new temporaries to 1189 TGT_BIND, and return the loop that will be used for kernel dispatch. */ 1190 1191 static gomp_for * 1192 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, 1193 gimple_stmt_iterator *dst, 1194 gbind *tgt_bind, struct walk_stmt_info *wi) 1195 { 1196 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, 1197 GRID_SEGMENT_GLOBAL, wi); 1198 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); 1199 gcc_assert (teams); 1200 gimple_omp_teams_set_grid_phony (teams, true); 1201 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, 1202 tgt_bind, GRID_SEGMENT_GLOBAL, 1203 wi); 1204 gcc_checking_assert (stmt); 1205 gomp_for *dist = dyn_cast <gomp_for *> (stmt); 1206 gcc_assert (dist); 1207 gimple_seq prebody = gimple_omp_for_pre_body (dist); 1208 if (prebody) 1209 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, 1210 GRID_SEGMENT_GROUP, wi); 1211 1212 if (grid->tiling) 1213 { 1214 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); 1215 gimple_omp_for_set_grid_group_iter (dist, true); 1216 1217 struct walk_stmt_info wi_tiled; 1218 memset (&wi_tiled, 0, sizeof (wi_tiled)); 1219 walk_gimple_seq_mod (gimple_omp_body_ptr (dist), 1220 grid_mark_tiling_parallels_and_loops, NULL, 1221 &wi_tiled); 1222 return dist; 1223 } 1224 else 1225 { 1226 gimple_omp_for_set_grid_phony (dist, true); 1227 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, 1228 tgt_bind, 1229 GRID_SEGMENT_PRIVATE, wi); 1230 gcc_checking_assert (stmt); 1231 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); 1232 gimple_omp_parallel_set_grid_phony (parallel, true); 1233 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), 1234 dst, tgt_bind, 1235 GRID_SEGMENT_PRIVATE, wi); 1236 gomp_for *inner_loop = as_a <gomp_for *> (stmt); 1237 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); 1238 prebody = gimple_omp_for_pre_body (inner_loop); 1239 if (prebody) 1240 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, 1241 GRID_SEGMENT_PRIVATE, wi); 1242 1243 if (gimple_omp_for_combined_p (inner_loop)) 1244 grid_eliminate_combined_simd_part (inner_loop); 1245 struct walk_stmt_info body_wi; 1246 memset (&body_wi, 0, sizeof (body_wi)); 1247 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), 1248 grid_process_grid_body, NULL, &body_wi); 1249 1250 return inner_loop; 1251 } 1252 } 1253 1254 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, 1255 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND 1256 is the bind into which temporaries inserted before TARGET should be 1257 added. */ 1258 1259 static void 1260 grid_attempt_target_gridification (gomp_target *target, 1261 gimple_stmt_iterator *gsi, 1262 gbind *tgt_bind) 1263 { 1264 /* removed group_size */ 1265 grid_prop grid = {}; 1266 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) 1267 return; 1268 1269 location_t loc = gimple_location (target); 1270 if (dump_enabled_p ()) 1271 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target, 1272 "Target construct will be turned into a gridified HSA " 1273 "kernel\n"); 1274 1275 /* Copy target body to a GPUKERNEL construct: */ 1276 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals 1277 (gimple_omp_body (target)); 1278 1279 hash_map<tree, tree> *declmap = new hash_map<tree, tree>; 1280 struct walk_stmt_info wi; 1281 memset (&wi, 0, sizeof (struct walk_stmt_info)); 1282 wi.info = declmap; 1283 1284 /* Copy assignments in between OMP statements before target, mark OMP 1285 statements within copy appropriately. */ 1286 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, 1287 tgt_bind, &wi); 1288 1289 gbind *old_bind 1290 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); 1291 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); 1292 tree new_block = gimple_bind_block (new_bind); 1293 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); 1294 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); 1295 BLOCK_SUBBLOCKS (enc_block) = new_block; 1296 BLOCK_SUPERCONTEXT (new_block) = enc_block; 1297 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); 1298 gimple_seq_add_stmt 1299 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), 1300 gpukernel); 1301 1302 for (size_t i = 0; i < grid.collapse; i++) 1303 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); 1304 push_gimplify_context (); 1305 for (size_t i = 0; i < grid.collapse; i++) 1306 { 1307 tree index_var = gimple_omp_for_index (inner_loop, i); 1308 tree itype, type = TREE_TYPE (index_var); 1309 if (POINTER_TYPE_P (type)) 1310 itype = signed_type_for (type); 1311 else 1312 itype = type; 1313 1314 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); 1315 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); 1316 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); 1317 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); 1318 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); 1319 tree step 1320 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); 1321 omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step); 1322 n1 = fold_convert (itype, n1); 1323 n2 = fold_convert (itype, n2); 1324 1325 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2); 1326 1327 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); 1328 t = fold_build2 (PLUS_EXPR, itype, step, t); 1329 t = fold_build2 (PLUS_EXPR, itype, t, n2); 1330 t = fold_build2 (MINUS_EXPR, itype, t, n1); 1331 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) 1332 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1333 fold_build1 (NEGATE_EXPR, itype, t), 1334 fold_build1 (NEGATE_EXPR, itype, step)); 1335 else 1336 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 1337 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype)); 1338 if (grid.tiling) 1339 { 1340 if (cond_code == GT_EXPR) 1341 step = fold_build1 (NEGATE_EXPR, itype, step); 1342 t = fold_build2 (MULT_EXPR, itype, t, step); 1343 } 1344 1345 tree gs = fold_convert (uint32_type_node, t); 1346 gimple_seq tmpseq = NULL; 1347 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); 1348 if (!gimple_seq_empty_p (tmpseq)) 1349 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); 1350 1351 tree ws; 1352 if (grid.group_sizes[i]) 1353 { 1354 ws = fold_convert (uint32_type_node, grid.group_sizes[i]); 1355 tmpseq = NULL; 1356 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); 1357 if (!gimple_seq_empty_p (tmpseq)) 1358 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); 1359 } 1360 else 1361 ws = build_zero_cst (uint32_type_node); 1362 1363 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); 1364 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; 1365 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; 1366 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; 1367 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); 1368 gimple_omp_target_set_clauses (target, c); 1369 } 1370 pop_gimplify_context (tgt_bind); 1371 delete declmap; 1372 return; 1373 } 1374 1375 /* Walker function doing all the work for create_target_kernels. */ 1376 1377 static tree 1378 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, 1379 bool *handled_ops_p, 1380 struct walk_stmt_info *incoming) 1381 { 1382 *handled_ops_p = false; 1383 1384 gimple *stmt = gsi_stmt (*gsi); 1385 gomp_target *target = dyn_cast <gomp_target *> (stmt); 1386 if (target) 1387 { 1388 gbind *tgt_bind = (gbind *) incoming->info; 1389 gcc_checking_assert (tgt_bind); 1390 grid_attempt_target_gridification (target, gsi, tgt_bind); 1391 return NULL_TREE; 1392 } 1393 gbind *bind = dyn_cast <gbind *> (stmt); 1394 if (bind) 1395 { 1396 *handled_ops_p = true; 1397 struct walk_stmt_info wi; 1398 memset (&wi, 0, sizeof (wi)); 1399 wi.info = bind; 1400 walk_gimple_seq_mod (gimple_bind_body_ptr (bind), 1401 grid_gridify_all_targets_stmt, NULL, &wi); 1402 } 1403 return NULL_TREE; 1404 } 1405 1406 /* Attempt to gridify all target constructs in BODY_P. All such targets will 1407 have their bodies duplicated, with the new copy being put into a 1408 gimple_omp_grid_body statement. All kernel-related construct within the 1409 grid_body will be marked with phony flags or kernel kinds. Moreover, some 1410 re-structuring is often needed, such as copying pre-bodies before the target 1411 construct so that kernel grid sizes can be computed. */ 1412 1413 void 1414 omp_grid_gridify_all_targets (gimple_seq *body_p) 1415 { 1416 struct walk_stmt_info wi; 1417 memset (&wi, 0, sizeof (wi)); 1418 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); 1419 } 1420