1 /* Lowering and expansion of OpenMP directives for HSA GPU agents. 2 3 Copyright (C) 2013-2019 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "tree.h" 26 #include "gimple.h" 27 #include "tree-pass.h" 28 #include "ssa.h" 29 #include "cgraph.h" 30 #include "pretty-print.h" 31 #include "fold-const.h" 32 #include "gimplify.h" 33 #include "gimple-iterator.h" 34 #include "gimple-walk.h" 35 #include "tree-inline.h" 36 #include "langhooks.h" 37 #include "omp-general.h" 38 #include "omp-low.h" 39 #include "omp-grid.h" 40 #include "gimple-pretty-print.h" 41 42 /* Return the lastprivate predicate for a given gridified loop described by 43 FD). */ 44 45 tree 46 omp_grid_lastprivate_predicate (struct omp_for_data *fd) 47 { 48 /* When dealing with a gridified loop, we need to check up to three collapsed 49 iteration variables but they are not actually captured in this fd. 50 Fortunately, we can easily rely on HSA builtins to get this 51 information. */ 52 53 tree id, size; 54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP 55 && gimple_omp_for_grid_intra_group (fd->for_stmt)) 56 { 57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); 58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); 59 } 60 else 61 { 62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); 63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); 64 } 65 tree cond = NULL; 66 for (int dim = 0; dim < fd->collapse; dim++) 67 { 68 tree dim_tree = build_int_cstu (unsigned_type_node, dim); 69 tree u1 = build_int_cstu (unsigned_type_node, 1); 70 tree c2 71 = build2 (EQ_EXPR, boolean_type_node, 72 build2 (PLUS_EXPR, unsigned_type_node, 73 build_call_expr (id, 1, dim_tree), u1), 74 build_call_expr (size, 1, dim_tree)); 75 if (cond) 76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); 77 else 78 cond = c2; 79 } 80 return cond; 81 } 82 83 /* Structure describing the basic properties of the loop we ara analyzing 84 whether it can be gridified and when it is gridified. */ 85 86 struct grid_prop 87 { 88 /* True when we are doing tiling gridification, i.e. when there is a distinct 89 distribute loop over groups and a loop construct over work-items. False 90 when distribute and parallel for loops form a combined construct. */ 91 bool tiling; 92 /* Location of the target construct for optimization information 93 messages. */ 94 dump_user_location_t target_loc; 95 /* The collapse clause of the involved loops. Collapse value of all of them 96 must be the same for gridification to take place. */ 97 size_t collapse; 98 /* Group sizes, if requested by the user or NULL if not requested. */ 99 tree group_sizes[3]; 100 }; 101 102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ 103 "gridified HSA kernel because " 104 105 /* Return true if STMT is an assignment of a register-type into a local 106 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to 107 any of the trees specifying group sizes there. */ 108 109 static bool 110 grid_safe_assignment_p (gimple *stmt, grid_prop *grid) 111 { 112 gassign *assign = dyn_cast <gassign *> (stmt); 113 if (!assign) 114 return false; 115 if (gimple_clobber_p (assign)) 116 return true; 117 tree lhs = gimple_assign_lhs (assign); 118 if (!VAR_P (lhs) 119 || !is_gimple_reg_type (TREE_TYPE (lhs)) 120 || is_global_var (lhs)) 121 return false; 122 if (grid) 123 for (unsigned i = 0; i < grid->collapse; i++) 124 if (lhs == grid->group_sizes[i]) 125 return false; 126 return true; 127 } 128 129 /* Return true if all statements in SEQ are assignments to local register-type 130 variables that do not hold group size information. */ 131 132 static bool 133 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) 134 { 135 if (!seq) 136 return true; 137 138 gimple_stmt_iterator gsi; 139 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) 140 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) 141 return false; 142 return true; 143 } 144 145 /* Scan statements in SEQ and call itself recursively on any bind. GRID 146 describes hitherto discovered properties of the loop that is evaluated for 147 possible gridification. If during whole search only assignments to 148 register-type local variables (that do not overwrite group size information) 149 and one single OMP statement is encountered, return true, otherwise return 150 false. RET is where we store any OMP statement encountered. */ 151 152 static bool 153 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, 154 const char *name, gimple **ret) 155 { 156 gimple_stmt_iterator gsi; 157 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) 158 { 159 gimple *stmt = gsi_stmt (gsi); 160 161 if (grid_safe_assignment_p (stmt, grid)) 162 continue; 163 if (gbind *bind = dyn_cast <gbind *> (stmt)) 164 { 165 gimple_seq bind_body = gimple_bind_body (bind); 166 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name, 167 ret)) 168 return false; 169 } 170 else if (is_gimple_omp (stmt)) 171 { 172 if (*ret) 173 { 174 if (dump_enabled_p ()) 175 { 176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 177 GRID_MISSED_MSG_PREFIX "%s construct " 178 "contains multiple OpenMP constructs\n", 179 name); 180 dump_printf_loc (MSG_NOTE, *ret, 181 "The first OpenMP construct within " 182 "a parallel\n"); 183 dump_printf_loc (MSG_NOTE, stmt, 184 "The second OpenMP construct within " 185 "a parallel\n"); 186 } 187 return false; 188 } 189 *ret = stmt; 190 } 191 else 192 { 193 if (dump_enabled_p ()) 194 { 195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 196 GRID_MISSED_MSG_PREFIX "%s construct contains " 197 "a complex statement\n", name); 198 dump_printf_loc (MSG_NOTE, stmt, 199 "This statement cannot be analyzed for " 200 "gridification\n"); 201 } 202 return false; 203 } 204 } 205 return true; 206 } 207 208 /* Scan statements in SEQ and make sure that it and any binds in it contain 209 only assignments to local register-type variables (that do not overwrite 210 group size information) and one OMP construct. If so, return that 211 construct, otherwise return NULL. GRID describes hitherto discovered 212 properties of the loop that is evaluated for possible gridification. If 213 dumping is enabled and function fails, use NAME to dump a note with the 214 reason for failure. */ 215 216 static gimple * 217 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, 218 const char *name) 219 { 220 if (!seq) 221 { 222 if (dump_enabled_p ()) 223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 224 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", 225 name); 226 return NULL; 227 } 228 229 gimple *ret = NULL; 230 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) 231 { 232 if (!ret && dump_enabled_p ()) 233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 234 GRID_MISSED_MSG_PREFIX "%s construct does not contain" 235 " any other OpenMP construct\n", name); 236 return ret; 237 } 238 else 239 return NULL; 240 } 241 242 /* Walker function looking for statements there is no point gridifying (and for 243 noreturn function calls which we cannot do). Return non-NULL if such a 244 function is found. */ 245 246 static tree 247 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, 248 bool *handled_ops_p, 249 struct walk_stmt_info *wi) 250 { 251 *handled_ops_p = false; 252 gimple *stmt = gsi_stmt (*gsi); 253 switch (gimple_code (stmt)) 254 { 255 case GIMPLE_CALL: 256 if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) 257 { 258 *handled_ops_p = true; 259 wi->info = stmt; 260 return error_mark_node; 261 } 262 break; 263 264 /* We may reduce the following list if we find a way to implement the 265 clauses, but now there is no point trying further. */ 266 case GIMPLE_OMP_CRITICAL: 267 case GIMPLE_OMP_TASKGROUP: 268 case GIMPLE_OMP_TASK: 269 case GIMPLE_OMP_SECTION: 270 case GIMPLE_OMP_SECTIONS: 271 case GIMPLE_OMP_SECTIONS_SWITCH: 272 case GIMPLE_OMP_TARGET: 273 case GIMPLE_OMP_ORDERED: 274 *handled_ops_p = true; 275 wi->info = stmt; 276 return error_mark_node; 277 default: 278 break; 279 } 280 return NULL; 281 } 282 283 /* Examine clauses of omp parallel statement PAR and if any prevents 284 gridification, issue a missed-optimization diagnostics and return false, 285 otherwise return true. GRID describes hitherto discovered properties of the 286 loop that is evaluated for possible gridification. */ 287 288 static bool 289 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc) 290 { 291 tree clauses = gimple_omp_parallel_clauses (par); 292 while (clauses) 293 { 294 switch (OMP_CLAUSE_CODE (clauses)) 295 { 296 case OMP_CLAUSE_NUM_THREADS: 297 if (dump_enabled_p ()) 298 { 299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 300 GRID_MISSED_MSG_PREFIX "because there is " 301 "a num_threads clause of the parallel " 302 "construct\n"); 303 dump_printf_loc (MSG_NOTE, par, 304 "Parallel construct has a num_threads clause\n"); 305 } 306 return false; 307 308 case OMP_CLAUSE_REDUCTION: 309 if (dump_enabled_p ()) 310 { 311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 312 GRID_MISSED_MSG_PREFIX "a reduction clause " 313 "is present\n "); 314 dump_printf_loc (MSG_NOTE, par, 315 "Parallel construct has a reduction clause\n"); 316 } 317 return false; 318 319 default: 320 break; 321 } 322 clauses = OMP_CLAUSE_CHAIN (clauses); 323 } 324 return true; 325 } 326 327 /* Examine clauses and the body of omp loop statement GFOR and if something 328 prevents gridification, issue a missed-optimization diagnostics and return 329 false, otherwise return true. GRID describes hitherto discovered properties 330 of the loop that is evaluated for possible gridification. */ 331 332 static bool 333 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) 334 { 335 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), 336 grid)) 337 { 338 if (dump_enabled_p ()) 339 { 340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 341 GRID_MISSED_MSG_PREFIX "the inner loop " 342 "loop bounds computation contains a complex " 343 "statement\n"); 344 dump_printf_loc (MSG_NOTE, gfor, 345 "Loop construct cannot be analyzed for " 346 "gridification\n"); 347 } 348 return false; 349 } 350 351 tree clauses = gimple_omp_for_clauses (gfor); 352 while (clauses) 353 { 354 switch (OMP_CLAUSE_CODE (clauses)) 355 { 356 case OMP_CLAUSE_SCHEDULE: 357 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) 358 { 359 if (dump_enabled_p ()) 360 { 361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 362 GRID_MISSED_MSG_PREFIX "the inner loop " 363 "has a non-automatic schedule clause\n"); 364 dump_printf_loc (MSG_NOTE, gfor, 365 "Loop construct has a non automatic " 366 "schedule clause\n"); 367 } 368 return false; 369 } 370 break; 371 372 case OMP_CLAUSE_REDUCTION: 373 if (dump_enabled_p ()) 374 { 375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 376 GRID_MISSED_MSG_PREFIX "a reduction " 377 "clause is present\n "); 378 dump_printf_loc (MSG_NOTE, gfor, 379 "Loop construct has a reduction schedule " 380 "clause\n"); 381 } 382 return false; 383 384 default: 385 break; 386 } 387 clauses = OMP_CLAUSE_CHAIN (clauses); 388 } 389 struct walk_stmt_info wi; 390 memset (&wi, 0, sizeof (wi)); 391 if (walk_gimple_seq (gimple_omp_body (gfor), 392 grid_find_ungridifiable_statement, 393 NULL, &wi)) 394 { 395 gimple *bad = (gimple *) wi.info; 396 if (dump_enabled_p ()) 397 { 398 if (is_gimple_call (bad)) 399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 400 GRID_MISSED_MSG_PREFIX "the inner loop contains " 401 "call to a noreturn function\n"); 402 else 403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 404 GRID_MISSED_MSG_PREFIX "the inner loop contains " 405 "statement %s which cannot be transformed\n", 406 gimple_code_name[(int) gimple_code (bad)]); 407 dump_printf_loc (MSG_NOTE, bad, 408 "This statement cannot be analyzed for " 409 "gridification\n"); 410 } 411 return false; 412 } 413 return true; 414 } 415 416 /* Given distribute omp construct represented by DIST, which in the original 417 source forms a compound construct with a looping construct, return true if it 418 can be turned into a gridified HSA kernel. Otherwise return false. GRID 419 describes hitherto discovered properties of the loop that is evaluated for 420 possible gridification. */ 421 422 static bool 423 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) 424 { 425 dump_user_location_t tloc = grid->target_loc; 426 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), 427 grid, "distribute"); 428 gomp_parallel *par; 429 if (!stmt 430 || !(par = dyn_cast <gomp_parallel *> (stmt)) 431 || !grid_parallel_clauses_gridifiable (par, tloc)) 432 return false; 433 434 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, 435 "parallel"); 436 gomp_for *gfor; 437 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) 438 return false; 439 440 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) 441 { 442 if (dump_enabled_p ()) 443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 444 GRID_MISSED_MSG_PREFIX "the inner loop is not " 445 "a simple for loop\n"); 446 return false; 447 } 448 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); 449 450 if (!grid_inner_loop_gridifiable_p (gfor, grid)) 451 return false; 452 453 return true; 454 } 455 456 /* Given an omp loop statement GFOR, return true if it can participate in 457 tiling gridification, i.e. in one where the distribute and parallel for 458 loops do not form a compound statement. GRID describes hitherto discovered 459 properties of the loop that is evaluated for possible gridification. */ 460 461 static bool 462 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) 463 { 464 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) 465 { 466 if (dump_enabled_p ()) 467 { 468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 469 GRID_MISSED_MSG_PREFIX "an inner loop is not " 470 "a simple for loop\n"); 471 dump_printf_loc (MSG_NOTE, gfor, 472 "This statement is not a simple for loop\n"); 473 } 474 return false; 475 } 476 477 if (!grid_inner_loop_gridifiable_p (gfor, grid)) 478 return false; 479 480 if (gimple_omp_for_collapse (gfor) != grid->collapse) 481 { 482 if (dump_enabled_p ()) 483 { 484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 485 GRID_MISSED_MSG_PREFIX "an inner loop does not " 486 "have use the same collapse clause\n"); 487 dump_printf_loc (MSG_NOTE, gfor, 488 "Loop construct uses a different collapse clause\n"); 489 } 490 return false; 491 } 492 493 struct omp_for_data fd; 494 struct omp_for_data_loop *loops 495 = (struct omp_for_data_loop *)alloca (grid->collapse 496 * sizeof (struct omp_for_data_loop)); 497 omp_extract_for_data (gfor, &fd, loops); 498 for (unsigned i = 0; i < grid->collapse; i++) 499 { 500 tree itype, type = TREE_TYPE (fd.loops[i].v); 501 if (POINTER_TYPE_P (type)) 502 itype = signed_type_for (type); 503 else 504 itype = type; 505 506 tree n1 = fold_convert (itype, fd.loops[i].n1); 507 tree n2 = fold_convert (itype, fd.loops[i].n2); 508 tree t = build_int_cst (itype, 509 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); 510 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); 511 t = fold_build2 (PLUS_EXPR, itype, t, n2); 512 t = fold_build2 (MINUS_EXPR, itype, t, n1); 513 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) 514 t = fold_build2 (TRUNC_DIV_EXPR, itype, 515 fold_build1 (NEGATE_EXPR, itype, t), 516 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); 517 else 518 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); 519 520 if (!operand_equal_p (grid->group_sizes[i], t, 0)) 521 { 522 if (dump_enabled_p ()) 523 { 524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 525 GRID_MISSED_MSG_PREFIX "the distribute and " 526 "an internal loop do not agree on tile size\n"); 527 dump_printf_loc (MSG_NOTE, gfor, 528 "Loop construct does not seem to loop over " 529 "a tile size\n"); 530 } 531 return false; 532 } 533 } 534 return true; 535 } 536 537 /* Facing a call to FNDECL in the body of a distribute construct, return true 538 if we can handle it or false if it precludes gridification. */ 539 540 static bool 541 grid_call_permissible_in_distribute_p (tree fndecl) 542 { 543 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) 544 return true; 545 546 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); 547 if (strstr (name, "omp_") != name) 548 return false; 549 550 if ((strcmp (name, "omp_get_thread_num") == 0) 551 || (strcmp (name, "omp_get_num_threads") == 0) 552 || (strcmp (name, "omp_get_num_teams") == 0) 553 || (strcmp (name, "omp_get_team_num") == 0) 554 || (strcmp (name, "omp_get_level") == 0) 555 || (strcmp (name, "omp_get_active_level") == 0) 556 || (strcmp (name, "omp_in_parallel") == 0)) 557 return true; 558 559 return false; 560 } 561 562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body 563 of a distribute construct that is pointed at by GSI, modify it as necessary 564 for gridification. If the statement itself got removed, return true. */ 565 566 static bool 567 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) 568 { 569 gimple *stmt = gsi_stmt (*gsi); 570 tree fndecl = gimple_call_fndecl (stmt); 571 gcc_checking_assert (stmt); 572 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) 573 return false; 574 575 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); 576 if ((strcmp (name, "omp_get_thread_num") == 0) 577 || (strcmp (name, "omp_get_level") == 0) 578 || (strcmp (name, "omp_get_active_level") == 0) 579 || (strcmp (name, "omp_in_parallel") == 0)) 580 { 581 tree lhs = gimple_call_lhs (stmt); 582 if (lhs) 583 { 584 gassign *assign 585 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); 586 gsi_insert_before (gsi, assign, GSI_SAME_STMT); 587 } 588 gsi_remove (gsi, true); 589 return true; 590 } 591 592 /* The rest of the omp functions can stay as they are, HSA back-end will 593 handle them correctly. */ 594 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) 595 || (strcmp (name, "omp_get_num_teams") == 0) 596 || (strcmp (name, "omp_get_team_num") == 0)); 597 return false; 598 } 599 600 /* Given a sequence of statements within a distribute omp construct or a 601 parallel construct, which in the original source does not form a compound 602 construct with a looping construct, return true if it does not prevent us 603 from turning it into a gridified HSA kernel. Otherwise return false. GRID 604 describes hitherto discovered properties of the loop that is evaluated for 605 possible gridification. IN_PARALLEL must be true if seq is within a 606 parallel construct and flase if it is only within a distribute 607 construct. */ 608 609 static bool 610 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, 611 bool in_parallel) 612 { 613 gimple_stmt_iterator gsi; 614 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) 615 { 616 gimple *stmt = gsi_stmt (gsi); 617 618 if (grid_safe_assignment_p (stmt, grid) 619 || gimple_code (stmt) == GIMPLE_GOTO 620 || gimple_code (stmt) == GIMPLE_LABEL 621 || gimple_code (stmt) == GIMPLE_COND) 622 continue; 623 else if (gbind *bind = dyn_cast <gbind *> (stmt)) 624 { 625 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), 626 grid, in_parallel)) 627 return false; 628 continue; 629 } 630 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) 631 { 632 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) 633 { 634 if (dump_enabled_p ()) 635 { 636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 637 GRID_MISSED_MSG_PREFIX "the distribute " 638 "construct contains a try..catch region\n"); 639 dump_printf_loc (MSG_NOTE, try_stmt, 640 "This statement cannot be analyzed for " 641 "tiled gridification\n"); 642 } 643 return false; 644 } 645 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), 646 grid, in_parallel)) 647 return false; 648 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), 649 grid, in_parallel)) 650 return false; 651 continue; 652 } 653 else if (is_gimple_call (stmt)) 654 { 655 tree fndecl = gimple_call_fndecl (stmt); 656 if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) 657 continue; 658 659 if (dump_enabled_p ()) 660 { 661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 662 GRID_MISSED_MSG_PREFIX "the distribute " 663 "construct contains a call\n"); 664 dump_printf_loc (MSG_NOTE, stmt, 665 "This statement cannot be analyzed for " 666 "tiled gridification\n"); 667 } 668 return false; 669 } 670 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) 671 { 672 if (in_parallel) 673 { 674 if (dump_enabled_p ()) 675 { 676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 677 GRID_MISSED_MSG_PREFIX "a parallel " 678 "construct contains another parallel " 679 "construct\n"); 680 dump_printf_loc (MSG_NOTE, stmt, 681 "This parallel construct is nested in " 682 "another one\n"); 683 } 684 return false; 685 } 686 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) 687 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), 688 grid, true)) 689 return false; 690 } 691 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) 692 { 693 if (!in_parallel) 694 { 695 if (dump_enabled_p ()) 696 { 697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 698 GRID_MISSED_MSG_PREFIX "a loop " 699 "construct is not nested within a parallel " 700 "construct\n"); 701 dump_printf_loc (MSG_NOTE, stmt, 702 "This loop construct is not nested in " 703 "a parallel construct\n"); 704 } 705 return false; 706 } 707 if (!grid_gfor_follows_tiling_pattern (gfor, grid)) 708 return false; 709 } 710 else 711 { 712 if (dump_enabled_p ()) 713 { 714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, 715 GRID_MISSED_MSG_PREFIX "the distribute " 716 "construct contains a complex statement\n"); 717 dump_printf_loc (MSG_NOTE, stmt, 718 "This statement cannot be analyzed for " 719 "tiled gridification\n"); 720 } 721 return false; 722 } 723 } 724 return true; 725 } 726 727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel, 728 return true, otherwise return false. In the case of success, also fill in 729 GRID with information describing the kernel grid. */ 730 731 static bool 732 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) 733 { 734 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) 735 return false; 736 737 dump_user_location_t tloc = target; 738 grid->target_loc = tloc; 739 gimple *stmt 740 = grid_find_single_omp_among_assignments (gimple_omp_body (target), 741 grid, "target"); 742 if (!stmt) 743 return false; 744 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); 745 tree group_size = NULL; 746 if (!teams) 747 { 748 if (dump_enabled_p ()) 749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 750 GRID_MISSED_MSG_PREFIX "it does not have a sole " 751 "teams construct in it.\n"); 752 return false; 753 } 754 755 tree clauses = gimple_omp_teams_clauses (teams); 756 while (clauses) 757 { 758 switch (OMP_CLAUSE_CODE (clauses)) 759 { 760 case OMP_CLAUSE_NUM_TEAMS: 761 if (dump_enabled_p ()) 762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 763 GRID_MISSED_MSG_PREFIX "the teams construct " 764 "contains a num_teams clause\n "); 765 return false; 766 767 case OMP_CLAUSE_REDUCTION: 768 if (dump_enabled_p ()) 769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 770 GRID_MISSED_MSG_PREFIX "a reduction " 771 "clause is present\n "); 772 return false; 773 774 case OMP_CLAUSE_THREAD_LIMIT: 775 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) 776 group_size = OMP_CLAUSE_OPERAND (clauses, 0); 777 break; 778 779 default: 780 break; 781 } 782 clauses = OMP_CLAUSE_CHAIN (clauses); 783 } 784 785 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, 786 "teams"); 787 if (!stmt) 788 return false; 789 gomp_for *dist = dyn_cast <gomp_for *> (stmt); 790 if (!dist) 791 { 792 if (dump_enabled_p ()) 793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 794 GRID_MISSED_MSG_PREFIX "the teams construct does not " 795 "have a single distribute construct in it.\n"); 796 return false; 797 } 798 799 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); 800 801 grid->collapse = gimple_omp_for_collapse (dist); 802 if (grid->collapse > 3) 803 { 804 if (dump_enabled_p ()) 805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 806 GRID_MISSED_MSG_PREFIX "the distribute construct " 807 "contains collapse clause with parameter greater " 808 "than 3\n"); 809 return false; 810 } 811 812 struct omp_for_data fd; 813 struct omp_for_data_loop *dist_loops 814 = (struct omp_for_data_loop *)alloca (grid->collapse 815 * sizeof (struct omp_for_data_loop)); 816 omp_extract_for_data (dist, &fd, dist_loops); 817 if (fd.chunk_size) 818 { 819 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) 820 { 821 if (dump_enabled_p ()) 822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 823 GRID_MISSED_MSG_PREFIX "the teams " 824 "thread limit is different from distribute " 825 "schedule chunk\n"); 826 return false; 827 } 828 group_size = fd.chunk_size; 829 } 830 if (group_size && grid->collapse > 1) 831 { 832 if (dump_enabled_p ()) 833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 834 GRID_MISSED_MSG_PREFIX "group size cannot be " 835 "set using thread_limit or schedule clauses " 836 "when also using a collapse clause greater than 1\n"); 837 return false; 838 } 839 840 if (gimple_omp_for_combined_p (dist)) 841 { 842 grid->tiling = false; 843 grid->group_sizes[0] = group_size; 844 for (unsigned i = 1; i < grid->collapse; i++) 845 grid->group_sizes[i] = NULL; 846 return grid_dist_follows_simple_pattern (dist, grid); 847 } 848 else 849 { 850 grid->tiling = true; 851 if (group_size) 852 { 853 if (dump_enabled_p ()) 854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, 855 GRID_MISSED_MSG_PREFIX "group size cannot be set " 856 "using thread_limit or schedule clauses when " 857 "distribute and loop constructs do not form " 858 "one combined construct\n"); 859 return false; 860 } 861 for (unsigned i = 0; i < grid->collapse; i++) 862 { 863 if (fd.loops[i].cond_code == GT_EXPR) 864 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, 865 TREE_TYPE (fd.loops[i].step), 866 fd.loops[i].step); 867 else 868 grid->group_sizes[i] = fd.loops[i].step; 869 } 870 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, 871 false); 872 } 873 } 874 875 /* Operand walker, used to remap pre-body declarations according to a hash map 876 provided in DATA. */ 877 878 static tree 879 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) 880 { 881 tree t = *tp; 882 883 if (DECL_P (t) || TYPE_P (t)) 884 *walk_subtrees = 0; 885 else 886 *walk_subtrees = 1; 887 888 if (VAR_P (t)) 889 { 890 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 891 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; 892 tree *repl = declmap->get (t); 893 if (repl) 894 *tp = *repl; 895 } 896 return NULL_TREE; 897 } 898 899 /* Identifiers of segments into which a particular variable should be places 900 when gridifying. */ 901 902 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, 903 GRID_SEGMENT_GLOBAL}; 904 905 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial 906 builtin call into SEQ that will make sure the variable is always considered 907 address taken. */ 908 909 static void 910 grid_mark_variable_segment (tree var, enum grid_var_segment segment) 911 { 912 /* Making a non-addressable variables would require that we re-gimplify all 913 their uses. Fortunately, we do not have to do this because if they are 914 not addressable, it means they are not used in atomic or parallel 915 statements and so relaxed GPU consistency rules mean we can just keep them 916 private. */ 917 if (!TREE_ADDRESSABLE (var)) 918 return; 919 920 switch (segment) 921 { 922 case GRID_SEGMENT_GROUP: 923 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), 924 NULL, DECL_ATTRIBUTES (var)); 925 break; 926 case GRID_SEGMENT_GLOBAL: 927 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), 928 NULL, DECL_ATTRIBUTES (var)); 929 break; 930 default: 931 gcc_unreachable (); 932 } 933 934 if (!TREE_STATIC (var)) 935 { 936 TREE_STATIC (var) = 1; 937 const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var)); 938 SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix)); 939 varpool_node::finalize_decl (var); 940 } 941 942 } 943 944 /* Copy leading register-type assignments to local variables in SRC to just 945 before DST, Creating temporaries, adjusting mapping of operands in WI and 946 remapping operands as necessary. Add any new temporaries to TGT_BIND. 947 Return the first statement that does not conform to grid_safe_assignment_p 948 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all 949 variables in traversed bind statements so that they are put into the 950 appropriate segment. */ 951 952 static gimple * 953 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, 954 gbind *tgt_bind, 955 enum grid_var_segment var_segment, 956 struct walk_stmt_info *wi) 957 { 958 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; 959 gimple_stmt_iterator gsi; 960 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) 961 { 962 gimple *stmt = gsi_stmt (gsi); 963 if (gbind *bind = dyn_cast <gbind *> (stmt)) 964 { 965 gimple *r = grid_copy_leading_local_assignments 966 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); 967 968 if (var_segment != GRID_SEGMENT_PRIVATE) 969 for (tree var = gimple_bind_vars (bind); 970 var; 971 var = DECL_CHAIN (var)) 972 grid_mark_variable_segment (var, var_segment); 973 if (r) 974 return r; 975 else 976 continue; 977 } 978 if (!grid_safe_assignment_p (stmt, NULL)) 979 return stmt; 980 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); 981 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), 982 TREE_TYPE (lhs)); 983 DECL_CONTEXT (repl) = current_function_decl; 984 gimple_bind_append_vars (tgt_bind, repl); 985 986 declmap->put (lhs, repl); 987 gassign *copy = as_a <gassign *> (gimple_copy (stmt)); 988 walk_gimple_op (copy, grid_remap_prebody_decls, wi); 989 gsi_insert_before (dst, copy, GSI_SAME_STMT); 990 } 991 return NULL; 992 } 993 994 /* Statement walker function to make adjustments to statements within the 995 gridifed kernel copy. */ 996 997 static tree 998 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, 999 struct walk_stmt_info *) 1000 { 1001 *handled_ops_p = false; 1002 gimple *stmt = gsi_stmt (*gsi); 1003 if (gimple_code (stmt) == GIMPLE_OMP_FOR 1004 && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)) 1005 { 1006 gomp_for *loop = as_a <gomp_for *> (stmt); 1007 tree clauses = gimple_omp_for_clauses (loop); 1008 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN); 1009 if (cl) 1010 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; 1011 else 1012 { 1013 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); 1014 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; 1015 OMP_CLAUSE_CHAIN (c) = clauses; 1016 gimple_omp_for_set_clauses (loop, c); 1017 } 1018 } 1019 return NULL_TREE; 1020 } 1021 1022 /* Given a PARLOOP that is a normal for looping construct but also a part of a 1023 combined construct with a simd loop, eliminate the simd loop. */ 1024 1025 static void 1026 grid_eliminate_combined_simd_part (gomp_for *parloop) 1027 { 1028 struct walk_stmt_info wi; 1029 1030 memset (&wi, 0, sizeof (wi)); 1031 wi.val_only = true; 1032 enum gf_mask msk = GF_OMP_FOR_SIMD; 1033 wi.info = (void *) &msk; 1034 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi); 1035 gimple *stmt = (gimple *) wi.info; 1036 /* We expect that the SIMD id the only statement in the parallel loop. */ 1037 gcc_assert (stmt 1038 && gimple_code (stmt) == GIMPLE_OMP_FOR 1039 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD) 1040 && gimple_omp_for_combined_into_p (stmt) 1041 && !gimple_omp_for_combined_p (stmt)); 1042 gomp_for *simd = as_a <gomp_for *> (stmt); 1043 1044 /* Copy over the iteration properties because the body refers to the index in 1045 the bottmom-most loop. */ 1046 unsigned i, collapse = gimple_omp_for_collapse (parloop); 1047 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); 1048 for (i = 0; i < collapse; i++) 1049 { 1050 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); 1051 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); 1052 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); 1053 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); 1054 } 1055 1056 tree *tgt= gimple_omp_for_clauses_ptr (parloop); 1057 while (*tgt) 1058 tgt = &OMP_CLAUSE_CHAIN (*tgt); 1059 1060 /* Copy over all clauses, except for linear clauses, which are turned into 1061 private clauses, and all other simd-specific clauses, which are 1062 ignored. */ 1063 tree *pc = gimple_omp_for_clauses_ptr (simd); 1064 while (*pc) 1065 { 1066 tree c = *pc; 1067 switch (TREE_CODE (c)) 1068 { 1069 case OMP_CLAUSE_LINEAR: 1070 { 1071 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); 1072 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); 1073 OMP_CLAUSE_CHAIN (priv) = NULL; 1074 *tgt = priv; 1075 tgt = &OMP_CLAUSE_CHAIN (priv); 1076 pc = &OMP_CLAUSE_CHAIN (c); 1077 break; 1078 } 1079 1080 case OMP_CLAUSE_SAFELEN: 1081 case OMP_CLAUSE_SIMDLEN: 1082 case OMP_CLAUSE_ALIGNED: 1083 pc = &OMP_CLAUSE_CHAIN (c); 1084 break; 1085 1086 default: 1087 *pc = OMP_CLAUSE_CHAIN (c); 1088 OMP_CLAUSE_CHAIN (c) = NULL; 1089 *tgt = c; 1090 tgt = &OMP_CLAUSE_CHAIN (c); 1091 break; 1092 } 1093 } 1094 1095 /* Finally, throw away the simd and mark the parallel loop as not 1096 combined. */ 1097 gimple_omp_set_body (parloop, gimple_omp_body (simd)); 1098 gimple_omp_for_set_combined_p (parloop, false); 1099 } 1100 1101 /* Statement walker function marking all parallels as grid_phony and loops as 1102 grid ones representing threads of a particular thread group. */ 1103 1104 static tree 1105 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, 1106 struct walk_stmt_info *wi_in) 1107 { 1108 *handled_ops_p = false; 1109 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) 1110 { 1111 *handled_ops_p = true; 1112 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); 1113 gimple_omp_for_set_grid_intra_group (loop, true); 1114 if (gimple_omp_for_combined_p (loop)) 1115 grid_eliminate_combined_simd_part (loop); 1116 1117 struct walk_stmt_info body_wi; 1118 memset (&body_wi, 0, sizeof (body_wi)); 1119 walk_gimple_seq_mod (gimple_omp_body_ptr (loop), 1120 grid_process_grid_body, NULL, &body_wi); 1121 1122 gbind *bind = (gbind *) wi_in->info; 1123 tree c; 1124 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) 1125 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) 1126 { 1127 push_gimplify_context (); 1128 tree ov = OMP_CLAUSE_DECL (c); 1129 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), 1130 TREE_TYPE (ov)); 1131 1132 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); 1133 DECL_CONTEXT (gv) = current_function_decl; 1134 gimple_bind_append_vars (bind, gv); 1135 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); 1136 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); 1137 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); 1138 gimple_seq l = NULL; 1139 gimplify_and_add (x, &l); 1140 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); 1141 pop_gimplify_context (bind); 1142 } 1143 } 1144 return NULL_TREE; 1145 } 1146 1147 /* Statement walker function marking all parallels as grid_phony and loops as 1148 grid ones representing threads of a particular thread group. */ 1149 1150 static tree 1151 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, 1152 bool *handled_ops_p, 1153 struct walk_stmt_info *wi_in) 1154 { 1155 *handled_ops_p = false; 1156 wi_in->removed_stmt = false; 1157 gimple *stmt = gsi_stmt (*gsi); 1158 if (gbind *bind = dyn_cast <gbind *> (stmt)) 1159 { 1160 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) 1161 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); 1162 } 1163 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) 1164 { 1165 *handled_ops_p = true; 1166 gimple_omp_parallel_set_grid_phony (parallel, true); 1167 1168 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); 1169 gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); 1170 gimple_seq s = NULL; 1171 gimple_seq_add_stmt (&s, new_bind); 1172 gimple_omp_set_body (parallel, s); 1173 1174 struct walk_stmt_info wi_par; 1175 memset (&wi_par, 0, sizeof (wi_par)); 1176 wi_par.info = new_bind; 1177 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), 1178 grid_mark_tiling_loops, NULL, &wi_par); 1179 } 1180 else if (is_a <gcall *> (stmt)) 1181 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); 1182 return NULL_TREE; 1183 } 1184 1185 /* Given freshly copied top level kernel SEQ, identify the individual OMP 1186 components, mark them as part of kernel, copy assignment leading to them 1187 just before DST, remapping them using WI and adding new temporaries to 1188 TGT_BIND, and and return the loop that will be used for kernel dispatch. */ 1189 1190 static gomp_for * 1191 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, 1192 gimple_stmt_iterator *dst, 1193 gbind *tgt_bind, struct walk_stmt_info *wi) 1194 { 1195 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, 1196 GRID_SEGMENT_GLOBAL, wi); 1197 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); 1198 gcc_assert (teams); 1199 gimple_omp_teams_set_grid_phony (teams, true); 1200 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, 1201 tgt_bind, GRID_SEGMENT_GLOBAL, 1202 wi); 1203 gcc_checking_assert (stmt); 1204 gomp_for *dist = dyn_cast <gomp_for *> (stmt); 1205 gcc_assert (dist); 1206 gimple_seq prebody = gimple_omp_for_pre_body (dist); 1207 if (prebody) 1208 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, 1209 GRID_SEGMENT_GROUP, wi); 1210 1211 if (grid->tiling) 1212 { 1213 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); 1214 gimple_omp_for_set_grid_group_iter (dist, true); 1215 1216 struct walk_stmt_info wi_tiled; 1217 memset (&wi_tiled, 0, sizeof (wi_tiled)); 1218 walk_gimple_seq_mod (gimple_omp_body_ptr (dist), 1219 grid_mark_tiling_parallels_and_loops, NULL, 1220 &wi_tiled); 1221 return dist; 1222 } 1223 else 1224 { 1225 gimple_omp_for_set_grid_phony (dist, true); 1226 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, 1227 tgt_bind, 1228 GRID_SEGMENT_PRIVATE, wi); 1229 gcc_checking_assert (stmt); 1230 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); 1231 gimple_omp_parallel_set_grid_phony (parallel, true); 1232 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), 1233 dst, tgt_bind, 1234 GRID_SEGMENT_PRIVATE, wi); 1235 gomp_for *inner_loop = as_a <gomp_for *> (stmt); 1236 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); 1237 prebody = gimple_omp_for_pre_body (inner_loop); 1238 if (prebody) 1239 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, 1240 GRID_SEGMENT_PRIVATE, wi); 1241 1242 if (gimple_omp_for_combined_p (inner_loop)) 1243 grid_eliminate_combined_simd_part (inner_loop); 1244 struct walk_stmt_info body_wi; 1245 memset (&body_wi, 0, sizeof (body_wi)); 1246 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), 1247 grid_process_grid_body, NULL, &body_wi); 1248 1249 return inner_loop; 1250 } 1251 } 1252 1253 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, 1254 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND 1255 is the bind into which temporaries inserted before TARGET should be 1256 added. */ 1257 1258 static void 1259 grid_attempt_target_gridification (gomp_target *target, 1260 gimple_stmt_iterator *gsi, 1261 gbind *tgt_bind) 1262 { 1263 /* removed group_size */ 1264 grid_prop grid = {}; 1265 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) 1266 return; 1267 1268 location_t loc = gimple_location (target); 1269 if (dump_enabled_p ()) 1270 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target, 1271 "Target construct will be turned into a gridified HSA " 1272 "kernel\n"); 1273 1274 /* Copy target body to a GPUKERNEL construct: */ 1275 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals 1276 (gimple_omp_body (target)); 1277 1278 hash_map<tree, tree> *declmap = new hash_map<tree, tree>; 1279 struct walk_stmt_info wi; 1280 memset (&wi, 0, sizeof (struct walk_stmt_info)); 1281 wi.info = declmap; 1282 1283 /* Copy assignments in between OMP statements before target, mark OMP 1284 statements within copy appropriately. */ 1285 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, 1286 tgt_bind, &wi); 1287 1288 gbind *old_bind 1289 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); 1290 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); 1291 tree new_block = gimple_bind_block (new_bind); 1292 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); 1293 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); 1294 BLOCK_SUBBLOCKS (enc_block) = new_block; 1295 BLOCK_SUPERCONTEXT (new_block) = enc_block; 1296 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); 1297 gimple_seq_add_stmt 1298 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), 1299 gpukernel); 1300 1301 for (size_t i = 0; i < grid.collapse; i++) 1302 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); 1303 push_gimplify_context (); 1304 for (size_t i = 0; i < grid.collapse; i++) 1305 { 1306 tree index_var = gimple_omp_for_index (inner_loop, i); 1307 tree itype, type = TREE_TYPE (index_var); 1308 if (POINTER_TYPE_P (type)) 1309 itype = signed_type_for (type); 1310 else 1311 itype = type; 1312 1313 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); 1314 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); 1315 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); 1316 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); 1317 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); 1318 tree step 1319 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); 1320 omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step); 1321 n1 = fold_convert (itype, n1); 1322 n2 = fold_convert (itype, n2); 1323 1324 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2); 1325 1326 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); 1327 t = fold_build2 (PLUS_EXPR, itype, step, t); 1328 t = fold_build2 (PLUS_EXPR, itype, t, n2); 1329 t = fold_build2 (MINUS_EXPR, itype, t, n1); 1330 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) 1331 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1332 fold_build1 (NEGATE_EXPR, itype, t), 1333 fold_build1 (NEGATE_EXPR, itype, step)); 1334 else 1335 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 1336 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype)); 1337 if (grid.tiling) 1338 { 1339 if (cond_code == GT_EXPR) 1340 step = fold_build1 (NEGATE_EXPR, itype, step); 1341 t = fold_build2 (MULT_EXPR, itype, t, step); 1342 } 1343 1344 tree gs = fold_convert (uint32_type_node, t); 1345 gimple_seq tmpseq = NULL; 1346 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); 1347 if (!gimple_seq_empty_p (tmpseq)) 1348 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); 1349 1350 tree ws; 1351 if (grid.group_sizes[i]) 1352 { 1353 ws = fold_convert (uint32_type_node, grid.group_sizes[i]); 1354 tmpseq = NULL; 1355 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); 1356 if (!gimple_seq_empty_p (tmpseq)) 1357 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); 1358 } 1359 else 1360 ws = build_zero_cst (uint32_type_node); 1361 1362 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); 1363 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; 1364 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; 1365 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; 1366 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); 1367 gimple_omp_target_set_clauses (target, c); 1368 } 1369 pop_gimplify_context (tgt_bind); 1370 delete declmap; 1371 return; 1372 } 1373 1374 /* Walker function doing all the work for create_target_kernels. */ 1375 1376 static tree 1377 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, 1378 bool *handled_ops_p, 1379 struct walk_stmt_info *incoming) 1380 { 1381 *handled_ops_p = false; 1382 1383 gimple *stmt = gsi_stmt (*gsi); 1384 gomp_target *target = dyn_cast <gomp_target *> (stmt); 1385 if (target) 1386 { 1387 gbind *tgt_bind = (gbind *) incoming->info; 1388 gcc_checking_assert (tgt_bind); 1389 grid_attempt_target_gridification (target, gsi, tgt_bind); 1390 return NULL_TREE; 1391 } 1392 gbind *bind = dyn_cast <gbind *> (stmt); 1393 if (bind) 1394 { 1395 *handled_ops_p = true; 1396 struct walk_stmt_info wi; 1397 memset (&wi, 0, sizeof (wi)); 1398 wi.info = bind; 1399 walk_gimple_seq_mod (gimple_bind_body_ptr (bind), 1400 grid_gridify_all_targets_stmt, NULL, &wi); 1401 } 1402 return NULL_TREE; 1403 } 1404 1405 /* Attempt to gridify all target constructs in BODY_P. All such targets will 1406 have their bodies duplicated, with the new copy being put into a 1407 gimple_omp_grid_body statement. All kernel-related construct within the 1408 grid_body will be marked with phony flags or kernel kinds. Moreover, some 1409 re-structuring is often needed, such as copying pre-bodies before the target 1410 construct so that kernel grid sizes can be computed. */ 1411 1412 void 1413 omp_grid_gridify_all_targets (gimple_seq *body_p) 1414 { 1415 struct walk_stmt_info wi; 1416 memset (&wi, 0, sizeof (wi)); 1417 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); 1418 } 1419