1 /* brig-function.cc -- declaration of brig_function class. 2 Copyright (C) 2016-2020 Free Software Foundation, Inc. 3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> 4 for General Processor Tech. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include <sstream> 23 #include <iomanip> 24 25 #include "brig-function.h" 26 #include "stringpool.h" 27 #include "tree-iterator.h" 28 #include "toplev.h" 29 #include "gimplify.h" 30 #include "gimple-expr.h" 31 #include "print-tree.h" 32 #include "hsa-brig-format.h" 33 #include "stor-layout.h" 34 #include "diagnostic-core.h" 35 #include "brig-code-entry-handler.h" 36 #include "brig-machine.h" 37 #include "brig-util.h" 38 #include "phsa.h" 39 #include "tree-pretty-print.h" 40 #include "dumpfile.h" 41 #include "profile-count.h" 42 #include "tree-cfg.h" 43 #include "errors.h" 44 #include "function.h" 45 #include "brig-to-generic.h" 46 #include "brig-builtins.h" 47 #include "options.h" 48 #include "fold-const.h" 49 #include "target.h" 50 #include "builtins.h" 51 52 brig_function::builtin_map brig_function::s_custom_builtins; 53 54 brig_function::brig_function (const BrigDirectiveExecutable *exec, 55 brig_to_generic *parent) 56 : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""), 57 m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE), 58 m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE), 59 m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE), 60 m_next_kernarg_offset (0), m_kernarg_max_align (0), 61 m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false), 62 m_has_function_calls_with_barriers (false), m_calls_analyzed (false), 63 m_is_wg_function (false), m_has_unexpanded_dp_builtins (false), 64 m_generating_arg_block (false), m_parent (parent) 65 { 66 memset (m_regs, 0, 67 BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *)); 68 memset (&m_descriptor, 0, sizeof (phsa_descriptor)); 69 70 if (s_custom_builtins.size () > 0) return; 71 72 /* Populate the builtin index. */ 73 #undef DEF_HSAIL_ATOMIC_BUILTIN 74 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN 75 #undef DEF_HSAIL_INTR_BUILTIN 76 #undef DEF_HSAIL_SAT_BUILTIN 77 #undef DEF_HSAIL_BUILTIN 78 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \ 79 s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \ 80 = builtin_decl_explicit (ENUM); 81 82 #include "brig-builtins.def" 83 } 84 85 brig_function::~brig_function () 86 { 87 for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i) 88 { 89 if (m_regs[i] != NULL) 90 { 91 delete m_regs[i]; 92 m_regs[i] = NULL; 93 } 94 } 95 } 96 97 /* Returns a GENERIC label with the given name in the given function. 98 Creates it, if not yet found. */ 99 100 tree 101 brig_function::label (const std::string &name) 102 { 103 label_index::const_iterator i = m_label_index.find (name); 104 if (i == m_label_index.end ()) 105 { 106 tree name_identifier 107 = get_identifier_with_length (name.c_str (), name.size ()); 108 109 tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL, 110 name_identifier, void_type_node); 111 112 DECL_CONTEXT (label_decl) = m_func_decl; 113 DECL_ARTIFICIAL (label_decl) = 0; 114 115 m_label_index[name] = label_decl; 116 return label_decl; 117 } 118 else 119 return (*i).second; 120 } 121 122 /* Record an argument variable for later use. This includes both local 123 variables inside arg blocks and incoming function arguments. */ 124 125 void 126 brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar, 127 tree treeDecl) 128 { 129 m_arg_variables[brigVar] = treeDecl; 130 } 131 132 tree 133 brig_function::arg_variable (const BrigDirectiveVariable *var) const 134 { 135 variable_index::const_iterator i = m_arg_variables.find (var); 136 if (i == m_arg_variables.end ()) 137 return NULL_TREE; 138 else 139 return (*i).second; 140 } 141 142 /* Appends a new kernel argument descriptor for the current kernel's 143 arg space. */ 144 145 void 146 brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size, 147 size_t alignment) 148 { 149 gcc_assert (m_func_decl != NULL_TREE); 150 gcc_assert (m_is_kernel); 151 152 size_t align_padding = m_next_kernarg_offset % alignment == 0 ? 153 0 : (alignment - m_next_kernarg_offset % alignment); 154 m_next_kernarg_offset += align_padding; 155 m_kernarg_offsets[var] = m_next_kernarg_offset; 156 m_next_kernarg_offset += size; 157 158 m_kernarg_max_align 159 = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align; 160 } 161 162 size_t 163 brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const 164 { 165 var_offset_table::const_iterator i = m_kernarg_offsets.find (var); 166 gcc_assert (i != m_kernarg_offsets.end ()); 167 return (*i).second; 168 } 169 170 /* Add work-item ID variables to the beginning of the kernel function 171 which can be used for address computation as kernel dispatch packet 172 instructions can be expanded to GENERIC nodes referring to them. */ 173 174 void 175 brig_function::add_id_variables () 176 { 177 tree bind_expr = m_current_bind_expr; 178 tree stmts = BIND_EXPR_BODY (bind_expr); 179 180 /* Initialize the WG limits and local ids. */ 181 m_kernel_entry = tsi_start (stmts); 182 183 for (int i = 0; i < 3; ++i) 184 { 185 char dim_char = (char) ((int) 'x' + i); 186 187 /* The local sizes are limited to 16b values, but let's still use 32b 188 to avoid unnecessary casts (the ID functions are 32b). */ 189 m_local_id_vars[i] 190 = add_local_variable (std::string ("__local_") + dim_char, 191 long_long_integer_type_node); 192 193 tree workitemid_call 194 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2, 195 uint32_type_node, uint32_type_node, 196 build_int_cst (uint32_type_node, i), ptr_type_node, 197 m_context_arg); 198 199 tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]), 200 m_local_id_vars[i], 201 convert (TREE_TYPE (m_local_id_vars[i]), 202 workitemid_call)); 203 204 append_statement (id_init); 205 206 m_cur_wg_size_vars[i] 207 = add_local_variable (std::string ("__cur_wg_size_") + dim_char, 208 long_long_integer_type_node); 209 210 tree cwgz_call; 211 if (flag_assume_phsa) 212 { 213 tree_stl_vec operands 214 = tree_stl_vec (1, build_int_cst (uint32_type_node, i)); 215 cwgz_call 216 = expand_or_call_builtin (BRIG_OPCODE_CURRENTWORKGROUPSIZE, 217 BRIG_TYPE_U32, uint32_type_node, 218 operands); 219 } 220 else 221 cwgz_call = call_builtin 222 (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE), 223 2, uint32_type_node, uint32_type_node, 224 build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg); 225 226 tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]), 227 m_cur_wg_size_vars[i], 228 convert (TREE_TYPE (m_cur_wg_size_vars[i]), 229 cwgz_call)); 230 231 append_statement (limit_init); 232 233 m_wg_id_vars[i] 234 = add_local_variable (std::string ("__workgroupid_") + dim_char, 235 uint32_type_node); 236 237 tree wgid_call; 238 if (flag_assume_phsa) 239 { 240 tree_stl_vec operands 241 = tree_stl_vec (1, build_int_cst (uint32_type_node, i)); 242 wgid_call 243 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPID, BRIG_TYPE_U32, 244 uint32_type_node, operands); 245 } 246 else 247 wgid_call 248 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID), 249 2, uint32_type_node, uint32_type_node, 250 build_int_cst (uint32_type_node, i), ptr_type_node, 251 m_context_arg); 252 253 tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]), 254 m_wg_id_vars[i], wgid_call); 255 256 append_statement (wgid_init); 257 258 m_wg_size_vars[i] 259 = add_local_variable (std::string ("__workgroupsize_") + dim_char, 260 uint32_type_node); 261 262 tree wgsize_call; 263 if (flag_assume_phsa) 264 { 265 tree_stl_vec operands 266 = tree_stl_vec (1, build_int_cst (uint32_type_node, i)); 267 wgsize_call 268 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, 269 uint32_type_node, operands); 270 } 271 else 272 wgsize_call 273 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE), 274 2, uint32_type_node, uint32_type_node, 275 build_int_cst (uint32_type_node, i), ptr_type_node, 276 m_context_arg); 277 278 tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]), 279 m_wg_size_vars[i], wgsize_call); 280 281 append_statement (wgsize_init); 282 283 m_grid_size_vars[i] 284 = add_local_variable (std::string ("__gridsize_") + dim_char, 285 uint32_type_node); 286 287 tree gridsize_call 288 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2, 289 uint32_type_node, uint32_type_node, 290 build_int_cst (uint32_type_node, i), ptr_type_node, 291 m_context_arg); 292 293 tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]), 294 m_grid_size_vars[i], gridsize_call); 295 296 append_statement (gridsize_init); 297 298 m_abs_id_base_vars[i] 299 = add_local_variable (std::string ("__abs_id_base_") + dim_char, 300 long_long_integer_type_node); 301 302 m_abs_id_vars[i] 303 = add_local_variable (std::string ("__abs_id_") + dim_char, 304 long_long_integer_type_node); 305 306 tree abs_id_base 307 = build2 (MULT_EXPR, long_long_integer_type_node, 308 convert (long_long_integer_type_node, m_wg_id_vars[i]), 309 convert (long_long_integer_type_node, m_wg_size_vars[i])); 310 tree abs_id 311 = build2 (PLUS_EXPR, long_long_integer_type_node, abs_id_base, 312 convert (long_long_integer_type_node, m_local_id_vars[i])); 313 314 tree abs_id_base_init 315 = build2 (MODIFY_EXPR, TREE_TYPE (m_abs_id_base_vars[i]), 316 m_abs_id_base_vars[i], abs_id_base); 317 append_statement (abs_id_base_init); 318 319 tree abs_id_init = build2 (MODIFY_EXPR, 320 TREE_TYPE (m_abs_id_vars[i]), 321 m_abs_id_vars[i], abs_id); 322 append_statement (abs_id_init); 323 } 324 } 325 326 /* Creates a new local variable with the given NAME and given GENERIC 327 TYPE. */ 328 329 tree 330 brig_function::add_local_variable (std::string name, tree type) 331 { 332 tree name_identifier 333 = get_identifier_with_length (name.c_str (), name.size ()); 334 tree variable 335 = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type); 336 337 DECL_NONLOCAL (variable) = 0; 338 TREE_ADDRESSABLE (variable) = 0; 339 TREE_STATIC (variable) = 0; 340 TREE_USED (variable) = 1; 341 DECL_ARTIFICIAL (variable) = 0; 342 343 tree bind_expr = DECL_SAVED_TREE (m_func_decl); 344 345 DECL_CONTEXT (variable) = m_func_decl; 346 347 DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr); 348 BIND_EXPR_VARS (bind_expr) = variable; 349 return variable; 350 } 351 352 /* Return tree type for an HSA register. 353 354 The tree type can be anything (scalar, vector, int, float, etc.) 355 but its size is guaranteed to match the HSA register size. 356 357 HSA registers are untyped but we select a type based on their use 358 to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems 359 to occur when use or def reaches over current BB). */ 360 361 tree 362 brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const 363 { 364 size_t reg_size = gccbrig_reg_size (reg); 365 366 /* The default type. */ 367 tree type = build_nonstandard_integer_type (reg_size, true); 368 369 if (m_parent->m_fn_regs_use_index.count (m_name) == 0) 370 return type; 371 372 const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name]; 373 size_t reg_id = gccbrig_hsa_reg_id (*reg); 374 if (index.count (reg_id) == 0) 375 return type; 376 377 const reg_use_info &info = index.find (reg_id)->second; 378 std::vector<std::pair<tree, size_t> >::const_iterator it 379 = info.m_type_refs.begin (); 380 std::vector<std::pair<tree, size_t> >::const_iterator it_end 381 = info.m_type_refs.end (); 382 size_t max_refs_as_type_count = 0; 383 for (; it != it_end; it++) 384 { 385 size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT; 386 if (type_bit_size != reg_size) continue; 387 if (it->second > max_refs_as_type_count) 388 { 389 type = it->first; 390 max_refs_as_type_count = it->second; 391 } 392 } 393 394 return type; 395 } 396 397 /* Returns a DECL_VAR for the given HSAIL operand register. 398 If it has not been created yet for the function being generated, 399 creates it as a type determined by analysis phase. */ 400 401 tree 402 brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg) 403 { 404 size_t offset = gccbrig_hsa_reg_id (*reg); 405 406 reg_decl_index_entry *regEntry = m_regs[offset]; 407 if (regEntry == NULL) 408 { 409 size_t reg_size = gccbrig_reg_size (reg); 410 tree type; 411 if (reg_size > 1) 412 type = get_tree_type_for_hsa_reg (reg); 413 else 414 type = boolean_type_node; 415 416 /* Drop the const qualifier so we do not end up with a read only 417 register variable which cannot be written to later. */ 418 tree nonconst_type = build_type_variant (type, false, false); 419 420 regEntry = new reg_decl_index_entry; 421 422 regEntry->m_var_decl 423 = add_local_variable (gccbrig_reg_name (reg), nonconst_type); 424 m_regs[offset] = regEntry; 425 } 426 return regEntry->m_var_decl; 427 } 428 429 /* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is 430 a statement after which the iteration variables should be initialized and 431 the loop body starts. BRANCH_AFTER is the statement after which the loop 432 predicate check and the back edge goto will be appended. */ 433 434 void 435 brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry, 436 tree_stmt_iterator *branch_after) 437 { 438 tree ivar = m_local_id_vars[dim]; 439 tree abs_id_base_var = m_abs_id_base_vars[dim]; 440 tree abs_id_var = m_abs_id_vars[dim]; 441 tree ivar_max = m_cur_wg_size_vars[dim]; 442 tree_stmt_iterator entry = *header_entry; 443 444 /* TODO: this is not a parallel loop as we share the "register variables" 445 across work-items. Should create a copy of them per WI instance. That 446 is, declare temporaries for new definitions inside the loop body, not at 447 function scope. */ 448 449 tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar, 450 build_zero_cst (TREE_TYPE (ivar))); 451 tsi_link_after (&entry, ivar_init, TSI_NEW_STMT); 452 453 tree abs_id_var_init = build2 (MODIFY_EXPR, TREE_TYPE (abs_id_var), 454 abs_id_var, 455 convert (TREE_TYPE (abs_id_var), 456 abs_id_base_var)); 457 tsi_link_after (&entry, abs_id_var_init, TSI_NEW_STMT); 458 459 tree loop_body_label 460 = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim)); 461 tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label); 462 463 tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT); 464 465 if (m_has_unexpanded_dp_builtins) 466 { 467 if (!flag_assume_phsa) 468 { 469 tree id_set_builtin 470 = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID); 471 /* Set the local ID to the current wi-loop iteration variable value 472 to ensure the builtins see the correct values. */ 473 tree id_set_call 474 = call_builtin (id_set_builtin, 3, 475 void_type_node, uint32_type_node, 476 build_int_cst (uint32_type_node, dim), 477 uint32_type_node, convert (uint32_type_node, ivar), 478 ptr_type_node, m_context_arg); 479 tsi_link_after (&entry, id_set_call, TSI_NEW_STMT); 480 } 481 else 482 { 483 tree ptr_type = build_pointer_type (uint32_type_node); 484 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, 485 build_int_cst (ptr_type, dim * 4)); 486 tree assign = build2 (MODIFY_EXPR, uint32_type_node, ctx, 487 convert (uint32_type_node, ivar)); 488 489 tsi_link_after (&entry, assign, TSI_NEW_STMT); 490 } 491 } 492 493 /* Increment the WI iteration variable. */ 494 tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar, 495 build_one_cst (TREE_TYPE (ivar))); 496 497 tsi_link_after (branch_after, incr, TSI_NEW_STMT); 498 499 /* ...and the abs id variable. */ 500 tree abs_id_incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (abs_id_var), 501 abs_id_var, 502 build_one_cst (TREE_TYPE (abs_id_var))); 503 504 tsi_link_after (branch_after, abs_id_incr, TSI_NEW_STMT); 505 506 /* Append the predicate check with the back edge goto. */ 507 tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max); 508 tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label); 509 tree if_stmt 510 = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE); 511 tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT); 512 } 513 514 /* Recursively analyzes the function and its callees for barrier usage. */ 515 516 void 517 brig_function::analyze_calls () 518 { 519 if (m_calls_analyzed) 520 return; 521 522 /* Set this early to not get stuck in case of recursive call graphs. 523 This is safe because if the function calls itself, either the function 524 has barrier calls which implies a call to a function with barrier calls, 525 or it doesn't in which case the result depends on the later called 526 functions. */ 527 m_calls_analyzed = true; 528 529 for (size_t i = 0; i < m_called_functions.size (); ++i) 530 { 531 tree f = m_called_functions[i]; 532 brig_function *called_f = m_parent->get_finished_function (f); 533 if (called_f == NULL) 534 { 535 /* Unfinished function (only declaration within the set of BRIGs) 536 found. Cannot finish the CG analysis. Have to assume it does have 537 a barrier for safety. */ 538 m_has_function_calls_with_barriers = true; 539 m_has_unexpanded_dp_builtins = true; 540 break; 541 } 542 called_f->analyze_calls (); 543 /* We can assume m_has_barriers has been correctly set during the 544 construction of the function decl. No need to reanalyze it. */ 545 m_has_function_calls_with_barriers |= called_f->m_has_barriers; 546 547 /* If the function or any of its called functions has dispatch 548 packet builtin calls that require the local id, we need to 549 set the local id to the context in the work item loop before 550 the functions are called. If we analyze the opposite, these 551 function calls can be omitted. */ 552 m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins; 553 } 554 } 555 556 /* Tries to convert the current kernel to a work-group function that executes 557 all work-items using loops. Returns true in case the conversion was 558 successful. */ 559 560 bool 561 brig_function::convert_to_wg_function () 562 { 563 if (!m_calls_analyzed) 564 analyze_calls (); 565 566 if (m_has_barriers || m_has_function_calls_with_barriers) 567 return false; 568 569 /* The most trivial case: No barriers at all in the kernel. 570 We can create one big work-item loop around the whole kernel. */ 571 tree bind_expr = m_current_bind_expr; 572 tree stmts = BIND_EXPR_BODY (bind_expr); 573 574 for (int i = 0; i < 3; ++i) 575 { 576 /* The previous loop has added a new label to the end of the function, 577 the next level loop should wrap around it also. */ 578 tree_stmt_iterator function_exit = tsi_last (stmts); 579 add_wi_loop (i, &m_kernel_entry, &function_exit); 580 } 581 582 m_is_wg_function = true; 583 return false; 584 } 585 586 /* Emits a kernel description to a special ELF section so it can be 587 utilized by an HSA runtime implementation. The assembly block 588 must be emitted to a statement list of an function, which is given 589 as an argument. Returns the assembly block used to emit the section. */ 590 591 tree 592 brig_function::emit_metadata (tree stmt_list) 593 { 594 /* Emit an ELF section via an assembly directive that generates a special 595 ELF section for each kernel that contains raw bytes of a descriptor 596 object. This is pretty disgusting, but life is never perfect ;) */ 597 598 /* Use the original kernel name without the '_' prefix in the section name. */ 599 std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name; 600 601 std::ostringstream strstr; 602 strstr << std::endl 603 << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name 604 << std::endl 605 << "\t.p2align 1, 1, 1" << std::endl 606 << "\t.byte "; 607 608 for (size_t i = 0; i < sizeof (phsa_descriptor); ++i) 609 { 610 strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex 611 << (unsigned) *((unsigned char *) &m_descriptor + i); 612 if (i + 1 < sizeof (phsa_descriptor)) 613 strstr << ", "; 614 } 615 616 strstr << std::endl << ".popsection" << std::endl << std::endl; 617 618 tree metadata_asm 619 = build_stmt (ASM_EXPR, 620 build_string (strstr.str ().size (), strstr.str ().c_str ()), 621 NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE); 622 623 append_to_statement_list_force (metadata_asm, &stmt_list); 624 return metadata_asm; 625 } 626 627 /* Emits the kernel launcher function. Also emits the metadata section 628 creation statements in it. 629 630 The launcher function calls the device-side runtime 631 that runs the kernel for all work-items. In C: 632 633 void KernelName (void* context, void* group_base_addr) 634 { 635 __hsail_launch_kernel (_KernelName, context, group_base_addr); 636 } 637 638 or, in case of a successful conversion to a work-group function: 639 640 void KernelName (void* context, void* group_base_addr) 641 { 642 __hsail_launch_wg_function (_KernelName, context, group_base_addr); 643 } 644 645 The user/host sees this function as the kernel to call from the 646 outside. The actual kernel generated from HSAIL was named _KernelName. 647 */ 648 649 tree 650 brig_function::emit_launcher_and_metadata () 651 { 652 /* The original kernel name without the '_' prefix. */ 653 std::string kern_name = m_name.substr (1); 654 655 tree name_identifier 656 = get_identifier_with_length (kern_name.c_str (), kern_name.size ()); 657 658 tree restrict_void_ptr 659 = build_qualified_type (build_pointer_type (void_type_node), 660 TYPE_QUAL_RESTRICT); 661 tree restrict_char_ptr 662 = build_qualified_type (build_pointer_type (char_type_node), 663 TYPE_QUAL_RESTRICT); 664 tree launcher 665 = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, 666 build_function_type_list (void_type_node, restrict_void_ptr, 667 restrict_char_ptr, NULL_TREE)); 668 669 TREE_USED (launcher) = 1; 670 DECL_ARTIFICIAL (launcher) = 1; 671 672 tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, 673 get_identifier ("__context"), 674 restrict_void_ptr); 675 676 DECL_ARGUMENTS (launcher) = context_arg; 677 DECL_ARG_TYPE (context_arg) = restrict_void_ptr; 678 DECL_CONTEXT (context_arg) = launcher; 679 TREE_USED (context_arg) = 1; 680 DECL_ARTIFICIAL (context_arg) = 1; 681 682 tree group_base_addr_arg 683 = build_decl (UNKNOWN_LOCATION, PARM_DECL, 684 get_identifier ("__group_base_addr"), restrict_char_ptr); 685 686 chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg); 687 DECL_ARG_TYPE (group_base_addr_arg) = restrict_char_ptr; 688 DECL_CONTEXT (group_base_addr_arg) = launcher; 689 TREE_USED (group_base_addr_arg) = 1; 690 DECL_ARTIFICIAL (group_base_addr_arg) = 1; 691 692 tree resdecl 693 = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node); 694 695 DECL_RESULT (launcher) = resdecl; 696 DECL_CONTEXT (resdecl) = launcher; 697 698 DECL_INITIAL (launcher) = make_node (BLOCK); 699 TREE_USED (DECL_INITIAL (launcher)) = 1; 700 701 tree stmt_list = alloc_stmt_list (); 702 703 tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL); 704 705 TREE_STATIC (launcher) = 1; 706 TREE_PUBLIC (launcher) = 1; 707 708 DECL_SAVED_TREE (launcher) = bind_expr; 709 710 if (DECL_STRUCT_FUNCTION (launcher) == NULL) 711 push_struct_function (launcher); 712 else 713 push_cfun (DECL_STRUCT_FUNCTION (launcher)); 714 715 tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl); 716 717 tree phsail_launch_kernel_call; 718 719 /* Compute the local group segment frame start pointer. */ 720 tree group_local_offset_temp 721 = create_tmp_var (uint32_type_node, "group_local_offset"); 722 tree group_local_offset_arg 723 = build2 (MODIFY_EXPR, uint32_type_node, 724 group_local_offset_temp, 725 build_int_cst (uint32_type_node, 726 m_parent->m_module_group_variables.size())); 727 728 /* Emit a launcher depending whether we converted the kernel function to 729 a work group function or not. */ 730 if (m_is_wg_function) 731 phsail_launch_kernel_call 732 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC), 733 4, void_type_node, 734 ptr_type_node, kernel_func_ptr, restrict_void_ptr, 735 context_arg, restrict_char_ptr, group_base_addr_arg, 736 uint32_type_node, group_local_offset_arg); 737 else 738 phsail_launch_kernel_call 739 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL), 740 4, void_type_node, 741 ptr_type_node, kernel_func_ptr, restrict_void_ptr, 742 context_arg, restrict_char_ptr, group_base_addr_arg, 743 uint32_type_node, group_local_offset_arg); 744 745 append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list); 746 747 emit_metadata (stmt_list); 748 749 set_externally_visible (launcher); 750 751 return launcher; 752 } 753 754 tree 755 brig_function::append_statement (tree stmt) 756 { 757 gcc_assert (m_func_decl != NULL); 758 759 tree bind_expr = m_current_bind_expr; 760 tree stmts = BIND_EXPR_BODY (bind_expr); 761 762 append_to_statement_list_force (stmt, &stmts); 763 return stmt; 764 } 765 766 /* Creates a new "alloca frame" for the current function by 767 injecting an alloca frame push in the beginning of the function 768 and an alloca frame pop before all function exit points. */ 769 770 void 771 brig_function::create_alloca_frame () 772 { 773 tree_stmt_iterator entry; 774 775 /* Adds the alloca push only after the ids have been initialized 776 in case of a kernel function. */ 777 if (m_is_kernel) 778 entry = m_kernel_entry; 779 else 780 { 781 tree bind_expr = m_current_bind_expr; 782 tree stmts = BIND_EXPR_BODY (bind_expr); 783 entry = tsi_start (stmts); 784 } 785 786 tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME); 787 tree push_frame_call 788 = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node, 789 m_context_arg); 790 791 tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT); 792 793 tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME); 794 795 do 796 { 797 tree stmt = tsi_stmt (entry); 798 if (TREE_CODE (stmt) == RETURN_EXPR) 799 { 800 tree pop_frame_call 801 = call_builtin (pop_frame_builtin, 1, void_type_node, 802 ptr_type_node, m_context_arg); 803 804 tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT); 805 } 806 tsi_next (&entry); 807 } 808 while (!tsi_end_p (entry)); 809 } 810 811 /* Finishes the currently built function. After calling this, no new 812 statements should be appeneded to the function. */ 813 void 814 brig_function::finish () 815 { 816 append_return_stmt (); 817 818 /* Currently assume single alloca frame per WG. */ 819 if (m_has_allocas) 820 create_alloca_frame (); 821 } 822 823 void 824 brig_function::finish_kernel () 825 { 826 /* Kernel functions should have a single exit point. 827 Let's create one. The return instructions should have 828 been converted to branches to this label. */ 829 append_statement (build_stmt (LABEL_EXPR, m_exit_label)); 830 /* Attempt to convert the kernel to a work-group function that 831 executes all work-items of the WG using a loop. */ 832 convert_to_wg_function (); 833 834 append_return_stmt (); 835 836 /* Currently assume single alloca frame per WG. */ 837 if (m_has_allocas) 838 create_alloca_frame (); 839 } 840 841 void 842 brig_function::append_return_stmt () 843 { 844 gcc_assert (m_current_bind_expr != NULL_TREE); 845 tree stmts = BIND_EXPR_BODY (m_current_bind_expr); 846 847 if (STATEMENT_LIST_TAIL (stmts) == NULL) 848 return; /* Empty function. */ 849 850 tree last_stmt = tsi_stmt (tsi_last (stmts)); 851 852 if (TREE_CODE (last_stmt) == RETURN_EXPR) 853 return; 854 855 if (m_ret_value != NULL_TREE) 856 { 857 tree result_assign 858 = build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value, 859 m_ret_temp); 860 861 tree return_expr 862 = build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign); 863 append_to_statement_list_force (return_expr, &stmts); 864 } 865 else 866 { 867 tree return_stmt = build_stmt (RETURN_EXPR, NULL); 868 append_to_statement_list_force (return_stmt, &stmts); 869 } 870 } 871 872 bool 873 brig_function::has_function_scope_var (const BrigBase* var) const 874 { 875 return m_function_scope_vars.find (var) != m_function_scope_vars.end (); 876 } 877 878 size_t 879 brig_function::group_variable_segment_offset (const std::string &name) const 880 { 881 if (m_local_group_variables.has_variable (name)) 882 return m_local_group_variables.segment_offset (name); 883 884 gcc_assert (m_parent->m_module_group_variables.has_variable (name)); 885 return m_parent->m_module_group_variables.segment_offset (name); 886 } 887 888 /* Try to expand the given builtin call to reuse a previously generated 889 variable, if possible. If not, just call the given builtin. 890 BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type, 891 ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's 892 input operands. */ 893 894 tree 895 brig_function::expand_or_call_builtin (BrigOpcode16_t brig_opcode, 896 BrigType16_t brig_type, 897 tree arith_type, 898 tree_stl_vec &operands) 899 { 900 if (needs_workitem_context_data (brig_opcode)) 901 m_has_unexpanded_dp_builtins = true; 902 903 if (can_expand_builtin (brig_opcode)) 904 return expand_builtin (brig_opcode, operands); 905 906 tree built_in 907 = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type); 908 909 if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in))) 910 && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type) 911 && brig_opcode != BRIG_OPCODE_LERP 912 && brig_opcode != BRIG_OPCODE_PACKCVT 913 && brig_opcode != BRIG_OPCODE_SAD 914 && brig_opcode != BRIG_OPCODE_SADHI) 915 { 916 /* Call the scalar built-in for all elements in the vector. */ 917 tree_stl_vec operand0_elements; 918 if (operands.size () > 0) 919 unpack (operands[0], operand0_elements); 920 921 tree_stl_vec operand1_elements; 922 if (operands.size () > 1) 923 unpack (operands[1], operand1_elements); 924 925 tree_stl_vec result_elements; 926 927 size_t element_count = gccbrig_type_vector_subparts (arith_type); 928 for (size_t i = 0; i < element_count; ++i) 929 { 930 tree_stl_vec call_operands; 931 if (operand0_elements.size () > 0) 932 call_operands.push_back (operand0_elements.at (i)); 933 934 if (operand1_elements.size () > 0) 935 call_operands.push_back (operand1_elements.at (i)); 936 937 result_elements.push_back 938 (expand_or_call_builtin (brig_opcode, brig_type, 939 TREE_TYPE (arith_type), 940 call_operands)); 941 } 942 return pack (result_elements); 943 } 944 945 tree_stl_vec call_operands; 946 tree_stl_vec operand_types; 947 948 tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in)); 949 950 for (size_t i = 0; i < operands.size (); ++i) 951 { 952 tree operand_type = TREE_VALUE (arg_type_chain); 953 call_operands.push_back (convert (operand_type, operands[i])); 954 operand_types.push_back (operand_type); 955 arg_type_chain = TREE_CHAIN (arg_type_chain); 956 } 957 958 if (needs_workitem_context_data (brig_opcode)) 959 { 960 call_operands.push_back (m_context_arg); 961 operand_types.push_back (ptr_type_node); 962 } 963 964 size_t operand_count = call_operands.size (); 965 966 call_operands.resize (4, NULL_TREE); 967 operand_types.resize (4, NULL_TREE); 968 for (size_t i = 0; i < operand_count; ++i) 969 call_operands.at (i) = build_resize_convert_view (operand_types.at (i), 970 call_operands.at (i)); 971 972 tree fnptr = build_fold_addr_expr (built_in); 973 return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr, 974 operand_count, &call_operands[0]); 975 } 976 977 /* Instead of calling a built-in function, use a more efficient mechanism 978 such as reuse a previously returned value known to be still valid, or 979 access the work-item context struct directly. This is beneficial especially 980 for the work-item identification related builtins as not having them as 981 unanalyzable black box calls can lead to more easily vectorizable parallel 982 loops for multi work-item work-groups. BRIG_OPCODE identifies the builtin 983 and OPERANDS store the operands. */ 984 985 tree 986 brig_function::expand_builtin (BrigOpcode16_t brig_opcode, 987 tree_stl_vec &operands) 988 { 989 tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); 990 991 tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); 992 993 tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); 994 995 if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID) 996 { 997 tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0); 998 id0 = convert (uint64_type_node, id0); 999 1000 tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1); 1001 id1 = convert (uint64_type_node, id1); 1002 1003 tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2); 1004 id2 = convert (uint64_type_node, id2); 1005 1006 tree max0 = convert (uint64_type_node, m_grid_size_vars[0]); 1007 tree max1 = convert (uint64_type_node, m_grid_size_vars[1]); 1008 1009 tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0); 1010 id2_x_max0_x_max1 1011 = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1); 1012 1013 tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0); 1014 1015 tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0); 1016 sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1); 1017 1018 return add_temp_var ("workitemflatabsid", sum); 1019 } 1020 else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID) 1021 { 1022 HOST_WIDE_INT dim = int_constant_value (operands[0]); 1023 return m_abs_id_vars[dim]; 1024 } 1025 else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID) 1026 { 1027 1028 tree wg_size_x = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_0); 1029 tree wg_size_y = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_1); 1030 tree z_x_wgsx_wgsy 1031 = build2 (MULT_EXPR, uint32_type_node, 1032 convert (uint32_type_node, 1033 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_2)), 1034 wg_size_x); 1035 z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy, 1036 wg_size_y); 1037 1038 tree y_x_wgsx 1039 = build2 (MULT_EXPR, uint32_type_node, 1040 convert (uint32_type_node, 1041 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_1)), 1042 wg_size_x); 1043 1044 tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy); 1045 sum = build2 (PLUS_EXPR, uint32_type_node, 1046 convert (uint32_type_node, 1047 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_0)), 1048 sum); 1049 return add_temp_var ("workitemflatid", sum); 1050 } 1051 else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE) 1052 { 1053 HOST_WIDE_INT dim = int_constant_value (operands[0]); 1054 if (flag_assume_phsa) 1055 { 1056 tree ptr_type = build_pointer_type (uint32_type_node); 1057 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, 1058 build_int_cst (ptr_type, 1059 PHSA_CONTEXT_WG_SIZES 1060 + dim * 4)); 1061 std::string name ("wgsize_x"); 1062 name [name.length() - 1] += dim; 1063 return add_temp_var (name.c_str(), ctx); 1064 } 1065 else if (m_is_kernel) 1066 { 1067 /* For kernels without phsa we generate certain temps before 1068 the WI loop, which means we don't need to rely on LICM to get 1069 them moved out. */ 1070 return m_wg_size_vars[dim]; 1071 } 1072 else 1073 gcc_unreachable (); 1074 } 1075 else if (brig_opcode == BRIG_OPCODE_WORKITEMID) 1076 { 1077 HOST_WIDE_INT dim = int_constant_value (operands[0]); 1078 if (m_is_kernel) 1079 { 1080 return m_local_id_vars [dim]; 1081 } 1082 else if (flag_assume_phsa) 1083 { 1084 tree ptr_type = build_pointer_type (uint32_type_node); 1085 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, 1086 build_int_cst (ptr_type, 1087 PHSA_CONTEXT_OFFS_WI_IDS 1088 + dim * 4)); 1089 std::string name ("wiid_x"); 1090 name [name.length() - 1] += dim; 1091 return add_temp_var (name.c_str(), ctx); 1092 } 1093 else 1094 gcc_unreachable (); 1095 } 1096 else if (brig_opcode == BRIG_OPCODE_WORKGROUPID) 1097 { 1098 HOST_WIDE_INT dim = int_constant_value (operands[0]); 1099 if (flag_assume_phsa) 1100 { 1101 tree ptr_type = build_pointer_type (uint32_type_node); 1102 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, 1103 build_int_cst (ptr_type, 1104 PHSA_CONTEXT_OFFS_WG_IDS 1105 + dim * 4)); 1106 std::string name ("wgid_x"); 1107 name [name.length() - 1] += dim; 1108 return add_temp_var (name.c_str(), ctx); 1109 } else if (m_is_kernel) 1110 return m_wg_id_vars [dim]; 1111 else 1112 gcc_unreachable (); 1113 } 1114 else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE) 1115 { 1116 HOST_WIDE_INT dim = int_constant_value (operands[0]); 1117 if (flag_assume_phsa) 1118 { 1119 tree ptr_type = build_pointer_type (uint32_type_node); 1120 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, 1121 build_int_cst (ptr_type, 1122 PHSA_CONTEXT_CURRENT_WG_SIZES 1123 + dim * 4)); 1124 std::string name ("curwgsize_x"); 1125 name [name.length() - 1] += dim; 1126 return add_temp_var (name.c_str(), ctx); 1127 } else if (m_is_kernel) 1128 return m_cur_wg_size_vars[dim]; 1129 else 1130 gcc_unreachable (); 1131 } 1132 else 1133 gcc_unreachable (); 1134 1135 return NULL_TREE; 1136 } 1137 1138 /* Returns true in case the given opcode that would normally be generated 1139 as a builtin call can be expanded to tree nodes. */ 1140 1141 bool 1142 brig_function::can_expand_builtin (BrigOpcode16_t brig_opcode) const 1143 { 1144 switch (brig_opcode) 1145 { 1146 case BRIG_OPCODE_CURRENTWORKGROUPSIZE: 1147 case BRIG_OPCODE_WORKITEMFLATID: 1148 case BRIG_OPCODE_WORKITEMID: 1149 case BRIG_OPCODE_WORKGROUPID: 1150 case BRIG_OPCODE_WORKGROUPSIZE: 1151 return m_is_kernel || flag_assume_phsa; 1152 case BRIG_OPCODE_WORKITEMFLATABSID: 1153 case BRIG_OPCODE_WORKITEMABSID: 1154 return m_is_kernel; 1155 default: 1156 return false; 1157 }; 1158 } 1159 1160 /* In case the HSA instruction must be implemented using a builtin, 1161 this function is called to get the correct builtin function. 1162 TYPE is the instruction tree type, BRIG_OPCODE the opcode of the 1163 brig instruction and BRIG_TYPE the brig instruction's type. */ 1164 1165 tree 1166 brig_function::get_builtin_for_hsa_opcode 1167 (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const 1168 { 1169 tree builtin = NULL_TREE; 1170 tree builtin_type = type; 1171 1172 /* For vector types, first find the scalar version of the builtin. */ 1173 if (type != NULL_TREE && VECTOR_TYPE_P (type)) 1174 builtin_type = TREE_TYPE (type); 1175 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; 1176 1177 /* Some BRIG opcodes can use the same builtins for unsigned and 1178 signed types. Force these cases to unsigned types. */ 1179 1180 if (brig_opcode == BRIG_OPCODE_BORROW 1181 || brig_opcode == BRIG_OPCODE_CARRY 1182 || brig_opcode == BRIG_OPCODE_LASTBIT 1183 || brig_opcode == BRIG_OPCODE_BITINSERT) 1184 { 1185 if (brig_type == BRIG_TYPE_S32) 1186 brig_type = BRIG_TYPE_U32; 1187 else if (brig_type == BRIG_TYPE_S64) 1188 brig_type = BRIG_TYPE_U64; 1189 } 1190 1191 switch (brig_opcode) 1192 { 1193 case BRIG_OPCODE_FLOOR: 1194 builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR); 1195 break; 1196 case BRIG_OPCODE_CEIL: 1197 builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL); 1198 break; 1199 case BRIG_OPCODE_SQRT: 1200 case BRIG_OPCODE_NSQRT: 1201 builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT); 1202 break; 1203 case BRIG_OPCODE_RINT: 1204 builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT); 1205 break; 1206 case BRIG_OPCODE_TRUNC: 1207 builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC); 1208 break; 1209 case BRIG_OPCODE_COPYSIGN: 1210 builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN); 1211 break; 1212 case BRIG_OPCODE_NSIN: 1213 builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN); 1214 break; 1215 case BRIG_OPCODE_NLOG2: 1216 builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2); 1217 break; 1218 case BRIG_OPCODE_NEXP2: 1219 builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2); 1220 break; 1221 case BRIG_OPCODE_FMA: 1222 case BRIG_OPCODE_NFMA: 1223 builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA); 1224 break; 1225 case BRIG_OPCODE_NCOS: 1226 builtin = mathfn_built_in (builtin_type, BUILT_IN_COS); 1227 break; 1228 case BRIG_OPCODE_POPCOUNT: 1229 /* Popcount should be typed by its argument type (the return value 1230 is always u32). Let's use a b64 version for also for b32 for now. */ 1231 return builtin_decl_explicit (BUILT_IN_POPCOUNTL); 1232 case BRIG_OPCODE_BORROW: 1233 /* Borrow uses the same builtin for unsigned and signed types. */ 1234 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) 1235 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32); 1236 else 1237 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64); 1238 case BRIG_OPCODE_CARRY: 1239 /* Carry also uses the same builtin for unsigned and signed types. */ 1240 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) 1241 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32); 1242 else 1243 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64); 1244 default: 1245 1246 /* Use our builtin index for finding a proper builtin for the BRIG 1247 opcode and BRIG type. This takes care most of the builtin cases, 1248 the special cases are handled in the separate 'case' statements 1249 above. */ 1250 builtin_map::const_iterator i 1251 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); 1252 if (i != s_custom_builtins.end ()) 1253 return (*i).second; 1254 1255 if (brig_inner_type != brig_type) 1256 { 1257 /* Try to find a scalar built-in we could use. */ 1258 i = s_custom_builtins.find 1259 (std::make_pair (brig_opcode, brig_inner_type)); 1260 if (i != s_custom_builtins.end ()) 1261 return (*i).second; 1262 } 1263 1264 /* In case this is an fp16 operation that is promoted to fp32, 1265 try to find a fp32 scalar built-in. */ 1266 if (brig_inner_type == BRIG_TYPE_F16) 1267 { 1268 i = s_custom_builtins.find 1269 (std::make_pair (brig_opcode, BRIG_TYPE_F32)); 1270 if (i != s_custom_builtins.end ()) 1271 return (*i).second; 1272 } 1273 gcc_unreachable (); 1274 } 1275 1276 if (VECTOR_TYPE_P (type) && builtin != NULL_TREE) 1277 { 1278 /* Try to find a vectorized version of the built-in. 1279 TODO: properly assert that builtin is a mathfn builtin? */ 1280 tree vec_builtin 1281 = targetm.vectorize.builtin_vectorized_function 1282 (builtin_mathfn_code (builtin), type, type); 1283 if (vec_builtin != NULL_TREE) 1284 return vec_builtin; 1285 else 1286 return builtin; 1287 } 1288 if (builtin == NULL_TREE) 1289 gcc_unreachable (); 1290 return builtin; 1291 } 1292 1293 /* Unpacks the elements of the vector in VALUE to scalars (bit field 1294 references) in ELEMENTS. */ 1295 1296 void 1297 brig_function::unpack (tree value, tree_stl_vec &elements) 1298 { 1299 size_t vec_size = int_size_in_bytes (TREE_TYPE (value)); 1300 size_t element_size 1301 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT; 1302 size_t element_count 1303 = vec_size * BITS_PER_UNIT / element_size; 1304 1305 tree input_element_type = TREE_TYPE (TREE_TYPE (value)); 1306 1307 value = add_temp_var ("unpack_input", value); 1308 1309 for (size_t i = 0; i < element_count; ++i) 1310 { 1311 tree element 1312 = build3 (BIT_FIELD_REF, input_element_type, value, 1313 TYPE_SIZE (input_element_type), 1314 bitsize_int(i * element_size)); 1315 1316 element = add_temp_var ("scalar", element); 1317 elements.push_back (element); 1318 } 1319 } 1320 1321 /* Pack the elements of the scalars in ELEMENTS to the returned vector. */ 1322 1323 tree 1324 brig_function::pack (tree_stl_vec &elements) 1325 { 1326 size_t element_count = elements.size (); 1327 1328 gcc_assert (element_count > 1); 1329 1330 tree output_element_type = TREE_TYPE (elements.at (0)); 1331 1332 vec<constructor_elt, va_gc> *constructor_vals = NULL; 1333 for (size_t i = 0; i < element_count; ++i) 1334 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i)); 1335 1336 tree vec_type = build_vector_type (output_element_type, element_count); 1337 1338 /* build_constructor creates a vector type which is not a vector_cst 1339 that requires compile time constant elements. */ 1340 tree vec = build_constructor (vec_type, constructor_vals); 1341 1342 /* Add a temp variable for readability. */ 1343 tree tmp_var = create_tmp_var (vec_type, "vec_out"); 1344 tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); 1345 append_statement (vec_tmp_assign); 1346 return tmp_var; 1347 } 1348 1349 /* Returns true in case the given opcode needs to know about work-item context 1350 data. In such case the context data is passed as a pointer to a work-item 1351 context object, as the last argument in the builtin call. */ 1352 1353 bool 1354 brig_function::needs_workitem_context_data 1355 (BrigOpcode16_t brig_opcode) 1356 { 1357 switch (brig_opcode) 1358 { 1359 case BRIG_OPCODE_WORKITEMABSID: 1360 case BRIG_OPCODE_WORKITEMFLATABSID: 1361 case BRIG_OPCODE_WORKITEMFLATID: 1362 case BRIG_OPCODE_CURRENTWORKITEMFLATID: 1363 case BRIG_OPCODE_WORKITEMID: 1364 case BRIG_OPCODE_WORKGROUPID: 1365 case BRIG_OPCODE_WORKGROUPSIZE: 1366 case BRIG_OPCODE_CURRENTWORKGROUPSIZE: 1367 case BRIG_OPCODE_GRIDGROUPS: 1368 case BRIG_OPCODE_GRIDSIZE: 1369 case BRIG_OPCODE_DIM: 1370 case BRIG_OPCODE_PACKETID: 1371 case BRIG_OPCODE_PACKETCOMPLETIONSIG: 1372 case BRIG_OPCODE_BARRIER: 1373 case BRIG_OPCODE_WAVEBARRIER: 1374 case BRIG_OPCODE_ARRIVEFBAR: 1375 case BRIG_OPCODE_INITFBAR: 1376 case BRIG_OPCODE_JOINFBAR: 1377 case BRIG_OPCODE_LEAVEFBAR: 1378 case BRIG_OPCODE_RELEASEFBAR: 1379 case BRIG_OPCODE_WAITFBAR: 1380 case BRIG_OPCODE_CUID: 1381 case BRIG_OPCODE_MAXCUID: 1382 case BRIG_OPCODE_DEBUGTRAP: 1383 case BRIG_OPCODE_GROUPBASEPTR: 1384 case BRIG_OPCODE_KERNARGBASEPTR: 1385 case BRIG_OPCODE_ALLOCA: 1386 return true; 1387 default: 1388 return false; 1389 }; 1390 } 1391 1392 /* Appends and returns a new temp variable and an accompanying assignment 1393 statement that stores the value of the given EXPR and has the given NAME. */ 1394 1395 tree 1396 brig_function::add_temp_var (std::string name, tree expr) 1397 { 1398 tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ()); 1399 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr); 1400 append_statement (assign); 1401 return temp_var; 1402 } 1403 1404 /* Returns the integer constant value of the given node. 1405 If it's a cast, looks into the source of the cast. */ 1406 1407 HOST_WIDE_INT 1408 brig_function::int_constant_value (tree node) 1409 { 1410 tree n = node; 1411 if (TREE_CODE (n) == VIEW_CONVERT_EXPR) 1412 n = TREE_OPERAND (n, 0); 1413 return int_cst_value (n); 1414 } 1415 1416 /* Returns the tree code that should be used to implement the given 1417 HSA instruction opcode (BRIG_OPCODE) for the given type of instruction 1418 (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly, 1419 returns TREE_LIST (if it can be emulated with a simple chain of tree 1420 nodes) or CALL_EXPR if the opcode should be implemented using a builtin 1421 call. */ 1422 1423 tree_code 1424 brig_function::get_tree_code_for_hsa_opcode 1425 (BrigOpcode16_t brig_opcode, BrigType16_t brig_type) 1426 { 1427 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; 1428 switch (brig_opcode) 1429 { 1430 case BRIG_OPCODE_NOP: 1431 return NOP_EXPR; 1432 case BRIG_OPCODE_ADD: 1433 return PLUS_EXPR; 1434 case BRIG_OPCODE_CMOV: 1435 if (brig_inner_type == brig_type) 1436 return COND_EXPR; 1437 else 1438 return VEC_COND_EXPR; 1439 case BRIG_OPCODE_SUB: 1440 return MINUS_EXPR; 1441 case BRIG_OPCODE_MUL: 1442 case BRIG_OPCODE_MUL24: 1443 return MULT_EXPR; 1444 case BRIG_OPCODE_MULHI: 1445 case BRIG_OPCODE_MUL24HI: 1446 return MULT_HIGHPART_EXPR; 1447 case BRIG_OPCODE_DIV: 1448 if (gccbrig_is_float_type (brig_inner_type)) 1449 return RDIV_EXPR; 1450 else 1451 return TRUNC_DIV_EXPR; 1452 case BRIG_OPCODE_NEG: 1453 return NEGATE_EXPR; 1454 case BRIG_OPCODE_MIN: 1455 if (gccbrig_is_float_type (brig_inner_type)) 1456 return CALL_EXPR; 1457 else 1458 return MIN_EXPR; 1459 case BRIG_OPCODE_MAX: 1460 if (gccbrig_is_float_type (brig_inner_type)) 1461 return CALL_EXPR; 1462 else 1463 return MAX_EXPR; 1464 case BRIG_OPCODE_ABS: 1465 return ABS_EXPR; 1466 case BRIG_OPCODE_SHL: 1467 return LSHIFT_EXPR; 1468 case BRIG_OPCODE_SHR: 1469 return RSHIFT_EXPR; 1470 case BRIG_OPCODE_OR: 1471 return BIT_IOR_EXPR; 1472 case BRIG_OPCODE_XOR: 1473 return BIT_XOR_EXPR; 1474 case BRIG_OPCODE_AND: 1475 return BIT_AND_EXPR; 1476 case BRIG_OPCODE_NOT: 1477 return BIT_NOT_EXPR; 1478 case BRIG_OPCODE_RET: 1479 return RETURN_EXPR; 1480 case BRIG_OPCODE_MOV: 1481 case BRIG_OPCODE_LDF: 1482 return MODIFY_EXPR; 1483 case BRIG_OPCODE_LD: 1484 case BRIG_OPCODE_ST: 1485 return MEM_REF; 1486 case BRIG_OPCODE_BR: 1487 return GOTO_EXPR; 1488 case BRIG_OPCODE_REM: 1489 if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32) 1490 return TRUNC_MOD_EXPR; 1491 else 1492 return CALL_EXPR; 1493 case BRIG_OPCODE_NRCP: 1494 case BRIG_OPCODE_NRSQRT: 1495 /* Implement as 1/f (x). gcc should pattern detect that and 1496 use a native instruction, if available, for it. */ 1497 return TREE_LIST; 1498 case BRIG_OPCODE_FMA: 1499 case BRIG_OPCODE_FLOOR: 1500 case BRIG_OPCODE_CEIL: 1501 case BRIG_OPCODE_SQRT: 1502 case BRIG_OPCODE_NSQRT: 1503 case BRIG_OPCODE_RINT: 1504 case BRIG_OPCODE_TRUNC: 1505 case BRIG_OPCODE_POPCOUNT: 1506 case BRIG_OPCODE_COPYSIGN: 1507 case BRIG_OPCODE_NCOS: 1508 case BRIG_OPCODE_NSIN: 1509 case BRIG_OPCODE_NLOG2: 1510 case BRIG_OPCODE_NEXP2: 1511 case BRIG_OPCODE_NFMA: 1512 /* Class has type B1 regardless of the float type, thus 1513 the below builtin map search cannot find it. */ 1514 case BRIG_OPCODE_CLASS: 1515 case BRIG_OPCODE_WORKITEMABSID: 1516 return CALL_EXPR; 1517 default: 1518 1519 /* Some BRIG opcodes can use the same builtins for unsigned and 1520 signed types. Force these cases to unsigned types. 1521 */ 1522 1523 if (brig_opcode == BRIG_OPCODE_BORROW 1524 || brig_opcode == BRIG_OPCODE_CARRY 1525 || brig_opcode == BRIG_OPCODE_LASTBIT 1526 || brig_opcode == BRIG_OPCODE_BITINSERT) 1527 { 1528 if (brig_type == BRIG_TYPE_S32) 1529 brig_type = BRIG_TYPE_U32; 1530 else if (brig_type == BRIG_TYPE_S64) 1531 brig_type = BRIG_TYPE_U64; 1532 } 1533 1534 1535 builtin_map::const_iterator i 1536 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); 1537 if (i != s_custom_builtins.end ()) 1538 return CALL_EXPR; 1539 else if (s_custom_builtins.find 1540 (std::make_pair (brig_opcode, brig_inner_type)) 1541 != s_custom_builtins.end ()) 1542 return CALL_EXPR; 1543 if (brig_inner_type == BRIG_TYPE_F16 1544 && s_custom_builtins.find 1545 (std::make_pair (brig_opcode, BRIG_TYPE_F32)) 1546 != s_custom_builtins.end ()) 1547 return CALL_EXPR; 1548 break; 1549 } 1550 return TREE_LIST; /* Emulate using a chain of nodes. */ 1551 } 1552 1553 /* Inform of an update to the REG_VAR. */ 1554 1555 void 1556 brig_function::add_reg_var_update (tree reg_var, tree var) 1557 { 1558 if (var == m_abs_id_vars[0] || var == m_abs_id_vars[1] 1559 || var == m_abs_id_vars[2] || var == m_local_id_vars[0] 1560 || var == m_local_id_vars[1] || var == m_local_id_vars[2]) 1561 m_id_val_defs [reg_var] = var; 1562 else 1563 { 1564 /* Possible overwrite of an ID value. */ 1565 1566 id_val_map::iterator i = m_id_val_defs.find (reg_var); 1567 if (i != m_id_val_defs.end()) 1568 m_id_val_defs.erase (i); 1569 } 1570 } 1571 1572 /* If the REG_VAR is known to contain an ID value at this point in 1573 the basic block, return true. */ 1574 1575 bool 1576 brig_function::is_id_val (tree reg_var) 1577 { 1578 id_val_map::iterator i = m_id_val_defs.find (reg_var); 1579 return i != m_id_val_defs.end(); 1580 } 1581 1582 /* Return an ID value for the given REG_VAR if its known to contain 1583 one at this point in the BB, NULL_TREE otherwise. */ 1584 1585 tree 1586 brig_function::id_val (tree reg_var) 1587 { 1588 id_val_map::iterator i = m_id_val_defs.find (reg_var); 1589 if (i != m_id_val_defs.end()) 1590 return (*i).second; 1591 else 1592 return NULL_TREE; 1593 } 1594 1595 /* Informs of starting a new basic block. Called when generating 1596 a label, a call, a jump, or a return. */ 1597 1598 void 1599 brig_function::start_new_bb () 1600 { 1601 m_id_val_defs.clear (); 1602 } 1603