1 /* Producing binary form of HSA BRIG from our internal representation. 2 Copyright (C) 2013-2020 Free Software Foundation, Inc. 3 Contributed by Martin Jambor <mjambor@suse.cz> and 4 Martin Liska <mliska@suse.cz>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "target.h" 27 #include "memmodel.h" 28 #include "tm_p.h" 29 #include "is-a.h" 30 #include "vec.h" 31 #include "hash-table.h" 32 #include "hash-map.h" 33 #include "tree.h" 34 #include "tree-iterator.h" 35 #include "stor-layout.h" 36 #include "output.h" 37 #include "basic-block.h" 38 #include "function.h" 39 #include "cfg.h" 40 #include "fold-const.h" 41 #include "stringpool.h" 42 #include "gimple-pretty-print.h" 43 #include "diagnostic-core.h" 44 #include "cgraph.h" 45 #include "dumpfile.h" 46 #include "print-tree.h" 47 #include "alloc-pool.h" 48 #include "symbol-summary.h" 49 #include "hsa-common.h" 50 #include "gomp-constants.h" 51 52 /* Convert VAL to little endian form, if necessary. */ 53 54 static uint16_t 55 lendian16 (uint16_t val) 56 { 57 #if GCC_VERSION >= 4008 58 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 59 return val; 60 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 61 return __builtin_bswap16 (val); 62 #else /* __ORDER_PDP_ENDIAN__ */ 63 return val; 64 #endif 65 #else 66 // provide a safe slower default, with shifts and masking 67 #ifndef WORDS_BIGENDIAN 68 return val; 69 #else 70 return (val >> 8) | (val << 8); 71 #endif 72 #endif 73 } 74 75 /* Convert VAL to little endian form, if necessary. */ 76 77 static uint32_t 78 lendian32 (uint32_t val) 79 { 80 #if GCC_VERSION >= 4006 81 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 82 return val; 83 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 84 return __builtin_bswap32 (val); 85 #else /* __ORDER_PDP_ENDIAN__ */ 86 return (val >> 16) | (val << 16); 87 #endif 88 #else 89 // provide a safe slower default, with shifts and masking 90 #ifndef WORDS_BIGENDIAN 91 return val; 92 #else 93 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8); 94 return (val >> 16) | (val << 16); 95 #endif 96 #endif 97 } 98 99 /* Convert VAL to little endian form, if necessary. */ 100 101 static uint64_t 102 lendian64 (uint64_t val) 103 { 104 #if GCC_VERSION >= 4006 105 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 106 return val; 107 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 108 return __builtin_bswap64 (val); 109 #else /* __ORDER_PDP_ENDIAN__ */ 110 return (((val & 0xffffll) << 48) 111 | ((val & 0xffff0000ll) << 16) 112 | ((val & 0xffff00000000ll) >> 16) 113 | ((val & 0xffff000000000000ll) >> 48)); 114 #endif 115 #else 116 // provide a safe slower default, with shifts and masking 117 #ifndef WORDS_BIGENDIAN 118 return val; 119 #else 120 val = (((val & 0xff00ff00ff00ff00ll) >> 8) 121 | ((val & 0x00ff00ff00ff00ffll) << 8)); 122 val = ((( val & 0xffff0000ffff0000ll) >> 16) 123 | (( val & 0x0000ffff0000ffffll) << 16)); 124 return (val >> 32) | (val << 32); 125 #endif 126 #endif 127 } 128 129 #define BRIG_ELF_SECTION_NAME ".brig" 130 #define BRIG_LABEL_STRING "hsa_brig" 131 #define BRIG_SECTION_DATA_NAME "hsa_data" 132 #define BRIG_SECTION_CODE_NAME "hsa_code" 133 #define BRIG_SECTION_OPERAND_NAME "hsa_operand" 134 135 #define BRIG_CHUNK_MAX_SIZE (64 * 1024) 136 137 /* Required HSA section alignment. */ 138 139 #define HSA_SECTION_ALIGNMENT 16 140 141 /* Chunks of BRIG binary data. */ 142 143 struct hsa_brig_data_chunk 144 { 145 /* Size of the data already stored into a chunk. */ 146 unsigned size; 147 148 /* Pointer to the data. */ 149 char *data; 150 }; 151 152 /* Structure representing a BRIG section, holding and writing its data. */ 153 154 struct hsa_brig_section 155 { 156 /* Section name that will be output to the BRIG. */ 157 const char *section_name; 158 /* Size in bytes of all data stored in the section. */ 159 unsigned total_size; 160 /* The size of the header of the section including padding. */ 161 unsigned header_byte_count; 162 /* The size of the header of the section without any padding. */ 163 unsigned header_byte_delta; 164 165 void init (const char *name); 166 void release (); 167 void output (); 168 unsigned add (const void *data, unsigned len, void **output = NULL); 169 void round_size_up (int factor); 170 void *get_ptr_by_offset (unsigned int offset); 171 172 private: 173 void allocate_new_chunk (); 174 175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */ 176 vec <struct hsa_brig_data_chunk> chunks; 177 178 /* More convenient access to the last chunk from the vector above. */ 179 struct hsa_brig_data_chunk *cur_chunk; 180 }; 181 182 static struct hsa_brig_section brig_data, brig_code, brig_operand; 183 static uint32_t brig_insn_count; 184 static bool brig_initialized = false; 185 186 /* Mapping between emitted HSA functions and their offset in code segment. */ 187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets; 188 189 /* Hash map of emitted function declarations. */ 190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations; 191 192 /* Hash table of emitted internal function declaration offsets. */ 193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; 194 195 /* List of sbr instructions. */ 196 static vec <hsa_insn_sbr *> *switch_instructions; 197 198 class function_linkage_pair 199 { 200 public: 201 function_linkage_pair (tree decl, unsigned int off) 202 : function_decl (decl), offset (off) {} 203 204 /* Declaration of called function. */ 205 tree function_decl; 206 207 /* Offset in operand section. */ 208 unsigned int offset; 209 }; 210 211 /* Vector of function calls where we need to resolve function offsets. */ 212 static auto_vec <function_linkage_pair> function_call_linkage; 213 214 /* Add a new chunk, allocate data for it and initialize it. */ 215 216 void 217 hsa_brig_section::allocate_new_chunk () 218 { 219 struct hsa_brig_data_chunk new_chunk; 220 221 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE); 222 new_chunk.size = 0; 223 cur_chunk = chunks.safe_push (new_chunk); 224 } 225 226 /* Initialize the brig section. */ 227 228 void 229 hsa_brig_section::init (const char *name) 230 { 231 section_name = name; 232 /* While the following computation is basically wrong, because the intent 233 certainly wasn't to have the first character of name and padding, which 234 are a part of sizeof (BrigSectionHeader), included in the first addend, 235 this is what the disassembler expects. */ 236 total_size = sizeof (BrigSectionHeader) + strlen (section_name); 237 chunks.create (1); 238 allocate_new_chunk (); 239 header_byte_delta = total_size; 240 round_size_up (4); 241 header_byte_count = total_size; 242 } 243 244 /* Free all data in the section. */ 245 246 void 247 hsa_brig_section::release () 248 { 249 for (unsigned i = 0; i < chunks.length (); i++) 250 free (chunks[i].data); 251 chunks.release (); 252 cur_chunk = NULL; 253 } 254 255 /* Write the section to the output file to a section with the name given at 256 initialization. Switches the output section and does not restore it. */ 257 258 void 259 hsa_brig_section::output () 260 { 261 struct BrigSectionHeader section_header; 262 char padding[8]; 263 264 section_header.byteCount = lendian64 (total_size); 265 section_header.headerByteCount = lendian32 (header_byte_count); 266 section_header.nameLength = lendian32 (strlen (section_name)); 267 assemble_string ((const char *) §ion_header, 16); 268 assemble_string (section_name, (section_header.nameLength)); 269 memset (&padding, 0, sizeof (padding)); 270 /* This is also a consequence of the wrong header size computation described 271 in a comment in hsa_brig_section::init. */ 272 assemble_string (padding, 8); 273 for (unsigned i = 0; i < chunks.length (); i++) 274 assemble_string (chunks[i].data, chunks[i].size); 275 } 276 277 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at 278 which it was stored. If OUTPUT is not NULL, store into it the pointer to 279 the place where DATA was actually stored. */ 280 281 unsigned 282 hsa_brig_section::add (const void *data, unsigned len, void **output) 283 { 284 unsigned offset = total_size; 285 286 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE); 287 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len)) 288 allocate_new_chunk (); 289 290 char *dst = cur_chunk->data + cur_chunk->size; 291 memcpy (dst, data, len); 292 if (output) 293 *output = dst; 294 cur_chunk->size += len; 295 total_size += len; 296 297 return offset; 298 } 299 300 /* Add padding to section so that its size is divisible by FACTOR. */ 301 302 void 303 hsa_brig_section::round_size_up (int factor) 304 { 305 unsigned padding, res = total_size % factor; 306 307 if (res == 0) 308 return; 309 310 padding = factor - res; 311 total_size += padding; 312 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding)) 313 { 314 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size; 315 cur_chunk->size = BRIG_CHUNK_MAX_SIZE; 316 allocate_new_chunk (); 317 } 318 319 cur_chunk->size += padding; 320 } 321 322 /* Return pointer to data by global OFFSET in the section. */ 323 324 void * 325 hsa_brig_section::get_ptr_by_offset (unsigned int offset) 326 { 327 gcc_assert (offset < total_size); 328 offset -= header_byte_delta; 329 330 unsigned i; 331 for (i = 0; offset >= chunks[i].size; i++) 332 offset -= chunks[i].size; 333 334 return chunks[i].data + offset; 335 } 336 337 /* BRIG string data hashing. */ 338 339 struct brig_string_slot 340 { 341 const char *s; 342 char prefix; 343 int len; 344 uint32_t offset; 345 }; 346 347 /* Hash table helpers. */ 348 349 struct brig_string_slot_hasher : pointer_hash <brig_string_slot> 350 { 351 static inline hashval_t hash (const value_type); 352 static inline bool equal (const value_type, const compare_type); 353 static inline void remove (value_type); 354 }; 355 356 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string 357 to support strings that may not end in '\0'. */ 358 359 inline hashval_t 360 brig_string_slot_hasher::hash (const value_type ds) 361 { 362 hashval_t r = ds->len; 363 int i; 364 365 for (i = 0; i < ds->len; i++) 366 r = r * 67 + (unsigned) ds->s[i] - 113; 367 r = r * 67 + (unsigned) ds->prefix - 113; 368 return r; 369 } 370 371 /* Returns nonzero if DS1 and DS2 are equal. */ 372 373 inline bool 374 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2) 375 { 376 if (ds1->len == ds2->len) 377 return ds1->prefix == ds2->prefix 378 && memcmp (ds1->s, ds2->s, ds1->len) == 0; 379 380 return 0; 381 } 382 383 /* Deallocate memory for DS upon its removal. */ 384 385 inline void 386 brig_string_slot_hasher::remove (value_type ds) 387 { 388 free (const_cast<char *> (ds->s)); 389 free (ds); 390 } 391 392 /* Hash for strings we output in order not to duplicate them needlessly. */ 393 394 static hash_table<brig_string_slot_hasher> *brig_string_htab; 395 396 /* Emit a null terminated string STR to the data section and return its 397 offset in it. If PREFIX is non-zero, output it just before STR too. 398 Sanitize the string if SANITIZE option is set to true. */ 399 400 static unsigned 401 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true) 402 { 403 unsigned slen = strlen (str); 404 unsigned offset, len = slen + (prefix ? 1 : 0); 405 uint32_t hdr_len = lendian32 (len); 406 brig_string_slot s_slot; 407 brig_string_slot **slot; 408 char *str2; 409 410 str2 = xstrdup (str); 411 412 if (sanitize) 413 hsa_sanitize_name (str2); 414 s_slot.s = str2; 415 s_slot.len = slen; 416 s_slot.prefix = prefix; 417 s_slot.offset = 0; 418 419 slot = brig_string_htab->find_slot (&s_slot, INSERT); 420 if (*slot == NULL) 421 { 422 brig_string_slot *new_slot = XCNEW (brig_string_slot); 423 424 /* In theory we should fill in BrigData but that would mean copying 425 the string to a buffer for no reason, so we just emulate it. */ 426 offset = brig_data.add (&hdr_len, sizeof (hdr_len)); 427 if (prefix) 428 brig_data.add (&prefix, 1); 429 430 brig_data.add (str2, slen); 431 brig_data.round_size_up (4); 432 433 /* TODO: could use the string we just copied into 434 brig_string->cur_chunk */ 435 new_slot->s = str2; 436 new_slot->len = slen; 437 new_slot->prefix = prefix; 438 new_slot->offset = offset; 439 *slot = new_slot; 440 } 441 else 442 { 443 offset = (*slot)->offset; 444 free (str2); 445 } 446 447 return offset; 448 } 449 450 /* Linked list of queued operands. */ 451 452 static struct operand_queue 453 { 454 /* First from the chain of queued operands. */ 455 hsa_op_base *first_op, *last_op; 456 457 /* The offset at which the next operand will be enqueued. */ 458 unsigned projected_size; 459 460 } op_queue; 461 462 /* Unless already initialized, initialize infrastructure to produce BRIG. */ 463 464 static void 465 brig_init (void) 466 { 467 brig_insn_count = 0; 468 469 if (brig_initialized) 470 return; 471 472 brig_string_htab = new hash_table<brig_string_slot_hasher> (37); 473 brig_data.init (BRIG_SECTION_DATA_NAME); 474 brig_code.init (BRIG_SECTION_CODE_NAME); 475 brig_operand.init (BRIG_SECTION_OPERAND_NAME); 476 brig_initialized = true; 477 478 struct BrigDirectiveModule moddir; 479 memset (&moddir, 0, sizeof (moddir)); 480 moddir.base.byteCount = lendian16 (sizeof (moddir)); 481 482 char *modname; 483 if (main_input_filename && *main_input_filename != '\0') 484 { 485 const char *part = strrchr (main_input_filename, '/'); 486 if (!part) 487 part = main_input_filename; 488 else 489 part++; 490 modname = concat ("&__hsa_module_", part, NULL); 491 char *extension = strchr (modname, '.'); 492 if (extension) 493 *extension = '\0'; 494 495 /* As in LTO mode, we have to emit a different module names. */ 496 if (flag_ltrans) 497 { 498 part = strrchr (asm_file_name, '/'); 499 if (!part) 500 part = asm_file_name; 501 else 502 part++; 503 char *modname2; 504 modname2 = xasprintf ("%s_%s", modname, part); 505 free (modname); 506 modname = modname2; 507 } 508 509 hsa_sanitize_name (modname); 510 moddir.name = brig_emit_string (modname); 511 free (modname); 512 } 513 else 514 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&'); 515 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE); 516 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR); 517 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR); 518 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE; 519 if (hsa_machine_large_p ()) 520 moddir.machineModel = BRIG_MACHINE_LARGE; 521 else 522 moddir.machineModel = BRIG_MACHINE_SMALL; 523 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT; 524 brig_code.add (&moddir, sizeof (moddir)); 525 } 526 527 /* Free all BRIG data. */ 528 529 static void 530 brig_release_data (void) 531 { 532 delete brig_string_htab; 533 brig_data.release (); 534 brig_code.release (); 535 brig_operand.release (); 536 537 brig_initialized = 0; 538 } 539 540 /* Enqueue operation OP. Return the offset at which it will be stored. */ 541 542 static unsigned int 543 enqueue_op (hsa_op_base *op) 544 { 545 unsigned ret; 546 547 if (op->m_brig_op_offset) 548 return op->m_brig_op_offset; 549 550 ret = op_queue.projected_size; 551 op->m_brig_op_offset = op_queue.projected_size; 552 553 if (!op_queue.first_op) 554 op_queue.first_op = op; 555 else 556 op_queue.last_op->m_next = op; 557 op_queue.last_op = op; 558 559 if (is_a <hsa_op_immed *> (op)) 560 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes); 561 else if (is_a <hsa_op_reg *> (op)) 562 op_queue.projected_size += sizeof (struct BrigOperandRegister); 563 else if (is_a <hsa_op_address *> (op)) 564 op_queue.projected_size += sizeof (struct BrigOperandAddress); 565 else if (is_a <hsa_op_code_ref *> (op)) 566 op_queue.projected_size += sizeof (struct BrigOperandCodeRef); 567 else if (is_a <hsa_op_code_list *> (op)) 568 op_queue.projected_size += sizeof (struct BrigOperandCodeList); 569 else if (is_a <hsa_op_operand_list *> (op)) 570 op_queue.projected_size += sizeof (struct BrigOperandOperandList); 571 else 572 gcc_unreachable (); 573 return ret; 574 } 575 576 static void emit_immediate_operand (hsa_op_immed *imm); 577 578 /* Emit directive describing a symbol if it has not been emitted already. 579 Return the offset of the directive. */ 580 581 static unsigned 582 emit_directive_variable (class hsa_symbol *symbol) 583 { 584 struct BrigDirectiveVariable dirvar; 585 unsigned name_offset; 586 static unsigned res_name_offset; 587 588 if (symbol->m_directive_offset) 589 return symbol->m_directive_offset; 590 591 memset (&dirvar, 0, sizeof (dirvar)); 592 dirvar.base.byteCount = lendian16 (sizeof (dirvar)); 593 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE); 594 dirvar.allocation = symbol->m_allocation; 595 596 char prefix = symbol->m_global_scope_p ? '&' : '%'; 597 598 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL) 599 { 600 if (res_name_offset == 0) 601 res_name_offset = brig_emit_string (symbol->m_name, '%'); 602 name_offset = res_name_offset; 603 } 604 else if (symbol->m_name) 605 name_offset = brig_emit_string (symbol->m_name, prefix); 606 else 607 { 608 char buf[64]; 609 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment), 610 symbol->m_name_number); 611 name_offset = brig_emit_string (buf, prefix); 612 } 613 614 dirvar.name = lendian32 (name_offset); 615 616 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL) 617 { 618 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl)); 619 dirvar.init = lendian32 (enqueue_op (tmp)); 620 } 621 else 622 dirvar.init = 0; 623 dirvar.type = lendian16 (symbol->m_type); 624 dirvar.segment = symbol->m_segment; 625 dirvar.align = symbol->m_align; 626 dirvar.linkage = symbol->m_linkage; 627 dirvar.dim.lo = symbol->m_dim; 628 dirvar.dim.hi = symbol->m_dim >> 32; 629 630 /* Global variables are just declared and linked via HSA runtime. */ 631 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM) 632 dirvar.modifier |= BRIG_VARIABLE_DEFINITION; 633 dirvar.reserved = 0; 634 635 if (symbol->m_cst_value) 636 { 637 dirvar.modifier |= BRIG_VARIABLE_CONST; 638 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value)); 639 } 640 641 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar)); 642 return symbol->m_directive_offset; 643 } 644 645 /* Emit directives describing either a function declaration or definition F and 646 return the produced BrigDirectiveExecutable structure. The function does 647 not take into account any instructions when calculating nextModuleEntry 648 field of the produced BrigDirectiveExecutable structure so when emitting 649 actual definitions, this field needs to be updated after all of the function 650 is actually added to the code section. */ 651 652 static BrigDirectiveExecutable * 653 emit_function_directives (hsa_function_representation *f, bool is_declaration) 654 { 655 struct BrigDirectiveExecutable fndir; 656 unsigned name_offset, inarg_off, scoped_off, next_toplev_off; 657 int count = 0; 658 void *ptr_to_fndir; 659 hsa_symbol *sym; 660 661 if (!f->m_declaration_p) 662 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++) 663 { 664 gcc_assert (!sym->m_emitted_to_brig); 665 sym->m_emitted_to_brig = true; 666 emit_directive_variable (sym); 667 brig_insn_count++; 668 } 669 670 name_offset = brig_emit_string (f->m_name, '&'); 671 inarg_off = brig_code.total_size + sizeof (fndir) 672 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0); 673 scoped_off = inarg_off 674 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable); 675 676 if (!f->m_declaration_p) 677 { 678 count += f->m_spill_symbols.length (); 679 count += f->m_private_variables.length (); 680 } 681 682 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable); 683 684 memset (&fndir, 0, sizeof (fndir)); 685 fndir.base.byteCount = lendian16 (sizeof (fndir)); 686 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL 687 : BRIG_KIND_DIRECTIVE_FUNCTION); 688 fndir.name = lendian32 (name_offset); 689 fndir.inArgCount = lendian16 (f->m_input_args.length ()); 690 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0); 691 fndir.firstInArg = lendian32 (inarg_off); 692 fndir.firstCodeBlockEntry = lendian32 (scoped_off); 693 fndir.nextModuleEntry = lendian32 (next_toplev_off); 694 fndir.linkage = f->get_linkage (); 695 if (!f->m_declaration_p) 696 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION; 697 memset (&fndir.reserved, 0, sizeof (fndir.reserved)); 698 699 /* Once we put a definition of function_offsets, we should not overwrite 700 it with a declaration of the function. */ 701 if (f->m_internal_fn == NULL) 702 { 703 if (!function_offsets->get (f->m_decl) || !is_declaration) 704 function_offsets->put (f->m_decl, brig_code.total_size); 705 } 706 else 707 { 708 /* Internal function. */ 709 hsa_internal_fn **slot 710 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT); 711 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn); 712 int_fn->m_offset = brig_code.total_size; 713 *slot = int_fn; 714 } 715 716 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir); 717 718 if (f->m_output_arg) 719 emit_directive_variable (f->m_output_arg); 720 for (unsigned i = 0; i < f->m_input_args.length (); i++) 721 emit_directive_variable (f->m_input_args[i]); 722 723 if (!f->m_declaration_p) 724 { 725 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++) 726 { 727 emit_directive_variable (sym); 728 brig_insn_count++; 729 } 730 for (unsigned i = 0; i < f->m_private_variables.length (); i++) 731 { 732 emit_directive_variable (f->m_private_variables[i]); 733 brig_insn_count++; 734 } 735 } 736 737 return (BrigDirectiveExecutable *) ptr_to_fndir; 738 } 739 740 /* Emit a label directive for the given HBB. We assume it is about to start on 741 the current offset in the code section. */ 742 743 static void 744 emit_bb_label_directive (hsa_bb *hbb) 745 { 746 struct BrigDirectiveLabel lbldir; 747 748 lbldir.base.byteCount = lendian16 (sizeof (lbldir)); 749 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL); 750 char buf[32]; 751 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl), 752 hbb->m_index); 753 lbldir.name = lendian32 (brig_emit_string (buf, '@')); 754 755 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir, 756 sizeof (lbldir)); 757 brig_insn_count++; 758 } 759 760 /* Map a normal HSAIL type to the type of the equivalent BRIG operand 761 holding such, for constants and registers. */ 762 763 static BrigType16_t 764 regtype_for_type (BrigType16_t t) 765 { 766 switch (t) 767 { 768 case BRIG_TYPE_B1: 769 return BRIG_TYPE_B1; 770 771 case BRIG_TYPE_U8: 772 case BRIG_TYPE_U16: 773 case BRIG_TYPE_U32: 774 case BRIG_TYPE_S8: 775 case BRIG_TYPE_S16: 776 case BRIG_TYPE_S32: 777 case BRIG_TYPE_B8: 778 case BRIG_TYPE_B16: 779 case BRIG_TYPE_B32: 780 case BRIG_TYPE_F16: 781 case BRIG_TYPE_F32: 782 case BRIG_TYPE_U8X4: 783 case BRIG_TYPE_U16X2: 784 case BRIG_TYPE_S8X4: 785 case BRIG_TYPE_S16X2: 786 case BRIG_TYPE_F16X2: 787 return BRIG_TYPE_B32; 788 789 case BRIG_TYPE_U64: 790 case BRIG_TYPE_S64: 791 case BRIG_TYPE_F64: 792 case BRIG_TYPE_B64: 793 case BRIG_TYPE_U8X8: 794 case BRIG_TYPE_U16X4: 795 case BRIG_TYPE_U32X2: 796 case BRIG_TYPE_S8X8: 797 case BRIG_TYPE_S16X4: 798 case BRIG_TYPE_S32X2: 799 case BRIG_TYPE_F16X4: 800 case BRIG_TYPE_F32X2: 801 return BRIG_TYPE_B64; 802 803 case BRIG_TYPE_B128: 804 case BRIG_TYPE_U8X16: 805 case BRIG_TYPE_U16X8: 806 case BRIG_TYPE_U32X4: 807 case BRIG_TYPE_U64X2: 808 case BRIG_TYPE_S8X16: 809 case BRIG_TYPE_S16X8: 810 case BRIG_TYPE_S32X4: 811 case BRIG_TYPE_S64X2: 812 case BRIG_TYPE_F16X8: 813 case BRIG_TYPE_F32X4: 814 case BRIG_TYPE_F64X2: 815 return BRIG_TYPE_B128; 816 817 default: 818 gcc_unreachable (); 819 } 820 } 821 822 /* Return the length of the BRIG type TYPE that is going to be streamed out as 823 an immediate constant (so it must not be B1). */ 824 825 unsigned 826 hsa_get_imm_brig_type_len (BrigType16_t type) 827 { 828 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK; 829 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK; 830 831 switch (pack_type) 832 { 833 case BRIG_TYPE_PACK_NONE: 834 break; 835 case BRIG_TYPE_PACK_32: 836 return 4; 837 case BRIG_TYPE_PACK_64: 838 return 8; 839 case BRIG_TYPE_PACK_128: 840 return 16; 841 default: 842 gcc_unreachable (); 843 } 844 845 switch (base_type) 846 { 847 case BRIG_TYPE_U8: 848 case BRIG_TYPE_S8: 849 case BRIG_TYPE_B8: 850 return 1; 851 case BRIG_TYPE_U16: 852 case BRIG_TYPE_S16: 853 case BRIG_TYPE_F16: 854 case BRIG_TYPE_B16: 855 return 2; 856 case BRIG_TYPE_U32: 857 case BRIG_TYPE_S32: 858 case BRIG_TYPE_F32: 859 case BRIG_TYPE_B32: 860 return 4; 861 case BRIG_TYPE_U64: 862 case BRIG_TYPE_S64: 863 case BRIG_TYPE_F64: 864 case BRIG_TYPE_B64: 865 return 8; 866 case BRIG_TYPE_B128: 867 return 16; 868 default: 869 gcc_unreachable (); 870 } 871 } 872 873 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission. 874 If NEED_LEN is not equal to zero, shrink or extend the value 875 to NEED_LEN bytes. Return how many bytes were written. */ 876 877 static int 878 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len) 879 { 880 union hsa_bytes bytes; 881 882 memset (&bytes, 0, sizeof (bytes)); 883 tree type = TREE_TYPE (value); 884 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE); 885 886 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT; 887 if (INTEGRAL_TYPE_P (type) 888 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST)) 889 switch (data_len) 890 { 891 case 1: 892 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value); 893 break; 894 case 2: 895 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value); 896 break; 897 case 4: 898 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value); 899 break; 900 case 8: 901 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value); 902 break; 903 default: 904 gcc_unreachable (); 905 } 906 else if (SCALAR_FLOAT_TYPE_P (type)) 907 { 908 if (data_len == 2) 909 { 910 sorry ("Support for HSA does not implement immediate 16 bit FPU " 911 "operands"); 912 return 2; 913 } 914 unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type)); 915 /* There are always 32 bits in each long, no matter the size of 916 the hosts long. */ 917 long tmp[6]; 918 919 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type)); 920 921 if (int_len == 4) 922 bytes.b32 = (uint32_t) tmp[0]; 923 else 924 { 925 bytes.b64 = (uint64_t)(uint32_t) tmp[1]; 926 bytes.b64 <<= 32; 927 bytes.b64 |= (uint32_t) tmp[0]; 928 } 929 } 930 else 931 gcc_unreachable (); 932 933 int len; 934 if (need_len == 0) 935 len = data_len; 936 else 937 len = need_len; 938 939 memcpy (data, &bytes, len); 940 return len; 941 } 942 943 char * 944 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size) 945 { 946 char *brig_repr; 947 *brig_repr_size = hsa_get_imm_brig_type_len (m_type); 948 949 if (m_tree_value != NULL_TREE) 950 { 951 /* Update brig_repr_size for special tree values. */ 952 if (TREE_CODE (m_tree_value) == STRING_CST) 953 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value); 954 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) 955 *brig_repr_size 956 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value))); 957 958 unsigned total_len = *brig_repr_size; 959 960 /* As we can have a constructor with fewer elements, fill the memory 961 with zeros. */ 962 brig_repr = XCNEWVEC (char, total_len); 963 char *p = brig_repr; 964 965 if (TREE_CODE (m_tree_value) == VECTOR_CST) 966 { 967 /* Variable-length vectors aren't supported. */ 968 int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant (); 969 for (i = 0; i < num; i++) 970 { 971 tree v = VECTOR_CST_ELT (m_tree_value, i); 972 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); 973 total_len -= actual; 974 p += actual; 975 } 976 /* Vectors should have the exact size. */ 977 gcc_assert (total_len == 0); 978 } 979 else if (TREE_CODE (m_tree_value) == STRING_CST) 980 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value), 981 TREE_STRING_LENGTH (m_tree_value)); 982 else if (TREE_CODE (m_tree_value) == COMPLEX_CST) 983 { 984 gcc_assert (total_len % 2 == 0); 985 unsigned actual; 986 actual 987 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p, 988 total_len / 2); 989 990 gcc_assert (actual == total_len / 2); 991 p += actual; 992 993 actual 994 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p, 995 total_len / 2); 996 gcc_assert (actual == total_len / 2); 997 } 998 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) 999 { 1000 unsigned len = CONSTRUCTOR_NELTS (m_tree_value); 1001 for (unsigned i = 0; i < len; i++) 1002 { 1003 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value; 1004 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); 1005 total_len -= actual; 1006 p += actual; 1007 } 1008 } 1009 else 1010 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len); 1011 } 1012 else 1013 { 1014 hsa_bytes bytes; 1015 1016 switch (*brig_repr_size) 1017 { 1018 case 1: 1019 bytes.b8 = (uint8_t) m_int_value; 1020 break; 1021 case 2: 1022 bytes.b16 = (uint16_t) m_int_value; 1023 break; 1024 case 4: 1025 bytes.b32 = (uint32_t) m_int_value; 1026 break; 1027 case 8: 1028 bytes.b64 = (uint64_t) m_int_value; 1029 break; 1030 default: 1031 gcc_unreachable (); 1032 } 1033 1034 brig_repr = XNEWVEC (char, *brig_repr_size); 1035 memcpy (brig_repr, &bytes, *brig_repr_size); 1036 } 1037 1038 return brig_repr; 1039 } 1040 1041 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might 1042 have been massaged to comply with various HSA/BRIG type requirements, so the 1043 only important aspect of that is the length (because HSAIL might expect 1044 smaller constants or become bit-data). The data should be represented 1045 according to what is in the tree representation. */ 1046 1047 static void 1048 emit_immediate_operand (hsa_op_immed *imm) 1049 { 1050 unsigned brig_repr_size; 1051 char *brig_repr = imm->emit_to_buffer (&brig_repr_size); 1052 struct BrigOperandConstantBytes out; 1053 1054 memset (&out, 0, sizeof (out)); 1055 out.base.byteCount = lendian16 (sizeof (out)); 1056 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES); 1057 uint32_t byteCount = lendian32 (brig_repr_size); 1058 out.type = lendian16 (imm->m_type); 1059 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1060 brig_operand.add (&out, sizeof (out)); 1061 brig_data.add (brig_repr, brig_repr_size); 1062 brig_data.round_size_up (4); 1063 1064 free (brig_repr); 1065 } 1066 1067 /* Emit a register BRIG operand REG. */ 1068 1069 static void 1070 emit_register_operand (hsa_op_reg *reg) 1071 { 1072 struct BrigOperandRegister out; 1073 1074 out.base.byteCount = lendian16 (sizeof (out)); 1075 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER); 1076 out.regNum = lendian32 (reg->m_hard_num); 1077 1078 switch (regtype_for_type (reg->m_type)) 1079 { 1080 case BRIG_TYPE_B32: 1081 out.regKind = BRIG_REGISTER_KIND_SINGLE; 1082 break; 1083 case BRIG_TYPE_B64: 1084 out.regKind = BRIG_REGISTER_KIND_DOUBLE; 1085 break; 1086 case BRIG_TYPE_B128: 1087 out.regKind = BRIG_REGISTER_KIND_QUAD; 1088 break; 1089 case BRIG_TYPE_B1: 1090 out.regKind = BRIG_REGISTER_KIND_CONTROL; 1091 break; 1092 default: 1093 gcc_unreachable (); 1094 } 1095 1096 brig_operand.add (&out, sizeof (out)); 1097 } 1098 1099 /* Emit an address BRIG operand ADDR. */ 1100 1101 static void 1102 emit_address_operand (hsa_op_address *addr) 1103 { 1104 struct BrigOperandAddress out; 1105 1106 out.base.byteCount = lendian16 (sizeof (out)); 1107 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS); 1108 out.symbol = addr->m_symbol 1109 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0; 1110 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0; 1111 1112 if (sizeof (addr->m_imm_offset) == 8) 1113 { 1114 out.offset.lo = lendian32 (addr->m_imm_offset); 1115 out.offset.hi = lendian32 (addr->m_imm_offset >> 32); 1116 } 1117 else 1118 { 1119 gcc_assert (sizeof (addr->m_imm_offset) == 4); 1120 out.offset.lo = lendian32 (addr->m_imm_offset); 1121 out.offset.hi = 0; 1122 } 1123 1124 brig_operand.add (&out, sizeof (out)); 1125 } 1126 1127 /* Emit a code reference operand REF. */ 1128 1129 static void 1130 emit_code_ref_operand (hsa_op_code_ref *ref) 1131 { 1132 struct BrigOperandCodeRef out; 1133 1134 out.base.byteCount = lendian16 (sizeof (out)); 1135 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF); 1136 out.ref = lendian32 (ref->m_directive_offset); 1137 brig_operand.add (&out, sizeof (out)); 1138 } 1139 1140 /* Emit a code list operand CODE_LIST. */ 1141 1142 static void 1143 emit_code_list_operand (hsa_op_code_list *code_list) 1144 { 1145 struct BrigOperandCodeList out; 1146 unsigned args = code_list->m_offsets.length (); 1147 1148 for (unsigned i = 0; i < args; i++) 1149 gcc_assert (code_list->m_offsets[i]); 1150 1151 out.base.byteCount = lendian16 (sizeof (out)); 1152 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST); 1153 1154 uint32_t byteCount = lendian32 (4 * args); 1155 1156 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1157 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t)); 1158 brig_data.round_size_up (4); 1159 brig_operand.add (&out, sizeof (out)); 1160 } 1161 1162 /* Emit an operand list operand OPERAND_LIST. */ 1163 1164 static void 1165 emit_operand_list_operand (hsa_op_operand_list *operand_list) 1166 { 1167 struct BrigOperandOperandList out; 1168 unsigned args = operand_list->m_offsets.length (); 1169 1170 for (unsigned i = 0; i < args; i++) 1171 gcc_assert (operand_list->m_offsets[i]); 1172 1173 out.base.byteCount = lendian16 (sizeof (out)); 1174 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST); 1175 1176 uint32_t byteCount = lendian32 (4 * args); 1177 1178 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1179 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t)); 1180 brig_data.round_size_up (4); 1181 brig_operand.add (&out, sizeof (out)); 1182 } 1183 1184 /* Emit all operands queued for writing. */ 1185 1186 static void 1187 emit_queued_operands (void) 1188 { 1189 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next) 1190 { 1191 gcc_assert (op->m_brig_op_offset == brig_operand.total_size); 1192 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op)) 1193 emit_immediate_operand (imm); 1194 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) 1195 emit_register_operand (reg); 1196 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op)) 1197 emit_address_operand (addr); 1198 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op)) 1199 emit_code_ref_operand (ref); 1200 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op)) 1201 emit_code_list_operand (code_list); 1202 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op)) 1203 emit_operand_list_operand (l); 1204 else 1205 gcc_unreachable (); 1206 } 1207 } 1208 1209 /* Emit directives describing the function that is used for 1210 a function declaration. */ 1211 1212 static BrigDirectiveExecutable * 1213 emit_function_declaration (tree decl) 1214 { 1215 hsa_function_representation *f = hsa_generate_function_declaration (decl); 1216 1217 BrigDirectiveExecutable *e = emit_function_directives (f, true); 1218 emit_queued_operands (); 1219 1220 delete f; 1221 1222 return e; 1223 } 1224 1225 /* Emit directives describing the function that is used for 1226 an internal function declaration. */ 1227 1228 static BrigDirectiveExecutable * 1229 emit_internal_fn_decl (hsa_internal_fn *fn) 1230 { 1231 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn); 1232 1233 BrigDirectiveExecutable *e = emit_function_directives (f, true); 1234 emit_queued_operands (); 1235 1236 delete f; 1237 1238 return e; 1239 } 1240 1241 /* Enqueue all operands of INSN and return offset to BRIG data section 1242 to list of operand offsets. */ 1243 1244 static unsigned 1245 emit_insn_operands (hsa_insn_basic *insn) 1246 { 1247 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> 1248 operand_offsets; 1249 1250 unsigned l = insn->operand_count (); 1251 1252 /* We have N operands so use 4 * N for the byte_count. */ 1253 uint32_t byte_count = lendian32 (4 * l); 1254 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); 1255 if (l > 0) 1256 { 1257 operand_offsets.safe_grow (l); 1258 for (unsigned i = 0; i < l; i++) 1259 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i))); 1260 1261 brig_data.add (operand_offsets.address (), 1262 l * sizeof (BrigOperandOffset32_t)); 1263 } 1264 brig_data.round_size_up (4); 1265 return offset; 1266 } 1267 1268 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset 1269 to BRIG data section to list of operand offsets. */ 1270 1271 static unsigned 1272 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL, 1273 hsa_op_base *op2 = NULL) 1274 { 1275 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> 1276 operand_offsets; 1277 1278 gcc_checking_assert (op0 != NULL); 1279 operand_offsets.safe_push (enqueue_op (op0)); 1280 1281 if (op1 != NULL) 1282 { 1283 operand_offsets.safe_push (enqueue_op (op1)); 1284 if (op2 != NULL) 1285 operand_offsets.safe_push (enqueue_op (op2)); 1286 } 1287 1288 unsigned l = operand_offsets.length (); 1289 1290 /* We have N operands so use 4 * N for the byte_count. */ 1291 uint32_t byte_count = lendian32 (4 * l); 1292 1293 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); 1294 brig_data.add (operand_offsets.address (), 1295 l * sizeof (BrigOperandOffset32_t)); 1296 1297 brig_data.round_size_up (4); 1298 1299 return offset; 1300 } 1301 1302 /* Emit an HSA memory instruction and all necessary directives, schedule 1303 necessary operands for writing. */ 1304 1305 static void 1306 emit_memory_insn (hsa_insn_mem *mem) 1307 { 1308 struct BrigInstMem repr; 1309 gcc_checking_assert (mem->operand_count () == 2); 1310 1311 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); 1312 1313 /* This is necessary because of the erroneous typedef of 1314 BrigMemoryModifier8_t which introduces padding which may then contain 1315 random stuff (which we do not want so that we can test things don't 1316 change). */ 1317 memset (&repr, 0, sizeof (repr)); 1318 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1319 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); 1320 repr.base.opcode = lendian16 (mem->m_opcode); 1321 repr.base.type = lendian16 (mem->m_type); 1322 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1323 1324 if (addr->m_symbol) 1325 repr.segment = addr->m_symbol->m_segment; 1326 else 1327 repr.segment = BRIG_SEGMENT_FLAT; 1328 repr.modifier = 0; 1329 repr.equivClass = mem->m_equiv_class; 1330 repr.align = mem->m_align; 1331 if (mem->m_opcode == BRIG_OPCODE_LD) 1332 repr.width = BRIG_WIDTH_1; 1333 else 1334 repr.width = BRIG_WIDTH_NONE; 1335 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1336 brig_code.add (&repr, sizeof (repr)); 1337 brig_insn_count++; 1338 } 1339 1340 /* Emit an HSA signal memory instruction and all necessary directives, schedule 1341 necessary operands for writing. */ 1342 1343 static void 1344 emit_signal_insn (hsa_insn_signal *mem) 1345 { 1346 struct BrigInstSignal repr; 1347 1348 memset (&repr, 0, sizeof (repr)); 1349 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1350 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL); 1351 repr.base.opcode = lendian16 (mem->m_opcode); 1352 repr.base.type = lendian16 (mem->m_type); 1353 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1354 1355 repr.memoryOrder = mem->m_memory_order; 1356 repr.signalOperation = mem->m_signalop; 1357 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32; 1358 1359 brig_code.add (&repr, sizeof (repr)); 1360 brig_insn_count++; 1361 } 1362 1363 /* Emit an HSA atomic memory instruction and all necessary directives, schedule 1364 necessary operands for writing. */ 1365 1366 static void 1367 emit_atomic_insn (hsa_insn_atomic *mem) 1368 { 1369 struct BrigInstAtomic repr; 1370 1371 /* Either operand[0] or operand[1] must be an address operand. */ 1372 hsa_op_address *addr = NULL; 1373 if (is_a <hsa_op_address *> (mem->get_op (0))) 1374 addr = as_a <hsa_op_address *> (mem->get_op (0)); 1375 else 1376 addr = as_a <hsa_op_address *> (mem->get_op (1)); 1377 1378 memset (&repr, 0, sizeof (repr)); 1379 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1380 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC); 1381 repr.base.opcode = lendian16 (mem->m_opcode); 1382 repr.base.type = lendian16 (mem->m_type); 1383 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1384 1385 if (addr->m_symbol) 1386 repr.segment = addr->m_symbol->m_segment; 1387 else 1388 repr.segment = BRIG_SEGMENT_FLAT; 1389 repr.memoryOrder = mem->m_memoryorder; 1390 repr.memoryScope = mem->m_memoryscope; 1391 repr.atomicOperation = mem->m_atomicop; 1392 1393 brig_code.add (&repr, sizeof (repr)); 1394 brig_insn_count++; 1395 } 1396 1397 /* Emit an HSA LDA instruction and all necessary directives, schedule 1398 necessary operands for writing. */ 1399 1400 static void 1401 emit_addr_insn (hsa_insn_basic *insn) 1402 { 1403 struct BrigInstAddr repr; 1404 1405 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); 1406 1407 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1408 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR); 1409 repr.base.opcode = lendian16 (insn->m_opcode); 1410 repr.base.type = lendian16 (insn->m_type); 1411 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1412 1413 if (addr->m_symbol) 1414 repr.segment = addr->m_symbol->m_segment; 1415 else 1416 repr.segment = BRIG_SEGMENT_FLAT; 1417 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1418 1419 brig_code.add (&repr, sizeof (repr)); 1420 brig_insn_count++; 1421 } 1422 1423 /* Emit an HSA segment conversion instruction and all necessary directives, 1424 schedule necessary operands for writing. */ 1425 1426 static void 1427 emit_segment_insn (hsa_insn_seg *seg) 1428 { 1429 struct BrigInstSegCvt repr; 1430 1431 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1432 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT); 1433 repr.base.opcode = lendian16 (seg->m_opcode); 1434 repr.base.type = lendian16 (seg->m_type); 1435 repr.base.operands = lendian32 (emit_insn_operands (seg)); 1436 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type); 1437 repr.segment = seg->m_segment; 1438 repr.modifier = 0; 1439 1440 brig_code.add (&repr, sizeof (repr)); 1441 1442 brig_insn_count++; 1443 } 1444 1445 /* Emit an HSA alloca instruction and all necessary directives, 1446 schedule necessary operands for writing. */ 1447 1448 static void 1449 emit_alloca_insn (hsa_insn_alloca *alloca) 1450 { 1451 struct BrigInstMem repr; 1452 gcc_checking_assert (alloca->operand_count () == 2); 1453 1454 memset (&repr, 0, sizeof (repr)); 1455 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1456 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); 1457 repr.base.opcode = lendian16 (alloca->m_opcode); 1458 repr.base.type = lendian16 (alloca->m_type); 1459 repr.base.operands = lendian32 (emit_insn_operands (alloca)); 1460 repr.segment = BRIG_SEGMENT_PRIVATE; 1461 repr.modifier = 0; 1462 repr.equivClass = 0; 1463 repr.align = alloca->m_align; 1464 repr.width = BRIG_WIDTH_NONE; 1465 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1466 brig_code.add (&repr, sizeof (repr)); 1467 brig_insn_count++; 1468 } 1469 1470 /* Emit an HSA comparison instruction and all necessary directives, 1471 schedule necessary operands for writing. */ 1472 1473 static void 1474 emit_cmp_insn (hsa_insn_cmp *cmp) 1475 { 1476 struct BrigInstCmp repr; 1477 1478 memset (&repr, 0, sizeof (repr)); 1479 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1480 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP); 1481 repr.base.opcode = lendian16 (cmp->m_opcode); 1482 repr.base.type = lendian16 (cmp->m_type); 1483 repr.base.operands = lendian32 (emit_insn_operands (cmp)); 1484 1485 if (is_a <hsa_op_reg *> (cmp->get_op (1))) 1486 repr.sourceType 1487 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type); 1488 else 1489 repr.sourceType 1490 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type); 1491 repr.modifier = 0; 1492 repr.compare = cmp->m_compare; 1493 repr.pack = 0; 1494 1495 brig_code.add (&repr, sizeof (repr)); 1496 brig_insn_count++; 1497 } 1498 1499 /* Emit an HSA generic branching/sycnronization instruction. */ 1500 1501 static void 1502 emit_generic_branch_insn (hsa_insn_br *br) 1503 { 1504 struct BrigInstBr repr; 1505 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1506 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1507 repr.base.opcode = lendian16 (br->m_opcode); 1508 repr.width = br->m_width; 1509 repr.base.type = lendian16 (br->m_type); 1510 repr.base.operands = lendian32 (emit_insn_operands (br)); 1511 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1512 1513 brig_code.add (&repr, sizeof (repr)); 1514 brig_insn_count++; 1515 } 1516 1517 /* Emit an HSA conditional branching instruction and all necessary directives, 1518 schedule necessary operands for writing. */ 1519 1520 static void 1521 emit_cond_branch_insn (hsa_insn_cbr *br) 1522 { 1523 struct BrigInstBr repr; 1524 1525 basic_block target = NULL; 1526 edge_iterator ei; 1527 edge e; 1528 1529 /* At the moment we only handle direct conditional jumps. */ 1530 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR); 1531 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1532 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1533 repr.base.opcode = lendian16 (br->m_opcode); 1534 repr.width = br->m_width; 1535 /* For Conditional jumps the type is always B1. */ 1536 repr.base.type = lendian16 (BRIG_TYPE_B1); 1537 1538 FOR_EACH_EDGE (e, ei, br->m_bb->succs) 1539 if (e->flags & EDGE_TRUE_VALUE) 1540 { 1541 target = e->dest; 1542 break; 1543 } 1544 gcc_assert (target); 1545 1546 repr.base.operands 1547 = lendian32 (emit_operands (br->get_op (0), 1548 &hsa_bb_for_bb (target)->m_label_ref)); 1549 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1550 1551 brig_code.add (&repr, sizeof (repr)); 1552 brig_insn_count++; 1553 } 1554 1555 /* Emit an HSA unconditional jump branching instruction that points to 1556 a label REFERENCE. */ 1557 1558 static void 1559 emit_unconditional_jump (hsa_op_code_ref *reference) 1560 { 1561 struct BrigInstBr repr; 1562 1563 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1564 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1565 repr.base.opcode = lendian16 (BRIG_OPCODE_BR); 1566 repr.base.type = lendian16 (BRIG_TYPE_NONE); 1567 /* Direct branches to labels must be width(all). */ 1568 repr.width = BRIG_WIDTH_ALL; 1569 1570 repr.base.operands = lendian32 (emit_operands (reference)); 1571 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1572 brig_code.add (&repr, sizeof (repr)); 1573 brig_insn_count++; 1574 } 1575 1576 /* Emit an HSA switch jump instruction that uses a jump table to 1577 jump to a destination label. */ 1578 1579 static void 1580 emit_switch_insn (hsa_insn_sbr *sbr) 1581 { 1582 struct BrigInstBr repr; 1583 1584 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR); 1585 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1586 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1587 repr.base.opcode = lendian16 (sbr->m_opcode); 1588 repr.width = BRIG_WIDTH_1; 1589 /* For Conditional jumps the type is always B1. */ 1590 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0)); 1591 repr.base.type = lendian16 (index->m_type); 1592 repr.base.operands 1593 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list)); 1594 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1595 1596 brig_code.add (&repr, sizeof (repr)); 1597 brig_insn_count++; 1598 } 1599 1600 /* Emit a HSA convert instruction and all necessary directives, schedule 1601 necessary operands for writing. */ 1602 1603 static void 1604 emit_cvt_insn (hsa_insn_cvt *insn) 1605 { 1606 struct BrigInstCvt repr; 1607 BrigType16_t srctype; 1608 1609 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1610 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT); 1611 repr.base.opcode = lendian16 (insn->m_opcode); 1612 repr.base.type = lendian16 (insn->m_type); 1613 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1614 1615 if (is_a <hsa_op_reg *> (insn->get_op (1))) 1616 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type; 1617 else 1618 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type; 1619 repr.sourceType = lendian16 (srctype); 1620 repr.modifier = 0; 1621 /* float to smaller float requires a rounding setting (we default 1622 to 'near'. */ 1623 if (hsa_type_float_p (insn->m_type) 1624 && (!hsa_type_float_p (srctype) 1625 || ((insn->m_type & BRIG_TYPE_BASE_MASK) 1626 < (srctype & BRIG_TYPE_BASE_MASK)))) 1627 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; 1628 else if (hsa_type_integer_p (insn->m_type) && 1629 hsa_type_float_p (srctype)) 1630 repr.round = BRIG_ROUND_INTEGER_ZERO; 1631 else 1632 repr.round = BRIG_ROUND_NONE; 1633 brig_code.add (&repr, sizeof (repr)); 1634 brig_insn_count++; 1635 } 1636 1637 /* Emit call instruction INSN, where this instruction must be closed 1638 within a call block instruction. */ 1639 1640 static void 1641 emit_call_insn (hsa_insn_call *call) 1642 { 1643 struct BrigInstBr repr; 1644 1645 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1646 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1647 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL); 1648 repr.base.type = lendian16 (BRIG_TYPE_NONE); 1649 1650 repr.base.operands 1651 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func, 1652 call->m_args_code_list)); 1653 1654 /* Internal functions have not set m_called_function. */ 1655 if (call->m_called_function) 1656 { 1657 function_linkage_pair pair (call->m_called_function, 1658 call->m_func.m_brig_op_offset); 1659 function_call_linkage.safe_push (pair); 1660 } 1661 else 1662 { 1663 hsa_internal_fn *slot 1664 = hsa_emitted_internal_decls->find (call->m_called_internal_fn); 1665 gcc_assert (slot); 1666 gcc_assert (slot->m_offset > 0); 1667 call->m_func.m_directive_offset = slot->m_offset; 1668 } 1669 1670 repr.width = BRIG_WIDTH_ALL; 1671 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1672 1673 brig_code.add (&repr, sizeof (repr)); 1674 brig_insn_count++; 1675 } 1676 1677 /* Emit argument block directive. */ 1678 1679 static void 1680 emit_arg_block_insn (hsa_insn_arg_block *insn) 1681 { 1682 switch (insn->m_kind) 1683 { 1684 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: 1685 { 1686 struct BrigDirectiveArgBlock repr; 1687 repr.base.byteCount = lendian16 (sizeof (repr)); 1688 repr.base.kind = lendian16 (insn->m_kind); 1689 brig_code.add (&repr, sizeof (repr)); 1690 1691 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++) 1692 { 1693 insn->m_call_insn->m_args_code_list->m_offsets[i] 1694 = lendian32 (emit_directive_variable 1695 (insn->m_call_insn->m_input_args[i])); 1696 brig_insn_count++; 1697 } 1698 1699 if (insn->m_call_insn->m_output_arg) 1700 { 1701 insn->m_call_insn->m_result_code_list->m_offsets[0] 1702 = lendian32 (emit_directive_variable 1703 (insn->m_call_insn->m_output_arg)); 1704 brig_insn_count++; 1705 } 1706 1707 break; 1708 } 1709 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: 1710 { 1711 struct BrigDirectiveArgBlock repr; 1712 repr.base.byteCount = lendian16 (sizeof (repr)); 1713 repr.base.kind = lendian16 (insn->m_kind); 1714 brig_code.add (&repr, sizeof (repr)); 1715 break; 1716 } 1717 default: 1718 gcc_unreachable (); 1719 } 1720 1721 brig_insn_count++; 1722 } 1723 1724 /* Emit comment directive. */ 1725 1726 static void 1727 emit_comment_insn (hsa_insn_comment *insn) 1728 { 1729 struct BrigDirectiveComment repr; 1730 memset (&repr, 0, sizeof (repr)); 1731 1732 repr.base.byteCount = lendian16 (sizeof (repr)); 1733 repr.base.kind = lendian16 (insn->m_opcode); 1734 repr.name = brig_emit_string (insn->m_comment, '\0', false); 1735 brig_code.add (&repr, sizeof (repr)); 1736 } 1737 1738 /* Emit queue instruction INSN. */ 1739 1740 static void 1741 emit_queue_insn (hsa_insn_queue *insn) 1742 { 1743 BrigInstQueue repr; 1744 memset (&repr, 0, sizeof (repr)); 1745 1746 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1747 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE); 1748 repr.base.opcode = lendian16 (insn->m_opcode); 1749 repr.base.type = lendian16 (insn->m_type); 1750 repr.segment = insn->m_segment; 1751 repr.memoryOrder = insn->m_memory_order; 1752 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1753 brig_data.round_size_up (4); 1754 brig_code.add (&repr, sizeof (repr)); 1755 1756 brig_insn_count++; 1757 } 1758 1759 /* Emit source type instruction INSN. */ 1760 1761 static void 1762 emit_srctype_insn (hsa_insn_srctype *insn) 1763 { 1764 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1765 struct BrigInstSourceType repr; 1766 unsigned operand_count = insn->operand_count (); 1767 gcc_checking_assert (operand_count >= 2); 1768 1769 memset (&repr, 0, sizeof (repr)); 1770 repr.sourceType = lendian16 (insn->m_source_type); 1771 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1772 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); 1773 repr.base.opcode = lendian16 (insn->m_opcode); 1774 repr.base.type = lendian16 (insn->m_type); 1775 1776 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1777 brig_code.add (&repr, sizeof (struct BrigInstSourceType)); 1778 brig_insn_count++; 1779 } 1780 1781 /* Emit packed instruction INSN. */ 1782 1783 static void 1784 emit_packed_insn (hsa_insn_packed *insn) 1785 { 1786 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1787 struct BrigInstSourceType repr; 1788 unsigned operand_count = insn->operand_count (); 1789 gcc_checking_assert (operand_count >= 2); 1790 1791 memset (&repr, 0, sizeof (repr)); 1792 repr.sourceType = lendian16 (insn->m_source_type); 1793 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1794 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); 1795 repr.base.opcode = lendian16 (insn->m_opcode); 1796 repr.base.type = lendian16 (insn->m_type); 1797 1798 if (insn->m_opcode == BRIG_OPCODE_COMBINE) 1799 { 1800 /* Create operand list for packed type. */ 1801 for (unsigned i = 1; i < operand_count; i++) 1802 { 1803 gcc_checking_assert (insn->get_op (i)); 1804 insn->m_operand_list->m_offsets[i - 1] 1805 = lendian32 (enqueue_op (insn->get_op (i))); 1806 } 1807 1808 repr.base.operands = lendian32 (emit_operands (insn->get_op (0), 1809 insn->m_operand_list)); 1810 } 1811 else if (insn->m_opcode == BRIG_OPCODE_EXPAND) 1812 { 1813 /* Create operand list for packed type. */ 1814 for (unsigned i = 0; i < operand_count - 1; i++) 1815 { 1816 gcc_checking_assert (insn->get_op (i)); 1817 insn->m_operand_list->m_offsets[i] 1818 = lendian32 (enqueue_op (insn->get_op (i))); 1819 } 1820 1821 unsigned ops = emit_operands (insn->m_operand_list, 1822 insn->get_op (insn->operand_count () - 1)); 1823 repr.base.operands = lendian32 (ops); 1824 } 1825 1826 1827 brig_code.add (&repr, sizeof (struct BrigInstSourceType)); 1828 brig_insn_count++; 1829 } 1830 1831 /* Emit a basic HSA instruction and all necessary directives, schedule 1832 necessary operands for writing. */ 1833 1834 static void 1835 emit_basic_insn (hsa_insn_basic *insn) 1836 { 1837 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1838 struct BrigInstMod repr; 1839 BrigType16_t type; 1840 1841 memset (&repr, 0, sizeof (repr)); 1842 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic)); 1843 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC); 1844 repr.base.opcode = lendian16 (insn->m_opcode); 1845 switch (insn->m_opcode) 1846 { 1847 /* And the bit-logical operations need bit types and whine about 1848 arithmetic types :-/ */ 1849 case BRIG_OPCODE_AND: 1850 case BRIG_OPCODE_OR: 1851 case BRIG_OPCODE_XOR: 1852 case BRIG_OPCODE_NOT: 1853 type = regtype_for_type (insn->m_type); 1854 break; 1855 default: 1856 type = insn->m_type; 1857 break; 1858 } 1859 repr.base.type = lendian16 (type); 1860 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1861 1862 if (hsa_type_packed_p (type)) 1863 { 1864 if (hsa_type_float_p (type) 1865 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode)) 1866 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; 1867 else 1868 repr.round = 0; 1869 /* We assume that destination and sources agree in packing layout. */ 1870 if (insn->num_used_ops () >= 2) 1871 repr.pack = BRIG_PACK_PP; 1872 else 1873 repr.pack = BRIG_PACK_P; 1874 repr.reserved = 0; 1875 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod)); 1876 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD); 1877 brig_code.add (&repr, sizeof (struct BrigInstMod)); 1878 } 1879 else 1880 brig_code.add (&repr, sizeof (struct BrigInstBasic)); 1881 brig_insn_count++; 1882 } 1883 1884 /* Emit an HSA instruction and all necessary directives, schedule necessary 1885 operands for writing. */ 1886 1887 static void 1888 emit_insn (hsa_insn_basic *insn) 1889 { 1890 gcc_assert (!is_a <hsa_insn_phi *> (insn)); 1891 1892 insn->m_brig_offset = brig_code.total_size; 1893 1894 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn)) 1895 emit_signal_insn (signal); 1896 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn)) 1897 emit_atomic_insn (atom); 1898 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) 1899 emit_memory_insn (mem); 1900 else if (insn->m_opcode == BRIG_OPCODE_LDA) 1901 emit_addr_insn (insn); 1902 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) 1903 emit_segment_insn (seg); 1904 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) 1905 emit_cmp_insn (cmp); 1906 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn)) 1907 emit_cond_branch_insn (br); 1908 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) 1909 { 1910 if (switch_instructions == NULL) 1911 switch_instructions = new vec <hsa_insn_sbr *> (); 1912 1913 switch_instructions->safe_push (sbr); 1914 emit_switch_insn (sbr); 1915 } 1916 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) 1917 emit_generic_branch_insn (br); 1918 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) 1919 emit_arg_block_insn (block); 1920 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) 1921 emit_call_insn (call); 1922 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) 1923 emit_comment_insn (comment); 1924 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn)) 1925 emit_queue_insn (queue); 1926 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn)) 1927 emit_srctype_insn (srctype); 1928 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn)) 1929 emit_packed_insn (packed); 1930 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) 1931 emit_cvt_insn (cvt); 1932 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn)) 1933 emit_alloca_insn (alloca); 1934 else 1935 emit_basic_insn (insn); 1936 } 1937 1938 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL, 1939 or we are about to finish emitting code, if it is NULL. If the fall through 1940 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */ 1941 1942 static void 1943 perhaps_emit_branch (basic_block bb, basic_block next_bb) 1944 { 1945 basic_block t_bb = NULL, ff = NULL; 1946 1947 edge_iterator ei; 1948 edge e; 1949 1950 /* If the last instruction of BB is a switch, ignore emission of all 1951 edges. */ 1952 if (hsa_bb_for_bb (bb)->m_last_insn 1953 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn)) 1954 return; 1955 1956 FOR_EACH_EDGE (e, ei, bb->succs) 1957 if (e->flags & EDGE_TRUE_VALUE) 1958 { 1959 gcc_assert (!t_bb); 1960 t_bb = e->dest; 1961 } 1962 else 1963 { 1964 gcc_assert (!ff); 1965 ff = e->dest; 1966 } 1967 1968 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun)) 1969 return; 1970 1971 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref); 1972 } 1973 1974 /* Emit the a function with name NAME to the various brig sections. */ 1975 1976 void 1977 hsa_brig_emit_function (void) 1978 { 1979 basic_block bb, prev_bb; 1980 hsa_insn_basic *insn; 1981 BrigDirectiveExecutable *ptr_to_fndir; 1982 1983 brig_init (); 1984 1985 brig_insn_count = 0; 1986 memset (&op_queue, 0, sizeof (op_queue)); 1987 op_queue.projected_size = brig_operand.total_size; 1988 1989 if (!function_offsets) 1990 function_offsets = new hash_map<tree, BrigCodeOffset32_t> (); 1991 1992 if (!emitted_declarations) 1993 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> (); 1994 1995 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++) 1996 { 1997 tree called = hsa_cfun->m_called_functions[i]; 1998 1999 /* If the function has no definition, emit a declaration. */ 2000 if (!emitted_declarations->get (called)) 2001 { 2002 BrigDirectiveExecutable *e = emit_function_declaration (called); 2003 emitted_declarations->put (called, e); 2004 } 2005 } 2006 2007 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++) 2008 { 2009 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i]; 2010 emit_internal_fn_decl (called); 2011 } 2012 2013 ptr_to_fndir = emit_function_directives (hsa_cfun, false); 2014 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn; 2015 insn; 2016 insn = insn->m_next) 2017 emit_insn (insn); 2018 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); 2019 FOR_EACH_BB_FN (bb, cfun) 2020 { 2021 perhaps_emit_branch (prev_bb, bb); 2022 emit_bb_label_directive (hsa_bb_for_bb (bb)); 2023 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next) 2024 emit_insn (insn); 2025 prev_bb = bb; 2026 } 2027 perhaps_emit_branch (prev_bb, NULL); 2028 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size); 2029 2030 /* Fill up label references for all sbr instructions. */ 2031 if (switch_instructions) 2032 { 2033 for (unsigned i = 0; i < switch_instructions->length (); i++) 2034 { 2035 hsa_insn_sbr *sbr = (*switch_instructions)[i]; 2036 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++) 2037 { 2038 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]); 2039 sbr->m_label_code_list->m_offsets[j] 2040 = hbb->m_label_ref.m_directive_offset; 2041 } 2042 } 2043 2044 switch_instructions->release (); 2045 delete switch_instructions; 2046 switch_instructions = NULL; 2047 } 2048 2049 if (dump_file) 2050 { 2051 fprintf (dump_file, "------- After BRIG emission: -------\n"); 2052 dump_hsa_cfun (dump_file); 2053 } 2054 2055 emit_queued_operands (); 2056 } 2057 2058 /* Emit all OMP symbols related to OMP. */ 2059 2060 void 2061 hsa_brig_emit_omp_symbols (void) 2062 { 2063 brig_init (); 2064 emit_directive_variable (hsa_num_threads); 2065 } 2066 2067 /* Create and return __hsa_global_variables symbol that contains 2068 all informations consumed by libgomp to link global variables 2069 with their string names used by an HSA kernel. */ 2070 2071 static tree 2072 hsa_output_global_variables () 2073 { 2074 unsigned l = hsa_global_variable_symbols->elements (); 2075 2076 tree variable_info_type = make_node (RECORD_TYPE); 2077 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2078 get_identifier ("name"), ptr_type_node); 2079 DECL_CHAIN (id_f1) = NULL_TREE; 2080 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2081 get_identifier ("omp_data_size"), 2082 ptr_type_node); 2083 DECL_CHAIN (id_f2) = id_f1; 2084 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2, 2085 NULL_TREE); 2086 2087 tree int_num_of_global_vars; 2088 int_num_of_global_vars = build_int_cst (uint32_type_node, l); 2089 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars); 2090 tree global_vars_array_type = build_array_type (variable_info_type, 2091 global_vars_num_index_type); 2092 TYPE_ARTIFICIAL (global_vars_array_type) = 1; 2093 2094 vec<constructor_elt, va_gc> *global_vars_vec = NULL; 2095 2096 for (hash_table <hsa_noop_symbol_hasher>::iterator it 2097 = hsa_global_variable_symbols->begin (); 2098 it != hsa_global_variable_symbols->end (); ++it) 2099 { 2100 unsigned len = strlen ((*it)->m_name); 2101 char *copy = XNEWVEC (char, len + 2); 2102 copy[0] = '&'; 2103 memcpy (copy + 1, (*it)->m_name, len); 2104 copy[len + 1] = '\0'; 2105 len++; 2106 hsa_sanitize_name (copy); 2107 2108 tree var_name = build_string (len, copy); 2109 TREE_TYPE (var_name) 2110 = build_array_type (char_type_node, build_index_type (size_int (len))); 2111 free (copy); 2112 2113 vec<constructor_elt, va_gc> *variable_info_vec = NULL; 2114 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, 2115 build1 (ADDR_EXPR, 2116 build_pointer_type (TREE_TYPE (var_name)), 2117 var_name)); 2118 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, 2119 build_fold_addr_expr ((*it)->m_decl)); 2120 2121 tree variable_info_ctor = build_constructor (variable_info_type, 2122 variable_info_vec); 2123 2124 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE, 2125 variable_info_ctor); 2126 } 2127 2128 tree global_vars_ctor = build_constructor (global_vars_array_type, 2129 global_vars_vec); 2130 2131 char tmp_name[64]; 2132 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1); 2133 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2134 get_identifier (tmp_name), 2135 global_vars_array_type); 2136 TREE_STATIC (global_vars_table) = 1; 2137 TREE_READONLY (global_vars_table) = 1; 2138 TREE_PUBLIC (global_vars_table) = 0; 2139 DECL_ARTIFICIAL (global_vars_table) = 1; 2140 DECL_IGNORED_P (global_vars_table) = 1; 2141 DECL_EXTERNAL (global_vars_table) = 0; 2142 TREE_CONSTANT (global_vars_table) = 1; 2143 DECL_INITIAL (global_vars_table) = global_vars_ctor; 2144 varpool_node::finalize_decl (global_vars_table); 2145 2146 return global_vars_table; 2147 } 2148 2149 /* Create __hsa_host_functions and __hsa_kernels that contain 2150 all informations consumed by libgomp to register all kernels 2151 in the BRIG binary. */ 2152 2153 static void 2154 hsa_output_kernels (tree *host_func_table, tree *kernels) 2155 { 2156 unsigned map_count = hsa_get_number_decl_kernel_mappings (); 2157 2158 tree int_num_of_kernels; 2159 int_num_of_kernels = build_int_cst (uint32_type_node, map_count); 2160 tree kernel_num_index_type = build_index_type (int_num_of_kernels); 2161 tree host_functions_array_type = build_array_type (ptr_type_node, 2162 kernel_num_index_type); 2163 TYPE_ARTIFICIAL (host_functions_array_type) = 1; 2164 2165 vec<constructor_elt, va_gc> *host_functions_vec = NULL; 2166 for (unsigned i = 0; i < map_count; ++i) 2167 { 2168 tree decl = hsa_get_decl_kernel_mapping_decl (i); 2169 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl)); 2170 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); 2171 } 2172 tree host_functions_ctor = build_constructor (host_functions_array_type, 2173 host_functions_vec); 2174 char tmp_name[64]; 2175 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); 2176 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2177 get_identifier (tmp_name), 2178 host_functions_array_type); 2179 TREE_STATIC (hsa_host_func_table) = 1; 2180 TREE_READONLY (hsa_host_func_table) = 1; 2181 TREE_PUBLIC (hsa_host_func_table) = 0; 2182 DECL_ARTIFICIAL (hsa_host_func_table) = 1; 2183 DECL_IGNORED_P (hsa_host_func_table) = 1; 2184 DECL_EXTERNAL (hsa_host_func_table) = 0; 2185 TREE_CONSTANT (hsa_host_func_table) = 1; 2186 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; 2187 varpool_node::finalize_decl (hsa_host_func_table); 2188 *host_func_table = hsa_host_func_table; 2189 2190 /* Following code emits list of kernel_info structures. */ 2191 2192 tree kernel_info_type = make_node (RECORD_TYPE); 2193 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2194 get_identifier ("name"), ptr_type_node); 2195 DECL_CHAIN (id_f1) = NULL_TREE; 2196 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2197 get_identifier ("omp_data_size"), 2198 unsigned_type_node); 2199 DECL_CHAIN (id_f2) = id_f1; 2200 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2201 get_identifier ("gridified_kernel_p"), 2202 boolean_type_node); 2203 DECL_CHAIN (id_f3) = id_f2; 2204 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2205 get_identifier ("kernel_dependencies_count"), 2206 unsigned_type_node); 2207 DECL_CHAIN (id_f4) = id_f3; 2208 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2209 get_identifier ("kernel_dependencies"), 2210 build_pointer_type (build_pointer_type 2211 (char_type_node))); 2212 DECL_CHAIN (id_f5) = id_f4; 2213 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, 2214 NULL_TREE); 2215 2216 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); 2217 tree kernel_info_vector_type 2218 = build_array_type (kernel_info_type, 2219 build_index_type (int_num_of_kernels)); 2220 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1; 2221 2222 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL; 2223 tree kernel_dependencies_vector_type = NULL; 2224 2225 for (unsigned i = 0; i < map_count; ++i) 2226 { 2227 tree kernel = hsa_get_decl_kernel_mapping_decl (i); 2228 char *name = hsa_get_decl_kernel_mapping_name (i); 2229 unsigned len = strlen (name); 2230 char *copy = XNEWVEC (char, len + 2); 2231 copy[0] = '&'; 2232 memcpy (copy + 1, name, len); 2233 copy[len + 1] = '\0'; 2234 len++; 2235 2236 tree kern_name = build_string (len, copy); 2237 TREE_TYPE (kern_name) 2238 = build_array_type (char_type_node, build_index_type (size_int (len))); 2239 free (copy); 2240 2241 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); 2242 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); 2243 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); 2244 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, 2245 gridified_kernel_p); 2246 unsigned count = 0; 2247 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL; 2248 if (hsa_decl_kernel_dependencies) 2249 { 2250 vec<const char *> **slot; 2251 slot = hsa_decl_kernel_dependencies->get (kernel); 2252 if (slot) 2253 { 2254 vec <const char *> *dependencies = *slot; 2255 count = dependencies->length (); 2256 2257 kernel_dependencies_vector_type 2258 = build_array_type (build_pointer_type (char_type_node), 2259 build_index_type (size_int (count))); 2260 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1; 2261 2262 for (unsigned j = 0; j < count; j++) 2263 { 2264 const char *d = (*dependencies)[j]; 2265 len = strlen (d); 2266 tree dependency_name = build_string (len, d); 2267 TREE_TYPE (dependency_name) 2268 = build_array_type (char_type_node, 2269 build_index_type (size_int (len))); 2270 2271 CONSTRUCTOR_APPEND_ELT 2272 (kernel_dependencies_vec, NULL_TREE, 2273 build1 (ADDR_EXPR, 2274 build_pointer_type (TREE_TYPE (dependency_name)), 2275 dependency_name)); 2276 } 2277 } 2278 } 2279 2280 tree dependencies_count = build_int_cstu (unsigned_type_node, count); 2281 2282 vec<constructor_elt, va_gc> *kernel_info_vec = NULL; 2283 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2284 build1 (ADDR_EXPR, 2285 build_pointer_type (TREE_TYPE 2286 (kern_name)), 2287 kern_name)); 2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); 2289 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2290 gridified_kernel_p_tree); 2291 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); 2292 2293 if (count > 0) 2294 { 2295 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); 2296 gcc_checking_assert (kernel_dependencies_vector_type); 2297 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2298 get_identifier (tmp_name), 2299 kernel_dependencies_vector_type); 2300 2301 TREE_STATIC (dependencies_list) = 1; 2302 TREE_READONLY (dependencies_list) = 1; 2303 TREE_PUBLIC (dependencies_list) = 0; 2304 DECL_ARTIFICIAL (dependencies_list) = 1; 2305 DECL_IGNORED_P (dependencies_list) = 1; 2306 DECL_EXTERNAL (dependencies_list) = 0; 2307 TREE_CONSTANT (dependencies_list) = 1; 2308 DECL_INITIAL (dependencies_list) 2309 = build_constructor (kernel_dependencies_vector_type, 2310 kernel_dependencies_vec); 2311 varpool_node::finalize_decl (dependencies_list); 2312 2313 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2314 build1 (ADDR_EXPR, 2315 build_pointer_type 2316 (TREE_TYPE (dependencies_list)), 2317 dependencies_list)); 2318 } 2319 else 2320 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); 2321 2322 tree kernel_info_ctor = build_constructor (kernel_info_type, 2323 kernel_info_vec); 2324 2325 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, 2326 kernel_info_ctor); 2327 } 2328 2329 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1); 2330 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2331 get_identifier (tmp_name), 2332 kernel_info_vector_type); 2333 2334 TREE_STATIC (hsa_kernels) = 1; 2335 TREE_READONLY (hsa_kernels) = 1; 2336 TREE_PUBLIC (hsa_kernels) = 0; 2337 DECL_ARTIFICIAL (hsa_kernels) = 1; 2338 DECL_IGNORED_P (hsa_kernels) = 1; 2339 DECL_EXTERNAL (hsa_kernels) = 0; 2340 TREE_CONSTANT (hsa_kernels) = 1; 2341 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type, 2342 kernel_info_vector_vec); 2343 varpool_node::finalize_decl (hsa_kernels); 2344 *kernels = hsa_kernels; 2345 } 2346 2347 /* Create a static constructor that will register out brig stuff with 2348 libgomp. */ 2349 2350 static void 2351 hsa_output_libgomp_mapping (tree brig_decl) 2352 { 2353 unsigned kernel_count = hsa_get_number_decl_kernel_mappings (); 2354 unsigned global_variable_count = hsa_global_variable_symbols->elements (); 2355 2356 tree kernels; 2357 tree host_func_table; 2358 2359 hsa_output_kernels (&host_func_table, &kernels); 2360 tree global_vars = hsa_output_global_variables (); 2361 2362 tree hsa_image_desc_type = make_node (RECORD_TYPE); 2363 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2364 get_identifier ("brig_module"), ptr_type_node); 2365 DECL_CHAIN (id_f1) = NULL_TREE; 2366 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2367 get_identifier ("kernel_count"), 2368 unsigned_type_node); 2369 2370 DECL_CHAIN (id_f2) = id_f1; 2371 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2372 get_identifier ("hsa_kernel_infos"), 2373 ptr_type_node); 2374 DECL_CHAIN (id_f3) = id_f2; 2375 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2376 get_identifier ("global_variable_count"), 2377 unsigned_type_node); 2378 DECL_CHAIN (id_f4) = id_f3; 2379 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2380 get_identifier ("hsa_global_variable_infos"), 2381 ptr_type_node); 2382 DECL_CHAIN (id_f5) = id_f4; 2383 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5, 2384 NULL_TREE); 2385 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1; 2386 2387 vec<constructor_elt, va_gc> *img_desc_vec = NULL; 2388 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2389 build_fold_addr_expr (brig_decl)); 2390 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2391 build_int_cstu (unsigned_type_node, kernel_count)); 2392 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2393 build1 (ADDR_EXPR, 2394 build_pointer_type (TREE_TYPE (kernels)), 2395 kernels)); 2396 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2397 build_int_cstu (unsigned_type_node, 2398 global_variable_count)); 2399 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2400 build1 (ADDR_EXPR, 2401 build_pointer_type (TREE_TYPE (global_vars)), 2402 global_vars)); 2403 2404 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec); 2405 2406 char tmp_name[64]; 2407 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1); 2408 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2409 get_identifier (tmp_name), 2410 hsa_image_desc_type); 2411 TREE_STATIC (hsa_img_descriptor) = 1; 2412 TREE_READONLY (hsa_img_descriptor) = 1; 2413 TREE_PUBLIC (hsa_img_descriptor) = 0; 2414 DECL_ARTIFICIAL (hsa_img_descriptor) = 1; 2415 DECL_IGNORED_P (hsa_img_descriptor) = 1; 2416 DECL_EXTERNAL (hsa_img_descriptor) = 0; 2417 TREE_CONSTANT (hsa_img_descriptor) = 1; 2418 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor; 2419 varpool_node::finalize_decl (hsa_img_descriptor); 2420 2421 /* Construct the "host_table" libgomp expects. */ 2422 tree index_type = build_index_type (build_int_cst (integer_type_node, 4)); 2423 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type); 2424 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1; 2425 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL; 2426 tree host_func_table_addr = build_fold_addr_expr (host_func_table); 2427 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, 2428 host_func_table_addr); 2429 offset_int func_table_size 2430 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count; 2431 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, 2432 fold_build2 (POINTER_PLUS_EXPR, 2433 TREE_TYPE (host_func_table_addr), 2434 host_func_table_addr, 2435 build_int_cst (size_type_node, 2436 func_table_size.to_uhwi 2437 ()))); 2438 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); 2439 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); 2440 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type, 2441 libgomp_host_table_vec); 2442 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1); 2443 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2444 get_identifier (tmp_name), 2445 libgomp_host_table_type); 2446 2447 TREE_STATIC (hsa_libgomp_host_table) = 1; 2448 TREE_READONLY (hsa_libgomp_host_table) = 1; 2449 TREE_PUBLIC (hsa_libgomp_host_table) = 0; 2450 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1; 2451 DECL_IGNORED_P (hsa_libgomp_host_table) = 1; 2452 DECL_EXTERNAL (hsa_libgomp_host_table) = 0; 2453 TREE_CONSTANT (hsa_libgomp_host_table) = 1; 2454 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor; 2455 varpool_node::finalize_decl (hsa_libgomp_host_table); 2456 2457 /* Generate an initializer with a call to the registration routine. */ 2458 2459 tree offload_register 2460 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER); 2461 gcc_checking_assert (offload_register); 2462 2463 tree *hsa_ctor_stmts = hsa_get_ctor_statements (); 2464 append_to_statement_list 2465 (build_call_expr (offload_register, 4, 2466 build_int_cstu (unsigned_type_node, 2467 GOMP_VERSION_PACK (GOMP_VERSION, 2468 GOMP_VERSION_HSA)), 2469 build_fold_addr_expr (hsa_libgomp_host_table), 2470 build_int_cst (integer_type_node, GOMP_DEVICE_HSA), 2471 build_fold_addr_expr (hsa_img_descriptor)), 2472 hsa_ctor_stmts); 2473 2474 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY); 2475 2476 tree offload_unregister 2477 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER); 2478 gcc_checking_assert (offload_unregister); 2479 2480 tree *hsa_dtor_stmts = hsa_get_dtor_statements (); 2481 append_to_statement_list 2482 (build_call_expr (offload_unregister, 4, 2483 build_int_cstu (unsigned_type_node, 2484 GOMP_VERSION_PACK (GOMP_VERSION, 2485 GOMP_VERSION_HSA)), 2486 build_fold_addr_expr (hsa_libgomp_host_table), 2487 build_int_cst (integer_type_node, GOMP_DEVICE_HSA), 2488 build_fold_addr_expr (hsa_img_descriptor)), 2489 hsa_dtor_stmts); 2490 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY); 2491 } 2492 2493 /* Emit the brig module we have compiled to a section in the final assembly and 2494 also create a compile unit static constructor that will register the brig 2495 module with libgomp. */ 2496 2497 void 2498 hsa_output_brig (void) 2499 { 2500 section *saved_section; 2501 2502 if (!brig_initialized) 2503 return; 2504 2505 for (unsigned i = 0; i < function_call_linkage.length (); i++) 2506 { 2507 function_linkage_pair p = function_call_linkage[i]; 2508 2509 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl); 2510 gcc_assert (*func_offset); 2511 BrigOperandCodeRef *code_ref 2512 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset)); 2513 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF); 2514 code_ref->ref = lendian32 (*func_offset); 2515 } 2516 2517 /* Iterate all function declarations and if we meet a function that should 2518 have module linkage and we are unable to emit HSAIL for the function, 2519 then change the linkage to program linkage. Doing so, we will emit 2520 a valid BRIG image. */ 2521 if (hsa_failed_functions != NULL && emitted_declarations != NULL) 2522 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it 2523 = emitted_declarations->begin (); 2524 it != emitted_declarations->end (); 2525 ++it) 2526 { 2527 if (hsa_failed_functions->contains ((*it).first)) 2528 (*it).second->linkage = BRIG_LINKAGE_PROGRAM; 2529 } 2530 2531 saved_section = in_section; 2532 2533 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL)); 2534 char tmp_name[64]; 2535 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1); 2536 ASM_OUTPUT_LABEL (asm_out_file, tmp_name); 2537 tree brig_id = get_identifier (tmp_name); 2538 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id, 2539 char_type_node); 2540 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id); 2541 TREE_ADDRESSABLE (brig_decl) = 1; 2542 TREE_READONLY (brig_decl) = 1; 2543 DECL_ARTIFICIAL (brig_decl) = 1; 2544 DECL_IGNORED_P (brig_decl) = 1; 2545 TREE_STATIC (brig_decl) = 1; 2546 TREE_PUBLIC (brig_decl) = 0; 2547 TREE_USED (brig_decl) = 1; 2548 DECL_INITIAL (brig_decl) = brig_decl; 2549 TREE_ASM_WRITTEN (brig_decl) = 1; 2550 2551 BrigModuleHeader module_header; 2552 memcpy (&module_header.identification, "HSA BRIG", 2553 sizeof (module_header.identification)); 2554 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR); 2555 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR); 2556 uint64_t section_index[3]; 2557 2558 int data_padding, code_padding, operand_padding; 2559 data_padding = HSA_SECTION_ALIGNMENT 2560 - brig_data.total_size % HSA_SECTION_ALIGNMENT; 2561 code_padding = HSA_SECTION_ALIGNMENT 2562 - brig_code.total_size % HSA_SECTION_ALIGNMENT; 2563 operand_padding = HSA_SECTION_ALIGNMENT 2564 - brig_operand.total_size % HSA_SECTION_ALIGNMENT; 2565 2566 uint64_t module_size = sizeof (module_header) 2567 + sizeof (section_index) 2568 + brig_data.total_size 2569 + data_padding 2570 + brig_code.total_size 2571 + code_padding 2572 + brig_operand.total_size 2573 + operand_padding; 2574 gcc_assert ((module_size % 16) == 0); 2575 module_header.byteCount = lendian64 (module_size); 2576 memset (&module_header.hash, 0, sizeof (module_header.hash)); 2577 module_header.reserved = 0; 2578 module_header.sectionCount = lendian32 (3); 2579 module_header.sectionIndex = lendian64 (sizeof (module_header)); 2580 assemble_string ((const char *) &module_header, sizeof (module_header)); 2581 uint64_t off = sizeof (module_header) + sizeof (section_index); 2582 section_index[0] = lendian64 (off); 2583 off += brig_data.total_size + data_padding; 2584 section_index[1] = lendian64 (off); 2585 off += brig_code.total_size + code_padding; 2586 section_index[2] = lendian64 (off); 2587 assemble_string ((const char *) §ion_index, sizeof (section_index)); 2588 2589 char padding[HSA_SECTION_ALIGNMENT]; 2590 memset (padding, 0, sizeof (padding)); 2591 2592 brig_data.output (); 2593 assemble_string (padding, data_padding); 2594 brig_code.output (); 2595 assemble_string (padding, code_padding); 2596 brig_operand.output (); 2597 assemble_string (padding, operand_padding); 2598 2599 if (saved_section) 2600 switch_to_section (saved_section); 2601 2602 hsa_output_libgomp_mapping (brig_decl); 2603 2604 hsa_free_decl_kernel_mapping (); 2605 brig_release_data (); 2606 hsa_deinit_compilation_unit_data (); 2607 2608 delete emitted_declarations; 2609 emitted_declarations = NULL; 2610 delete function_offsets; 2611 function_offsets = NULL; 2612 } 2613