1 /* Producing binary form of HSA BRIG from our internal representation. 2 Copyright (C) 2013-2017 Free Software Foundation, Inc. 3 Contributed by Martin Jambor <mjambor@suse.cz> and 4 Martin Liska <mliska@suse.cz>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "target.h" 27 #include "memmodel.h" 28 #include "tm_p.h" 29 #include "is-a.h" 30 #include "vec.h" 31 #include "hash-table.h" 32 #include "hash-map.h" 33 #include "tree.h" 34 #include "tree-iterator.h" 35 #include "stor-layout.h" 36 #include "output.h" 37 #include "cfg.h" 38 #include "function.h" 39 #include "fold-const.h" 40 #include "stringpool.h" 41 #include "gimple-pretty-print.h" 42 #include "diagnostic-core.h" 43 #include "cgraph.h" 44 #include "dumpfile.h" 45 #include "print-tree.h" 46 #include "symbol-summary.h" 47 #include "hsa-common.h" 48 #include "gomp-constants.h" 49 50 /* Convert VAL to little endian form, if necessary. */ 51 52 static uint16_t 53 lendian16 (uint16_t val) 54 { 55 #if GCC_VERSION >= 4008 56 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 57 return val; 58 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 59 return __builtin_bswap16 (val); 60 #else /* __ORDER_PDP_ENDIAN__ */ 61 return val; 62 #endif 63 #else 64 // provide a safe slower default, with shifts and masking 65 #ifndef WORDS_BIGENDIAN 66 return val; 67 #else 68 return (val >> 8) | (val << 8); 69 #endif 70 #endif 71 } 72 73 /* Convert VAL to little endian form, if necessary. */ 74 75 static uint32_t 76 lendian32 (uint32_t val) 77 { 78 #if GCC_VERSION >= 4006 79 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 80 return val; 81 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 82 return __builtin_bswap32 (val); 83 #else /* __ORDER_PDP_ENDIAN__ */ 84 return (val >> 16) | (val << 16); 85 #endif 86 #else 87 // provide a safe slower default, with shifts and masking 88 #ifndef WORDS_BIGENDIAN 89 return val; 90 #else 91 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8); 92 return (val >> 16) | (val << 16); 93 #endif 94 #endif 95 } 96 97 /* Convert VAL to little endian form, if necessary. */ 98 99 static uint64_t 100 lendian64 (uint64_t val) 101 { 102 #if GCC_VERSION >= 4006 103 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 104 return val; 105 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 106 return __builtin_bswap64 (val); 107 #else /* __ORDER_PDP_ENDIAN__ */ 108 return (((val & 0xffffll) << 48) 109 | ((val & 0xffff0000ll) << 16) 110 | ((val & 0xffff00000000ll) >> 16) 111 | ((val & 0xffff000000000000ll) >> 48)); 112 #endif 113 #else 114 // provide a safe slower default, with shifts and masking 115 #ifndef WORDS_BIGENDIAN 116 return val; 117 #else 118 val = (((val & 0xff00ff00ff00ff00ll) >> 8) 119 | ((val & 0x00ff00ff00ff00ffll) << 8)); 120 val = ((( val & 0xffff0000ffff0000ll) >> 16) 121 | (( val & 0x0000ffff0000ffffll) << 16)); 122 return (val >> 32) | (val << 32); 123 #endif 124 #endif 125 } 126 127 #define BRIG_ELF_SECTION_NAME ".brig" 128 #define BRIG_LABEL_STRING "hsa_brig" 129 #define BRIG_SECTION_DATA_NAME "hsa_data" 130 #define BRIG_SECTION_CODE_NAME "hsa_code" 131 #define BRIG_SECTION_OPERAND_NAME "hsa_operand" 132 133 #define BRIG_CHUNK_MAX_SIZE (64 * 1024) 134 135 /* Required HSA section alignment. */ 136 137 #define HSA_SECTION_ALIGNMENT 16 138 139 /* Chunks of BRIG binary data. */ 140 141 struct hsa_brig_data_chunk 142 { 143 /* Size of the data already stored into a chunk. */ 144 unsigned size; 145 146 /* Pointer to the data. */ 147 char *data; 148 }; 149 150 /* Structure representing a BRIG section, holding and writing its data. */ 151 152 class hsa_brig_section 153 { 154 public: 155 /* Section name that will be output to the BRIG. */ 156 const char *section_name; 157 /* Size in bytes of all data stored in the section. */ 158 unsigned total_size; 159 /* The size of the header of the section including padding. */ 160 unsigned header_byte_count; 161 /* The size of the header of the section without any padding. */ 162 unsigned header_byte_delta; 163 164 void init (const char *name); 165 void release (); 166 void output (); 167 unsigned add (const void *data, unsigned len, void **output = NULL); 168 void round_size_up (int factor); 169 void *get_ptr_by_offset (unsigned int offset); 170 171 private: 172 void allocate_new_chunk (); 173 174 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */ 175 vec <struct hsa_brig_data_chunk> chunks; 176 177 /* More convenient access to the last chunk from the vector above. */ 178 struct hsa_brig_data_chunk *cur_chunk; 179 }; 180 181 static struct hsa_brig_section brig_data, brig_code, brig_operand; 182 static uint32_t brig_insn_count; 183 static bool brig_initialized = false; 184 185 /* Mapping between emitted HSA functions and their offset in code segment. */ 186 static hash_map<tree, BrigCodeOffset32_t> *function_offsets; 187 188 /* Hash map of emitted function declarations. */ 189 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations; 190 191 /* Hash table of emitted internal function declaration offsets. */ 192 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; 193 194 /* List of sbr instructions. */ 195 static vec <hsa_insn_sbr *> *switch_instructions; 196 197 struct function_linkage_pair 198 { 199 function_linkage_pair (tree decl, unsigned int off) 200 : function_decl (decl), offset (off) {} 201 202 /* Declaration of called function. */ 203 tree function_decl; 204 205 /* Offset in operand section. */ 206 unsigned int offset; 207 }; 208 209 /* Vector of function calls where we need to resolve function offsets. */ 210 static auto_vec <function_linkage_pair> function_call_linkage; 211 212 /* Add a new chunk, allocate data for it and initialize it. */ 213 214 void 215 hsa_brig_section::allocate_new_chunk () 216 { 217 struct hsa_brig_data_chunk new_chunk; 218 219 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE); 220 new_chunk.size = 0; 221 cur_chunk = chunks.safe_push (new_chunk); 222 } 223 224 /* Initialize the brig section. */ 225 226 void 227 hsa_brig_section::init (const char *name) 228 { 229 section_name = name; 230 /* While the following computation is basically wrong, because the intent 231 certainly wasn't to have the first character of name and padding, which 232 are a part of sizeof (BrigSectionHeader), included in the first addend, 233 this is what the disassembler expects. */ 234 total_size = sizeof (BrigSectionHeader) + strlen (section_name); 235 chunks.create (1); 236 allocate_new_chunk (); 237 header_byte_delta = total_size; 238 round_size_up (4); 239 header_byte_count = total_size; 240 } 241 242 /* Free all data in the section. */ 243 244 void 245 hsa_brig_section::release () 246 { 247 for (unsigned i = 0; i < chunks.length (); i++) 248 free (chunks[i].data); 249 chunks.release (); 250 cur_chunk = NULL; 251 } 252 253 /* Write the section to the output file to a section with the name given at 254 initialization. Switches the output section and does not restore it. */ 255 256 void 257 hsa_brig_section::output () 258 { 259 struct BrigSectionHeader section_header; 260 char padding[8]; 261 262 section_header.byteCount = lendian64 (total_size); 263 section_header.headerByteCount = lendian32 (header_byte_count); 264 section_header.nameLength = lendian32 (strlen (section_name)); 265 assemble_string ((const char *) §ion_header, 16); 266 assemble_string (section_name, (section_header.nameLength)); 267 memset (&padding, 0, sizeof (padding)); 268 /* This is also a consequence of the wrong header size computation described 269 in a comment in hsa_brig_section::init. */ 270 assemble_string (padding, 8); 271 for (unsigned i = 0; i < chunks.length (); i++) 272 assemble_string (chunks[i].data, chunks[i].size); 273 } 274 275 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at 276 which it was stored. If OUTPUT is not NULL, store into it the pointer to 277 the place where DATA was actually stored. */ 278 279 unsigned 280 hsa_brig_section::add (const void *data, unsigned len, void **output) 281 { 282 unsigned offset = total_size; 283 284 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE); 285 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len)) 286 allocate_new_chunk (); 287 288 char *dst = cur_chunk->data + cur_chunk->size; 289 memcpy (dst, data, len); 290 if (output) 291 *output = dst; 292 cur_chunk->size += len; 293 total_size += len; 294 295 return offset; 296 } 297 298 /* Add padding to section so that its size is divisible by FACTOR. */ 299 300 void 301 hsa_brig_section::round_size_up (int factor) 302 { 303 unsigned padding, res = total_size % factor; 304 305 if (res == 0) 306 return; 307 308 padding = factor - res; 309 total_size += padding; 310 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding)) 311 { 312 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size; 313 cur_chunk->size = BRIG_CHUNK_MAX_SIZE; 314 allocate_new_chunk (); 315 } 316 317 cur_chunk->size += padding; 318 } 319 320 /* Return pointer to data by global OFFSET in the section. */ 321 322 void * 323 hsa_brig_section::get_ptr_by_offset (unsigned int offset) 324 { 325 gcc_assert (offset < total_size); 326 offset -= header_byte_delta; 327 328 unsigned i; 329 for (i = 0; offset >= chunks[i].size; i++) 330 offset -= chunks[i].size; 331 332 return chunks[i].data + offset; 333 } 334 335 /* BRIG string data hashing. */ 336 337 struct brig_string_slot 338 { 339 const char *s; 340 char prefix; 341 int len; 342 uint32_t offset; 343 }; 344 345 /* Hash table helpers. */ 346 347 struct brig_string_slot_hasher : pointer_hash <brig_string_slot> 348 { 349 static inline hashval_t hash (const value_type); 350 static inline bool equal (const value_type, const compare_type); 351 static inline void remove (value_type); 352 }; 353 354 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string 355 to support strings that may not end in '\0'. */ 356 357 inline hashval_t 358 brig_string_slot_hasher::hash (const value_type ds) 359 { 360 hashval_t r = ds->len; 361 int i; 362 363 for (i = 0; i < ds->len; i++) 364 r = r * 67 + (unsigned) ds->s[i] - 113; 365 r = r * 67 + (unsigned) ds->prefix - 113; 366 return r; 367 } 368 369 /* Returns nonzero if DS1 and DS2 are equal. */ 370 371 inline bool 372 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2) 373 { 374 if (ds1->len == ds2->len) 375 return ds1->prefix == ds2->prefix 376 && memcmp (ds1->s, ds2->s, ds1->len) == 0; 377 378 return 0; 379 } 380 381 /* Deallocate memory for DS upon its removal. */ 382 383 inline void 384 brig_string_slot_hasher::remove (value_type ds) 385 { 386 free (const_cast<char *> (ds->s)); 387 free (ds); 388 } 389 390 /* Hash for strings we output in order not to duplicate them needlessly. */ 391 392 static hash_table<brig_string_slot_hasher> *brig_string_htab; 393 394 /* Emit a null terminated string STR to the data section and return its 395 offset in it. If PREFIX is non-zero, output it just before STR too. 396 Sanitize the string if SANITIZE option is set to true. */ 397 398 static unsigned 399 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true) 400 { 401 unsigned slen = strlen (str); 402 unsigned offset, len = slen + (prefix ? 1 : 0); 403 uint32_t hdr_len = lendian32 (len); 404 brig_string_slot s_slot; 405 brig_string_slot **slot; 406 char *str2; 407 408 str2 = xstrdup (str); 409 410 if (sanitize) 411 hsa_sanitize_name (str2); 412 s_slot.s = str2; 413 s_slot.len = slen; 414 s_slot.prefix = prefix; 415 s_slot.offset = 0; 416 417 slot = brig_string_htab->find_slot (&s_slot, INSERT); 418 if (*slot == NULL) 419 { 420 brig_string_slot *new_slot = XCNEW (brig_string_slot); 421 422 /* In theory we should fill in BrigData but that would mean copying 423 the string to a buffer for no reason, so we just emulate it. */ 424 offset = brig_data.add (&hdr_len, sizeof (hdr_len)); 425 if (prefix) 426 brig_data.add (&prefix, 1); 427 428 brig_data.add (str2, slen); 429 brig_data.round_size_up (4); 430 431 /* TODO: could use the string we just copied into 432 brig_string->cur_chunk */ 433 new_slot->s = str2; 434 new_slot->len = slen; 435 new_slot->prefix = prefix; 436 new_slot->offset = offset; 437 *slot = new_slot; 438 } 439 else 440 { 441 offset = (*slot)->offset; 442 free (str2); 443 } 444 445 return offset; 446 } 447 448 /* Linked list of queued operands. */ 449 450 static struct operand_queue 451 { 452 /* First from the chain of queued operands. */ 453 hsa_op_base *first_op, *last_op; 454 455 /* The offset at which the next operand will be enqueued. */ 456 unsigned projected_size; 457 458 } op_queue; 459 460 /* Unless already initialized, initialize infrastructure to produce BRIG. */ 461 462 static void 463 brig_init (void) 464 { 465 brig_insn_count = 0; 466 467 if (brig_initialized) 468 return; 469 470 brig_string_htab = new hash_table<brig_string_slot_hasher> (37); 471 brig_data.init (BRIG_SECTION_DATA_NAME); 472 brig_code.init (BRIG_SECTION_CODE_NAME); 473 brig_operand.init (BRIG_SECTION_OPERAND_NAME); 474 brig_initialized = true; 475 476 struct BrigDirectiveModule moddir; 477 memset (&moddir, 0, sizeof (moddir)); 478 moddir.base.byteCount = lendian16 (sizeof (moddir)); 479 480 char *modname; 481 if (main_input_filename && *main_input_filename != '\0') 482 { 483 const char *part = strrchr (main_input_filename, '/'); 484 if (!part) 485 part = main_input_filename; 486 else 487 part++; 488 modname = concat ("&__hsa_module_", part, NULL); 489 char *extension = strchr (modname, '.'); 490 if (extension) 491 *extension = '\0'; 492 493 /* As in LTO mode, we have to emit a different module names. */ 494 if (flag_ltrans) 495 { 496 part = strrchr (asm_file_name, '/'); 497 if (!part) 498 part = asm_file_name; 499 else 500 part++; 501 char *modname2; 502 modname2 = xasprintf ("%s_%s", modname, part); 503 free (modname); 504 modname = modname2; 505 } 506 507 hsa_sanitize_name (modname); 508 moddir.name = brig_emit_string (modname); 509 free (modname); 510 } 511 else 512 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&'); 513 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE); 514 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR); 515 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR); 516 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE; 517 if (hsa_machine_large_p ()) 518 moddir.machineModel = BRIG_MACHINE_LARGE; 519 else 520 moddir.machineModel = BRIG_MACHINE_SMALL; 521 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT; 522 brig_code.add (&moddir, sizeof (moddir)); 523 } 524 525 /* Free all BRIG data. */ 526 527 static void 528 brig_release_data (void) 529 { 530 delete brig_string_htab; 531 brig_data.release (); 532 brig_code.release (); 533 brig_operand.release (); 534 535 brig_initialized = 0; 536 } 537 538 /* Enqueue operation OP. Return the offset at which it will be stored. */ 539 540 static unsigned int 541 enqueue_op (hsa_op_base *op) 542 { 543 unsigned ret; 544 545 if (op->m_brig_op_offset) 546 return op->m_brig_op_offset; 547 548 ret = op_queue.projected_size; 549 op->m_brig_op_offset = op_queue.projected_size; 550 551 if (!op_queue.first_op) 552 op_queue.first_op = op; 553 else 554 op_queue.last_op->m_next = op; 555 op_queue.last_op = op; 556 557 if (is_a <hsa_op_immed *> (op)) 558 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes); 559 else if (is_a <hsa_op_reg *> (op)) 560 op_queue.projected_size += sizeof (struct BrigOperandRegister); 561 else if (is_a <hsa_op_address *> (op)) 562 op_queue.projected_size += sizeof (struct BrigOperandAddress); 563 else if (is_a <hsa_op_code_ref *> (op)) 564 op_queue.projected_size += sizeof (struct BrigOperandCodeRef); 565 else if (is_a <hsa_op_code_list *> (op)) 566 op_queue.projected_size += sizeof (struct BrigOperandCodeList); 567 else if (is_a <hsa_op_operand_list *> (op)) 568 op_queue.projected_size += sizeof (struct BrigOperandOperandList); 569 else 570 gcc_unreachable (); 571 return ret; 572 } 573 574 static void emit_immediate_operand (hsa_op_immed *imm); 575 576 /* Emit directive describing a symbol if it has not been emitted already. 577 Return the offset of the directive. */ 578 579 static unsigned 580 emit_directive_variable (struct hsa_symbol *symbol) 581 { 582 struct BrigDirectiveVariable dirvar; 583 unsigned name_offset; 584 static unsigned res_name_offset; 585 586 if (symbol->m_directive_offset) 587 return symbol->m_directive_offset; 588 589 memset (&dirvar, 0, sizeof (dirvar)); 590 dirvar.base.byteCount = lendian16 (sizeof (dirvar)); 591 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE); 592 dirvar.allocation = symbol->m_allocation; 593 594 char prefix = symbol->m_global_scope_p ? '&' : '%'; 595 596 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL) 597 { 598 if (res_name_offset == 0) 599 res_name_offset = brig_emit_string (symbol->m_name, '%'); 600 name_offset = res_name_offset; 601 } 602 else if (symbol->m_name) 603 name_offset = brig_emit_string (symbol->m_name, prefix); 604 else 605 { 606 char buf[64]; 607 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment), 608 symbol->m_name_number); 609 name_offset = brig_emit_string (buf, prefix); 610 } 611 612 dirvar.name = lendian32 (name_offset); 613 614 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL) 615 { 616 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl)); 617 dirvar.init = lendian32 (enqueue_op (tmp)); 618 } 619 else 620 dirvar.init = 0; 621 dirvar.type = lendian16 (symbol->m_type); 622 dirvar.segment = symbol->m_segment; 623 dirvar.align = symbol->m_align; 624 dirvar.linkage = symbol->m_linkage; 625 dirvar.dim.lo = symbol->m_dim; 626 dirvar.dim.hi = symbol->m_dim >> 32; 627 628 /* Global variables are just declared and linked via HSA runtime. */ 629 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM) 630 dirvar.modifier |= BRIG_VARIABLE_DEFINITION; 631 dirvar.reserved = 0; 632 633 if (symbol->m_cst_value) 634 { 635 dirvar.modifier |= BRIG_VARIABLE_CONST; 636 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value)); 637 } 638 639 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar)); 640 return symbol->m_directive_offset; 641 } 642 643 /* Emit directives describing either a function declaration or definition F and 644 return the produced BrigDirectiveExecutable structure. The function does 645 not take into account any instructions when calculating nextModuleEntry 646 field of the produced BrigDirectiveExecutable structure so when emitting 647 actual definitions, this field needs to be updated after all of the function 648 is actually added to the code section. */ 649 650 static BrigDirectiveExecutable * 651 emit_function_directives (hsa_function_representation *f, bool is_declaration) 652 { 653 struct BrigDirectiveExecutable fndir; 654 unsigned name_offset, inarg_off, scoped_off, next_toplev_off; 655 int count = 0; 656 void *ptr_to_fndir; 657 hsa_symbol *sym; 658 659 if (!f->m_declaration_p) 660 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++) 661 { 662 gcc_assert (!sym->m_emitted_to_brig); 663 sym->m_emitted_to_brig = true; 664 emit_directive_variable (sym); 665 brig_insn_count++; 666 } 667 668 name_offset = brig_emit_string (f->m_name, '&'); 669 inarg_off = brig_code.total_size + sizeof (fndir) 670 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0); 671 scoped_off = inarg_off 672 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable); 673 674 if (!f->m_declaration_p) 675 { 676 count += f->m_spill_symbols.length (); 677 count += f->m_private_variables.length (); 678 } 679 680 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable); 681 682 memset (&fndir, 0, sizeof (fndir)); 683 fndir.base.byteCount = lendian16 (sizeof (fndir)); 684 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL 685 : BRIG_KIND_DIRECTIVE_FUNCTION); 686 fndir.name = lendian32 (name_offset); 687 fndir.inArgCount = lendian16 (f->m_input_args.length ()); 688 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0); 689 fndir.firstInArg = lendian32 (inarg_off); 690 fndir.firstCodeBlockEntry = lendian32 (scoped_off); 691 fndir.nextModuleEntry = lendian32 (next_toplev_off); 692 fndir.linkage = f->get_linkage (); 693 if (!f->m_declaration_p) 694 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION; 695 memset (&fndir.reserved, 0, sizeof (fndir.reserved)); 696 697 /* Once we put a definition of function_offsets, we should not overwrite 698 it with a declaration of the function. */ 699 if (f->m_internal_fn == NULL) 700 { 701 if (!function_offsets->get (f->m_decl) || !is_declaration) 702 function_offsets->put (f->m_decl, brig_code.total_size); 703 } 704 else 705 { 706 /* Internal function. */ 707 hsa_internal_fn **slot 708 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT); 709 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn); 710 int_fn->m_offset = brig_code.total_size; 711 *slot = int_fn; 712 } 713 714 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir); 715 716 if (f->m_output_arg) 717 emit_directive_variable (f->m_output_arg); 718 for (unsigned i = 0; i < f->m_input_args.length (); i++) 719 emit_directive_variable (f->m_input_args[i]); 720 721 if (!f->m_declaration_p) 722 { 723 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++) 724 { 725 emit_directive_variable (sym); 726 brig_insn_count++; 727 } 728 for (unsigned i = 0; i < f->m_private_variables.length (); i++) 729 { 730 emit_directive_variable (f->m_private_variables[i]); 731 brig_insn_count++; 732 } 733 } 734 735 return (BrigDirectiveExecutable *) ptr_to_fndir; 736 } 737 738 /* Emit a label directive for the given HBB. We assume it is about to start on 739 the current offset in the code section. */ 740 741 static void 742 emit_bb_label_directive (hsa_bb *hbb) 743 { 744 struct BrigDirectiveLabel lbldir; 745 746 lbldir.base.byteCount = lendian16 (sizeof (lbldir)); 747 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL); 748 char buf[32]; 749 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl), 750 hbb->m_index); 751 lbldir.name = lendian32 (brig_emit_string (buf, '@')); 752 753 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir, 754 sizeof (lbldir)); 755 brig_insn_count++; 756 } 757 758 /* Map a normal HSAIL type to the type of the equivalent BRIG operand 759 holding such, for constants and registers. */ 760 761 static BrigType16_t 762 regtype_for_type (BrigType16_t t) 763 { 764 switch (t) 765 { 766 case BRIG_TYPE_B1: 767 return BRIG_TYPE_B1; 768 769 case BRIG_TYPE_U8: 770 case BRIG_TYPE_U16: 771 case BRIG_TYPE_U32: 772 case BRIG_TYPE_S8: 773 case BRIG_TYPE_S16: 774 case BRIG_TYPE_S32: 775 case BRIG_TYPE_B8: 776 case BRIG_TYPE_B16: 777 case BRIG_TYPE_B32: 778 case BRIG_TYPE_F16: 779 case BRIG_TYPE_F32: 780 case BRIG_TYPE_U8X4: 781 case BRIG_TYPE_U16X2: 782 case BRIG_TYPE_S8X4: 783 case BRIG_TYPE_S16X2: 784 case BRIG_TYPE_F16X2: 785 return BRIG_TYPE_B32; 786 787 case BRIG_TYPE_U64: 788 case BRIG_TYPE_S64: 789 case BRIG_TYPE_F64: 790 case BRIG_TYPE_B64: 791 case BRIG_TYPE_U8X8: 792 case BRIG_TYPE_U16X4: 793 case BRIG_TYPE_U32X2: 794 case BRIG_TYPE_S8X8: 795 case BRIG_TYPE_S16X4: 796 case BRIG_TYPE_S32X2: 797 case BRIG_TYPE_F16X4: 798 case BRIG_TYPE_F32X2: 799 return BRIG_TYPE_B64; 800 801 case BRIG_TYPE_B128: 802 case BRIG_TYPE_U8X16: 803 case BRIG_TYPE_U16X8: 804 case BRIG_TYPE_U32X4: 805 case BRIG_TYPE_U64X2: 806 case BRIG_TYPE_S8X16: 807 case BRIG_TYPE_S16X8: 808 case BRIG_TYPE_S32X4: 809 case BRIG_TYPE_S64X2: 810 case BRIG_TYPE_F16X8: 811 case BRIG_TYPE_F32X4: 812 case BRIG_TYPE_F64X2: 813 return BRIG_TYPE_B128; 814 815 default: 816 gcc_unreachable (); 817 } 818 } 819 820 /* Return the length of the BRIG type TYPE that is going to be streamed out as 821 an immediate constant (so it must not be B1). */ 822 823 unsigned 824 hsa_get_imm_brig_type_len (BrigType16_t type) 825 { 826 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK; 827 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK; 828 829 switch (pack_type) 830 { 831 case BRIG_TYPE_PACK_NONE: 832 break; 833 case BRIG_TYPE_PACK_32: 834 return 4; 835 case BRIG_TYPE_PACK_64: 836 return 8; 837 case BRIG_TYPE_PACK_128: 838 return 16; 839 default: 840 gcc_unreachable (); 841 } 842 843 switch (base_type) 844 { 845 case BRIG_TYPE_U8: 846 case BRIG_TYPE_S8: 847 case BRIG_TYPE_B8: 848 return 1; 849 case BRIG_TYPE_U16: 850 case BRIG_TYPE_S16: 851 case BRIG_TYPE_F16: 852 case BRIG_TYPE_B16: 853 return 2; 854 case BRIG_TYPE_U32: 855 case BRIG_TYPE_S32: 856 case BRIG_TYPE_F32: 857 case BRIG_TYPE_B32: 858 return 4; 859 case BRIG_TYPE_U64: 860 case BRIG_TYPE_S64: 861 case BRIG_TYPE_F64: 862 case BRIG_TYPE_B64: 863 return 8; 864 case BRIG_TYPE_B128: 865 return 16; 866 default: 867 gcc_unreachable (); 868 } 869 } 870 871 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission. 872 If NEED_LEN is not equal to zero, shrink or extend the value 873 to NEED_LEN bytes. Return how many bytes were written. */ 874 875 static int 876 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len) 877 { 878 union hsa_bytes bytes; 879 880 memset (&bytes, 0, sizeof (bytes)); 881 tree type = TREE_TYPE (value); 882 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE); 883 884 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT; 885 if (INTEGRAL_TYPE_P (type) 886 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST)) 887 switch (data_len) 888 { 889 case 1: 890 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value); 891 break; 892 case 2: 893 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value); 894 break; 895 case 4: 896 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value); 897 break; 898 case 8: 899 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value); 900 break; 901 default: 902 gcc_unreachable (); 903 } 904 else if (SCALAR_FLOAT_TYPE_P (type)) 905 { 906 if (data_len == 2) 907 { 908 sorry ("Support for HSA does not implement immediate 16 bit FPU " 909 "operands"); 910 return 2; 911 } 912 unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type)); 913 /* There are always 32 bits in each long, no matter the size of 914 the hosts long. */ 915 long tmp[6]; 916 917 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type)); 918 919 if (int_len == 4) 920 bytes.b32 = (uint32_t) tmp[0]; 921 else 922 { 923 bytes.b64 = (uint64_t)(uint32_t) tmp[1]; 924 bytes.b64 <<= 32; 925 bytes.b64 |= (uint32_t) tmp[0]; 926 } 927 } 928 else 929 gcc_unreachable (); 930 931 int len; 932 if (need_len == 0) 933 len = data_len; 934 else 935 len = need_len; 936 937 memcpy (data, &bytes, len); 938 return len; 939 } 940 941 char * 942 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size) 943 { 944 char *brig_repr; 945 *brig_repr_size = hsa_get_imm_brig_type_len (m_type); 946 947 if (m_tree_value != NULL_TREE) 948 { 949 /* Update brig_repr_size for special tree values. */ 950 if (TREE_CODE (m_tree_value) == STRING_CST) 951 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value); 952 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) 953 *brig_repr_size 954 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value))); 955 956 unsigned total_len = *brig_repr_size; 957 958 /* As we can have a constructor with fewer elements, fill the memory 959 with zeros. */ 960 brig_repr = XCNEWVEC (char, total_len); 961 char *p = brig_repr; 962 963 if (TREE_CODE (m_tree_value) == VECTOR_CST) 964 { 965 int i, num = VECTOR_CST_NELTS (m_tree_value); 966 for (i = 0; i < num; i++) 967 { 968 tree v = VECTOR_CST_ELT (m_tree_value, i); 969 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); 970 total_len -= actual; 971 p += actual; 972 } 973 /* Vectors should have the exact size. */ 974 gcc_assert (total_len == 0); 975 } 976 else if (TREE_CODE (m_tree_value) == STRING_CST) 977 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value), 978 TREE_STRING_LENGTH (m_tree_value)); 979 else if (TREE_CODE (m_tree_value) == COMPLEX_CST) 980 { 981 gcc_assert (total_len % 2 == 0); 982 unsigned actual; 983 actual 984 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p, 985 total_len / 2); 986 987 gcc_assert (actual == total_len / 2); 988 p += actual; 989 990 actual 991 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p, 992 total_len / 2); 993 gcc_assert (actual == total_len / 2); 994 } 995 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) 996 { 997 unsigned len = CONSTRUCTOR_NELTS (m_tree_value); 998 for (unsigned i = 0; i < len; i++) 999 { 1000 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value; 1001 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); 1002 total_len -= actual; 1003 p += actual; 1004 } 1005 } 1006 else 1007 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len); 1008 } 1009 else 1010 { 1011 hsa_bytes bytes; 1012 1013 switch (*brig_repr_size) 1014 { 1015 case 1: 1016 bytes.b8 = (uint8_t) m_int_value; 1017 break; 1018 case 2: 1019 bytes.b16 = (uint16_t) m_int_value; 1020 break; 1021 case 4: 1022 bytes.b32 = (uint32_t) m_int_value; 1023 break; 1024 case 8: 1025 bytes.b64 = (uint64_t) m_int_value; 1026 break; 1027 default: 1028 gcc_unreachable (); 1029 } 1030 1031 brig_repr = XNEWVEC (char, *brig_repr_size); 1032 memcpy (brig_repr, &bytes, *brig_repr_size); 1033 } 1034 1035 return brig_repr; 1036 } 1037 1038 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might 1039 have been massaged to comply with various HSA/BRIG type requirements, so the 1040 only important aspect of that is the length (because HSAIL might expect 1041 smaller constants or become bit-data). The data should be represented 1042 according to what is in the tree representation. */ 1043 1044 static void 1045 emit_immediate_operand (hsa_op_immed *imm) 1046 { 1047 unsigned brig_repr_size; 1048 char *brig_repr = imm->emit_to_buffer (&brig_repr_size); 1049 struct BrigOperandConstantBytes out; 1050 1051 memset (&out, 0, sizeof (out)); 1052 out.base.byteCount = lendian16 (sizeof (out)); 1053 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES); 1054 uint32_t byteCount = lendian32 (brig_repr_size); 1055 out.type = lendian16 (imm->m_type); 1056 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1057 brig_operand.add (&out, sizeof (out)); 1058 brig_data.add (brig_repr, brig_repr_size); 1059 brig_data.round_size_up (4); 1060 1061 free (brig_repr); 1062 } 1063 1064 /* Emit a register BRIG operand REG. */ 1065 1066 static void 1067 emit_register_operand (hsa_op_reg *reg) 1068 { 1069 struct BrigOperandRegister out; 1070 1071 out.base.byteCount = lendian16 (sizeof (out)); 1072 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER); 1073 out.regNum = lendian32 (reg->m_hard_num); 1074 1075 switch (regtype_for_type (reg->m_type)) 1076 { 1077 case BRIG_TYPE_B32: 1078 out.regKind = BRIG_REGISTER_KIND_SINGLE; 1079 break; 1080 case BRIG_TYPE_B64: 1081 out.regKind = BRIG_REGISTER_KIND_DOUBLE; 1082 break; 1083 case BRIG_TYPE_B128: 1084 out.regKind = BRIG_REGISTER_KIND_QUAD; 1085 break; 1086 case BRIG_TYPE_B1: 1087 out.regKind = BRIG_REGISTER_KIND_CONTROL; 1088 break; 1089 default: 1090 gcc_unreachable (); 1091 } 1092 1093 brig_operand.add (&out, sizeof (out)); 1094 } 1095 1096 /* Emit an address BRIG operand ADDR. */ 1097 1098 static void 1099 emit_address_operand (hsa_op_address *addr) 1100 { 1101 struct BrigOperandAddress out; 1102 1103 out.base.byteCount = lendian16 (sizeof (out)); 1104 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS); 1105 out.symbol = addr->m_symbol 1106 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0; 1107 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0; 1108 1109 if (sizeof (addr->m_imm_offset) == 8) 1110 { 1111 out.offset.lo = lendian32 (addr->m_imm_offset); 1112 out.offset.hi = lendian32 (addr->m_imm_offset >> 32); 1113 } 1114 else 1115 { 1116 gcc_assert (sizeof (addr->m_imm_offset) == 4); 1117 out.offset.lo = lendian32 (addr->m_imm_offset); 1118 out.offset.hi = 0; 1119 } 1120 1121 brig_operand.add (&out, sizeof (out)); 1122 } 1123 1124 /* Emit a code reference operand REF. */ 1125 1126 static void 1127 emit_code_ref_operand (hsa_op_code_ref *ref) 1128 { 1129 struct BrigOperandCodeRef out; 1130 1131 out.base.byteCount = lendian16 (sizeof (out)); 1132 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF); 1133 out.ref = lendian32 (ref->m_directive_offset); 1134 brig_operand.add (&out, sizeof (out)); 1135 } 1136 1137 /* Emit a code list operand CODE_LIST. */ 1138 1139 static void 1140 emit_code_list_operand (hsa_op_code_list *code_list) 1141 { 1142 struct BrigOperandCodeList out; 1143 unsigned args = code_list->m_offsets.length (); 1144 1145 for (unsigned i = 0; i < args; i++) 1146 gcc_assert (code_list->m_offsets[i]); 1147 1148 out.base.byteCount = lendian16 (sizeof (out)); 1149 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST); 1150 1151 uint32_t byteCount = lendian32 (4 * args); 1152 1153 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1154 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t)); 1155 brig_data.round_size_up (4); 1156 brig_operand.add (&out, sizeof (out)); 1157 } 1158 1159 /* Emit an operand list operand OPERAND_LIST. */ 1160 1161 static void 1162 emit_operand_list_operand (hsa_op_operand_list *operand_list) 1163 { 1164 struct BrigOperandOperandList out; 1165 unsigned args = operand_list->m_offsets.length (); 1166 1167 for (unsigned i = 0; i < args; i++) 1168 gcc_assert (operand_list->m_offsets[i]); 1169 1170 out.base.byteCount = lendian16 (sizeof (out)); 1171 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST); 1172 1173 uint32_t byteCount = lendian32 (4 * args); 1174 1175 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1176 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t)); 1177 brig_data.round_size_up (4); 1178 brig_operand.add (&out, sizeof (out)); 1179 } 1180 1181 /* Emit all operands queued for writing. */ 1182 1183 static void 1184 emit_queued_operands (void) 1185 { 1186 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next) 1187 { 1188 gcc_assert (op->m_brig_op_offset == brig_operand.total_size); 1189 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op)) 1190 emit_immediate_operand (imm); 1191 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) 1192 emit_register_operand (reg); 1193 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op)) 1194 emit_address_operand (addr); 1195 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op)) 1196 emit_code_ref_operand (ref); 1197 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op)) 1198 emit_code_list_operand (code_list); 1199 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op)) 1200 emit_operand_list_operand (l); 1201 else 1202 gcc_unreachable (); 1203 } 1204 } 1205 1206 /* Emit directives describing the function that is used for 1207 a function declaration. */ 1208 1209 static BrigDirectiveExecutable * 1210 emit_function_declaration (tree decl) 1211 { 1212 hsa_function_representation *f = hsa_generate_function_declaration (decl); 1213 1214 BrigDirectiveExecutable *e = emit_function_directives (f, true); 1215 emit_queued_operands (); 1216 1217 delete f; 1218 1219 return e; 1220 } 1221 1222 /* Emit directives describing the function that is used for 1223 an internal function declaration. */ 1224 1225 static BrigDirectiveExecutable * 1226 emit_internal_fn_decl (hsa_internal_fn *fn) 1227 { 1228 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn); 1229 1230 BrigDirectiveExecutable *e = emit_function_directives (f, true); 1231 emit_queued_operands (); 1232 1233 delete f; 1234 1235 return e; 1236 } 1237 1238 /* Enqueue all operands of INSN and return offset to BRIG data section 1239 to list of operand offsets. */ 1240 1241 static unsigned 1242 emit_insn_operands (hsa_insn_basic *insn) 1243 { 1244 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> 1245 operand_offsets; 1246 1247 unsigned l = insn->operand_count (); 1248 1249 /* We have N operands so use 4 * N for the byte_count. */ 1250 uint32_t byte_count = lendian32 (4 * l); 1251 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); 1252 if (l > 0) 1253 { 1254 operand_offsets.safe_grow (l); 1255 for (unsigned i = 0; i < l; i++) 1256 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i))); 1257 1258 brig_data.add (operand_offsets.address (), 1259 l * sizeof (BrigOperandOffset32_t)); 1260 } 1261 brig_data.round_size_up (4); 1262 return offset; 1263 } 1264 1265 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset 1266 to BRIG data section to list of operand offsets. */ 1267 1268 static unsigned 1269 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL, 1270 hsa_op_base *op2 = NULL) 1271 { 1272 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> 1273 operand_offsets; 1274 1275 gcc_checking_assert (op0 != NULL); 1276 operand_offsets.safe_push (enqueue_op (op0)); 1277 1278 if (op1 != NULL) 1279 { 1280 operand_offsets.safe_push (enqueue_op (op1)); 1281 if (op2 != NULL) 1282 operand_offsets.safe_push (enqueue_op (op2)); 1283 } 1284 1285 unsigned l = operand_offsets.length (); 1286 1287 /* We have N operands so use 4 * N for the byte_count. */ 1288 uint32_t byte_count = lendian32 (4 * l); 1289 1290 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); 1291 brig_data.add (operand_offsets.address (), 1292 l * sizeof (BrigOperandOffset32_t)); 1293 1294 brig_data.round_size_up (4); 1295 1296 return offset; 1297 } 1298 1299 /* Emit an HSA memory instruction and all necessary directives, schedule 1300 necessary operands for writing. */ 1301 1302 static void 1303 emit_memory_insn (hsa_insn_mem *mem) 1304 { 1305 struct BrigInstMem repr; 1306 gcc_checking_assert (mem->operand_count () == 2); 1307 1308 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); 1309 1310 /* This is necessary because of the erroneous typedef of 1311 BrigMemoryModifier8_t which introduces padding which may then contain 1312 random stuff (which we do not want so that we can test things don't 1313 change). */ 1314 memset (&repr, 0, sizeof (repr)); 1315 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1316 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); 1317 repr.base.opcode = lendian16 (mem->m_opcode); 1318 repr.base.type = lendian16 (mem->m_type); 1319 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1320 1321 if (addr->m_symbol) 1322 repr.segment = addr->m_symbol->m_segment; 1323 else 1324 repr.segment = BRIG_SEGMENT_FLAT; 1325 repr.modifier = 0; 1326 repr.equivClass = mem->m_equiv_class; 1327 repr.align = mem->m_align; 1328 if (mem->m_opcode == BRIG_OPCODE_LD) 1329 repr.width = BRIG_WIDTH_1; 1330 else 1331 repr.width = BRIG_WIDTH_NONE; 1332 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1333 brig_code.add (&repr, sizeof (repr)); 1334 brig_insn_count++; 1335 } 1336 1337 /* Emit an HSA signal memory instruction and all necessary directives, schedule 1338 necessary operands for writing. */ 1339 1340 static void 1341 emit_signal_insn (hsa_insn_signal *mem) 1342 { 1343 struct BrigInstSignal repr; 1344 1345 memset (&repr, 0, sizeof (repr)); 1346 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1347 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL); 1348 repr.base.opcode = lendian16 (mem->m_opcode); 1349 repr.base.type = lendian16 (mem->m_type); 1350 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1351 1352 repr.memoryOrder = mem->m_memory_order; 1353 repr.signalOperation = mem->m_signalop; 1354 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32; 1355 1356 brig_code.add (&repr, sizeof (repr)); 1357 brig_insn_count++; 1358 } 1359 1360 /* Emit an HSA atomic memory instruction and all necessary directives, schedule 1361 necessary operands for writing. */ 1362 1363 static void 1364 emit_atomic_insn (hsa_insn_atomic *mem) 1365 { 1366 struct BrigInstAtomic repr; 1367 1368 /* Either operand[0] or operand[1] must be an address operand. */ 1369 hsa_op_address *addr = NULL; 1370 if (is_a <hsa_op_address *> (mem->get_op (0))) 1371 addr = as_a <hsa_op_address *> (mem->get_op (0)); 1372 else 1373 addr = as_a <hsa_op_address *> (mem->get_op (1)); 1374 1375 memset (&repr, 0, sizeof (repr)); 1376 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1377 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC); 1378 repr.base.opcode = lendian16 (mem->m_opcode); 1379 repr.base.type = lendian16 (mem->m_type); 1380 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1381 1382 if (addr->m_symbol) 1383 repr.segment = addr->m_symbol->m_segment; 1384 else 1385 repr.segment = BRIG_SEGMENT_FLAT; 1386 repr.memoryOrder = mem->m_memoryorder; 1387 repr.memoryScope = mem->m_memoryscope; 1388 repr.atomicOperation = mem->m_atomicop; 1389 1390 brig_code.add (&repr, sizeof (repr)); 1391 brig_insn_count++; 1392 } 1393 1394 /* Emit an HSA LDA instruction and all necessary directives, schedule 1395 necessary operands for writing. */ 1396 1397 static void 1398 emit_addr_insn (hsa_insn_basic *insn) 1399 { 1400 struct BrigInstAddr repr; 1401 1402 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); 1403 1404 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1405 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR); 1406 repr.base.opcode = lendian16 (insn->m_opcode); 1407 repr.base.type = lendian16 (insn->m_type); 1408 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1409 1410 if (addr->m_symbol) 1411 repr.segment = addr->m_symbol->m_segment; 1412 else 1413 repr.segment = BRIG_SEGMENT_FLAT; 1414 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1415 1416 brig_code.add (&repr, sizeof (repr)); 1417 brig_insn_count++; 1418 } 1419 1420 /* Emit an HSA segment conversion instruction and all necessary directives, 1421 schedule necessary operands for writing. */ 1422 1423 static void 1424 emit_segment_insn (hsa_insn_seg *seg) 1425 { 1426 struct BrigInstSegCvt repr; 1427 1428 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1429 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT); 1430 repr.base.opcode = lendian16 (seg->m_opcode); 1431 repr.base.type = lendian16 (seg->m_type); 1432 repr.base.operands = lendian32 (emit_insn_operands (seg)); 1433 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type); 1434 repr.segment = seg->m_segment; 1435 repr.modifier = 0; 1436 1437 brig_code.add (&repr, sizeof (repr)); 1438 1439 brig_insn_count++; 1440 } 1441 1442 /* Emit an HSA alloca instruction and all necessary directives, 1443 schedule necessary operands for writing. */ 1444 1445 static void 1446 emit_alloca_insn (hsa_insn_alloca *alloca) 1447 { 1448 struct BrigInstMem repr; 1449 gcc_checking_assert (alloca->operand_count () == 2); 1450 1451 memset (&repr, 0, sizeof (repr)); 1452 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1453 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); 1454 repr.base.opcode = lendian16 (alloca->m_opcode); 1455 repr.base.type = lendian16 (alloca->m_type); 1456 repr.base.operands = lendian32 (emit_insn_operands (alloca)); 1457 repr.segment = BRIG_SEGMENT_PRIVATE; 1458 repr.modifier = 0; 1459 repr.equivClass = 0; 1460 repr.align = alloca->m_align; 1461 repr.width = BRIG_WIDTH_NONE; 1462 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1463 brig_code.add (&repr, sizeof (repr)); 1464 brig_insn_count++; 1465 } 1466 1467 /* Emit an HSA comparison instruction and all necessary directives, 1468 schedule necessary operands for writing. */ 1469 1470 static void 1471 emit_cmp_insn (hsa_insn_cmp *cmp) 1472 { 1473 struct BrigInstCmp repr; 1474 1475 memset (&repr, 0, sizeof (repr)); 1476 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1477 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP); 1478 repr.base.opcode = lendian16 (cmp->m_opcode); 1479 repr.base.type = lendian16 (cmp->m_type); 1480 repr.base.operands = lendian32 (emit_insn_operands (cmp)); 1481 1482 if (is_a <hsa_op_reg *> (cmp->get_op (1))) 1483 repr.sourceType 1484 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type); 1485 else 1486 repr.sourceType 1487 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type); 1488 repr.modifier = 0; 1489 repr.compare = cmp->m_compare; 1490 repr.pack = 0; 1491 1492 brig_code.add (&repr, sizeof (repr)); 1493 brig_insn_count++; 1494 } 1495 1496 /* Emit an HSA generic branching/sycnronization instruction. */ 1497 1498 static void 1499 emit_generic_branch_insn (hsa_insn_br *br) 1500 { 1501 struct BrigInstBr repr; 1502 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1503 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1504 repr.base.opcode = lendian16 (br->m_opcode); 1505 repr.width = br->m_width; 1506 repr.base.type = lendian16 (br->m_type); 1507 repr.base.operands = lendian32 (emit_insn_operands (br)); 1508 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1509 1510 brig_code.add (&repr, sizeof (repr)); 1511 brig_insn_count++; 1512 } 1513 1514 /* Emit an HSA conditional branching instruction and all necessary directives, 1515 schedule necessary operands for writing. */ 1516 1517 static void 1518 emit_cond_branch_insn (hsa_insn_cbr *br) 1519 { 1520 struct BrigInstBr repr; 1521 1522 basic_block target = NULL; 1523 edge_iterator ei; 1524 edge e; 1525 1526 /* At the moment we only handle direct conditional jumps. */ 1527 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR); 1528 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1529 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1530 repr.base.opcode = lendian16 (br->m_opcode); 1531 repr.width = br->m_width; 1532 /* For Conditional jumps the type is always B1. */ 1533 repr.base.type = lendian16 (BRIG_TYPE_B1); 1534 1535 FOR_EACH_EDGE (e, ei, br->m_bb->succs) 1536 if (e->flags & EDGE_TRUE_VALUE) 1537 { 1538 target = e->dest; 1539 break; 1540 } 1541 gcc_assert (target); 1542 1543 repr.base.operands 1544 = lendian32 (emit_operands (br->get_op (0), 1545 &hsa_bb_for_bb (target)->m_label_ref)); 1546 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1547 1548 brig_code.add (&repr, sizeof (repr)); 1549 brig_insn_count++; 1550 } 1551 1552 /* Emit an HSA unconditional jump branching instruction that points to 1553 a label REFERENCE. */ 1554 1555 static void 1556 emit_unconditional_jump (hsa_op_code_ref *reference) 1557 { 1558 struct BrigInstBr repr; 1559 1560 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1561 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1562 repr.base.opcode = lendian16 (BRIG_OPCODE_BR); 1563 repr.base.type = lendian16 (BRIG_TYPE_NONE); 1564 /* Direct branches to labels must be width(all). */ 1565 repr.width = BRIG_WIDTH_ALL; 1566 1567 repr.base.operands = lendian32 (emit_operands (reference)); 1568 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1569 brig_code.add (&repr, sizeof (repr)); 1570 brig_insn_count++; 1571 } 1572 1573 /* Emit an HSA switch jump instruction that uses a jump table to 1574 jump to a destination label. */ 1575 1576 static void 1577 emit_switch_insn (hsa_insn_sbr *sbr) 1578 { 1579 struct BrigInstBr repr; 1580 1581 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR); 1582 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1583 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1584 repr.base.opcode = lendian16 (sbr->m_opcode); 1585 repr.width = BRIG_WIDTH_1; 1586 /* For Conditional jumps the type is always B1. */ 1587 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0)); 1588 repr.base.type = lendian16 (index->m_type); 1589 repr.base.operands 1590 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list)); 1591 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1592 1593 brig_code.add (&repr, sizeof (repr)); 1594 brig_insn_count++; 1595 } 1596 1597 /* Emit a HSA convert instruction and all necessary directives, schedule 1598 necessary operands for writing. */ 1599 1600 static void 1601 emit_cvt_insn (hsa_insn_cvt *insn) 1602 { 1603 struct BrigInstCvt repr; 1604 BrigType16_t srctype; 1605 1606 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1607 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT); 1608 repr.base.opcode = lendian16 (insn->m_opcode); 1609 repr.base.type = lendian16 (insn->m_type); 1610 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1611 1612 if (is_a <hsa_op_reg *> (insn->get_op (1))) 1613 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type; 1614 else 1615 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type; 1616 repr.sourceType = lendian16 (srctype); 1617 repr.modifier = 0; 1618 /* float to smaller float requires a rounding setting (we default 1619 to 'near'. */ 1620 if (hsa_type_float_p (insn->m_type) 1621 && (!hsa_type_float_p (srctype) 1622 || ((insn->m_type & BRIG_TYPE_BASE_MASK) 1623 < (srctype & BRIG_TYPE_BASE_MASK)))) 1624 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; 1625 else if (hsa_type_integer_p (insn->m_type) && 1626 hsa_type_float_p (srctype)) 1627 repr.round = BRIG_ROUND_INTEGER_ZERO; 1628 else 1629 repr.round = BRIG_ROUND_NONE; 1630 brig_code.add (&repr, sizeof (repr)); 1631 brig_insn_count++; 1632 } 1633 1634 /* Emit call instruction INSN, where this instruction must be closed 1635 within a call block instruction. */ 1636 1637 static void 1638 emit_call_insn (hsa_insn_call *call) 1639 { 1640 struct BrigInstBr repr; 1641 1642 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1643 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1644 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL); 1645 repr.base.type = lendian16 (BRIG_TYPE_NONE); 1646 1647 repr.base.operands 1648 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func, 1649 call->m_args_code_list)); 1650 1651 /* Internal functions have not set m_called_function. */ 1652 if (call->m_called_function) 1653 { 1654 function_linkage_pair pair (call->m_called_function, 1655 call->m_func.m_brig_op_offset); 1656 function_call_linkage.safe_push (pair); 1657 } 1658 else 1659 { 1660 hsa_internal_fn *slot 1661 = hsa_emitted_internal_decls->find (call->m_called_internal_fn); 1662 gcc_assert (slot); 1663 gcc_assert (slot->m_offset > 0); 1664 call->m_func.m_directive_offset = slot->m_offset; 1665 } 1666 1667 repr.width = BRIG_WIDTH_ALL; 1668 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1669 1670 brig_code.add (&repr, sizeof (repr)); 1671 brig_insn_count++; 1672 } 1673 1674 /* Emit argument block directive. */ 1675 1676 static void 1677 emit_arg_block_insn (hsa_insn_arg_block *insn) 1678 { 1679 switch (insn->m_kind) 1680 { 1681 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: 1682 { 1683 struct BrigDirectiveArgBlock repr; 1684 repr.base.byteCount = lendian16 (sizeof (repr)); 1685 repr.base.kind = lendian16 (insn->m_kind); 1686 brig_code.add (&repr, sizeof (repr)); 1687 1688 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++) 1689 { 1690 insn->m_call_insn->m_args_code_list->m_offsets[i] 1691 = lendian32 (emit_directive_variable 1692 (insn->m_call_insn->m_input_args[i])); 1693 brig_insn_count++; 1694 } 1695 1696 if (insn->m_call_insn->m_output_arg) 1697 { 1698 insn->m_call_insn->m_result_code_list->m_offsets[0] 1699 = lendian32 (emit_directive_variable 1700 (insn->m_call_insn->m_output_arg)); 1701 brig_insn_count++; 1702 } 1703 1704 break; 1705 } 1706 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: 1707 { 1708 struct BrigDirectiveArgBlock repr; 1709 repr.base.byteCount = lendian16 (sizeof (repr)); 1710 repr.base.kind = lendian16 (insn->m_kind); 1711 brig_code.add (&repr, sizeof (repr)); 1712 break; 1713 } 1714 default: 1715 gcc_unreachable (); 1716 } 1717 1718 brig_insn_count++; 1719 } 1720 1721 /* Emit comment directive. */ 1722 1723 static void 1724 emit_comment_insn (hsa_insn_comment *insn) 1725 { 1726 struct BrigDirectiveComment repr; 1727 memset (&repr, 0, sizeof (repr)); 1728 1729 repr.base.byteCount = lendian16 (sizeof (repr)); 1730 repr.base.kind = lendian16 (insn->m_opcode); 1731 repr.name = brig_emit_string (insn->m_comment, '\0', false); 1732 brig_code.add (&repr, sizeof (repr)); 1733 } 1734 1735 /* Emit queue instruction INSN. */ 1736 1737 static void 1738 emit_queue_insn (hsa_insn_queue *insn) 1739 { 1740 BrigInstQueue repr; 1741 memset (&repr, 0, sizeof (repr)); 1742 1743 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1744 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE); 1745 repr.base.opcode = lendian16 (insn->m_opcode); 1746 repr.base.type = lendian16 (insn->m_type); 1747 repr.segment = insn->m_segment; 1748 repr.memoryOrder = insn->m_memory_order; 1749 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1750 brig_data.round_size_up (4); 1751 brig_code.add (&repr, sizeof (repr)); 1752 1753 brig_insn_count++; 1754 } 1755 1756 /* Emit source type instruction INSN. */ 1757 1758 static void 1759 emit_srctype_insn (hsa_insn_srctype *insn) 1760 { 1761 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1762 struct BrigInstSourceType repr; 1763 unsigned operand_count = insn->operand_count (); 1764 gcc_checking_assert (operand_count >= 2); 1765 1766 memset (&repr, 0, sizeof (repr)); 1767 repr.sourceType = lendian16 (insn->m_source_type); 1768 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1769 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); 1770 repr.base.opcode = lendian16 (insn->m_opcode); 1771 repr.base.type = lendian16 (insn->m_type); 1772 1773 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1774 brig_code.add (&repr, sizeof (struct BrigInstSourceType)); 1775 brig_insn_count++; 1776 } 1777 1778 /* Emit packed instruction INSN. */ 1779 1780 static void 1781 emit_packed_insn (hsa_insn_packed *insn) 1782 { 1783 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1784 struct BrigInstSourceType repr; 1785 unsigned operand_count = insn->operand_count (); 1786 gcc_checking_assert (operand_count >= 2); 1787 1788 memset (&repr, 0, sizeof (repr)); 1789 repr.sourceType = lendian16 (insn->m_source_type); 1790 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1791 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); 1792 repr.base.opcode = lendian16 (insn->m_opcode); 1793 repr.base.type = lendian16 (insn->m_type); 1794 1795 if (insn->m_opcode == BRIG_OPCODE_COMBINE) 1796 { 1797 /* Create operand list for packed type. */ 1798 for (unsigned i = 1; i < operand_count; i++) 1799 { 1800 gcc_checking_assert (insn->get_op (i)); 1801 insn->m_operand_list->m_offsets[i - 1] 1802 = lendian32 (enqueue_op (insn->get_op (i))); 1803 } 1804 1805 repr.base.operands = lendian32 (emit_operands (insn->get_op (0), 1806 insn->m_operand_list)); 1807 } 1808 else if (insn->m_opcode == BRIG_OPCODE_EXPAND) 1809 { 1810 /* Create operand list for packed type. */ 1811 for (unsigned i = 0; i < operand_count - 1; i++) 1812 { 1813 gcc_checking_assert (insn->get_op (i)); 1814 insn->m_operand_list->m_offsets[i] 1815 = lendian32 (enqueue_op (insn->get_op (i))); 1816 } 1817 1818 unsigned ops = emit_operands (insn->m_operand_list, 1819 insn->get_op (insn->operand_count () - 1)); 1820 repr.base.operands = lendian32 (ops); 1821 } 1822 1823 1824 brig_code.add (&repr, sizeof (struct BrigInstSourceType)); 1825 brig_insn_count++; 1826 } 1827 1828 /* Emit a basic HSA instruction and all necessary directives, schedule 1829 necessary operands for writing. */ 1830 1831 static void 1832 emit_basic_insn (hsa_insn_basic *insn) 1833 { 1834 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1835 struct BrigInstMod repr; 1836 BrigType16_t type; 1837 1838 memset (&repr, 0, sizeof (repr)); 1839 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic)); 1840 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC); 1841 repr.base.opcode = lendian16 (insn->m_opcode); 1842 switch (insn->m_opcode) 1843 { 1844 /* And the bit-logical operations need bit types and whine about 1845 arithmetic types :-/ */ 1846 case BRIG_OPCODE_AND: 1847 case BRIG_OPCODE_OR: 1848 case BRIG_OPCODE_XOR: 1849 case BRIG_OPCODE_NOT: 1850 type = regtype_for_type (insn->m_type); 1851 break; 1852 default: 1853 type = insn->m_type; 1854 break; 1855 } 1856 repr.base.type = lendian16 (type); 1857 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1858 1859 if (hsa_type_packed_p (type)) 1860 { 1861 if (hsa_type_float_p (type) 1862 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode)) 1863 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; 1864 else 1865 repr.round = 0; 1866 /* We assume that destination and sources agree in packing layout. */ 1867 if (insn->num_used_ops () >= 2) 1868 repr.pack = BRIG_PACK_PP; 1869 else 1870 repr.pack = BRIG_PACK_P; 1871 repr.reserved = 0; 1872 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod)); 1873 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD); 1874 brig_code.add (&repr, sizeof (struct BrigInstMod)); 1875 } 1876 else 1877 brig_code.add (&repr, sizeof (struct BrigInstBasic)); 1878 brig_insn_count++; 1879 } 1880 1881 /* Emit an HSA instruction and all necessary directives, schedule necessary 1882 operands for writing. */ 1883 1884 static void 1885 emit_insn (hsa_insn_basic *insn) 1886 { 1887 gcc_assert (!is_a <hsa_insn_phi *> (insn)); 1888 1889 insn->m_brig_offset = brig_code.total_size; 1890 1891 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn)) 1892 emit_signal_insn (signal); 1893 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn)) 1894 emit_atomic_insn (atom); 1895 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) 1896 emit_memory_insn (mem); 1897 else if (insn->m_opcode == BRIG_OPCODE_LDA) 1898 emit_addr_insn (insn); 1899 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) 1900 emit_segment_insn (seg); 1901 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) 1902 emit_cmp_insn (cmp); 1903 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn)) 1904 emit_cond_branch_insn (br); 1905 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) 1906 { 1907 if (switch_instructions == NULL) 1908 switch_instructions = new vec <hsa_insn_sbr *> (); 1909 1910 switch_instructions->safe_push (sbr); 1911 emit_switch_insn (sbr); 1912 } 1913 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) 1914 emit_generic_branch_insn (br); 1915 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) 1916 emit_arg_block_insn (block); 1917 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) 1918 emit_call_insn (call); 1919 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) 1920 emit_comment_insn (comment); 1921 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn)) 1922 emit_queue_insn (queue); 1923 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn)) 1924 emit_srctype_insn (srctype); 1925 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn)) 1926 emit_packed_insn (packed); 1927 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) 1928 emit_cvt_insn (cvt); 1929 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn)) 1930 emit_alloca_insn (alloca); 1931 else 1932 emit_basic_insn (insn); 1933 } 1934 1935 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL, 1936 or we are about to finish emitting code, if it is NULL. If the fall through 1937 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */ 1938 1939 static void 1940 perhaps_emit_branch (basic_block bb, basic_block next_bb) 1941 { 1942 basic_block t_bb = NULL, ff = NULL; 1943 1944 edge_iterator ei; 1945 edge e; 1946 1947 /* If the last instruction of BB is a switch, ignore emission of all 1948 edges. */ 1949 if (hsa_bb_for_bb (bb)->m_last_insn 1950 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn)) 1951 return; 1952 1953 FOR_EACH_EDGE (e, ei, bb->succs) 1954 if (e->flags & EDGE_TRUE_VALUE) 1955 { 1956 gcc_assert (!t_bb); 1957 t_bb = e->dest; 1958 } 1959 else 1960 { 1961 gcc_assert (!ff); 1962 ff = e->dest; 1963 } 1964 1965 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun)) 1966 return; 1967 1968 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref); 1969 } 1970 1971 /* Emit the a function with name NAME to the various brig sections. */ 1972 1973 void 1974 hsa_brig_emit_function (void) 1975 { 1976 basic_block bb, prev_bb; 1977 hsa_insn_basic *insn; 1978 BrigDirectiveExecutable *ptr_to_fndir; 1979 1980 brig_init (); 1981 1982 brig_insn_count = 0; 1983 memset (&op_queue, 0, sizeof (op_queue)); 1984 op_queue.projected_size = brig_operand.total_size; 1985 1986 if (!function_offsets) 1987 function_offsets = new hash_map<tree, BrigCodeOffset32_t> (); 1988 1989 if (!emitted_declarations) 1990 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> (); 1991 1992 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++) 1993 { 1994 tree called = hsa_cfun->m_called_functions[i]; 1995 1996 /* If the function has no definition, emit a declaration. */ 1997 if (!emitted_declarations->get (called)) 1998 { 1999 BrigDirectiveExecutable *e = emit_function_declaration (called); 2000 emitted_declarations->put (called, e); 2001 } 2002 } 2003 2004 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++) 2005 { 2006 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i]; 2007 emit_internal_fn_decl (called); 2008 } 2009 2010 ptr_to_fndir = emit_function_directives (hsa_cfun, false); 2011 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn; 2012 insn; 2013 insn = insn->m_next) 2014 emit_insn (insn); 2015 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); 2016 FOR_EACH_BB_FN (bb, cfun) 2017 { 2018 perhaps_emit_branch (prev_bb, bb); 2019 emit_bb_label_directive (hsa_bb_for_bb (bb)); 2020 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next) 2021 emit_insn (insn); 2022 prev_bb = bb; 2023 } 2024 perhaps_emit_branch (prev_bb, NULL); 2025 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size); 2026 2027 /* Fill up label references for all sbr instructions. */ 2028 if (switch_instructions) 2029 { 2030 for (unsigned i = 0; i < switch_instructions->length (); i++) 2031 { 2032 hsa_insn_sbr *sbr = (*switch_instructions)[i]; 2033 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++) 2034 { 2035 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]); 2036 sbr->m_label_code_list->m_offsets[j] 2037 = hbb->m_label_ref.m_directive_offset; 2038 } 2039 } 2040 2041 switch_instructions->release (); 2042 delete switch_instructions; 2043 switch_instructions = NULL; 2044 } 2045 2046 if (dump_file) 2047 { 2048 fprintf (dump_file, "------- After BRIG emission: -------\n"); 2049 dump_hsa_cfun (dump_file); 2050 } 2051 2052 emit_queued_operands (); 2053 } 2054 2055 /* Emit all OMP symbols related to OMP. */ 2056 2057 void 2058 hsa_brig_emit_omp_symbols (void) 2059 { 2060 brig_init (); 2061 emit_directive_variable (hsa_num_threads); 2062 } 2063 2064 /* Create and return __hsa_global_variables symbol that contains 2065 all informations consumed by libgomp to link global variables 2066 with their string names used by an HSA kernel. */ 2067 2068 static tree 2069 hsa_output_global_variables () 2070 { 2071 unsigned l = hsa_global_variable_symbols->elements (); 2072 2073 tree variable_info_type = make_node (RECORD_TYPE); 2074 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2075 get_identifier ("name"), ptr_type_node); 2076 DECL_CHAIN (id_f1) = NULL_TREE; 2077 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2078 get_identifier ("omp_data_size"), 2079 ptr_type_node); 2080 DECL_CHAIN (id_f2) = id_f1; 2081 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2, 2082 NULL_TREE); 2083 2084 tree int_num_of_global_vars; 2085 int_num_of_global_vars = build_int_cst (uint32_type_node, l); 2086 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars); 2087 tree global_vars_array_type = build_array_type (variable_info_type, 2088 global_vars_num_index_type); 2089 TYPE_ARTIFICIAL (global_vars_array_type) = 1; 2090 2091 vec<constructor_elt, va_gc> *global_vars_vec = NULL; 2092 2093 for (hash_table <hsa_noop_symbol_hasher>::iterator it 2094 = hsa_global_variable_symbols->begin (); 2095 it != hsa_global_variable_symbols->end (); ++it) 2096 { 2097 unsigned len = strlen ((*it)->m_name); 2098 char *copy = XNEWVEC (char, len + 2); 2099 copy[0] = '&'; 2100 memcpy (copy + 1, (*it)->m_name, len); 2101 copy[len + 1] = '\0'; 2102 len++; 2103 hsa_sanitize_name (copy); 2104 2105 tree var_name = build_string (len, copy); 2106 TREE_TYPE (var_name) 2107 = build_array_type (char_type_node, build_index_type (size_int (len))); 2108 free (copy); 2109 2110 vec<constructor_elt, va_gc> *variable_info_vec = NULL; 2111 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, 2112 build1 (ADDR_EXPR, 2113 build_pointer_type (TREE_TYPE (var_name)), 2114 var_name)); 2115 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, 2116 build_fold_addr_expr ((*it)->m_decl)); 2117 2118 tree variable_info_ctor = build_constructor (variable_info_type, 2119 variable_info_vec); 2120 2121 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE, 2122 variable_info_ctor); 2123 } 2124 2125 tree global_vars_ctor = build_constructor (global_vars_array_type, 2126 global_vars_vec); 2127 2128 char tmp_name[64]; 2129 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1); 2130 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2131 get_identifier (tmp_name), 2132 global_vars_array_type); 2133 TREE_STATIC (global_vars_table) = 1; 2134 TREE_READONLY (global_vars_table) = 1; 2135 TREE_PUBLIC (global_vars_table) = 0; 2136 DECL_ARTIFICIAL (global_vars_table) = 1; 2137 DECL_IGNORED_P (global_vars_table) = 1; 2138 DECL_EXTERNAL (global_vars_table) = 0; 2139 TREE_CONSTANT (global_vars_table) = 1; 2140 DECL_INITIAL (global_vars_table) = global_vars_ctor; 2141 varpool_node::finalize_decl (global_vars_table); 2142 2143 return global_vars_table; 2144 } 2145 2146 /* Create __hsa_host_functions and __hsa_kernels that contain 2147 all informations consumed by libgomp to register all kernels 2148 in the BRIG binary. */ 2149 2150 static void 2151 hsa_output_kernels (tree *host_func_table, tree *kernels) 2152 { 2153 unsigned map_count = hsa_get_number_decl_kernel_mappings (); 2154 2155 tree int_num_of_kernels; 2156 int_num_of_kernels = build_int_cst (uint32_type_node, map_count); 2157 tree kernel_num_index_type = build_index_type (int_num_of_kernels); 2158 tree host_functions_array_type = build_array_type (ptr_type_node, 2159 kernel_num_index_type); 2160 TYPE_ARTIFICIAL (host_functions_array_type) = 1; 2161 2162 vec<constructor_elt, va_gc> *host_functions_vec = NULL; 2163 for (unsigned i = 0; i < map_count; ++i) 2164 { 2165 tree decl = hsa_get_decl_kernel_mapping_decl (i); 2166 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl)); 2167 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); 2168 } 2169 tree host_functions_ctor = build_constructor (host_functions_array_type, 2170 host_functions_vec); 2171 char tmp_name[64]; 2172 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); 2173 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2174 get_identifier (tmp_name), 2175 host_functions_array_type); 2176 TREE_STATIC (hsa_host_func_table) = 1; 2177 TREE_READONLY (hsa_host_func_table) = 1; 2178 TREE_PUBLIC (hsa_host_func_table) = 0; 2179 DECL_ARTIFICIAL (hsa_host_func_table) = 1; 2180 DECL_IGNORED_P (hsa_host_func_table) = 1; 2181 DECL_EXTERNAL (hsa_host_func_table) = 0; 2182 TREE_CONSTANT (hsa_host_func_table) = 1; 2183 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; 2184 varpool_node::finalize_decl (hsa_host_func_table); 2185 *host_func_table = hsa_host_func_table; 2186 2187 /* Following code emits list of kernel_info structures. */ 2188 2189 tree kernel_info_type = make_node (RECORD_TYPE); 2190 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2191 get_identifier ("name"), ptr_type_node); 2192 DECL_CHAIN (id_f1) = NULL_TREE; 2193 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2194 get_identifier ("omp_data_size"), 2195 unsigned_type_node); 2196 DECL_CHAIN (id_f2) = id_f1; 2197 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2198 get_identifier ("gridified_kernel_p"), 2199 boolean_type_node); 2200 DECL_CHAIN (id_f3) = id_f2; 2201 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2202 get_identifier ("kernel_dependencies_count"), 2203 unsigned_type_node); 2204 DECL_CHAIN (id_f4) = id_f3; 2205 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2206 get_identifier ("kernel_dependencies"), 2207 build_pointer_type (build_pointer_type 2208 (char_type_node))); 2209 DECL_CHAIN (id_f5) = id_f4; 2210 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, 2211 NULL_TREE); 2212 2213 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); 2214 tree kernel_info_vector_type 2215 = build_array_type (kernel_info_type, 2216 build_index_type (int_num_of_kernels)); 2217 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1; 2218 2219 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL; 2220 tree kernel_dependencies_vector_type = NULL; 2221 2222 for (unsigned i = 0; i < map_count; ++i) 2223 { 2224 tree kernel = hsa_get_decl_kernel_mapping_decl (i); 2225 char *name = hsa_get_decl_kernel_mapping_name (i); 2226 unsigned len = strlen (name); 2227 char *copy = XNEWVEC (char, len + 2); 2228 copy[0] = '&'; 2229 memcpy (copy + 1, name, len); 2230 copy[len + 1] = '\0'; 2231 len++; 2232 2233 tree kern_name = build_string (len, copy); 2234 TREE_TYPE (kern_name) 2235 = build_array_type (char_type_node, build_index_type (size_int (len))); 2236 free (copy); 2237 2238 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); 2239 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); 2240 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); 2241 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, 2242 gridified_kernel_p); 2243 unsigned count = 0; 2244 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL; 2245 if (hsa_decl_kernel_dependencies) 2246 { 2247 vec<const char *> **slot; 2248 slot = hsa_decl_kernel_dependencies->get (kernel); 2249 if (slot) 2250 { 2251 vec <const char *> *dependencies = *slot; 2252 count = dependencies->length (); 2253 2254 kernel_dependencies_vector_type 2255 = build_array_type (build_pointer_type (char_type_node), 2256 build_index_type (size_int (count))); 2257 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1; 2258 2259 for (unsigned j = 0; j < count; j++) 2260 { 2261 const char *d = (*dependencies)[j]; 2262 len = strlen (d); 2263 tree dependency_name = build_string (len, d); 2264 TREE_TYPE (dependency_name) 2265 = build_array_type (char_type_node, 2266 build_index_type (size_int (len))); 2267 2268 CONSTRUCTOR_APPEND_ELT 2269 (kernel_dependencies_vec, NULL_TREE, 2270 build1 (ADDR_EXPR, 2271 build_pointer_type (TREE_TYPE (dependency_name)), 2272 dependency_name)); 2273 } 2274 } 2275 } 2276 2277 tree dependencies_count = build_int_cstu (unsigned_type_node, count); 2278 2279 vec<constructor_elt, va_gc> *kernel_info_vec = NULL; 2280 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2281 build1 (ADDR_EXPR, 2282 build_pointer_type (TREE_TYPE 2283 (kern_name)), 2284 kern_name)); 2285 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); 2286 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2287 gridified_kernel_p_tree); 2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); 2289 2290 if (count > 0) 2291 { 2292 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); 2293 gcc_checking_assert (kernel_dependencies_vector_type); 2294 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2295 get_identifier (tmp_name), 2296 kernel_dependencies_vector_type); 2297 2298 TREE_STATIC (dependencies_list) = 1; 2299 TREE_READONLY (dependencies_list) = 1; 2300 TREE_PUBLIC (dependencies_list) = 0; 2301 DECL_ARTIFICIAL (dependencies_list) = 1; 2302 DECL_IGNORED_P (dependencies_list) = 1; 2303 DECL_EXTERNAL (dependencies_list) = 0; 2304 TREE_CONSTANT (dependencies_list) = 1; 2305 DECL_INITIAL (dependencies_list) 2306 = build_constructor (kernel_dependencies_vector_type, 2307 kernel_dependencies_vec); 2308 varpool_node::finalize_decl (dependencies_list); 2309 2310 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2311 build1 (ADDR_EXPR, 2312 build_pointer_type 2313 (TREE_TYPE (dependencies_list)), 2314 dependencies_list)); 2315 } 2316 else 2317 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); 2318 2319 tree kernel_info_ctor = build_constructor (kernel_info_type, 2320 kernel_info_vec); 2321 2322 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, 2323 kernel_info_ctor); 2324 } 2325 2326 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1); 2327 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2328 get_identifier (tmp_name), 2329 kernel_info_vector_type); 2330 2331 TREE_STATIC (hsa_kernels) = 1; 2332 TREE_READONLY (hsa_kernels) = 1; 2333 TREE_PUBLIC (hsa_kernels) = 0; 2334 DECL_ARTIFICIAL (hsa_kernels) = 1; 2335 DECL_IGNORED_P (hsa_kernels) = 1; 2336 DECL_EXTERNAL (hsa_kernels) = 0; 2337 TREE_CONSTANT (hsa_kernels) = 1; 2338 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type, 2339 kernel_info_vector_vec); 2340 varpool_node::finalize_decl (hsa_kernels); 2341 *kernels = hsa_kernels; 2342 } 2343 2344 /* Create a static constructor that will register out brig stuff with 2345 libgomp. */ 2346 2347 static void 2348 hsa_output_libgomp_mapping (tree brig_decl) 2349 { 2350 unsigned kernel_count = hsa_get_number_decl_kernel_mappings (); 2351 unsigned global_variable_count = hsa_global_variable_symbols->elements (); 2352 2353 tree kernels; 2354 tree host_func_table; 2355 2356 hsa_output_kernels (&host_func_table, &kernels); 2357 tree global_vars = hsa_output_global_variables (); 2358 2359 tree hsa_image_desc_type = make_node (RECORD_TYPE); 2360 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2361 get_identifier ("brig_module"), ptr_type_node); 2362 DECL_CHAIN (id_f1) = NULL_TREE; 2363 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2364 get_identifier ("kernel_count"), 2365 unsigned_type_node); 2366 2367 DECL_CHAIN (id_f2) = id_f1; 2368 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2369 get_identifier ("hsa_kernel_infos"), 2370 ptr_type_node); 2371 DECL_CHAIN (id_f3) = id_f2; 2372 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2373 get_identifier ("global_variable_count"), 2374 unsigned_type_node); 2375 DECL_CHAIN (id_f4) = id_f3; 2376 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2377 get_identifier ("hsa_global_variable_infos"), 2378 ptr_type_node); 2379 DECL_CHAIN (id_f5) = id_f4; 2380 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5, 2381 NULL_TREE); 2382 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1; 2383 2384 vec<constructor_elt, va_gc> *img_desc_vec = NULL; 2385 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2386 build_fold_addr_expr (brig_decl)); 2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2388 build_int_cstu (unsigned_type_node, kernel_count)); 2389 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2390 build1 (ADDR_EXPR, 2391 build_pointer_type (TREE_TYPE (kernels)), 2392 kernels)); 2393 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2394 build_int_cstu (unsigned_type_node, 2395 global_variable_count)); 2396 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2397 build1 (ADDR_EXPR, 2398 build_pointer_type (TREE_TYPE (global_vars)), 2399 global_vars)); 2400 2401 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec); 2402 2403 char tmp_name[64]; 2404 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1); 2405 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2406 get_identifier (tmp_name), 2407 hsa_image_desc_type); 2408 TREE_STATIC (hsa_img_descriptor) = 1; 2409 TREE_READONLY (hsa_img_descriptor) = 1; 2410 TREE_PUBLIC (hsa_img_descriptor) = 0; 2411 DECL_ARTIFICIAL (hsa_img_descriptor) = 1; 2412 DECL_IGNORED_P (hsa_img_descriptor) = 1; 2413 DECL_EXTERNAL (hsa_img_descriptor) = 0; 2414 TREE_CONSTANT (hsa_img_descriptor) = 1; 2415 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor; 2416 varpool_node::finalize_decl (hsa_img_descriptor); 2417 2418 /* Construct the "host_table" libgomp expects. */ 2419 tree index_type = build_index_type (build_int_cst (integer_type_node, 4)); 2420 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type); 2421 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1; 2422 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL; 2423 tree host_func_table_addr = build_fold_addr_expr (host_func_table); 2424 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, 2425 host_func_table_addr); 2426 offset_int func_table_size 2427 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count; 2428 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, 2429 fold_build2 (POINTER_PLUS_EXPR, 2430 TREE_TYPE (host_func_table_addr), 2431 host_func_table_addr, 2432 build_int_cst (size_type_node, 2433 func_table_size.to_uhwi 2434 ()))); 2435 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); 2436 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); 2437 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type, 2438 libgomp_host_table_vec); 2439 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1); 2440 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2441 get_identifier (tmp_name), 2442 libgomp_host_table_type); 2443 2444 TREE_STATIC (hsa_libgomp_host_table) = 1; 2445 TREE_READONLY (hsa_libgomp_host_table) = 1; 2446 TREE_PUBLIC (hsa_libgomp_host_table) = 0; 2447 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1; 2448 DECL_IGNORED_P (hsa_libgomp_host_table) = 1; 2449 DECL_EXTERNAL (hsa_libgomp_host_table) = 0; 2450 TREE_CONSTANT (hsa_libgomp_host_table) = 1; 2451 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor; 2452 varpool_node::finalize_decl (hsa_libgomp_host_table); 2453 2454 /* Generate an initializer with a call to the registration routine. */ 2455 2456 tree offload_register 2457 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER); 2458 gcc_checking_assert (offload_register); 2459 2460 tree *hsa_ctor_stmts = hsa_get_ctor_statements (); 2461 append_to_statement_list 2462 (build_call_expr (offload_register, 4, 2463 build_int_cstu (unsigned_type_node, 2464 GOMP_VERSION_PACK (GOMP_VERSION, 2465 GOMP_VERSION_HSA)), 2466 build_fold_addr_expr (hsa_libgomp_host_table), 2467 build_int_cst (integer_type_node, GOMP_DEVICE_HSA), 2468 build_fold_addr_expr (hsa_img_descriptor)), 2469 hsa_ctor_stmts); 2470 2471 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY); 2472 2473 tree offload_unregister 2474 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER); 2475 gcc_checking_assert (offload_unregister); 2476 2477 tree *hsa_dtor_stmts = hsa_get_dtor_statements (); 2478 append_to_statement_list 2479 (build_call_expr (offload_unregister, 4, 2480 build_int_cstu (unsigned_type_node, 2481 GOMP_VERSION_PACK (GOMP_VERSION, 2482 GOMP_VERSION_HSA)), 2483 build_fold_addr_expr (hsa_libgomp_host_table), 2484 build_int_cst (integer_type_node, GOMP_DEVICE_HSA), 2485 build_fold_addr_expr (hsa_img_descriptor)), 2486 hsa_dtor_stmts); 2487 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY); 2488 } 2489 2490 /* Emit the brig module we have compiled to a section in the final assembly and 2491 also create a compile unit static constructor that will register the brig 2492 module with libgomp. */ 2493 2494 void 2495 hsa_output_brig (void) 2496 { 2497 section *saved_section; 2498 2499 if (!brig_initialized) 2500 return; 2501 2502 for (unsigned i = 0; i < function_call_linkage.length (); i++) 2503 { 2504 function_linkage_pair p = function_call_linkage[i]; 2505 2506 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl); 2507 gcc_assert (*func_offset); 2508 BrigOperandCodeRef *code_ref 2509 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset)); 2510 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF); 2511 code_ref->ref = lendian32 (*func_offset); 2512 } 2513 2514 /* Iterate all function declarations and if we meet a function that should 2515 have module linkage and we are unable to emit HSAIL for the function, 2516 then change the linkage to program linkage. Doing so, we will emit 2517 a valid BRIG image. */ 2518 if (hsa_failed_functions != NULL && emitted_declarations != NULL) 2519 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it 2520 = emitted_declarations->begin (); 2521 it != emitted_declarations->end (); 2522 ++it) 2523 { 2524 if (hsa_failed_functions->contains ((*it).first)) 2525 (*it).second->linkage = BRIG_LINKAGE_PROGRAM; 2526 } 2527 2528 saved_section = in_section; 2529 2530 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL)); 2531 char tmp_name[64]; 2532 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1); 2533 ASM_OUTPUT_LABEL (asm_out_file, tmp_name); 2534 tree brig_id = get_identifier (tmp_name); 2535 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id, 2536 char_type_node); 2537 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id); 2538 TREE_ADDRESSABLE (brig_decl) = 1; 2539 TREE_READONLY (brig_decl) = 1; 2540 DECL_ARTIFICIAL (brig_decl) = 1; 2541 DECL_IGNORED_P (brig_decl) = 1; 2542 TREE_STATIC (brig_decl) = 1; 2543 TREE_PUBLIC (brig_decl) = 0; 2544 TREE_USED (brig_decl) = 1; 2545 DECL_INITIAL (brig_decl) = brig_decl; 2546 TREE_ASM_WRITTEN (brig_decl) = 1; 2547 2548 BrigModuleHeader module_header; 2549 memcpy (&module_header.identification, "HSA BRIG", 2550 sizeof (module_header.identification)); 2551 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR); 2552 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR); 2553 uint64_t section_index[3]; 2554 2555 int data_padding, code_padding, operand_padding; 2556 data_padding = HSA_SECTION_ALIGNMENT 2557 - brig_data.total_size % HSA_SECTION_ALIGNMENT; 2558 code_padding = HSA_SECTION_ALIGNMENT 2559 - brig_code.total_size % HSA_SECTION_ALIGNMENT; 2560 operand_padding = HSA_SECTION_ALIGNMENT 2561 - brig_operand.total_size % HSA_SECTION_ALIGNMENT; 2562 2563 uint64_t module_size = sizeof (module_header) 2564 + sizeof (section_index) 2565 + brig_data.total_size 2566 + data_padding 2567 + brig_code.total_size 2568 + code_padding 2569 + brig_operand.total_size 2570 + operand_padding; 2571 gcc_assert ((module_size % 16) == 0); 2572 module_header.byteCount = lendian64 (module_size); 2573 memset (&module_header.hash, 0, sizeof (module_header.hash)); 2574 module_header.reserved = 0; 2575 module_header.sectionCount = lendian32 (3); 2576 module_header.sectionIndex = lendian64 (sizeof (module_header)); 2577 assemble_string ((const char *) &module_header, sizeof (module_header)); 2578 uint64_t off = sizeof (module_header) + sizeof (section_index); 2579 section_index[0] = lendian64 (off); 2580 off += brig_data.total_size + data_padding; 2581 section_index[1] = lendian64 (off); 2582 off += brig_code.total_size + code_padding; 2583 section_index[2] = lendian64 (off); 2584 assemble_string ((const char *) §ion_index, sizeof (section_index)); 2585 2586 char padding[HSA_SECTION_ALIGNMENT]; 2587 memset (padding, 0, sizeof (padding)); 2588 2589 brig_data.output (); 2590 assemble_string (padding, data_padding); 2591 brig_code.output (); 2592 assemble_string (padding, code_padding); 2593 brig_operand.output (); 2594 assemble_string (padding, operand_padding); 2595 2596 if (saved_section) 2597 switch_to_section (saved_section); 2598 2599 hsa_output_libgomp_mapping (brig_decl); 2600 2601 hsa_free_decl_kernel_mapping (); 2602 brig_release_data (); 2603 hsa_deinit_compilation_unit_data (); 2604 2605 delete emitted_declarations; 2606 emitted_declarations = NULL; 2607 delete function_offsets; 2608 function_offsets = NULL; 2609 } 2610