xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/hsa-brig.c (revision 33881f779a77dce6440bdc44610d94de75bebefe)
1 /* Producing binary form of HSA BRIG from our internal representation.
2    Copyright (C) 2013-2017 Free Software Foundation, Inc.
3    Contributed by Martin Jambor <mjambor@suse.cz> and
4    Martin Liska <mliska@suse.cz>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "cfg.h"
38 #include "function.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "gimple-pretty-print.h"
42 #include "diagnostic-core.h"
43 #include "cgraph.h"
44 #include "dumpfile.h"
45 #include "print-tree.h"
46 #include "symbol-summary.h"
47 #include "hsa-common.h"
48 #include "gomp-constants.h"
49 
50 /* Convert VAL to little endian form, if necessary.  */
51 
52 static uint16_t
53 lendian16 (uint16_t val)
54 {
55 #if GCC_VERSION >= 4008
56 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
57   return val;
58 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
59   return __builtin_bswap16 (val);
60 #else   /* __ORDER_PDP_ENDIAN__ */
61   return val;
62 #endif
63 #else
64 // provide a safe slower default, with shifts and masking
65 #ifndef WORDS_BIGENDIAN
66   return val;
67 #else
68   return (val >> 8) | (val << 8);
69 #endif
70 #endif
71 }
72 
73 /* Convert VAL to little endian form, if necessary.  */
74 
75 static uint32_t
76 lendian32 (uint32_t val)
77 {
78 #if GCC_VERSION >= 4006
79 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
80   return val;
81 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
82   return __builtin_bswap32 (val);
83 #else  /* __ORDER_PDP_ENDIAN__ */
84   return (val >> 16) | (val << 16);
85 #endif
86 #else
87 // provide a safe slower default, with shifts and masking
88 #ifndef WORDS_BIGENDIAN
89   return val;
90 #else
91   val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
92   return (val >> 16) | (val << 16);
93 #endif
94 #endif
95 }
96 
97 /* Convert VAL to little endian form, if necessary.  */
98 
99 static uint64_t
100 lendian64 (uint64_t val)
101 {
102 #if GCC_VERSION >= 4006
103 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
104   return val;
105 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
106   return __builtin_bswap64 (val);
107 #else  /* __ORDER_PDP_ENDIAN__ */
108   return (((val & 0xffffll) << 48)
109 	  | ((val & 0xffff0000ll) << 16)
110 	  | ((val & 0xffff00000000ll) >> 16)
111 	  | ((val & 0xffff000000000000ll) >> 48));
112 #endif
113 #else
114 // provide a safe slower default, with shifts and masking
115 #ifndef WORDS_BIGENDIAN
116   return val;
117 #else
118   val = (((val & 0xff00ff00ff00ff00ll) >> 8)
119 	 | ((val & 0x00ff00ff00ff00ffll) << 8));
120   val = ((( val & 0xffff0000ffff0000ll) >> 16)
121 	 | (( val & 0x0000ffff0000ffffll) << 16));
122   return (val >> 32) | (val << 32);
123 #endif
124 #endif
125 }
126 
127 #define BRIG_ELF_SECTION_NAME ".brig"
128 #define BRIG_LABEL_STRING "hsa_brig"
129 #define BRIG_SECTION_DATA_NAME    "hsa_data"
130 #define BRIG_SECTION_CODE_NAME    "hsa_code"
131 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
132 
133 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
134 
135 /* Required HSA section alignment.  */
136 
137 #define HSA_SECTION_ALIGNMENT 16
138 
139 /* Chunks of BRIG binary data.  */
140 
141 struct hsa_brig_data_chunk
142 {
143   /* Size of the data already stored into a chunk.  */
144   unsigned size;
145 
146   /* Pointer to the data.  */
147   char *data;
148 };
149 
150 /* Structure representing a BRIG section, holding and writing its data.  */
151 
152 class hsa_brig_section
153 {
154 public:
155   /* Section name that will be output to the BRIG.  */
156   const char *section_name;
157   /* Size in bytes of all data stored in the section.  */
158   unsigned total_size;
159   /* The size of the header of the section including padding.  */
160   unsigned header_byte_count;
161   /* The size of the header of the section without any padding.  */
162   unsigned header_byte_delta;
163 
164   void init (const char *name);
165   void release ();
166   void output ();
167   unsigned add (const void *data, unsigned len, void **output = NULL);
168   void round_size_up (int factor);
169   void *get_ptr_by_offset (unsigned int offset);
170 
171 private:
172   void allocate_new_chunk ();
173 
174   /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes.  */
175   vec <struct hsa_brig_data_chunk> chunks;
176 
177   /* More convenient access to the last chunk from the vector above.  */
178   struct hsa_brig_data_chunk *cur_chunk;
179 };
180 
181 static struct hsa_brig_section brig_data, brig_code, brig_operand;
182 static uint32_t brig_insn_count;
183 static bool brig_initialized = false;
184 
185 /* Mapping between emitted HSA functions and their offset in code segment.  */
186 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
187 
188 /* Hash map of emitted function declarations.  */
189 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
190 
191 /* Hash table of emitted internal function declaration offsets.  */
192 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
193 
194 /* List of sbr instructions.  */
195 static vec <hsa_insn_sbr *> *switch_instructions;
196 
197 struct function_linkage_pair
198 {
199   function_linkage_pair (tree decl, unsigned int off)
200     : function_decl (decl), offset (off) {}
201 
202   /* Declaration of called function.  */
203   tree function_decl;
204 
205   /* Offset in operand section.  */
206   unsigned int offset;
207 };
208 
209 /* Vector of function calls where we need to resolve function offsets.  */
210 static auto_vec <function_linkage_pair> function_call_linkage;
211 
212 /* Add a new chunk, allocate data for it and initialize it.  */
213 
214 void
215 hsa_brig_section::allocate_new_chunk ()
216 {
217   struct hsa_brig_data_chunk new_chunk;
218 
219   new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
220   new_chunk.size = 0;
221   cur_chunk = chunks.safe_push (new_chunk);
222 }
223 
224 /* Initialize the brig section.  */
225 
226 void
227 hsa_brig_section::init (const char *name)
228 {
229   section_name = name;
230   /* While the following computation is basically wrong, because the intent
231      certainly wasn't to have the first character of name and padding, which
232      are a part of sizeof (BrigSectionHeader), included in the first addend,
233      this is what the disassembler expects.  */
234   total_size = sizeof (BrigSectionHeader) + strlen (section_name);
235   chunks.create (1);
236   allocate_new_chunk ();
237   header_byte_delta = total_size;
238   round_size_up (4);
239   header_byte_count = total_size;
240 }
241 
242 /* Free all data in the section.  */
243 
244 void
245 hsa_brig_section::release ()
246 {
247   for (unsigned i = 0; i < chunks.length (); i++)
248     free (chunks[i].data);
249   chunks.release ();
250   cur_chunk = NULL;
251 }
252 
253 /* Write the section to the output file to a section with the name given at
254    initialization.  Switches the output section and does not restore it.  */
255 
256 void
257 hsa_brig_section::output ()
258 {
259   struct BrigSectionHeader section_header;
260   char padding[8];
261 
262   section_header.byteCount = lendian64 (total_size);
263   section_header.headerByteCount = lendian32 (header_byte_count);
264   section_header.nameLength = lendian32 (strlen (section_name));
265   assemble_string ((const char *) &section_header, 16);
266   assemble_string (section_name, (section_header.nameLength));
267   memset (&padding, 0, sizeof (padding));
268   /* This is also a consequence of the wrong header size computation described
269      in a comment in hsa_brig_section::init.  */
270   assemble_string (padding, 8);
271   for (unsigned i = 0; i < chunks.length (); i++)
272     assemble_string (chunks[i].data, chunks[i].size);
273 }
274 
275 /* Add to the stream LEN bytes of opaque binary DATA.  Return the offset at
276    which it was stored.  If OUTPUT is not NULL, store into it the pointer to
277    the place where DATA was actually stored.  */
278 
279 unsigned
280 hsa_brig_section::add (const void *data, unsigned len, void **output)
281 {
282   unsigned offset = total_size;
283 
284   gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
285   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
286     allocate_new_chunk ();
287 
288   char *dst = cur_chunk->data + cur_chunk->size;
289   memcpy (dst, data, len);
290   if (output)
291     *output = dst;
292   cur_chunk->size += len;
293   total_size += len;
294 
295   return offset;
296 }
297 
298 /* Add padding to section so that its size is divisible by FACTOR.  */
299 
300 void
301 hsa_brig_section::round_size_up (int factor)
302 {
303   unsigned padding, res = total_size % factor;
304 
305   if (res == 0)
306     return;
307 
308   padding = factor - res;
309   total_size += padding;
310   if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
311     {
312       padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
313       cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
314       allocate_new_chunk ();
315     }
316 
317   cur_chunk->size += padding;
318 }
319 
320 /* Return pointer to data by global OFFSET in the section.  */
321 
322 void *
323 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
324 {
325   gcc_assert (offset < total_size);
326   offset -= header_byte_delta;
327 
328   unsigned i;
329   for (i = 0; offset >= chunks[i].size; i++)
330     offset -= chunks[i].size;
331 
332   return chunks[i].data + offset;
333 }
334 
335 /* BRIG string data hashing.  */
336 
337 struct brig_string_slot
338 {
339   const char *s;
340   char prefix;
341   int len;
342   uint32_t offset;
343 };
344 
345 /* Hash table helpers.  */
346 
347 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
348 {
349   static inline hashval_t hash (const value_type);
350   static inline bool equal (const value_type, const compare_type);
351   static inline void remove (value_type);
352 };
353 
354 /* Returns a hash code for DS.  Adapted from libiberty's htab_hash_string
355    to support strings that may not end in '\0'.  */
356 
357 inline hashval_t
358 brig_string_slot_hasher::hash (const value_type ds)
359 {
360   hashval_t r = ds->len;
361   int i;
362 
363   for (i = 0; i < ds->len; i++)
364      r = r * 67 + (unsigned) ds->s[i] - 113;
365   r = r * 67 + (unsigned) ds->prefix - 113;
366   return r;
367 }
368 
369 /* Returns nonzero if DS1 and DS2 are equal.  */
370 
371 inline bool
372 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
373 {
374   if (ds1->len == ds2->len)
375     return ds1->prefix == ds2->prefix
376       && memcmp (ds1->s, ds2->s, ds1->len) == 0;
377 
378   return 0;
379 }
380 
381 /* Deallocate memory for DS upon its removal.  */
382 
383 inline void
384 brig_string_slot_hasher::remove (value_type ds)
385 {
386   free (const_cast<char *> (ds->s));
387   free (ds);
388 }
389 
390 /* Hash for strings we output in order not to duplicate them needlessly.  */
391 
392 static hash_table<brig_string_slot_hasher> *brig_string_htab;
393 
394 /* Emit a null terminated string STR to the data section and return its
395    offset in it.  If PREFIX is non-zero, output it just before STR too.
396    Sanitize the string if SANITIZE option is set to true.  */
397 
398 static unsigned
399 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
400 {
401   unsigned slen = strlen (str);
402   unsigned offset, len = slen + (prefix ? 1 : 0);
403   uint32_t hdr_len = lendian32 (len);
404   brig_string_slot s_slot;
405   brig_string_slot **slot;
406   char *str2;
407 
408   str2 = xstrdup (str);
409 
410   if (sanitize)
411     hsa_sanitize_name (str2);
412   s_slot.s = str2;
413   s_slot.len = slen;
414   s_slot.prefix = prefix;
415   s_slot.offset = 0;
416 
417   slot = brig_string_htab->find_slot (&s_slot, INSERT);
418   if (*slot == NULL)
419     {
420       brig_string_slot *new_slot = XCNEW (brig_string_slot);
421 
422       /* In theory we should fill in BrigData but that would mean copying
423 	 the string to a buffer for no reason, so we just emulate it.  */
424       offset = brig_data.add (&hdr_len, sizeof (hdr_len));
425       if (prefix)
426 	brig_data.add (&prefix, 1);
427 
428       brig_data.add (str2, slen);
429       brig_data.round_size_up (4);
430 
431       /* TODO: could use the string we just copied into
432 	 brig_string->cur_chunk */
433       new_slot->s = str2;
434       new_slot->len = slen;
435       new_slot->prefix = prefix;
436       new_slot->offset = offset;
437       *slot = new_slot;
438     }
439   else
440     {
441       offset = (*slot)->offset;
442       free (str2);
443     }
444 
445   return offset;
446 }
447 
448 /* Linked list of queued operands.  */
449 
450 static struct operand_queue
451 {
452   /* First from the chain of queued operands.  */
453   hsa_op_base *first_op, *last_op;
454 
455   /* The offset at which the next operand will be enqueued.  */
456   unsigned projected_size;
457 
458 } op_queue;
459 
460 /* Unless already initialized, initialize infrastructure to produce BRIG.  */
461 
462 static void
463 brig_init (void)
464 {
465   brig_insn_count = 0;
466 
467   if (brig_initialized)
468     return;
469 
470   brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
471   brig_data.init (BRIG_SECTION_DATA_NAME);
472   brig_code.init (BRIG_SECTION_CODE_NAME);
473   brig_operand.init (BRIG_SECTION_OPERAND_NAME);
474   brig_initialized = true;
475 
476   struct BrigDirectiveModule moddir;
477   memset (&moddir, 0, sizeof (moddir));
478   moddir.base.byteCount = lendian16 (sizeof (moddir));
479 
480   char *modname;
481   if (main_input_filename && *main_input_filename != '\0')
482     {
483       const char *part = strrchr (main_input_filename, '/');
484       if (!part)
485 	part = main_input_filename;
486       else
487 	part++;
488       modname = concat ("&__hsa_module_", part, NULL);
489       char *extension = strchr (modname, '.');
490       if (extension)
491 	*extension = '\0';
492 
493       /* As in LTO mode, we have to emit a different module names.  */
494       if (flag_ltrans)
495 	{
496 	  part = strrchr (asm_file_name, '/');
497 	  if (!part)
498 	    part = asm_file_name;
499 	  else
500 	    part++;
501 	  char *modname2;
502 	  modname2 = xasprintf ("%s_%s", modname, part);
503 	  free (modname);
504 	  modname = modname2;
505 	}
506 
507       hsa_sanitize_name (modname);
508       moddir.name = brig_emit_string (modname);
509       free (modname);
510     }
511   else
512     moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
513   moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
514   moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
515   moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
516   moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
517   if (hsa_machine_large_p ())
518     moddir.machineModel = BRIG_MACHINE_LARGE;
519   else
520     moddir.machineModel = BRIG_MACHINE_SMALL;
521   moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
522   brig_code.add (&moddir, sizeof (moddir));
523 }
524 
525 /* Free all BRIG data.  */
526 
527 static void
528 brig_release_data (void)
529 {
530   delete brig_string_htab;
531   brig_data.release ();
532   brig_code.release ();
533   brig_operand.release ();
534 
535   brig_initialized = 0;
536 }
537 
538 /* Enqueue operation OP.  Return the offset at which it will be stored.  */
539 
540 static unsigned int
541 enqueue_op (hsa_op_base *op)
542 {
543   unsigned ret;
544 
545   if (op->m_brig_op_offset)
546     return op->m_brig_op_offset;
547 
548   ret = op_queue.projected_size;
549   op->m_brig_op_offset = op_queue.projected_size;
550 
551   if (!op_queue.first_op)
552     op_queue.first_op = op;
553   else
554     op_queue.last_op->m_next = op;
555   op_queue.last_op = op;
556 
557   if (is_a <hsa_op_immed *> (op))
558     op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
559   else if (is_a <hsa_op_reg *> (op))
560     op_queue.projected_size += sizeof (struct BrigOperandRegister);
561   else if (is_a <hsa_op_address *> (op))
562     op_queue.projected_size += sizeof (struct BrigOperandAddress);
563   else if (is_a <hsa_op_code_ref *> (op))
564     op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
565   else if (is_a <hsa_op_code_list *> (op))
566     op_queue.projected_size += sizeof (struct BrigOperandCodeList);
567   else if (is_a <hsa_op_operand_list *> (op))
568     op_queue.projected_size += sizeof (struct BrigOperandOperandList);
569   else
570     gcc_unreachable ();
571   return ret;
572 }
573 
574 static void emit_immediate_operand (hsa_op_immed *imm);
575 
576 /* Emit directive describing a symbol if it has not been emitted already.
577    Return the offset of the directive.  */
578 
579 static unsigned
580 emit_directive_variable (struct hsa_symbol *symbol)
581 {
582   struct BrigDirectiveVariable dirvar;
583   unsigned name_offset;
584   static unsigned res_name_offset;
585 
586   if (symbol->m_directive_offset)
587     return symbol->m_directive_offset;
588 
589   memset (&dirvar, 0, sizeof (dirvar));
590   dirvar.base.byteCount = lendian16 (sizeof (dirvar));
591   dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
592   dirvar.allocation = symbol->m_allocation;
593 
594   char prefix = symbol->m_global_scope_p ? '&' : '%';
595 
596   if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
597     {
598       if (res_name_offset == 0)
599 	res_name_offset = brig_emit_string (symbol->m_name, '%');
600       name_offset = res_name_offset;
601     }
602   else if (symbol->m_name)
603     name_offset = brig_emit_string (symbol->m_name, prefix);
604   else
605     {
606       char buf[64];
607       snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
608 		symbol->m_name_number);
609       name_offset = brig_emit_string (buf, prefix);
610     }
611 
612   dirvar.name = lendian32 (name_offset);
613 
614   if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
615     {
616       hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
617       dirvar.init = lendian32 (enqueue_op (tmp));
618     }
619   else
620     dirvar.init = 0;
621   dirvar.type = lendian16 (symbol->m_type);
622   dirvar.segment = symbol->m_segment;
623   dirvar.align = symbol->m_align;
624   dirvar.linkage = symbol->m_linkage;
625   dirvar.dim.lo = symbol->m_dim;
626   dirvar.dim.hi = symbol->m_dim >> 32;
627 
628   /* Global variables are just declared and linked via HSA runtime.  */
629   if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
630     dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
631   dirvar.reserved = 0;
632 
633   if (symbol->m_cst_value)
634     {
635       dirvar.modifier |= BRIG_VARIABLE_CONST;
636       dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
637     }
638 
639   symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
640   return symbol->m_directive_offset;
641 }
642 
643 /* Emit directives describing either a function declaration or definition F and
644    return the produced BrigDirectiveExecutable structure.  The function does
645    not take into account any instructions when calculating nextModuleEntry
646    field of the produced BrigDirectiveExecutable structure so when emitting
647    actual definitions, this field needs to be updated after all of the function
648    is actually added to the code section.  */
649 
650 static BrigDirectiveExecutable *
651 emit_function_directives (hsa_function_representation *f, bool is_declaration)
652 {
653   struct BrigDirectiveExecutable fndir;
654   unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
655   int count = 0;
656   void *ptr_to_fndir;
657   hsa_symbol *sym;
658 
659   if (!f->m_declaration_p)
660     for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
661       {
662 	gcc_assert (!sym->m_emitted_to_brig);
663 	sym->m_emitted_to_brig = true;
664 	emit_directive_variable (sym);
665 	brig_insn_count++;
666       }
667 
668   name_offset = brig_emit_string (f->m_name, '&');
669   inarg_off = brig_code.total_size + sizeof (fndir)
670     + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
671   scoped_off = inarg_off
672     + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
673 
674   if (!f->m_declaration_p)
675     {
676       count += f->m_spill_symbols.length ();
677       count += f->m_private_variables.length ();
678     }
679 
680   next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
681 
682   memset (&fndir, 0, sizeof (fndir));
683   fndir.base.byteCount = lendian16 (sizeof (fndir));
684   fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
685 			       : BRIG_KIND_DIRECTIVE_FUNCTION);
686   fndir.name = lendian32 (name_offset);
687   fndir.inArgCount = lendian16 (f->m_input_args.length ());
688   fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
689   fndir.firstInArg = lendian32 (inarg_off);
690   fndir.firstCodeBlockEntry = lendian32 (scoped_off);
691   fndir.nextModuleEntry = lendian32 (next_toplev_off);
692   fndir.linkage = f->get_linkage ();
693   if (!f->m_declaration_p)
694     fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
695   memset (&fndir.reserved, 0, sizeof (fndir.reserved));
696 
697   /* Once we put a definition of function_offsets, we should not overwrite
698      it with a declaration of the function.  */
699   if (f->m_internal_fn == NULL)
700     {
701       if (!function_offsets->get (f->m_decl) || !is_declaration)
702 	function_offsets->put (f->m_decl, brig_code.total_size);
703     }
704   else
705     {
706       /* Internal function.  */
707       hsa_internal_fn **slot
708 	= hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
709       hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
710       int_fn->m_offset = brig_code.total_size;
711       *slot = int_fn;
712     }
713 
714   brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
715 
716   if (f->m_output_arg)
717     emit_directive_variable (f->m_output_arg);
718   for (unsigned i = 0; i < f->m_input_args.length (); i++)
719     emit_directive_variable (f->m_input_args[i]);
720 
721   if (!f->m_declaration_p)
722     {
723       for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
724 	{
725 	  emit_directive_variable (sym);
726 	  brig_insn_count++;
727 	}
728       for (unsigned i = 0; i < f->m_private_variables.length (); i++)
729 	{
730 	  emit_directive_variable (f->m_private_variables[i]);
731 	  brig_insn_count++;
732 	}
733     }
734 
735   return (BrigDirectiveExecutable *) ptr_to_fndir;
736 }
737 
738 /* Emit a label directive for the given HBB.  We assume it is about to start on
739    the current offset in the code section.  */
740 
741 static void
742 emit_bb_label_directive (hsa_bb *hbb)
743 {
744   struct BrigDirectiveLabel lbldir;
745 
746   lbldir.base.byteCount = lendian16 (sizeof (lbldir));
747   lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
748   char buf[32];
749   snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
750 	    hbb->m_index);
751   lbldir.name = lendian32 (brig_emit_string (buf, '@'));
752 
753   hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
754 						       sizeof (lbldir));
755   brig_insn_count++;
756 }
757 
758 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
759    holding such, for constants and registers.  */
760 
761 static BrigType16_t
762 regtype_for_type (BrigType16_t t)
763 {
764   switch (t)
765     {
766     case BRIG_TYPE_B1:
767       return BRIG_TYPE_B1;
768 
769     case BRIG_TYPE_U8:
770     case BRIG_TYPE_U16:
771     case BRIG_TYPE_U32:
772     case BRIG_TYPE_S8:
773     case BRIG_TYPE_S16:
774     case BRIG_TYPE_S32:
775     case BRIG_TYPE_B8:
776     case BRIG_TYPE_B16:
777     case BRIG_TYPE_B32:
778     case BRIG_TYPE_F16:
779     case BRIG_TYPE_F32:
780     case BRIG_TYPE_U8X4:
781     case BRIG_TYPE_U16X2:
782     case BRIG_TYPE_S8X4:
783     case BRIG_TYPE_S16X2:
784     case BRIG_TYPE_F16X2:
785       return BRIG_TYPE_B32;
786 
787     case BRIG_TYPE_U64:
788     case BRIG_TYPE_S64:
789     case BRIG_TYPE_F64:
790     case BRIG_TYPE_B64:
791     case BRIG_TYPE_U8X8:
792     case BRIG_TYPE_U16X4:
793     case BRIG_TYPE_U32X2:
794     case BRIG_TYPE_S8X8:
795     case BRIG_TYPE_S16X4:
796     case BRIG_TYPE_S32X2:
797     case BRIG_TYPE_F16X4:
798     case BRIG_TYPE_F32X2:
799       return BRIG_TYPE_B64;
800 
801     case BRIG_TYPE_B128:
802     case BRIG_TYPE_U8X16:
803     case BRIG_TYPE_U16X8:
804     case BRIG_TYPE_U32X4:
805     case BRIG_TYPE_U64X2:
806     case BRIG_TYPE_S8X16:
807     case BRIG_TYPE_S16X8:
808     case BRIG_TYPE_S32X4:
809     case BRIG_TYPE_S64X2:
810     case BRIG_TYPE_F16X8:
811     case BRIG_TYPE_F32X4:
812     case BRIG_TYPE_F64X2:
813       return BRIG_TYPE_B128;
814 
815     default:
816       gcc_unreachable ();
817     }
818 }
819 
820 /* Return the length of the BRIG type TYPE that is going to be streamed out as
821    an immediate constant (so it must not be B1).  */
822 
823 unsigned
824 hsa_get_imm_brig_type_len (BrigType16_t type)
825 {
826   BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
827   BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
828 
829   switch (pack_type)
830     {
831     case BRIG_TYPE_PACK_NONE:
832       break;
833     case BRIG_TYPE_PACK_32:
834       return 4;
835     case BRIG_TYPE_PACK_64:
836       return 8;
837     case BRIG_TYPE_PACK_128:
838       return 16;
839     default:
840       gcc_unreachable ();
841     }
842 
843   switch (base_type)
844     {
845     case BRIG_TYPE_U8:
846     case BRIG_TYPE_S8:
847     case BRIG_TYPE_B8:
848       return 1;
849     case BRIG_TYPE_U16:
850     case BRIG_TYPE_S16:
851     case BRIG_TYPE_F16:
852     case BRIG_TYPE_B16:
853       return 2;
854     case BRIG_TYPE_U32:
855     case BRIG_TYPE_S32:
856     case BRIG_TYPE_F32:
857     case BRIG_TYPE_B32:
858       return 4;
859     case BRIG_TYPE_U64:
860     case BRIG_TYPE_S64:
861     case BRIG_TYPE_F64:
862     case BRIG_TYPE_B64:
863       return 8;
864     case BRIG_TYPE_B128:
865       return 16;
866     default:
867       gcc_unreachable ();
868     }
869 }
870 
871 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
872    If NEED_LEN is not equal to zero, shrink or extend the value
873    to NEED_LEN bytes.  Return how many bytes were written.  */
874 
875 static int
876 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
877 {
878   union hsa_bytes bytes;
879 
880   memset (&bytes, 0, sizeof (bytes));
881   tree type = TREE_TYPE (value);
882   gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
883 
884   unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
885   if (INTEGRAL_TYPE_P (type)
886       || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
887     switch (data_len)
888       {
889       case 1:
890 	bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
891 	break;
892       case 2:
893 	bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
894 	break;
895       case 4:
896 	bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
897 	break;
898       case 8:
899 	bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
900 	break;
901       default:
902 	gcc_unreachable ();
903       }
904   else if (SCALAR_FLOAT_TYPE_P (type))
905     {
906       if (data_len == 2)
907 	{
908 	  sorry ("Support for HSA does not implement immediate 16 bit FPU "
909 		 "operands");
910 	  return 2;
911 	}
912       unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
913       /* There are always 32 bits in each long, no matter the size of
914 	 the hosts long.  */
915       long tmp[6];
916 
917       real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
918 
919       if (int_len == 4)
920 	bytes.b32 = (uint32_t) tmp[0];
921       else
922 	{
923 	  bytes.b64 = (uint64_t)(uint32_t) tmp[1];
924 	  bytes.b64 <<= 32;
925 	  bytes.b64 |= (uint32_t) tmp[0];
926 	}
927     }
928   else
929     gcc_unreachable ();
930 
931   int len;
932   if (need_len == 0)
933     len = data_len;
934   else
935     len = need_len;
936 
937   memcpy (data, &bytes, len);
938   return len;
939 }
940 
941 char *
942 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
943 {
944   char *brig_repr;
945   *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
946 
947   if (m_tree_value != NULL_TREE)
948     {
949       /* Update brig_repr_size for special tree values.  */
950       if (TREE_CODE (m_tree_value) == STRING_CST)
951 	*brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
952       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
953 	*brig_repr_size
954 	  = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
955 
956       unsigned total_len = *brig_repr_size;
957 
958       /* As we can have a constructor with fewer elements, fill the memory
959 	 with zeros.  */
960       brig_repr = XCNEWVEC (char, total_len);
961       char *p = brig_repr;
962 
963       if (TREE_CODE (m_tree_value) == VECTOR_CST)
964 	{
965 	  int i, num = VECTOR_CST_NELTS (m_tree_value);
966 	  for (i = 0; i < num; i++)
967 	    {
968 	      tree v = VECTOR_CST_ELT (m_tree_value, i);
969 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
970 	      total_len -= actual;
971 	      p += actual;
972 	    }
973 	  /* Vectors should have the exact size.  */
974 	  gcc_assert (total_len == 0);
975 	}
976       else if (TREE_CODE (m_tree_value) == STRING_CST)
977 	memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
978 		TREE_STRING_LENGTH (m_tree_value));
979       else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
980 	{
981 	  gcc_assert (total_len % 2 == 0);
982 	  unsigned actual;
983 	  actual
984 	    = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
985 					       total_len / 2);
986 
987 	  gcc_assert (actual == total_len / 2);
988 	  p += actual;
989 
990 	  actual
991 	    = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
992 					       total_len / 2);
993 	  gcc_assert (actual == total_len / 2);
994 	}
995       else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
996 	{
997 	  unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
998 	  for (unsigned i = 0; i < len; i++)
999 	    {
1000 	      tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1001 	      unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1002 	      total_len -= actual;
1003 	      p += actual;
1004 	    }
1005 	}
1006       else
1007 	emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1008     }
1009   else
1010     {
1011       hsa_bytes bytes;
1012 
1013       switch (*brig_repr_size)
1014 	{
1015 	case 1:
1016 	  bytes.b8 = (uint8_t) m_int_value;
1017 	  break;
1018 	case 2:
1019 	  bytes.b16 = (uint16_t) m_int_value;
1020 	  break;
1021 	case 4:
1022 	  bytes.b32 = (uint32_t) m_int_value;
1023 	  break;
1024 	case 8:
1025 	  bytes.b64 = (uint64_t) m_int_value;
1026 	  break;
1027 	default:
1028 	  gcc_unreachable ();
1029 	}
1030 
1031       brig_repr = XNEWVEC (char, *brig_repr_size);
1032       memcpy (brig_repr, &bytes, *brig_repr_size);
1033     }
1034 
1035   return brig_repr;
1036 }
1037 
1038 /* Emit an immediate BRIG operand IMM.  The BRIG type of the immediate might
1039    have been massaged to comply with various HSA/BRIG type requirements, so the
1040    only important aspect of that is the length (because HSAIL might expect
1041    smaller constants or become bit-data).  The data should be represented
1042    according to what is in the tree representation.  */
1043 
1044 static void
1045 emit_immediate_operand (hsa_op_immed *imm)
1046 {
1047   unsigned brig_repr_size;
1048   char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1049   struct BrigOperandConstantBytes out;
1050 
1051   memset (&out, 0, sizeof (out));
1052   out.base.byteCount = lendian16 (sizeof (out));
1053   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1054   uint32_t byteCount = lendian32 (brig_repr_size);
1055   out.type = lendian16 (imm->m_type);
1056   out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1057   brig_operand.add (&out, sizeof (out));
1058   brig_data.add (brig_repr, brig_repr_size);
1059   brig_data.round_size_up (4);
1060 
1061   free (brig_repr);
1062 }
1063 
1064 /* Emit a register BRIG operand REG.  */
1065 
1066 static void
1067 emit_register_operand (hsa_op_reg *reg)
1068 {
1069   struct BrigOperandRegister out;
1070 
1071   out.base.byteCount = lendian16 (sizeof (out));
1072   out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1073   out.regNum = lendian32 (reg->m_hard_num);
1074 
1075   switch (regtype_for_type (reg->m_type))
1076     {
1077     case BRIG_TYPE_B32:
1078       out.regKind = BRIG_REGISTER_KIND_SINGLE;
1079       break;
1080     case BRIG_TYPE_B64:
1081       out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1082       break;
1083     case BRIG_TYPE_B128:
1084       out.regKind = BRIG_REGISTER_KIND_QUAD;
1085       break;
1086     case BRIG_TYPE_B1:
1087       out.regKind = BRIG_REGISTER_KIND_CONTROL;
1088       break;
1089     default:
1090       gcc_unreachable ();
1091     }
1092 
1093   brig_operand.add (&out, sizeof (out));
1094 }
1095 
1096 /* Emit an address BRIG operand ADDR.  */
1097 
1098 static void
1099 emit_address_operand (hsa_op_address *addr)
1100 {
1101   struct BrigOperandAddress out;
1102 
1103   out.base.byteCount = lendian16 (sizeof (out));
1104   out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1105   out.symbol = addr->m_symbol
1106     ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1107   out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1108 
1109   if (sizeof (addr->m_imm_offset) == 8)
1110     {
1111       out.offset.lo = lendian32 (addr->m_imm_offset);
1112       out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1113     }
1114   else
1115     {
1116       gcc_assert (sizeof (addr->m_imm_offset) == 4);
1117       out.offset.lo = lendian32 (addr->m_imm_offset);
1118       out.offset.hi = 0;
1119     }
1120 
1121   brig_operand.add (&out, sizeof (out));
1122 }
1123 
1124 /* Emit a code reference operand REF.  */
1125 
1126 static void
1127 emit_code_ref_operand (hsa_op_code_ref *ref)
1128 {
1129   struct BrigOperandCodeRef out;
1130 
1131   out.base.byteCount = lendian16 (sizeof (out));
1132   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1133   out.ref = lendian32 (ref->m_directive_offset);
1134   brig_operand.add (&out, sizeof (out));
1135 }
1136 
1137 /* Emit a code list operand CODE_LIST.  */
1138 
1139 static void
1140 emit_code_list_operand (hsa_op_code_list *code_list)
1141 {
1142   struct BrigOperandCodeList out;
1143   unsigned args = code_list->m_offsets.length ();
1144 
1145   for (unsigned i = 0; i < args; i++)
1146     gcc_assert (code_list->m_offsets[i]);
1147 
1148   out.base.byteCount = lendian16 (sizeof (out));
1149   out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1150 
1151   uint32_t byteCount = lendian32 (4 * args);
1152 
1153   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1154   brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1155   brig_data.round_size_up (4);
1156   brig_operand.add (&out, sizeof (out));
1157 }
1158 
1159 /* Emit an operand list operand OPERAND_LIST.  */
1160 
1161 static void
1162 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1163 {
1164   struct BrigOperandOperandList out;
1165   unsigned args = operand_list->m_offsets.length ();
1166 
1167   for (unsigned i = 0; i < args; i++)
1168     gcc_assert (operand_list->m_offsets[i]);
1169 
1170   out.base.byteCount = lendian16 (sizeof (out));
1171   out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1172 
1173   uint32_t byteCount = lendian32 (4 * args);
1174 
1175   out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1176   brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1177   brig_data.round_size_up (4);
1178   brig_operand.add (&out, sizeof (out));
1179 }
1180 
1181 /* Emit all operands queued for writing.  */
1182 
1183 static void
1184 emit_queued_operands (void)
1185 {
1186   for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1187     {
1188       gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1189       if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1190 	emit_immediate_operand (imm);
1191       else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1192 	emit_register_operand (reg);
1193       else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1194 	emit_address_operand (addr);
1195       else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1196 	emit_code_ref_operand (ref);
1197       else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1198 	emit_code_list_operand (code_list);
1199       else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1200 	emit_operand_list_operand (l);
1201       else
1202 	gcc_unreachable ();
1203     }
1204 }
1205 
1206 /* Emit directives describing the function that is used for
1207    a function declaration.  */
1208 
1209 static BrigDirectiveExecutable *
1210 emit_function_declaration (tree decl)
1211 {
1212   hsa_function_representation *f = hsa_generate_function_declaration (decl);
1213 
1214   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1215   emit_queued_operands ();
1216 
1217   delete f;
1218 
1219   return e;
1220 }
1221 
1222 /* Emit directives describing the function that is used for
1223    an internal function declaration.  */
1224 
1225 static BrigDirectiveExecutable *
1226 emit_internal_fn_decl (hsa_internal_fn *fn)
1227 {
1228   hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1229 
1230   BrigDirectiveExecutable *e = emit_function_directives (f, true);
1231   emit_queued_operands ();
1232 
1233   delete f;
1234 
1235   return e;
1236 }
1237 
1238 /* Enqueue all operands of INSN and return offset to BRIG data section
1239    to list of operand offsets.  */
1240 
1241 static unsigned
1242 emit_insn_operands (hsa_insn_basic *insn)
1243 {
1244   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1245     operand_offsets;
1246 
1247   unsigned l = insn->operand_count ();
1248 
1249   /* We have N operands so use 4 * N for the byte_count.  */
1250   uint32_t byte_count = lendian32 (4 * l);
1251   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1252   if (l > 0)
1253     {
1254       operand_offsets.safe_grow (l);
1255       for (unsigned i = 0; i < l; i++)
1256 	operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1257 
1258       brig_data.add (operand_offsets.address (),
1259 		     l * sizeof (BrigOperandOffset32_t));
1260     }
1261   brig_data.round_size_up (4);
1262   return offset;
1263 }
1264 
1265 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1266    to BRIG data section to list of operand offsets.  */
1267 
1268 static unsigned
1269 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1270 	       hsa_op_base *op2 = NULL)
1271 {
1272   auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1273     operand_offsets;
1274 
1275   gcc_checking_assert (op0 != NULL);
1276   operand_offsets.safe_push (enqueue_op (op0));
1277 
1278   if (op1 != NULL)
1279     {
1280       operand_offsets.safe_push (enqueue_op (op1));
1281       if (op2 != NULL)
1282 	operand_offsets.safe_push (enqueue_op (op2));
1283     }
1284 
1285   unsigned l = operand_offsets.length ();
1286 
1287   /* We have N operands so use 4 * N for the byte_count.  */
1288   uint32_t byte_count = lendian32 (4 * l);
1289 
1290   unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1291   brig_data.add (operand_offsets.address (),
1292 		 l * sizeof (BrigOperandOffset32_t));
1293 
1294   brig_data.round_size_up (4);
1295 
1296   return offset;
1297 }
1298 
1299 /* Emit an HSA memory instruction and all necessary directives, schedule
1300    necessary operands for writing.  */
1301 
1302 static void
1303 emit_memory_insn (hsa_insn_mem *mem)
1304 {
1305   struct BrigInstMem repr;
1306   gcc_checking_assert (mem->operand_count () == 2);
1307 
1308   hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1309 
1310   /* This is necessary because of the erroneous typedef of
1311      BrigMemoryModifier8_t which introduces padding which may then contain
1312      random stuff (which we do not want so that we can test things don't
1313      change).  */
1314   memset (&repr, 0, sizeof (repr));
1315   repr.base.base.byteCount = lendian16 (sizeof (repr));
1316   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1317   repr.base.opcode = lendian16 (mem->m_opcode);
1318   repr.base.type = lendian16 (mem->m_type);
1319   repr.base.operands = lendian32 (emit_insn_operands (mem));
1320 
1321   if (addr->m_symbol)
1322     repr.segment = addr->m_symbol->m_segment;
1323   else
1324     repr.segment = BRIG_SEGMENT_FLAT;
1325   repr.modifier = 0;
1326   repr.equivClass = mem->m_equiv_class;
1327   repr.align = mem->m_align;
1328   if (mem->m_opcode == BRIG_OPCODE_LD)
1329     repr.width = BRIG_WIDTH_1;
1330   else
1331     repr.width = BRIG_WIDTH_NONE;
1332   memset (&repr.reserved, 0, sizeof (repr.reserved));
1333   brig_code.add (&repr, sizeof (repr));
1334   brig_insn_count++;
1335 }
1336 
1337 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1338    necessary operands for writing.  */
1339 
1340 static void
1341 emit_signal_insn (hsa_insn_signal *mem)
1342 {
1343   struct BrigInstSignal repr;
1344 
1345   memset (&repr, 0, sizeof (repr));
1346   repr.base.base.byteCount = lendian16 (sizeof (repr));
1347   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1348   repr.base.opcode = lendian16 (mem->m_opcode);
1349   repr.base.type = lendian16 (mem->m_type);
1350   repr.base.operands = lendian32 (emit_insn_operands (mem));
1351 
1352   repr.memoryOrder = mem->m_memory_order;
1353   repr.signalOperation = mem->m_signalop;
1354   repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1355 
1356   brig_code.add (&repr, sizeof (repr));
1357   brig_insn_count++;
1358 }
1359 
1360 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1361    necessary operands for writing.  */
1362 
1363 static void
1364 emit_atomic_insn (hsa_insn_atomic *mem)
1365 {
1366   struct BrigInstAtomic repr;
1367 
1368   /* Either operand[0] or operand[1] must be an address operand.  */
1369   hsa_op_address *addr = NULL;
1370   if (is_a <hsa_op_address *> (mem->get_op (0)))
1371     addr = as_a <hsa_op_address *> (mem->get_op (0));
1372   else
1373     addr = as_a <hsa_op_address *> (mem->get_op (1));
1374 
1375   memset (&repr, 0, sizeof (repr));
1376   repr.base.base.byteCount = lendian16 (sizeof (repr));
1377   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1378   repr.base.opcode = lendian16 (mem->m_opcode);
1379   repr.base.type = lendian16 (mem->m_type);
1380   repr.base.operands = lendian32 (emit_insn_operands (mem));
1381 
1382   if (addr->m_symbol)
1383     repr.segment = addr->m_symbol->m_segment;
1384   else
1385     repr.segment = BRIG_SEGMENT_FLAT;
1386   repr.memoryOrder = mem->m_memoryorder;
1387   repr.memoryScope = mem->m_memoryscope;
1388   repr.atomicOperation = mem->m_atomicop;
1389 
1390   brig_code.add (&repr, sizeof (repr));
1391   brig_insn_count++;
1392 }
1393 
1394 /* Emit an HSA LDA instruction and all necessary directives, schedule
1395    necessary operands for writing.  */
1396 
1397 static void
1398 emit_addr_insn (hsa_insn_basic *insn)
1399 {
1400   struct BrigInstAddr repr;
1401 
1402   hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1403 
1404   repr.base.base.byteCount = lendian16 (sizeof (repr));
1405   repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1406   repr.base.opcode = lendian16 (insn->m_opcode);
1407   repr.base.type = lendian16 (insn->m_type);
1408   repr.base.operands = lendian32 (emit_insn_operands (insn));
1409 
1410   if (addr->m_symbol)
1411     repr.segment = addr->m_symbol->m_segment;
1412   else
1413     repr.segment = BRIG_SEGMENT_FLAT;
1414   memset (&repr.reserved, 0, sizeof (repr.reserved));
1415 
1416   brig_code.add (&repr, sizeof (repr));
1417   brig_insn_count++;
1418 }
1419 
1420 /* Emit an HSA segment conversion instruction and all necessary directives,
1421    schedule necessary operands for writing.  */
1422 
1423 static void
1424 emit_segment_insn (hsa_insn_seg *seg)
1425 {
1426   struct BrigInstSegCvt repr;
1427 
1428   repr.base.base.byteCount = lendian16 (sizeof (repr));
1429   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1430   repr.base.opcode = lendian16 (seg->m_opcode);
1431   repr.base.type = lendian16 (seg->m_type);
1432   repr.base.operands = lendian32 (emit_insn_operands (seg));
1433   repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1434   repr.segment = seg->m_segment;
1435   repr.modifier = 0;
1436 
1437   brig_code.add (&repr, sizeof (repr));
1438 
1439   brig_insn_count++;
1440 }
1441 
1442 /* Emit an HSA alloca instruction and all necessary directives,
1443    schedule necessary operands for writing.  */
1444 
1445 static void
1446 emit_alloca_insn (hsa_insn_alloca *alloca)
1447 {
1448   struct BrigInstMem repr;
1449   gcc_checking_assert (alloca->operand_count () == 2);
1450 
1451   memset (&repr, 0, sizeof (repr));
1452   repr.base.base.byteCount = lendian16 (sizeof (repr));
1453   repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1454   repr.base.opcode = lendian16 (alloca->m_opcode);
1455   repr.base.type = lendian16 (alloca->m_type);
1456   repr.base.operands = lendian32 (emit_insn_operands (alloca));
1457   repr.segment = BRIG_SEGMENT_PRIVATE;
1458   repr.modifier = 0;
1459   repr.equivClass = 0;
1460   repr.align = alloca->m_align;
1461   repr.width = BRIG_WIDTH_NONE;
1462   memset (&repr.reserved, 0, sizeof (repr.reserved));
1463   brig_code.add (&repr, sizeof (repr));
1464   brig_insn_count++;
1465 }
1466 
1467 /* Emit an HSA comparison instruction and all necessary directives,
1468    schedule necessary operands for writing.  */
1469 
1470 static void
1471 emit_cmp_insn (hsa_insn_cmp *cmp)
1472 {
1473   struct BrigInstCmp repr;
1474 
1475   memset (&repr, 0, sizeof (repr));
1476   repr.base.base.byteCount = lendian16 (sizeof (repr));
1477   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1478   repr.base.opcode = lendian16 (cmp->m_opcode);
1479   repr.base.type = lendian16 (cmp->m_type);
1480   repr.base.operands = lendian32 (emit_insn_operands (cmp));
1481 
1482   if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1483     repr.sourceType
1484       = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1485   else
1486     repr.sourceType
1487       = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1488   repr.modifier = 0;
1489   repr.compare = cmp->m_compare;
1490   repr.pack = 0;
1491 
1492   brig_code.add (&repr, sizeof (repr));
1493   brig_insn_count++;
1494 }
1495 
1496 /* Emit an HSA generic branching/sycnronization instruction.  */
1497 
1498 static void
1499 emit_generic_branch_insn (hsa_insn_br *br)
1500 {
1501   struct BrigInstBr repr;
1502   repr.base.base.byteCount = lendian16 (sizeof (repr));
1503   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1504   repr.base.opcode = lendian16 (br->m_opcode);
1505   repr.width = br->m_width;
1506   repr.base.type = lendian16 (br->m_type);
1507   repr.base.operands = lendian32 (emit_insn_operands (br));
1508   memset (&repr.reserved, 0, sizeof (repr.reserved));
1509 
1510   brig_code.add (&repr, sizeof (repr));
1511   brig_insn_count++;
1512 }
1513 
1514 /* Emit an HSA conditional branching instruction and all necessary directives,
1515    schedule necessary operands for writing.  */
1516 
1517 static void
1518 emit_cond_branch_insn (hsa_insn_cbr *br)
1519 {
1520   struct BrigInstBr repr;
1521 
1522   basic_block target = NULL;
1523   edge_iterator ei;
1524   edge e;
1525 
1526   /* At the moment we only handle direct conditional jumps.  */
1527   gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1528   repr.base.base.byteCount = lendian16 (sizeof (repr));
1529   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1530   repr.base.opcode = lendian16 (br->m_opcode);
1531   repr.width = br->m_width;
1532   /* For Conditional jumps the type is always B1.  */
1533   repr.base.type = lendian16 (BRIG_TYPE_B1);
1534 
1535   FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1536     if (e->flags & EDGE_TRUE_VALUE)
1537       {
1538 	target = e->dest;
1539 	break;
1540       }
1541   gcc_assert (target);
1542 
1543   repr.base.operands
1544     = lendian32 (emit_operands (br->get_op (0),
1545 				&hsa_bb_for_bb (target)->m_label_ref));
1546   memset (&repr.reserved, 0, sizeof (repr.reserved));
1547 
1548   brig_code.add (&repr, sizeof (repr));
1549   brig_insn_count++;
1550 }
1551 
1552 /* Emit an HSA unconditional jump branching instruction that points to
1553    a label REFERENCE.  */
1554 
1555 static void
1556 emit_unconditional_jump (hsa_op_code_ref *reference)
1557 {
1558   struct BrigInstBr repr;
1559 
1560   repr.base.base.byteCount = lendian16 (sizeof (repr));
1561   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1562   repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1563   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1564   /* Direct branches to labels must be width(all).  */
1565   repr.width = BRIG_WIDTH_ALL;
1566 
1567   repr.base.operands = lendian32 (emit_operands (reference));
1568   memset (&repr.reserved, 0, sizeof (repr.reserved));
1569   brig_code.add (&repr, sizeof (repr));
1570   brig_insn_count++;
1571 }
1572 
1573 /* Emit an HSA switch jump instruction that uses a jump table to
1574    jump to a destination label.  */
1575 
1576 static void
1577 emit_switch_insn (hsa_insn_sbr *sbr)
1578 {
1579   struct BrigInstBr repr;
1580 
1581   gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1582   repr.base.base.byteCount = lendian16 (sizeof (repr));
1583   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1584   repr.base.opcode = lendian16 (sbr->m_opcode);
1585   repr.width = BRIG_WIDTH_1;
1586   /* For Conditional jumps the type is always B1.  */
1587   hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1588   repr.base.type = lendian16 (index->m_type);
1589   repr.base.operands
1590     = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1591   memset (&repr.reserved, 0, sizeof (repr.reserved));
1592 
1593   brig_code.add (&repr, sizeof (repr));
1594   brig_insn_count++;
1595 }
1596 
1597 /* Emit a HSA convert instruction and all necessary directives, schedule
1598    necessary operands for writing.  */
1599 
1600 static void
1601 emit_cvt_insn (hsa_insn_cvt *insn)
1602 {
1603   struct BrigInstCvt repr;
1604   BrigType16_t srctype;
1605 
1606   repr.base.base.byteCount = lendian16 (sizeof (repr));
1607   repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1608   repr.base.opcode = lendian16 (insn->m_opcode);
1609   repr.base.type = lendian16 (insn->m_type);
1610   repr.base.operands = lendian32 (emit_insn_operands (insn));
1611 
1612   if (is_a <hsa_op_reg *> (insn->get_op (1)))
1613     srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1614   else
1615     srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1616   repr.sourceType = lendian16 (srctype);
1617   repr.modifier = 0;
1618   /* float to smaller float requires a rounding setting (we default
1619      to 'near'.  */
1620   if (hsa_type_float_p (insn->m_type)
1621       && (!hsa_type_float_p (srctype)
1622 	  || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1623 	      < (srctype & BRIG_TYPE_BASE_MASK))))
1624     repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1625   else if (hsa_type_integer_p (insn->m_type) &&
1626 	   hsa_type_float_p (srctype))
1627     repr.round = BRIG_ROUND_INTEGER_ZERO;
1628   else
1629     repr.round = BRIG_ROUND_NONE;
1630   brig_code.add (&repr, sizeof (repr));
1631   brig_insn_count++;
1632 }
1633 
1634 /* Emit call instruction INSN, where this instruction must be closed
1635    within a call block instruction.  */
1636 
1637 static void
1638 emit_call_insn (hsa_insn_call *call)
1639 {
1640   struct BrigInstBr repr;
1641 
1642   repr.base.base.byteCount = lendian16 (sizeof (repr));
1643   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1644   repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1645   repr.base.type = lendian16 (BRIG_TYPE_NONE);
1646 
1647   repr.base.operands
1648     = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1649 				call->m_args_code_list));
1650 
1651   /* Internal functions have not set m_called_function.  */
1652   if (call->m_called_function)
1653     {
1654       function_linkage_pair pair (call->m_called_function,
1655 				  call->m_func.m_brig_op_offset);
1656       function_call_linkage.safe_push (pair);
1657     }
1658   else
1659     {
1660       hsa_internal_fn *slot
1661 	= hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1662       gcc_assert (slot);
1663       gcc_assert (slot->m_offset > 0);
1664       call->m_func.m_directive_offset = slot->m_offset;
1665     }
1666 
1667   repr.width = BRIG_WIDTH_ALL;
1668   memset (&repr.reserved, 0, sizeof (repr.reserved));
1669 
1670   brig_code.add (&repr, sizeof (repr));
1671   brig_insn_count++;
1672 }
1673 
1674 /* Emit argument block directive.  */
1675 
1676 static void
1677 emit_arg_block_insn (hsa_insn_arg_block *insn)
1678 {
1679   switch (insn->m_kind)
1680     {
1681     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1682       {
1683 	struct BrigDirectiveArgBlock repr;
1684 	repr.base.byteCount = lendian16 (sizeof (repr));
1685 	repr.base.kind = lendian16 (insn->m_kind);
1686 	brig_code.add (&repr, sizeof (repr));
1687 
1688 	for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1689 	  {
1690 	    insn->m_call_insn->m_args_code_list->m_offsets[i]
1691 	      = lendian32 (emit_directive_variable
1692 			   (insn->m_call_insn->m_input_args[i]));
1693 	    brig_insn_count++;
1694 	  }
1695 
1696 	if (insn->m_call_insn->m_output_arg)
1697 	  {
1698 	    insn->m_call_insn->m_result_code_list->m_offsets[0]
1699 	      = lendian32 (emit_directive_variable
1700 			   (insn->m_call_insn->m_output_arg));
1701 	    brig_insn_count++;
1702 	  }
1703 
1704 	break;
1705       }
1706     case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1707       {
1708 	struct BrigDirectiveArgBlock repr;
1709 	repr.base.byteCount = lendian16 (sizeof (repr));
1710 	repr.base.kind = lendian16 (insn->m_kind);
1711 	brig_code.add (&repr, sizeof (repr));
1712 	break;
1713       }
1714     default:
1715       gcc_unreachable ();
1716     }
1717 
1718   brig_insn_count++;
1719 }
1720 
1721 /* Emit comment directive.  */
1722 
1723 static void
1724 emit_comment_insn (hsa_insn_comment *insn)
1725 {
1726   struct BrigDirectiveComment repr;
1727   memset (&repr, 0, sizeof (repr));
1728 
1729   repr.base.byteCount = lendian16 (sizeof (repr));
1730   repr.base.kind = lendian16 (insn->m_opcode);
1731   repr.name = brig_emit_string (insn->m_comment, '\0', false);
1732   brig_code.add (&repr, sizeof (repr));
1733 }
1734 
1735 /* Emit queue instruction INSN.  */
1736 
1737 static void
1738 emit_queue_insn (hsa_insn_queue *insn)
1739 {
1740   BrigInstQueue repr;
1741   memset (&repr, 0, sizeof (repr));
1742 
1743   repr.base.base.byteCount = lendian16 (sizeof (repr));
1744   repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1745   repr.base.opcode = lendian16 (insn->m_opcode);
1746   repr.base.type = lendian16 (insn->m_type);
1747   repr.segment = insn->m_segment;
1748   repr.memoryOrder = insn->m_memory_order;
1749   repr.base.operands = lendian32 (emit_insn_operands (insn));
1750   brig_data.round_size_up (4);
1751   brig_code.add (&repr, sizeof (repr));
1752 
1753   brig_insn_count++;
1754 }
1755 
1756 /* Emit source type instruction INSN.  */
1757 
1758 static void
1759 emit_srctype_insn (hsa_insn_srctype *insn)
1760 {
1761   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1762   struct BrigInstSourceType repr;
1763   unsigned operand_count = insn->operand_count ();
1764   gcc_checking_assert (operand_count >= 2);
1765 
1766   memset (&repr, 0, sizeof (repr));
1767   repr.sourceType = lendian16 (insn->m_source_type);
1768   repr.base.base.byteCount = lendian16 (sizeof (repr));
1769   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1770   repr.base.opcode = lendian16 (insn->m_opcode);
1771   repr.base.type = lendian16 (insn->m_type);
1772 
1773   repr.base.operands = lendian32 (emit_insn_operands (insn));
1774   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1775   brig_insn_count++;
1776 }
1777 
1778 /* Emit packed instruction INSN.  */
1779 
1780 static void
1781 emit_packed_insn (hsa_insn_packed *insn)
1782 {
1783   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1784   struct BrigInstSourceType repr;
1785   unsigned operand_count = insn->operand_count ();
1786   gcc_checking_assert (operand_count >= 2);
1787 
1788   memset (&repr, 0, sizeof (repr));
1789   repr.sourceType = lendian16 (insn->m_source_type);
1790   repr.base.base.byteCount = lendian16 (sizeof (repr));
1791   repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1792   repr.base.opcode = lendian16 (insn->m_opcode);
1793   repr.base.type = lendian16 (insn->m_type);
1794 
1795   if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1796     {
1797       /* Create operand list for packed type.  */
1798       for (unsigned i = 1; i < operand_count; i++)
1799 	{
1800 	  gcc_checking_assert (insn->get_op (i));
1801 	  insn->m_operand_list->m_offsets[i - 1]
1802 	    = lendian32 (enqueue_op (insn->get_op (i)));
1803 	}
1804 
1805       repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1806 						     insn->m_operand_list));
1807     }
1808   else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1809     {
1810       /* Create operand list for packed type.  */
1811       for (unsigned i = 0; i < operand_count - 1; i++)
1812 	{
1813 	  gcc_checking_assert (insn->get_op (i));
1814 	  insn->m_operand_list->m_offsets[i]
1815 	    = lendian32 (enqueue_op (insn->get_op (i)));
1816 	}
1817 
1818       unsigned ops = emit_operands (insn->m_operand_list,
1819 				    insn->get_op (insn->operand_count () - 1));
1820       repr.base.operands = lendian32 (ops);
1821     }
1822 
1823 
1824   brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1825   brig_insn_count++;
1826 }
1827 
1828 /* Emit a basic HSA instruction and all necessary directives, schedule
1829    necessary operands for writing.  */
1830 
1831 static void
1832 emit_basic_insn (hsa_insn_basic *insn)
1833 {
1834   /* We assume that BrigInstMod has a BrigInstBasic prefix.  */
1835   struct BrigInstMod repr;
1836   BrigType16_t type;
1837 
1838   memset (&repr, 0, sizeof (repr));
1839   repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1840   repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1841   repr.base.opcode = lendian16 (insn->m_opcode);
1842   switch (insn->m_opcode)
1843     {
1844       /* And the bit-logical operations need bit types and whine about
1845 	 arithmetic types :-/  */
1846       case BRIG_OPCODE_AND:
1847       case BRIG_OPCODE_OR:
1848       case BRIG_OPCODE_XOR:
1849       case BRIG_OPCODE_NOT:
1850 	type = regtype_for_type (insn->m_type);
1851 	break;
1852       default:
1853 	type = insn->m_type;
1854 	break;
1855     }
1856   repr.base.type = lendian16 (type);
1857   repr.base.operands = lendian32 (emit_insn_operands (insn));
1858 
1859   if (hsa_type_packed_p (type))
1860     {
1861       if (hsa_type_float_p (type)
1862 	  && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1863 	repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1864       else
1865 	repr.round = 0;
1866       /* We assume that destination and sources agree in packing layout.  */
1867       if (insn->num_used_ops () >= 2)
1868 	repr.pack = BRIG_PACK_PP;
1869       else
1870 	repr.pack = BRIG_PACK_P;
1871       repr.reserved = 0;
1872       repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1873       repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1874       brig_code.add (&repr, sizeof (struct BrigInstMod));
1875     }
1876   else
1877     brig_code.add (&repr, sizeof (struct BrigInstBasic));
1878   brig_insn_count++;
1879 }
1880 
1881 /* Emit an HSA instruction and all necessary directives, schedule necessary
1882    operands for writing.  */
1883 
1884 static void
1885 emit_insn (hsa_insn_basic *insn)
1886 {
1887   gcc_assert (!is_a <hsa_insn_phi *> (insn));
1888 
1889   insn->m_brig_offset = brig_code.total_size;
1890 
1891   if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1892     emit_signal_insn (signal);
1893   else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1894     emit_atomic_insn (atom);
1895   else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1896     emit_memory_insn (mem);
1897   else if (insn->m_opcode == BRIG_OPCODE_LDA)
1898     emit_addr_insn (insn);
1899   else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1900     emit_segment_insn (seg);
1901   else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1902     emit_cmp_insn (cmp);
1903   else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1904     emit_cond_branch_insn (br);
1905   else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1906     {
1907       if (switch_instructions == NULL)
1908 	switch_instructions = new vec <hsa_insn_sbr *> ();
1909 
1910       switch_instructions->safe_push (sbr);
1911       emit_switch_insn (sbr);
1912     }
1913   else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1914     emit_generic_branch_insn (br);
1915   else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1916     emit_arg_block_insn (block);
1917   else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1918     emit_call_insn (call);
1919   else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1920     emit_comment_insn (comment);
1921   else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1922     emit_queue_insn (queue);
1923   else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1924     emit_srctype_insn (srctype);
1925   else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1926     emit_packed_insn (packed);
1927   else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1928     emit_cvt_insn (cvt);
1929   else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1930     emit_alloca_insn (alloca);
1931   else
1932     emit_basic_insn (insn);
1933 }
1934 
1935 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1936    or we are about to finish emitting code, if it is NULL.  If the fall through
1937    edge from BB does not lead to NEXT_BB, emit an unconditional jump.  */
1938 
1939 static void
1940 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1941 {
1942   basic_block t_bb = NULL, ff = NULL;
1943 
1944   edge_iterator ei;
1945   edge e;
1946 
1947   /* If the last instruction of BB is a switch, ignore emission of all
1948      edges.  */
1949   if (hsa_bb_for_bb (bb)->m_last_insn
1950       && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1951     return;
1952 
1953   FOR_EACH_EDGE (e, ei, bb->succs)
1954     if (e->flags & EDGE_TRUE_VALUE)
1955       {
1956 	gcc_assert (!t_bb);
1957 	t_bb = e->dest;
1958       }
1959     else
1960       {
1961 	gcc_assert (!ff);
1962 	ff = e->dest;
1963       }
1964 
1965   if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1966     return;
1967 
1968   emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1969 }
1970 
1971 /* Emit the a function with name NAME to the various brig sections.  */
1972 
1973 void
1974 hsa_brig_emit_function (void)
1975 {
1976   basic_block bb, prev_bb;
1977   hsa_insn_basic *insn;
1978   BrigDirectiveExecutable *ptr_to_fndir;
1979 
1980   brig_init ();
1981 
1982   brig_insn_count = 0;
1983   memset (&op_queue, 0, sizeof (op_queue));
1984   op_queue.projected_size = brig_operand.total_size;
1985 
1986   if (!function_offsets)
1987     function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1988 
1989   if (!emitted_declarations)
1990     emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1991 
1992   for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1993     {
1994       tree called = hsa_cfun->m_called_functions[i];
1995 
1996       /* If the function has no definition, emit a declaration.  */
1997       if (!emitted_declarations->get (called))
1998 	{
1999 	  BrigDirectiveExecutable *e = emit_function_declaration (called);
2000 	  emitted_declarations->put (called, e);
2001 	}
2002     }
2003 
2004   for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2005     {
2006       hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2007       emit_internal_fn_decl (called);
2008     }
2009 
2010   ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2011   for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2012        insn;
2013        insn = insn->m_next)
2014     emit_insn (insn);
2015   prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2016   FOR_EACH_BB_FN (bb, cfun)
2017     {
2018       perhaps_emit_branch (prev_bb, bb);
2019       emit_bb_label_directive (hsa_bb_for_bb (bb));
2020       for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2021 	emit_insn (insn);
2022       prev_bb = bb;
2023     }
2024   perhaps_emit_branch (prev_bb, NULL);
2025   ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2026 
2027   /* Fill up label references for all sbr instructions.  */
2028   if (switch_instructions)
2029     {
2030       for (unsigned i = 0; i < switch_instructions->length (); i++)
2031 	{
2032 	  hsa_insn_sbr *sbr = (*switch_instructions)[i];
2033 	  for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2034 	    {
2035 	      hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2036 	      sbr->m_label_code_list->m_offsets[j]
2037 		= hbb->m_label_ref.m_directive_offset;
2038 	    }
2039 	}
2040 
2041       switch_instructions->release ();
2042       delete switch_instructions;
2043       switch_instructions = NULL;
2044     }
2045 
2046   if (dump_file)
2047     {
2048       fprintf (dump_file, "------- After BRIG emission: -------\n");
2049       dump_hsa_cfun (dump_file);
2050     }
2051 
2052   emit_queued_operands ();
2053 }
2054 
2055 /* Emit all OMP symbols related to OMP.  */
2056 
2057 void
2058 hsa_brig_emit_omp_symbols (void)
2059 {
2060   brig_init ();
2061   emit_directive_variable (hsa_num_threads);
2062 }
2063 
2064 /* Create and return __hsa_global_variables symbol that contains
2065    all informations consumed by libgomp to link global variables
2066    with their string names used by an HSA kernel.  */
2067 
2068 static tree
2069 hsa_output_global_variables ()
2070 {
2071   unsigned l = hsa_global_variable_symbols->elements ();
2072 
2073   tree variable_info_type = make_node (RECORD_TYPE);
2074   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2075 			   get_identifier ("name"), ptr_type_node);
2076   DECL_CHAIN (id_f1) = NULL_TREE;
2077   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2078 			   get_identifier ("omp_data_size"),
2079 			   ptr_type_node);
2080   DECL_CHAIN (id_f2) = id_f1;
2081   finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2082 			 NULL_TREE);
2083 
2084   tree int_num_of_global_vars;
2085   int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2086   tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2087   tree global_vars_array_type = build_array_type (variable_info_type,
2088 						  global_vars_num_index_type);
2089   TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2090 
2091   vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2092 
2093   for (hash_table <hsa_noop_symbol_hasher>::iterator it
2094        = hsa_global_variable_symbols->begin ();
2095        it != hsa_global_variable_symbols->end (); ++it)
2096     {
2097       unsigned len = strlen ((*it)->m_name);
2098       char *copy = XNEWVEC (char, len + 2);
2099       copy[0] = '&';
2100       memcpy (copy + 1, (*it)->m_name, len);
2101       copy[len + 1] = '\0';
2102       len++;
2103       hsa_sanitize_name (copy);
2104 
2105       tree var_name = build_string (len, copy);
2106       TREE_TYPE (var_name)
2107 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2108       free (copy);
2109 
2110       vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2111       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2112 			      build1 (ADDR_EXPR,
2113 				      build_pointer_type (TREE_TYPE (var_name)),
2114 				      var_name));
2115       CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2116 			      build_fold_addr_expr ((*it)->m_decl));
2117 
2118       tree variable_info_ctor = build_constructor (variable_info_type,
2119 						   variable_info_vec);
2120 
2121       CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2122 			      variable_info_ctor);
2123     }
2124 
2125   tree global_vars_ctor = build_constructor (global_vars_array_type,
2126 					     global_vars_vec);
2127 
2128   char tmp_name[64];
2129   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2130   tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2131 					   get_identifier (tmp_name),
2132 					   global_vars_array_type);
2133   TREE_STATIC (global_vars_table) = 1;
2134   TREE_READONLY (global_vars_table) = 1;
2135   TREE_PUBLIC (global_vars_table) = 0;
2136   DECL_ARTIFICIAL (global_vars_table) = 1;
2137   DECL_IGNORED_P (global_vars_table) = 1;
2138   DECL_EXTERNAL (global_vars_table) = 0;
2139   TREE_CONSTANT (global_vars_table) = 1;
2140   DECL_INITIAL (global_vars_table) = global_vars_ctor;
2141   varpool_node::finalize_decl (global_vars_table);
2142 
2143   return global_vars_table;
2144 }
2145 
2146 /* Create __hsa_host_functions and __hsa_kernels that contain
2147    all informations consumed by libgomp to register all kernels
2148    in the BRIG binary.  */
2149 
2150 static void
2151 hsa_output_kernels (tree *host_func_table, tree *kernels)
2152 {
2153   unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2154 
2155   tree int_num_of_kernels;
2156   int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2157   tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2158   tree host_functions_array_type = build_array_type (ptr_type_node,
2159 						     kernel_num_index_type);
2160   TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2161 
2162   vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2163   for (unsigned i = 0; i < map_count; ++i)
2164     {
2165       tree decl = hsa_get_decl_kernel_mapping_decl (i);
2166       tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2167       CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2168     }
2169   tree host_functions_ctor = build_constructor (host_functions_array_type,
2170 						host_functions_vec);
2171   char tmp_name[64];
2172   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2173   tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2174 					 get_identifier (tmp_name),
2175 					 host_functions_array_type);
2176   TREE_STATIC (hsa_host_func_table) = 1;
2177   TREE_READONLY (hsa_host_func_table) = 1;
2178   TREE_PUBLIC (hsa_host_func_table) = 0;
2179   DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2180   DECL_IGNORED_P (hsa_host_func_table) = 1;
2181   DECL_EXTERNAL (hsa_host_func_table) = 0;
2182   TREE_CONSTANT (hsa_host_func_table) = 1;
2183   DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2184   varpool_node::finalize_decl (hsa_host_func_table);
2185   *host_func_table = hsa_host_func_table;
2186 
2187   /* Following code emits list of kernel_info structures.  */
2188 
2189   tree kernel_info_type = make_node (RECORD_TYPE);
2190   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2191 			   get_identifier ("name"), ptr_type_node);
2192   DECL_CHAIN (id_f1) = NULL_TREE;
2193   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2194 			   get_identifier ("omp_data_size"),
2195 			   unsigned_type_node);
2196   DECL_CHAIN (id_f2) = id_f1;
2197   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2198 			   get_identifier ("gridified_kernel_p"),
2199 			   boolean_type_node);
2200   DECL_CHAIN (id_f3) = id_f2;
2201   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2202 			   get_identifier ("kernel_dependencies_count"),
2203 			   unsigned_type_node);
2204   DECL_CHAIN (id_f4) = id_f3;
2205   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2206 			   get_identifier ("kernel_dependencies"),
2207 			   build_pointer_type (build_pointer_type
2208 					       (char_type_node)));
2209   DECL_CHAIN (id_f5) = id_f4;
2210   finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2211 			 NULL_TREE);
2212 
2213   int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2214   tree kernel_info_vector_type
2215     = build_array_type (kernel_info_type,
2216 			build_index_type (int_num_of_kernels));
2217   TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2218 
2219   vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2220   tree kernel_dependencies_vector_type = NULL;
2221 
2222   for (unsigned i = 0; i < map_count; ++i)
2223     {
2224       tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2225       char *name = hsa_get_decl_kernel_mapping_name (i);
2226       unsigned len = strlen (name);
2227       char *copy = XNEWVEC (char, len + 2);
2228       copy[0] = '&';
2229       memcpy (copy + 1, name, len);
2230       copy[len + 1] = '\0';
2231       len++;
2232 
2233       tree kern_name = build_string (len, copy);
2234       TREE_TYPE (kern_name)
2235 	= build_array_type (char_type_node, build_index_type (size_int (len)));
2236       free (copy);
2237 
2238       unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2239       tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2240       bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2241       tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2242 						     gridified_kernel_p);
2243       unsigned count = 0;
2244       vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2245       if (hsa_decl_kernel_dependencies)
2246 	{
2247 	  vec<const char *> **slot;
2248 	  slot = hsa_decl_kernel_dependencies->get (kernel);
2249 	  if (slot)
2250 	    {
2251 	      vec <const char *> *dependencies = *slot;
2252 	      count = dependencies->length ();
2253 
2254 	      kernel_dependencies_vector_type
2255 		= build_array_type (build_pointer_type (char_type_node),
2256 				    build_index_type (size_int (count)));
2257 	      TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2258 
2259 	      for (unsigned j = 0; j < count; j++)
2260 		{
2261 		  const char *d = (*dependencies)[j];
2262 		  len = strlen (d);
2263 		  tree dependency_name = build_string (len, d);
2264 		  TREE_TYPE (dependency_name)
2265 		    = build_array_type (char_type_node,
2266 					build_index_type (size_int (len)));
2267 
2268 		  CONSTRUCTOR_APPEND_ELT
2269 		    (kernel_dependencies_vec, NULL_TREE,
2270 		     build1 (ADDR_EXPR,
2271 			     build_pointer_type (TREE_TYPE (dependency_name)),
2272 			     dependency_name));
2273 		}
2274 	    }
2275 	}
2276 
2277       tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2278 
2279       vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2280       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2281 			      build1 (ADDR_EXPR,
2282 				      build_pointer_type (TREE_TYPE
2283 							  (kern_name)),
2284 				      kern_name));
2285       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2286       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2287 			      gridified_kernel_p_tree);
2288       CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2289 
2290       if (count > 0)
2291 	{
2292 	  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2293 	  gcc_checking_assert (kernel_dependencies_vector_type);
2294 	  tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2295 					       get_identifier (tmp_name),
2296 					       kernel_dependencies_vector_type);
2297 
2298 	  TREE_STATIC (dependencies_list) = 1;
2299 	  TREE_READONLY (dependencies_list) = 1;
2300 	  TREE_PUBLIC (dependencies_list) = 0;
2301 	  DECL_ARTIFICIAL (dependencies_list) = 1;
2302 	  DECL_IGNORED_P (dependencies_list) = 1;
2303 	  DECL_EXTERNAL (dependencies_list) = 0;
2304 	  TREE_CONSTANT (dependencies_list) = 1;
2305 	  DECL_INITIAL (dependencies_list)
2306 	    = build_constructor (kernel_dependencies_vector_type,
2307 				 kernel_dependencies_vec);
2308 	  varpool_node::finalize_decl (dependencies_list);
2309 
2310 	  CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2311 				  build1 (ADDR_EXPR,
2312 					  build_pointer_type
2313 					    (TREE_TYPE (dependencies_list)),
2314 					  dependencies_list));
2315 	}
2316       else
2317 	CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2318 
2319       tree kernel_info_ctor = build_constructor (kernel_info_type,
2320 						 kernel_info_vec);
2321 
2322       CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2323 			      kernel_info_ctor);
2324     }
2325 
2326   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2327   tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2328 				 get_identifier (tmp_name),
2329 				 kernel_info_vector_type);
2330 
2331   TREE_STATIC (hsa_kernels) = 1;
2332   TREE_READONLY (hsa_kernels) = 1;
2333   TREE_PUBLIC (hsa_kernels) = 0;
2334   DECL_ARTIFICIAL (hsa_kernels) = 1;
2335   DECL_IGNORED_P (hsa_kernels) = 1;
2336   DECL_EXTERNAL (hsa_kernels) = 0;
2337   TREE_CONSTANT (hsa_kernels) = 1;
2338   DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2339 						  kernel_info_vector_vec);
2340   varpool_node::finalize_decl (hsa_kernels);
2341   *kernels = hsa_kernels;
2342 }
2343 
2344 /* Create a static constructor that will register out brig stuff with
2345    libgomp.  */
2346 
2347 static void
2348 hsa_output_libgomp_mapping (tree brig_decl)
2349 {
2350   unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2351   unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2352 
2353   tree kernels;
2354   tree host_func_table;
2355 
2356   hsa_output_kernels (&host_func_table, &kernels);
2357   tree global_vars = hsa_output_global_variables ();
2358 
2359   tree hsa_image_desc_type = make_node (RECORD_TYPE);
2360   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2361 			   get_identifier ("brig_module"), ptr_type_node);
2362   DECL_CHAIN (id_f1) = NULL_TREE;
2363   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2364 			   get_identifier ("kernel_count"),
2365 			   unsigned_type_node);
2366 
2367   DECL_CHAIN (id_f2) = id_f1;
2368   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2369 			   get_identifier ("hsa_kernel_infos"),
2370 			   ptr_type_node);
2371   DECL_CHAIN (id_f3) = id_f2;
2372   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2373 			   get_identifier ("global_variable_count"),
2374 			   unsigned_type_node);
2375   DECL_CHAIN (id_f4) = id_f3;
2376   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2377 			   get_identifier ("hsa_global_variable_infos"),
2378 			   ptr_type_node);
2379   DECL_CHAIN (id_f5) = id_f4;
2380   finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2381 			 NULL_TREE);
2382   TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2383 
2384   vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2385   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2386 			  build_fold_addr_expr (brig_decl));
2387   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2388 			  build_int_cstu (unsigned_type_node, kernel_count));
2389   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2390 			  build1 (ADDR_EXPR,
2391 				  build_pointer_type (TREE_TYPE (kernels)),
2392 				  kernels));
2393   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2394 			  build_int_cstu (unsigned_type_node,
2395 					  global_variable_count));
2396   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2397 			  build1 (ADDR_EXPR,
2398 				  build_pointer_type (TREE_TYPE (global_vars)),
2399 				  global_vars));
2400 
2401   tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2402 
2403   char tmp_name[64];
2404   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2405   tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2406 					get_identifier (tmp_name),
2407 					hsa_image_desc_type);
2408   TREE_STATIC (hsa_img_descriptor) = 1;
2409   TREE_READONLY (hsa_img_descriptor) = 1;
2410   TREE_PUBLIC (hsa_img_descriptor) = 0;
2411   DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2412   DECL_IGNORED_P (hsa_img_descriptor) = 1;
2413   DECL_EXTERNAL (hsa_img_descriptor) = 0;
2414   TREE_CONSTANT (hsa_img_descriptor) = 1;
2415   DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2416   varpool_node::finalize_decl (hsa_img_descriptor);
2417 
2418   /* Construct the "host_table" libgomp expects.  */
2419   tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2420   tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2421   TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2422   vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2423   tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2424   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2425 			  host_func_table_addr);
2426   offset_int func_table_size
2427     = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2428   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2429 			  fold_build2 (POINTER_PLUS_EXPR,
2430 				       TREE_TYPE (host_func_table_addr),
2431 				       host_func_table_addr,
2432 				       build_int_cst (size_type_node,
2433 						      func_table_size.to_uhwi
2434 						      ())));
2435   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2436   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2437   tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2438 						    libgomp_host_table_vec);
2439   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2440   tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2441 					    get_identifier (tmp_name),
2442 					    libgomp_host_table_type);
2443 
2444   TREE_STATIC (hsa_libgomp_host_table) = 1;
2445   TREE_READONLY (hsa_libgomp_host_table) = 1;
2446   TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2447   DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2448   DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2449   DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2450   TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2451   DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2452   varpool_node::finalize_decl (hsa_libgomp_host_table);
2453 
2454   /* Generate an initializer with a call to the registration routine.  */
2455 
2456   tree offload_register
2457     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2458   gcc_checking_assert (offload_register);
2459 
2460   tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2461   append_to_statement_list
2462     (build_call_expr (offload_register, 4,
2463 		      build_int_cstu (unsigned_type_node,
2464 				      GOMP_VERSION_PACK (GOMP_VERSION,
2465 							 GOMP_VERSION_HSA)),
2466 		      build_fold_addr_expr (hsa_libgomp_host_table),
2467 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2468 		      build_fold_addr_expr (hsa_img_descriptor)),
2469      hsa_ctor_stmts);
2470 
2471   cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2472 
2473   tree offload_unregister
2474     = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2475   gcc_checking_assert (offload_unregister);
2476 
2477   tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2478   append_to_statement_list
2479     (build_call_expr (offload_unregister, 4,
2480 		      build_int_cstu (unsigned_type_node,
2481 				      GOMP_VERSION_PACK (GOMP_VERSION,
2482 							 GOMP_VERSION_HSA)),
2483 		      build_fold_addr_expr (hsa_libgomp_host_table),
2484 		      build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2485 		      build_fold_addr_expr (hsa_img_descriptor)),
2486      hsa_dtor_stmts);
2487   cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2488 }
2489 
2490 /* Emit the brig module we have compiled to a section in the final assembly and
2491    also create a compile unit static constructor that will register the brig
2492    module with libgomp.  */
2493 
2494 void
2495 hsa_output_brig (void)
2496 {
2497   section *saved_section;
2498 
2499   if (!brig_initialized)
2500     return;
2501 
2502   for (unsigned i = 0; i < function_call_linkage.length (); i++)
2503     {
2504       function_linkage_pair p = function_call_linkage[i];
2505 
2506       BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2507       gcc_assert (*func_offset);
2508       BrigOperandCodeRef *code_ref
2509 	= (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2510       gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2511       code_ref->ref = lendian32 (*func_offset);
2512     }
2513 
2514   /* Iterate all function declarations and if we meet a function that should
2515      have module linkage and we are unable to emit HSAIL for the function,
2516      then change the linkage to program linkage.  Doing so, we will emit
2517      a valid BRIG image.  */
2518   if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2519     for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2520 	 = emitted_declarations->begin ();
2521 	 it != emitted_declarations->end ();
2522 	 ++it)
2523       {
2524 	if (hsa_failed_functions->contains ((*it).first))
2525 	  (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2526       }
2527 
2528   saved_section = in_section;
2529 
2530   switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2531   char tmp_name[64];
2532   ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2533   ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2534   tree brig_id = get_identifier (tmp_name);
2535   tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2536 			       char_type_node);
2537   SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2538   TREE_ADDRESSABLE (brig_decl) = 1;
2539   TREE_READONLY (brig_decl) = 1;
2540   DECL_ARTIFICIAL (brig_decl) = 1;
2541   DECL_IGNORED_P (brig_decl) = 1;
2542   TREE_STATIC (brig_decl) = 1;
2543   TREE_PUBLIC (brig_decl) = 0;
2544   TREE_USED (brig_decl) = 1;
2545   DECL_INITIAL (brig_decl) = brig_decl;
2546   TREE_ASM_WRITTEN (brig_decl) = 1;
2547 
2548   BrigModuleHeader module_header;
2549   memcpy (&module_header.identification, "HSA BRIG",
2550 	  sizeof (module_header.identification));
2551   module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2552   module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2553   uint64_t section_index[3];
2554 
2555   int data_padding, code_padding, operand_padding;
2556   data_padding = HSA_SECTION_ALIGNMENT
2557     - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2558   code_padding = HSA_SECTION_ALIGNMENT
2559     - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2560   operand_padding = HSA_SECTION_ALIGNMENT
2561     - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2562 
2563   uint64_t module_size = sizeof (module_header)
2564     + sizeof (section_index)
2565     + brig_data.total_size
2566     + data_padding
2567     + brig_code.total_size
2568     + code_padding
2569     + brig_operand.total_size
2570     + operand_padding;
2571   gcc_assert ((module_size % 16) == 0);
2572   module_header.byteCount = lendian64 (module_size);
2573   memset (&module_header.hash, 0, sizeof (module_header.hash));
2574   module_header.reserved = 0;
2575   module_header.sectionCount = lendian32 (3);
2576   module_header.sectionIndex = lendian64 (sizeof (module_header));
2577   assemble_string ((const char *) &module_header, sizeof (module_header));
2578   uint64_t off = sizeof (module_header) + sizeof (section_index);
2579   section_index[0] = lendian64 (off);
2580   off += brig_data.total_size + data_padding;
2581   section_index[1] = lendian64 (off);
2582   off += brig_code.total_size + code_padding;
2583   section_index[2] = lendian64 (off);
2584   assemble_string ((const char *) &section_index, sizeof (section_index));
2585 
2586   char padding[HSA_SECTION_ALIGNMENT];
2587   memset (padding, 0, sizeof (padding));
2588 
2589   brig_data.output ();
2590   assemble_string (padding, data_padding);
2591   brig_code.output ();
2592   assemble_string (padding, code_padding);
2593   brig_operand.output ();
2594   assemble_string (padding, operand_padding);
2595 
2596   if (saved_section)
2597     switch_to_section (saved_section);
2598 
2599   hsa_output_libgomp_mapping (brig_decl);
2600 
2601   hsa_free_decl_kernel_mapping ();
2602   brig_release_data ();
2603   hsa_deinit_compilation_unit_data ();
2604 
2605   delete emitted_declarations;
2606   emitted_declarations = NULL;
2607   delete function_offsets;
2608   function_offsets = NULL;
2609 }
2610