xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/gcn/gcn.c (revision a04395531661c5e8d314125d5ae77d4cbedd5d73)
1 /* Copyright (C) 2016-2019 Free Software Foundation, Inc.
2 
3    This file is free software; you can redistribute it and/or modify it under
4    the terms of the GNU General Public License as published by the Free
5    Software Foundation; either version 3 of the License, or (at your option)
6    any later version.
7 
8    This file is distributed in the hope that it will be useful, but WITHOUT
9    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11    for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with GCC; see the file COPYING3.  If not see
15    <http://www.gnu.org/licenses/>.  */
16 
17 /* {{{ Includes.  */
18 
19 /* We want GET_MODE_SIZE et al to return integers, please.  */
20 #define IN_TARGET_CODE 1
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "fold-const.h"
40 #include "calls.h"
41 #include "explow.h"
42 #include "expr.h"
43 #include "output.h"
44 #include "cfgrtl.h"
45 #include "langhooks.h"
46 #include "builtins.h"
47 #include "omp-general.h"
48 #include "print-rtl.h"
49 #include "attribs.h"
50 #include "varasm.h"
51 #include "intl.h"
52 #include "rtl-iter.h"
53 
54 /* This file should be included last.  */
55 #include "target-def.h"
56 
57 /* }}}  */
58 /* {{{ Global variables.  */
59 
60 /* Constants used by FP instructions.  */
61 
62 static REAL_VALUE_TYPE dconst4, dconst1over2pi;
63 static bool ext_gcn_constants_init = 0;
64 
65 /* Holds the ISA variant, derived from the command line parameters.  */
66 
67 int gcn_isa = 3;		/* Default to GCN3.  */
68 
69 /* Reserve this much space for LDS (for propagating variables from
70    worker-single mode to worker-partitioned mode), per workgroup.  Global
71    analysis could calculate an exact bound, but we don't do that yet.
72 
73    We reserve the whole LDS, which also prevents any other workgroup
74    sharing the Compute Unit.  */
75 
76 #define LDS_SIZE 65536
77 
78 /* }}}  */
79 /* {{{ Initialization and options.  */
80 
81 /* Initialize machine_function.  */
82 
83 static struct machine_function *
84 gcn_init_machine_status (void)
85 {
86   struct machine_function *f;
87 
88   f = ggc_cleared_alloc<machine_function> ();
89 
90   /* Set up LDS allocation for broadcasting for this function.  */
91   f->lds_allocated = 32;
92   f->lds_allocs = hash_map<tree, int>::create_ggc (64);
93 
94   /* And LDS temporary decls for worker reductions.  */
95   vec_alloc (f->reduc_decls, 0);
96 
97   if (TARGET_GCN3)
98     f->use_flat_addressing = true;
99 
100   return f;
101 }
102 
103 /* Implement TARGET_OPTION_OVERRIDE.
104 
105    Override option settings where defaults are variable, or we have specific
106    needs to consider.  */
107 
108 static void
109 gcn_option_override (void)
110 {
111   init_machine_status = gcn_init_machine_status;
112 
113   /* The HSA runtime does not respect ELF load addresses, so force PIE.  */
114   if (!flag_pie)
115     flag_pie = 2;
116   if (!flag_pic)
117     flag_pic = flag_pie;
118 
119   gcn_isa = gcn_arch == PROCESSOR_VEGA ? 5 : 3;
120 
121   /* The default stack size needs to be small for offload kernels because
122      there may be many, many threads.  Also, a smaller stack gives a
123      measureable performance boost.  But, a small stack is insufficient
124      for running the testsuite, so we use a larger default for the stand
125      alone case.  */
126   if (stack_size_opt == -1)
127     {
128       if (flag_openacc || flag_openmp)
129 	/* 512 bytes per work item = 32kB total.  */
130 	stack_size_opt = 512 * 64;
131       else
132 	/* 1MB total.  */
133 	stack_size_opt = 1048576;
134     }
135 }
136 
137 /* }}}  */
138 /* {{{ Attributes.  */
139 
140 /* This table defines the arguments that are permitted in
141    __attribute__ ((amdgpu_hsa_kernel (...))).
142 
143    The names and values correspond to the HSA metadata that is encoded
144    into the assembler file and binary.  */
145 
146 static const struct gcn_kernel_arg_type
147 {
148   const char *name;
149   const char *header_pseudo;
150   machine_mode mode;
151 
152   /* This should be set to -1 or -2 for a dynamically allocated register
153      number.  Use -1 if this argument contributes to the user_sgpr_count,
154      -2 otherwise.  */
155   int fixed_regno;
156 } gcn_kernel_arg_types[] = {
157   {"exec", NULL, DImode, EXEC_REG},
158 #define PRIVATE_SEGMENT_BUFFER_ARG 1
159   {"private_segment_buffer",
160     "enable_sgpr_private_segment_buffer", TImode, -1},
161 #define DISPATCH_PTR_ARG 2
162   {"dispatch_ptr", "enable_sgpr_dispatch_ptr", DImode, -1},
163 #define QUEUE_PTR_ARG 3
164   {"queue_ptr", "enable_sgpr_queue_ptr", DImode, -1},
165 #define KERNARG_SEGMENT_PTR_ARG 4
166   {"kernarg_segment_ptr", "enable_sgpr_kernarg_segment_ptr", DImode, -1},
167   {"dispatch_id", "enable_sgpr_dispatch_id", DImode, -1},
168 #define FLAT_SCRATCH_INIT_ARG 6
169   {"flat_scratch_init", "enable_sgpr_flat_scratch_init", DImode, -1},
170 #define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
171   {"private_segment_size", "enable_sgpr_private_segment_size", SImode, -1},
172   {"grid_workgroup_count_X",
173     "enable_sgpr_grid_workgroup_count_x", SImode, -1},
174   {"grid_workgroup_count_Y",
175     "enable_sgpr_grid_workgroup_count_y", SImode, -1},
176   {"grid_workgroup_count_Z",
177     "enable_sgpr_grid_workgroup_count_z", SImode, -1},
178 #define WORKGROUP_ID_X_ARG 11
179   {"workgroup_id_X", "enable_sgpr_workgroup_id_x", SImode, -2},
180   {"workgroup_id_Y", "enable_sgpr_workgroup_id_y", SImode, -2},
181   {"workgroup_id_Z", "enable_sgpr_workgroup_id_z", SImode, -2},
182   {"workgroup_info", "enable_sgpr_workgroup_info", SImode, -1},
183 #define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 15
184   {"private_segment_wave_offset",
185     "enable_sgpr_private_segment_wave_byte_offset", SImode, -2},
186 #define WORK_ITEM_ID_X_ARG 16
187   {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
188 #define WORK_ITEM_ID_Y_ARG 17
189   {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
190 #define WORK_ITEM_ID_Z_ARG 18
191   {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
192 };
193 
194 /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
195    This function also sets the default values for some arguments.
196 
197    Return true on success, with ARGS populated.  */
198 
199 static bool
200 gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
201 				       tree list)
202 {
203   bool err = false;
204   args->requested = ((1 << PRIVATE_SEGMENT_BUFFER_ARG)
205 		     | (1 << QUEUE_PTR_ARG)
206 		     | (1 << KERNARG_SEGMENT_PTR_ARG)
207 		     | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG));
208   args->nargs = 0;
209 
210   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
211     args->reg[a] = -1;
212 
213   for (; list; list = TREE_CHAIN (list))
214     {
215       const char *str;
216       if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
217 	{
218 	  error ("amdgpu_hsa_kernel attribute requires string constant "
219 		 "arguments");
220 	  break;
221 	}
222       str = TREE_STRING_POINTER (TREE_VALUE (list));
223       int a;
224       for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
225 	{
226 	  if (!strcmp (str, gcn_kernel_arg_types[a].name))
227 	    break;
228 	}
229       if (a == GCN_KERNEL_ARG_TYPES)
230 	{
231 	  error ("unknown specifier %s in amdgpu_hsa_kernel attribute", str);
232 	  err = true;
233 	  break;
234 	}
235       if (args->requested & (1 << a))
236 	{
237 	  error ("duplicated parameter specifier %s in amdgpu_hsa_kernel "
238 		 "attribute", str);
239 	  err = true;
240 	  break;
241 	}
242       args->requested |= (1 << a);
243       args->order[args->nargs++] = a;
244     }
245   args->requested |= (1 << WORKGROUP_ID_X_ARG);
246   args->requested |= (1 << WORK_ITEM_ID_Z_ARG);
247 
248   /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
249      WORK_ITEM_ID_Y_ARG.  Similarly, requesting WORK_ITEM_ID_Y_ARG implies
250      requesting WORK_ITEM_ID_X_ARG.  */
251   if (args->requested & (1 << WORK_ITEM_ID_Z_ARG))
252     args->requested |= (1 << WORK_ITEM_ID_Y_ARG);
253   if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
254     args->requested |= (1 << WORK_ITEM_ID_X_ARG);
255 
256   /* Always enable this so that kernargs is in a predictable place for
257      gomp_print, etc.  */
258   args->requested |= (1 << DISPATCH_PTR_ARG);
259 
260   int sgpr_regno = FIRST_SGPR_REG;
261   args->nsgprs = 0;
262   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
263     {
264       if (!(args->requested & (1 << a)))
265 	continue;
266 
267       if (gcn_kernel_arg_types[a].fixed_regno >= 0)
268 	args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
269       else
270 	{
271 	  int reg_count;
272 
273 	  switch (gcn_kernel_arg_types[a].mode)
274 	    {
275 	    case E_SImode:
276 	      reg_count = 1;
277 	      break;
278 	    case E_DImode:
279 	      reg_count = 2;
280 	      break;
281 	    case E_TImode:
282 	      reg_count = 4;
283 	      break;
284 	    default:
285 	      gcc_unreachable ();
286 	    }
287 	  args->reg[a] = sgpr_regno;
288 	  sgpr_regno += reg_count;
289 	  if (gcn_kernel_arg_types[a].fixed_regno == -1)
290 	    args->nsgprs += reg_count;
291 	}
292     }
293   if (sgpr_regno > FIRST_SGPR_REG + 16)
294     {
295       error ("too many arguments passed in sgpr registers");
296     }
297   return err;
298 }
299 
300 /* Referenced by TARGET_ATTRIBUTE_TABLE.
301 
302    Validates target specific attributes.  */
303 
304 static tree
305 gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
306 					tree args, int, bool *no_add_attrs)
307 {
308   if (FUNC_OR_METHOD_TYPE_P (*node)
309       && TREE_CODE (*node) != FIELD_DECL
310       && TREE_CODE (*node) != TYPE_DECL)
311     {
312       warning (OPT_Wattributes, "%qE attribute only applies to functions",
313 	       name);
314       *no_add_attrs = true;
315       return NULL_TREE;
316     }
317 
318   /* Can combine regparm with all attributes but fastcall, and thiscall.  */
319   if (is_attribute_p ("gcnhsa_kernel", name))
320     {
321       struct gcn_kernel_args kernelarg;
322 
323       if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
324 	*no_add_attrs = true;
325 
326       return NULL_TREE;
327     }
328 
329   return NULL_TREE;
330 }
331 
332 /* Implement TARGET_ATTRIBUTE_TABLE.
333 
334    Create target-specific __attribute__ types.  */
335 
336 static const struct attribute_spec gcn_attribute_table[] = {
337   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
338      affects_type_identity } */
339   {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true,
340    true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL},
341   /* End element.  */
342   {NULL, 0, 0, false, false, false, false, NULL, NULL}
343 };
344 
345 /* }}}  */
346 /* {{{ Registers and modes.  */
347 
348 /* Implement TARGET_CLASS_MAX_NREGS.
349 
350    Return the number of hard registers needed to hold a value of MODE in
351    a register of class RCLASS.  */
352 
353 static unsigned char
354 gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
355 {
356   /* Scalar registers are 32bit, vector registers are in fact tuples of
357      64 lanes.  */
358   if (rclass == VGPR_REGS)
359     {
360       if (vgpr_1reg_mode_p (mode))
361 	return 1;
362       if (vgpr_2reg_mode_p (mode))
363 	return 2;
364       /* TImode is used by DImode compare_and_swap.  */
365       if (mode == TImode)
366 	return 4;
367     }
368   else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
369     return 2;
370   return CEIL (GET_MODE_SIZE (mode), 4);
371 }
372 
373 /* Implement TARGET_HARD_REGNO_NREGS.
374 
375    Return the number of hard registers needed to hold a value of MODE in
376    REGNO.  */
377 
378 unsigned int
379 gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
380 {
381   return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode);
382 }
383 
384 /* Implement TARGET_HARD_REGNO_MODE_OK.
385 
386    Return true if REGNO can hold value in MODE.  */
387 
388 bool
389 gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
390 {
391   /* Treat a complex mode as if it were a scalar mode of the same overall
392      size for the purposes of allocating hard registers.  */
393   if (COMPLEX_MODE_P (mode))
394     switch (mode)
395       {
396       case E_CQImode:
397       case E_CHImode:
398 	mode = SImode;
399 	break;
400       case E_CSImode:
401 	mode = DImode;
402 	break;
403       case E_CDImode:
404 	mode = TImode;
405 	break;
406       case E_HCmode:
407 	mode = SFmode;
408 	break;
409       case E_SCmode:
410 	mode = DFmode;
411 	break;
412       default:
413 	/* Not supported.  */
414 	return false;
415       }
416 
417   switch (regno)
418     {
419     case FLAT_SCRATCH_LO_REG:
420     case XNACK_MASK_LO_REG:
421     case TBA_LO_REG:
422     case TMA_LO_REG:
423       return (mode == SImode || mode == DImode);
424     case VCC_LO_REG:
425     case EXEC_LO_REG:
426       return (mode == BImode || mode == SImode || mode == DImode);
427     case M0_REG:
428     case FLAT_SCRATCH_HI_REG:
429     case XNACK_MASK_HI_REG:
430     case TBA_HI_REG:
431     case TMA_HI_REG:
432       return mode == SImode;
433     case VCC_HI_REG:
434       return false;
435     case EXEC_HI_REG:
436       return mode == SImode /*|| mode == V32BImode */ ;
437     case SCC_REG:
438     case VCCZ_REG:
439     case EXECZ_REG:
440       return mode == BImode;
441     }
442   if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
443     return true;
444   if (SGPR_REGNO_P (regno))
445     /* We restrict double register values to aligned registers.  */
446     return (sgpr_1reg_mode_p (mode)
447 	    || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
448 	    || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
449   if (VGPR_REGNO_P (regno))
450     return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode)
451 	    /* TImode is used by DImode compare_and_swap.  */
452 	    || mode == TImode);
453   return false;
454 }
455 
456 /* Implement REGNO_REG_CLASS via gcn.h.
457 
458    Return smallest class containing REGNO.  */
459 
460 enum reg_class
461 gcn_regno_reg_class (int regno)
462 {
463   switch (regno)
464     {
465     case SCC_REG:
466       return SCC_CONDITIONAL_REG;
467     case VCCZ_REG:
468       return VCCZ_CONDITIONAL_REG;
469     case EXECZ_REG:
470       return EXECZ_CONDITIONAL_REG;
471     case EXEC_LO_REG:
472     case EXEC_HI_REG:
473       return EXEC_MASK_REG;
474     }
475   if (VGPR_REGNO_P (regno))
476     return VGPR_REGS;
477   if (SGPR_REGNO_P (regno))
478     return SGPR_REGS;
479   if (regno < FIRST_VGPR_REG)
480     return GENERAL_REGS;
481   if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
482     return AFP_REGS;
483   return ALL_REGS;
484 }
485 
486 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
487 
488    GCC assumes that lowpart contains first part of value as stored in memory.
489    This is not the case for vector registers.  */
490 
491 bool
492 gcn_can_change_mode_class (machine_mode from, machine_mode to,
493 			   reg_class_t regclass)
494 {
495   if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
496     return true;
497   return (gcn_class_max_nregs (regclass, from)
498 	  == gcn_class_max_nregs (regclass, to));
499 }
500 
501 /* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
502 
503    When this hook returns true for MODE, the compiler allows
504    registers explicitly used in the rtl to be used as spill registers
505    but prevents the compiler from extending the lifetime of these
506    registers.  */
507 
508 bool
509 gcn_small_register_classes_for_mode_p (machine_mode mode)
510 {
511   /* We allocate into exec and vcc regs.  Those make small register class.  */
512   return mode == DImode || mode == SImode;
513 }
514 
515 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
516 
517    Returns true if pseudos that have been assigned to registers of class RCLASS
518    would likely be spilled because registers of RCLASS are needed for spill
519    registers.  */
520 
521 static bool
522 gcn_class_likely_spilled_p (reg_class_t rclass)
523 {
524   return (rclass == EXEC_MASK_REG
525 	  || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass));
526 }
527 
528 /* Implement TARGET_MODES_TIEABLE_P.
529 
530    Returns true if a value of MODE1 is accessible in MODE2 without
531    copying.  */
532 
533 bool
534 gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
535 {
536   return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
537 	  && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
538 }
539 
540 /* Implement TARGET_TRULY_NOOP_TRUNCATION.
541 
542    Returns true if it is safe to “convert” a value of INPREC bits to one of
543    OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
544    it as if it had only OUTPREC bits.  */
545 
546 bool
547 gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
548 {
549   return ((inprec <= 32) && (outprec <= inprec));
550 }
551 
552 /* Return N-th part of value occupying multiple registers.  */
553 
554 rtx
555 gcn_operand_part (machine_mode mode, rtx op, int n)
556 {
557   if (GET_MODE_SIZE (mode) >= 256)
558     {
559       /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0);  */
560 
561       if (REG_P (op))
562 	{
563 	  gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
564 	  return gen_rtx_REG (V64SImode, REGNO (op) + n);
565 	}
566       if (GET_CODE (op) == CONST_VECTOR)
567 	{
568 	  int units = GET_MODE_NUNITS (mode);
569 	  rtvec v = rtvec_alloc (units);
570 
571 	  for (int i = 0; i < units; ++i)
572 	    RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
573 						 CONST_VECTOR_ELT (op, i), n);
574 
575 	  return gen_rtx_CONST_VECTOR (V64SImode, v);
576 	}
577       if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
578 	return gcn_gen_undef (V64SImode);
579       gcc_unreachable ();
580     }
581   else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
582     {
583       gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
584       return gen_rtx_REG (SImode, REGNO (op) + n);
585     }
586   else
587     {
588       if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
589 	return gcn_gen_undef (SImode);
590 
591       /* If it's a constant then let's assume it is of the largest mode
592 	 available, otherwise simplify_gen_subreg will fail.  */
593       if (mode == VOIDmode && CONST_INT_P (op))
594 	mode = DImode;
595       return simplify_gen_subreg (SImode, op, mode, n * 4);
596     }
597 }
598 
599 /* Return N-th part of value occupying multiple registers.  */
600 
601 rtx
602 gcn_operand_doublepart (machine_mode mode, rtx op, int n)
603 {
604   return simplify_gen_subreg (DImode, op, mode, n * 8);
605 }
606 
607 /* Return true if OP can be split into subregs or high/low parts.
608    This is always true for scalars, but not normally true for vectors.
609    However, for vectors in hardregs we can use the low and high registers.  */
610 
611 bool
612 gcn_can_split_p (machine_mode, rtx op)
613 {
614   if (vgpr_vector_mode_p (GET_MODE (op)))
615     {
616       if (GET_CODE (op) == SUBREG)
617 	op = SUBREG_REG (op);
618       if (!REG_P (op))
619 	return true;
620       return REGNO (op) <= FIRST_PSEUDO_REGISTER;
621     }
622   return true;
623 }
624 
625 /* Implement TARGET_SPILL_CLASS.
626 
627    Return class of registers which could be used for pseudo of MODE
628    and of class RCLASS for spilling instead of memory.  Return NO_REGS
629    if it is not possible or non-profitable.  */
630 
631 static reg_class_t
632 gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
633 {
634   if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c))
635     return SGPR_REGS;
636   else
637     return NO_REGS;
638 }
639 
640 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
641 
642    Change allocno class for given pseudo from allocno and best class
643    calculated by IRA.  */
644 
645 static reg_class_t
646 gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
647 				     reg_class_t best_cl)
648 {
649   /* Avoid returning classes that contain both vgpr and sgpr registers.  */
650   if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS)
651     return cl;
652   if (best_cl != ALL_REGS && best_cl != SRCDST_REGS
653       && best_cl != ALL_GPR_REGS)
654     return best_cl;
655 
656   machine_mode mode = PSEUDO_REGNO_MODE (regno);
657   if (vgpr_vector_mode_p (mode))
658     return VGPR_REGS;
659 
660   return GENERAL_REGS;
661 }
662 
663 /* Create a new DImode pseudo reg and emit an instruction to initialize
664    it to VAL.  */
665 
666 static rtx
667 get_exec (int64_t val)
668 {
669   rtx reg = gen_reg_rtx (DImode);
670   emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
671   return reg;
672 }
673 
674 /* Return value of scalar exec register.  */
675 
676 rtx
677 gcn_scalar_exec ()
678 {
679   return const1_rtx;
680 }
681 
682 /* Return pseudo holding scalar exec register.  */
683 
684 rtx
685 gcn_scalar_exec_reg ()
686 {
687   return get_exec (1);
688 }
689 
690 /* Return value of full exec register.  */
691 
692 rtx
693 gcn_full_exec ()
694 {
695   return constm1_rtx;
696 }
697 
698 /* Return pseudo holding full exec register.  */
699 
700 rtx
701 gcn_full_exec_reg ()
702 {
703   return get_exec (-1);
704 }
705 
706 /* }}}  */
707 /* {{{ Immediate constants.  */
708 
709 /* Initialize shared numeric constants.  */
710 
711 static void
712 init_ext_gcn_constants (void)
713 {
714   real_from_integer (&dconst4, DFmode, 4, SIGNED);
715 
716   /* FIXME: this constant probably does not match what hardware really loads.
717      Reality check it eventually.  */
718   real_from_string (&dconst1over2pi,
719 		    "0.1591549430918953357663423455968866839");
720   real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
721 
722   ext_gcn_constants_init = 1;
723 }
724 
725 /* Return non-zero if X is a constant that can appear as an inline operand.
726    This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
727    Or a vector of those.
728    The value returned should be the encoding of this constant.  */
729 
730 int
731 gcn_inline_fp_constant_p (rtx x, bool allow_vector)
732 {
733   machine_mode mode = GET_MODE (x);
734 
735   if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
736       && allow_vector)
737     {
738       int n;
739       if (GET_CODE (x) != CONST_VECTOR)
740 	return 0;
741       n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
742       if (!n)
743 	return 0;
744       for (int i = 1; i < 64; i++)
745 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
746 	  return 0;
747       return 1;
748     }
749 
750   if (mode != HFmode && mode != SFmode && mode != DFmode)
751     return 0;
752 
753   const REAL_VALUE_TYPE *r;
754 
755   if (x == CONST0_RTX (mode))
756     return 128;
757   if (x == CONST1_RTX (mode))
758     return 242;
759 
760   r = CONST_DOUBLE_REAL_VALUE (x);
761 
762   if (real_identical (r, &dconstm1))
763     return 243;
764 
765   if (real_identical (r, &dconsthalf))
766     return 240;
767   if (real_identical (r, &dconstm1))
768     return 243;
769   if (real_identical (r, &dconst2))
770     return 244;
771   if (real_identical (r, &dconst4))
772     return 246;
773   if (real_identical (r, &dconst1over2pi))
774     return 248;
775   if (!ext_gcn_constants_init)
776     init_ext_gcn_constants ();
777   real_value_negate (r);
778   if (real_identical (r, &dconsthalf))
779     return 241;
780   if (real_identical (r, &dconst2))
781     return 245;
782   if (real_identical (r, &dconst4))
783     return 247;
784 
785   /* FIXME: add 4, -4 and 1/(2*PI).  */
786 
787   return 0;
788 }
789 
790 /* Return non-zero if X is a constant that can appear as an immediate operand.
791    This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
792    Or a vector of those.
793    The value returned should be the encoding of this constant.  */
794 
795 bool
796 gcn_fp_constant_p (rtx x, bool allow_vector)
797 {
798   machine_mode mode = GET_MODE (x);
799 
800   if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
801       && allow_vector)
802     {
803       int n;
804       if (GET_CODE (x) != CONST_VECTOR)
805 	return false;
806       n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
807       if (!n)
808 	return false;
809       for (int i = 1; i < 64; i++)
810 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
811 	  return false;
812       return true;
813     }
814   if (mode != HFmode && mode != SFmode && mode != DFmode)
815     return false;
816 
817   if (gcn_inline_fp_constant_p (x, false))
818     return true;
819   /* FIXME: It is not clear how 32bit immediates are interpreted here.  */
820   return (mode != DFmode);
821 }
822 
823 /* Return true if X is a constant representable as an inline immediate
824    constant in a 32-bit instruction encoding.  */
825 
826 bool
827 gcn_inline_constant_p (rtx x)
828 {
829   if (GET_CODE (x) == CONST_INT)
830     return INTVAL (x) >= -16 && INTVAL (x) < 64;
831   if (GET_CODE (x) == CONST_DOUBLE)
832     return gcn_inline_fp_constant_p (x, false);
833   if (GET_CODE (x) == CONST_VECTOR)
834     {
835       int n;
836       if (!vgpr_vector_mode_p (GET_MODE (x)))
837 	return false;
838       n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
839       if (!n)
840 	return false;
841       for (int i = 1; i < 64; i++)
842 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
843 	  return false;
844       return 1;
845     }
846   return false;
847 }
848 
849 /* Return true if X is a constant representable as an immediate constant
850    in a 32 or 64-bit instruction encoding.  */
851 
852 bool
853 gcn_constant_p (rtx x)
854 {
855   switch (GET_CODE (x))
856     {
857     case CONST_INT:
858       return true;
859 
860     case CONST_DOUBLE:
861       return gcn_fp_constant_p (x, false);
862 
863     case CONST_VECTOR:
864       {
865 	int n;
866 	if (!vgpr_vector_mode_p (GET_MODE (x)))
867 	  return false;
868 	n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
869 	if (!n)
870 	  return false;
871 	for (int i = 1; i < 64; i++)
872 	  if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
873 	    return false;
874 	return true;
875       }
876 
877     case SYMBOL_REF:
878     case LABEL_REF:
879       return true;
880 
881     default:
882       ;
883     }
884 
885   return false;
886 }
887 
888 /* Return true if X is a constant representable as two inline immediate
889    constants in a 64-bit instruction that is split into two 32-bit
890    instructions.  */
891 
892 bool
893 gcn_inline_constant64_p (rtx x)
894 {
895   if (GET_CODE (x) == CONST_VECTOR)
896     {
897       if (!vgpr_vector_mode_p (GET_MODE (x)))
898 	return false;
899       if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0)))
900 	return false;
901       for (int i = 1; i < 64; i++)
902 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
903 	  return false;
904 
905       return true;
906     }
907 
908   if (GET_CODE (x) != CONST_INT)
909     return false;
910 
911   rtx val_lo = gcn_operand_part (DImode, x, 0);
912   rtx val_hi = gcn_operand_part (DImode, x, 1);
913   return gcn_inline_constant_p (val_lo) && gcn_inline_constant_p (val_hi);
914 }
915 
916 /* Return true if X is a constant representable as an immediate constant
917    in a 32 or 64-bit instruction encoding where the hardware will
918    extend the immediate to 64-bits.  */
919 
920 bool
921 gcn_constant64_p (rtx x)
922 {
923   if (!gcn_constant_p (x))
924     return false;
925 
926   if (GET_CODE (x) != CONST_INT)
927     return true;
928 
929   /* Negative numbers are only allowed if they can be encoded within src0,
930      because the 32-bit immediates do not get sign-extended.
931      Unsigned numbers must not be encodable as 32-bit -1..-16, because the
932      assembler will use a src0 inline immediate and that will get
933      sign-extended.  */
934   HOST_WIDE_INT val = INTVAL (x);
935   return (((val & 0xffffffff) == val	/* Positive 32-bit.  */
936 	   && (val & 0xfffffff0) != 0xfffffff0)	/* Not -1..-16.  */
937 	  || gcn_inline_constant_p (x));	/* Src0.  */
938 }
939 
940 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
941 
942    Returns true if X is a legitimate constant for a MODE immediate operand.  */
943 
944 bool
945 gcn_legitimate_constant_p (machine_mode, rtx x)
946 {
947   return gcn_constant_p (x);
948 }
949 
950 /* Return true if X is a CONST_VECTOR of single constant.  */
951 
952 static bool
953 single_cst_vector_p (rtx x)
954 {
955   if (GET_CODE (x) != CONST_VECTOR)
956     return false;
957   for (int i = 1; i < 64; i++)
958     if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
959       return false;
960   return true;
961 }
962 
963 /* Create a CONST_VECTOR of duplicated value A.  */
964 
965 rtx
966 gcn_vec_constant (machine_mode mode, int a)
967 {
968   /*if (!a)
969     return CONST0_RTX (mode);
970   if (a == -1)
971     return CONSTM1_RTX (mode);
972   if (a == 1)
973     return CONST1_RTX (mode);
974   if (a == 2)
975     return CONST2_RTX (mode);*/
976 
977   int units = GET_MODE_NUNITS (mode);
978   rtx tem = gen_int_mode (a, GET_MODE_INNER (mode));
979   rtvec v = rtvec_alloc (units);
980 
981   for (int i = 0; i < units; ++i)
982     RTVEC_ELT (v, i) = tem;
983 
984   return gen_rtx_CONST_VECTOR (mode, v);
985 }
986 
987 /* Create a CONST_VECTOR of duplicated value A.  */
988 
989 rtx
990 gcn_vec_constant (machine_mode mode, rtx a)
991 {
992   int units = GET_MODE_NUNITS (mode);
993   rtvec v = rtvec_alloc (units);
994 
995   for (int i = 0; i < units; ++i)
996     RTVEC_ELT (v, i) = a;
997 
998   return gen_rtx_CONST_VECTOR (mode, v);
999 }
1000 
1001 /* Create an undefined vector value, used where an insn operand is
1002    optional.  */
1003 
1004 rtx
1005 gcn_gen_undef (machine_mode mode)
1006 {
1007   return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
1008 }
1009 
1010 /* }}}  */
1011 /* {{{ Addresses, pointers and moves.  */
1012 
1013 /* Return true is REG is a valid place to store a pointer,
1014    for instructions that require an SGPR.
1015    FIXME rename. */
1016 
1017 static bool
1018 gcn_address_register_p (rtx reg, machine_mode mode, bool strict)
1019 {
1020   if (GET_CODE (reg) == SUBREG)
1021     reg = SUBREG_REG (reg);
1022 
1023   if (!REG_P (reg))
1024     return false;
1025 
1026   if (GET_MODE (reg) != mode)
1027     return false;
1028 
1029   int regno = REGNO (reg);
1030 
1031   if (regno >= FIRST_PSEUDO_REGISTER)
1032     {
1033       if (!strict)
1034 	return true;
1035 
1036       if (!reg_renumber)
1037 	return false;
1038 
1039       regno = reg_renumber[regno];
1040     }
1041 
1042   return (SGPR_REGNO_P (regno) || regno == M0_REG
1043 	  || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1044 }
1045 
1046 /* Return true is REG is a valid place to store a pointer,
1047    for instructions that require a VGPR.  */
1048 
1049 static bool
1050 gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict)
1051 {
1052   if (GET_CODE (reg) == SUBREG)
1053     reg = SUBREG_REG (reg);
1054 
1055   if (!REG_P (reg))
1056     return false;
1057 
1058   if (GET_MODE (reg) != mode)
1059     return false;
1060 
1061   int regno = REGNO (reg);
1062 
1063   if (regno >= FIRST_PSEUDO_REGISTER)
1064     {
1065       if (!strict)
1066 	return true;
1067 
1068       if (!reg_renumber)
1069 	return false;
1070 
1071       regno = reg_renumber[regno];
1072     }
1073 
1074   return VGPR_REGNO_P (regno);
1075 }
1076 
1077 /* Return true if X would be valid inside a MEM using the Flat address
1078    space.  */
1079 
1080 bool
1081 gcn_flat_address_p (rtx x, machine_mode mode)
1082 {
1083   bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1084 		   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1085 
1086   if (vec_mode && gcn_address_register_p (x, DImode, false))
1087     return true;
1088 
1089   if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
1090     return true;
1091 
1092   if (TARGET_GCN5_PLUS
1093       && GET_CODE (x) == PLUS
1094       && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
1095       && CONST_INT_P (XEXP (x, 1)))
1096     return true;
1097 
1098   return false;
1099 }
1100 
1101 /* Return true if X would be valid inside a MEM using the Scalar Flat
1102    address space.  */
1103 
1104 bool
1105 gcn_scalar_flat_address_p (rtx x)
1106 {
1107   if (gcn_address_register_p (x, DImode, false))
1108     return true;
1109 
1110   if (GET_CODE (x) == PLUS
1111       && gcn_address_register_p (XEXP (x, 0), DImode, false)
1112       && CONST_INT_P (XEXP (x, 1)))
1113     return true;
1114 
1115   return false;
1116 }
1117 
1118 /* Return true if MEM X would be valid for the Scalar Flat address space.  */
1119 
1120 bool
1121 gcn_scalar_flat_mem_p (rtx x)
1122 {
1123   if (!MEM_P (x))
1124     return false;
1125 
1126   if (GET_MODE_SIZE (GET_MODE (x)) < 4)
1127     return false;
1128 
1129   return gcn_scalar_flat_address_p (XEXP (x, 0));
1130 }
1131 
1132 /* Return true if X would be valid inside a MEM using the LDS or GDS
1133    address spaces.  */
1134 
1135 bool
1136 gcn_ds_address_p (rtx x)
1137 {
1138   if (gcn_vec_address_register_p (x, SImode, false))
1139     return true;
1140 
1141   if (GET_CODE (x) == PLUS
1142       && gcn_vec_address_register_p (XEXP (x, 0), SImode, false)
1143       && CONST_INT_P (XEXP (x, 1)))
1144     return true;
1145 
1146   return false;
1147 }
1148 
1149 /* Return true if ADDR would be valid inside a MEM using the Global
1150    address space.  */
1151 
1152 bool
1153 gcn_global_address_p (rtx addr)
1154 {
1155   if (gcn_address_register_p (addr, DImode, false)
1156       || gcn_vec_address_register_p (addr, DImode, false))
1157     return true;
1158 
1159   if (GET_CODE (addr) == PLUS)
1160     {
1161       rtx base = XEXP (addr, 0);
1162       rtx offset = XEXP (addr, 1);
1163       bool immediate_p = (CONST_INT_P (offset)
1164 			  && INTVAL (offset) >= -(1 << 12)
1165 			  && INTVAL (offset) < (1 << 12));
1166 
1167       if ((gcn_address_register_p (base, DImode, false)
1168 	   || gcn_vec_address_register_p (base, DImode, false))
1169 	  && immediate_p)
1170 	/* SGPR + CONST or VGPR + CONST  */
1171 	return true;
1172 
1173       if (gcn_address_register_p (base, DImode, false)
1174 	  && gcn_vgpr_register_operand (offset, SImode))
1175 	/* SPGR + VGPR  */
1176 	return true;
1177 
1178       if (GET_CODE (base) == PLUS
1179 	  && gcn_address_register_p (XEXP (base, 0), DImode, false)
1180 	  && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1181 	  && immediate_p)
1182 	/* (SGPR + VGPR) + CONST  */
1183 	return true;
1184     }
1185 
1186   return false;
1187 }
1188 
1189 /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
1190 
1191    Recognizes RTL expressions that are valid memory addresses for an
1192    instruction.  The MODE argument is the machine mode for the MEM
1193    expression that wants to use this address.
1194 
1195    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
1196    convert common non-canonical forms to canonical form so that they will
1197    be recognized.  */
1198 
1199 static bool
1200 gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
1201 				     addr_space_t as)
1202 {
1203   /* All vector instructions need to work on addresses in registers.  */
1204   if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
1205     return false;
1206 
1207   if (AS_SCALAR_FLAT_P (as))
1208     {
1209       if (mode == QImode || mode == HImode)
1210 	return 0;
1211 
1212       switch (GET_CODE (x))
1213 	{
1214 	case REG:
1215 	  return gcn_address_register_p (x, DImode, strict);
1216 	/* Addresses are in the form BASE+OFFSET
1217 	   OFFSET is either 20bit unsigned immediate, SGPR or M0.
1218 	   Writes and atomics do not accept SGPR.  */
1219 	case PLUS:
1220 	  {
1221 	    rtx x0 = XEXP (x, 0);
1222 	    rtx x1 = XEXP (x, 1);
1223 	    if (!gcn_address_register_p (x0, DImode, strict))
1224 	      return false;
1225 	    /* FIXME: This is disabled because of the mode mismatch between
1226 	       SImode (for the address or m0 register) and the DImode PLUS.
1227 	       We'll need a zero_extend or similar.
1228 
1229 	    if (gcn_m0_register_p (x1, SImode, strict)
1230 		|| gcn_address_register_p (x1, SImode, strict))
1231 	      return true;
1232 	    else*/
1233 	    if (GET_CODE (x1) == CONST_INT)
1234 	      {
1235 		if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)
1236 		    /* The low bits of the offset are ignored, even when
1237 		       they're meant to realign the pointer.  */
1238 		    && !(INTVAL (x1) & 0x3))
1239 		  return true;
1240 	      }
1241 	    return false;
1242 	  }
1243 
1244 	default:
1245 	  break;
1246 	}
1247     }
1248   else if (AS_SCRATCH_P (as))
1249     return gcn_address_register_p (x, SImode, strict);
1250   else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
1251     {
1252       if (TARGET_GCN3 || GET_CODE (x) == REG)
1253        return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1254 		|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1255 	       ? gcn_address_register_p (x, DImode, strict)
1256 	       : gcn_vec_address_register_p (x, DImode, strict));
1257       else
1258 	{
1259 	  gcc_assert (TARGET_GCN5_PLUS);
1260 
1261 	  if (GET_CODE (x) == PLUS)
1262 	    {
1263 	      rtx x1 = XEXP (x, 1);
1264 
1265 	      if (VECTOR_MODE_P (mode)
1266 		  ? !gcn_address_register_p (x, DImode, strict)
1267 		  : !gcn_vec_address_register_p (x, DImode, strict))
1268 		return false;
1269 
1270 	      if (GET_CODE (x1) == CONST_INT)
1271 		{
1272 		  if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12)
1273 		      /* The low bits of the offset are ignored, even when
1274 		         they're meant to realign the pointer.  */
1275 		      && !(INTVAL (x1) & 0x3))
1276 		    return true;
1277 		}
1278 	    }
1279 	  return false;
1280 	}
1281     }
1282   else if (AS_GLOBAL_P (as))
1283     {
1284       gcc_assert (TARGET_GCN5_PLUS);
1285 
1286       if (GET_CODE (x) == REG)
1287        return (gcn_address_register_p (x, DImode, strict)
1288 	       || (!VECTOR_MODE_P (mode)
1289 		   && gcn_vec_address_register_p (x, DImode, strict)));
1290       else if (GET_CODE (x) == PLUS)
1291 	{
1292 	  rtx base = XEXP (x, 0);
1293 	  rtx offset = XEXP (x, 1);
1294 
1295 	  bool immediate_p = (GET_CODE (offset) == CONST_INT
1296 			      /* Signed 13-bit immediate.  */
1297 			      && INTVAL (offset) >= -(1 << 12)
1298 			      && INTVAL (offset) < (1 << 12)
1299 			      /* The low bits of the offset are ignored, even
1300 			         when they're meant to realign the pointer.  */
1301 			      && !(INTVAL (offset) & 0x3));
1302 
1303 	  if (!VECTOR_MODE_P (mode))
1304 	    {
1305 	      if ((gcn_address_register_p (base, DImode, strict)
1306 		   || gcn_vec_address_register_p (base, DImode, strict))
1307 		  && immediate_p)
1308 		/* SGPR + CONST or VGPR + CONST  */
1309 		return true;
1310 
1311 	      if (gcn_address_register_p (base, DImode, strict)
1312 		  && gcn_vgpr_register_operand (offset, SImode))
1313 		/* SGPR + VGPR  */
1314 		return true;
1315 
1316 	      if (GET_CODE (base) == PLUS
1317 		  && gcn_address_register_p (XEXP (base, 0), DImode, strict)
1318 		  && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1319 		  && immediate_p)
1320 		/* (SGPR + VGPR) + CONST  */
1321 		return true;
1322 	    }
1323 	  else
1324 	    {
1325 	      if (gcn_address_register_p (base, DImode, strict)
1326 		  && immediate_p)
1327 		/* SGPR + CONST  */
1328 		return true;
1329 	    }
1330 	}
1331       else
1332 	return false;
1333     }
1334   else if (AS_ANY_DS_P (as))
1335     switch (GET_CODE (x))
1336       {
1337       case REG:
1338 	return (VECTOR_MODE_P (mode)
1339 		? gcn_address_register_p (x, SImode, strict)
1340 		: gcn_vec_address_register_p (x, SImode, strict));
1341       /* Addresses are in the form BASE+OFFSET
1342 	 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1343 	 Writes and atomics do not accept SGPR.  */
1344       case PLUS:
1345 	{
1346 	  rtx x0 = XEXP (x, 0);
1347 	  rtx x1 = XEXP (x, 1);
1348 	  if (!gcn_vec_address_register_p (x0, DImode, strict))
1349 	    return false;
1350 	  if (GET_CODE (x1) == REG)
1351 	    {
1352 	      if (GET_CODE (x1) != REG
1353 		  || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
1354 		      && !gcn_ssrc_register_operand (x1, DImode)))
1355 		return false;
1356 	    }
1357 	  else if (GET_CODE (x1) == CONST_VECTOR
1358 		   && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
1359 		   && single_cst_vector_p (x1))
1360 	    {
1361 	      x1 = CONST_VECTOR_ELT (x1, 0);
1362 	      if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20))
1363 		return true;
1364 	    }
1365 	  return false;
1366 	}
1367 
1368       default:
1369 	break;
1370       }
1371   else
1372     gcc_unreachable ();
1373   return false;
1374 }
1375 
1376 /* Implement TARGET_ADDR_SPACE_POINTER_MODE.
1377 
1378    Return the appropriate mode for a named address pointer.  */
1379 
1380 static scalar_int_mode
1381 gcn_addr_space_pointer_mode (addr_space_t addrspace)
1382 {
1383   switch (addrspace)
1384     {
1385     case ADDR_SPACE_SCRATCH:
1386     case ADDR_SPACE_LDS:
1387     case ADDR_SPACE_GDS:
1388       return SImode;
1389     case ADDR_SPACE_DEFAULT:
1390     case ADDR_SPACE_FLAT:
1391     case ADDR_SPACE_FLAT_SCRATCH:
1392     case ADDR_SPACE_SCALAR_FLAT:
1393       return DImode;
1394     default:
1395       gcc_unreachable ();
1396     }
1397 }
1398 
1399 /* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
1400 
1401    Return the appropriate mode for a named address space address.  */
1402 
1403 static scalar_int_mode
1404 gcn_addr_space_address_mode (addr_space_t addrspace)
1405 {
1406   return gcn_addr_space_pointer_mode (addrspace);
1407 }
1408 
1409 /* Implement TARGET_ADDR_SPACE_SUBSET_P.
1410 
1411    Determine if one named address space is a subset of another.  */
1412 
1413 static bool
1414 gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
1415 {
1416   if (subset == superset)
1417     return true;
1418   /* FIXME is this true?  */
1419   if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset))
1420     return true;
1421   return false;
1422 }
1423 
1424 /* Convert from one address space to another.  */
1425 
1426 static rtx
1427 gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
1428 {
1429   gcc_assert (POINTER_TYPE_P (from_type));
1430   gcc_assert (POINTER_TYPE_P (to_type));
1431 
1432   addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
1433   addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
1434 
1435   if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
1436     {
1437       rtx queue = gen_rtx_REG (DImode,
1438 			       cfun->machine->args.reg[QUEUE_PTR_ARG]);
1439       rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
1440 				     gen_rtx_PLUS (DImode, queue,
1441 						   gen_int_mode (64, SImode)));
1442       rtx tmp = gen_reg_rtx (DImode);
1443 
1444       emit_move_insn (gen_lowpart (SImode, tmp), op);
1445       emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
1446 		      group_seg_aperture_hi);
1447 
1448       return tmp;
1449     }
1450   else if (as_from == as_to)
1451     return op;
1452   else
1453     gcc_unreachable ();
1454 }
1455 
1456 
1457 /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
1458 
1459    Retun true if REGNO is OK for memory adressing.  */
1460 
1461 bool
1462 gcn_regno_mode_code_ok_for_base_p (int regno,
1463 				   machine_mode, addr_space_t as, int, int)
1464 {
1465   if (regno >= FIRST_PSEUDO_REGISTER)
1466     {
1467       if (reg_renumber)
1468 	regno = reg_renumber[regno];
1469       else
1470 	return true;
1471     }
1472   if (AS_FLAT_P (as))
1473     return (VGPR_REGNO_P (regno)
1474 	    || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1475   else if (AS_SCALAR_FLAT_P (as))
1476     return (SGPR_REGNO_P (regno)
1477 	    || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1478   else if (AS_GLOBAL_P (as))
1479     {
1480       return (SGPR_REGNO_P (regno)
1481 	      || VGPR_REGNO_P (regno)
1482 	      || regno == ARG_POINTER_REGNUM
1483 	      || regno == FRAME_POINTER_REGNUM);
1484     }
1485   else
1486     /* For now.  */
1487     return false;
1488 }
1489 
1490 /* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
1491 
1492    Return a suitable register class for memory addressing.  */
1493 
1494 reg_class
1495 gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
1496 			      int ic)
1497 {
1498   switch (as)
1499     {
1500     case ADDR_SPACE_DEFAULT:
1501       return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic);
1502     case ADDR_SPACE_SCALAR_FLAT:
1503     case ADDR_SPACE_SCRATCH:
1504       return SGPR_REGS;
1505       break;
1506     case ADDR_SPACE_FLAT:
1507     case ADDR_SPACE_FLAT_SCRATCH:
1508     case ADDR_SPACE_LDS:
1509     case ADDR_SPACE_GDS:
1510       return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1511 	       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1512 	      ? SGPR_REGS : VGPR_REGS);
1513     case ADDR_SPACE_GLOBAL:
1514       return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1515 	       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1516 	      ? SGPR_REGS : ALL_GPR_REGS);
1517     }
1518   gcc_unreachable ();
1519 }
1520 
1521 /* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
1522 
1523    Return true if REGNO is OK for index of memory addressing.  */
1524 
1525 bool
1526 regno_ok_for_index_p (int regno)
1527 {
1528   if (regno >= FIRST_PSEUDO_REGISTER)
1529     {
1530       if (reg_renumber)
1531 	regno = reg_renumber[regno];
1532       else
1533 	return true;
1534     }
1535   return regno == M0_REG || VGPR_REGNO_P (regno);
1536 }
1537 
1538 /* Generate move which uses the exec flags.  If EXEC is NULL, then it is
1539    assumed that all lanes normally relevant to the mode of the move are
1540    affected.  If PREV is NULL, then a sensible default is supplied for
1541    the inactive lanes.  */
1542 
1543 static rtx
1544 gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
1545 {
1546   machine_mode mode = GET_MODE (op0);
1547 
1548   if (vgpr_vector_mode_p (mode))
1549     {
1550       if (exec && exec != CONSTM1_RTX (DImode))
1551 	{
1552 	  if (!prev)
1553 	    prev = op0;
1554 	}
1555       else
1556 	{
1557 	  if (!prev)
1558 	    prev = gcn_gen_undef (mode);
1559 	  exec = gcn_full_exec_reg ();
1560 	}
1561 
1562       rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
1563 
1564       return gen_rtx_PARALLEL (VOIDmode,
1565 	       gen_rtvec (2, set,
1566 			 gen_rtx_CLOBBER (VOIDmode,
1567 					  gen_rtx_SCRATCH (V64DImode))));
1568     }
1569 
1570   return (gen_rtx_PARALLEL
1571 	  (VOIDmode,
1572 	   gen_rtvec (2, gen_rtx_SET (op0, op1),
1573 		      gen_rtx_USE (VOIDmode,
1574 				   exec ? exec : gcn_scalar_exec ()))));
1575 }
1576 
1577 /* Generate masked move.  */
1578 
1579 static rtx
1580 gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
1581 {
1582   if (exec)
1583     return (gen_rtx_SET (op0,
1584 			 gen_rtx_VEC_MERGE (GET_MODE (op0),
1585 					    gen_rtx_VEC_DUPLICATE (GET_MODE
1586 								   (op0), op1),
1587 					    op2, exec)));
1588   else
1589     return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
1590 }
1591 
1592 /* Expand vector init of OP0 by VEC.
1593    Implements vec_init instruction pattern.  */
1594 
1595 void
1596 gcn_expand_vector_init (rtx op0, rtx vec)
1597 {
1598   int64_t initialized_mask = 0;
1599   int64_t curr_mask = 1;
1600   machine_mode mode = GET_MODE (op0);
1601 
1602   rtx val = XVECEXP (vec, 0, 0);
1603 
1604   for (int i = 1; i < 64; i++)
1605     if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
1606       curr_mask |= (int64_t) 1 << i;
1607 
1608   if (gcn_constant_p (val))
1609     emit_move_insn (op0, gcn_vec_constant (mode, val));
1610   else
1611     {
1612       val = force_reg (GET_MODE_INNER (mode), val);
1613       emit_insn (gen_duplicate_load (op0, val));
1614     }
1615   initialized_mask |= curr_mask;
1616   for (int i = 1; i < 64; i++)
1617     if (!(initialized_mask & ((int64_t) 1 << i)))
1618       {
1619 	curr_mask = (int64_t) 1 << i;
1620 	rtx val = XVECEXP (vec, 0, i);
1621 
1622 	for (int j = i + 1; j < 64; j++)
1623 	  if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
1624 	    curr_mask |= (int64_t) 1 << j;
1625 	if (gcn_constant_p (val))
1626 	  emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
1627 					get_exec (curr_mask)));
1628 	else
1629 	  {
1630 	    val = force_reg (GET_MODE_INNER (mode), val);
1631 	    emit_insn (gen_duplicate_load (op0, val, op0,
1632 					   get_exec (curr_mask)));
1633 	  }
1634 	initialized_mask |= curr_mask;
1635       }
1636 }
1637 
1638 /* Load vector constant where n-th lane contains BASE+n*VAL.  */
1639 
1640 static rtx
1641 strided_constant (machine_mode mode, int base, int val)
1642 {
1643   rtx x = gen_reg_rtx (mode);
1644   emit_move_insn (x, gcn_vec_constant (mode, base));
1645   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
1646 				 x, get_exec (0xffffffff00000000)));
1647   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
1648 				 x, get_exec (0xffff0000ffff0000)));
1649   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
1650 				 x, get_exec (0xff00ff00ff00ff00)));
1651   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
1652 				 x, get_exec (0xf0f0f0f0f0f0f0f0)));
1653   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
1654 				 x, get_exec (0xcccccccccccccccc)));
1655   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
1656 				 x, get_exec (0xaaaaaaaaaaaaaaaa)));
1657   return x;
1658 }
1659 
1660 /* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS.  */
1661 
1662 static rtx
1663 gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
1664 				   addr_space_t as)
1665 {
1666   switch (as)
1667     {
1668     case ADDR_SPACE_DEFAULT:
1669       return gcn_addr_space_legitimize_address (x, old, mode,
1670 						DEFAULT_ADDR_SPACE);
1671     case ADDR_SPACE_SCALAR_FLAT:
1672     case ADDR_SPACE_SCRATCH:
1673       /* Instructions working on vectors need the address to be in
1674          a register.  */
1675       if (vgpr_vector_mode_p (mode))
1676 	return force_reg (GET_MODE (x), x);
1677 
1678       return x;
1679     case ADDR_SPACE_FLAT:
1680     case ADDR_SPACE_FLAT_SCRATCH:
1681     case ADDR_SPACE_GLOBAL:
1682       return TARGET_GCN3 ? force_reg (DImode, x) : x;
1683     case ADDR_SPACE_LDS:
1684     case ADDR_SPACE_GDS:
1685       /* FIXME: LDS support offsets, handle them!.  */
1686       if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
1687 	{
1688 	  rtx addrs = gen_reg_rtx (V64SImode);
1689 	  rtx base = force_reg (SImode, x);
1690 	  rtx offsets = strided_constant (V64SImode, 0,
1691 					  GET_MODE_UNIT_SIZE (mode));
1692 
1693 	  emit_insn (gen_vec_duplicatev64si (addrs, base));
1694 	  emit_insn (gen_addv64si3 (addrs, offsets, addrs));
1695 	  return addrs;
1696 	}
1697       return x;
1698     }
1699   gcc_unreachable ();
1700 }
1701 
1702 /* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
1703    proper vector of stepped addresses.
1704 
1705    MEM will be a DImode address of a vector in an SGPR.
1706    TMP will be a V64DImode VGPR pair or (scratch:V64DI).  */
1707 
1708 rtx
1709 gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
1710 				     rtx tmp)
1711 {
1712   gcc_assert (MEM_P (mem));
1713   rtx mem_base = XEXP (mem, 0);
1714   rtx mem_index = NULL_RTX;
1715 
1716   if (!TARGET_GCN5_PLUS)
1717     {
1718       /* gcn_addr_space_legitimize_address should have put the address in a
1719          register.  If not, it is too late to do anything about it.  */
1720       gcc_assert (REG_P (mem_base));
1721     }
1722 
1723   if (GET_CODE (mem_base) == PLUS)
1724     {
1725       mem_index = XEXP (mem_base, 1);
1726       mem_base = XEXP (mem_base, 0);
1727     }
1728 
1729   /* RF and RM base registers for vector modes should be always an SGPR.  */
1730   gcc_assert (SGPR_REGNO_P (REGNO (mem_base))
1731 	      || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER);
1732 
1733   machine_mode inner = GET_MODE_INNER (mode);
1734   int shift = exact_log2 (GET_MODE_SIZE (inner));
1735   rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
1736   rtx undef_v64si = gcn_gen_undef (V64SImode);
1737   rtx new_base = NULL_RTX;
1738   addr_space_t as = MEM_ADDR_SPACE (mem);
1739 
1740   rtx tmplo = (REG_P (tmp)
1741 	       ? gcn_operand_part (V64DImode, tmp, 0)
1742 	       : gen_reg_rtx (V64SImode));
1743 
1744   /* tmplo[:] = ramp[:] << shift  */
1745   if (exec)
1746     emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
1747 				    gen_int_mode (shift, SImode),
1748 				    undef_v64si, exec));
1749   else
1750     emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
1751 
1752   if (AS_FLAT_P (as))
1753     {
1754       if (REG_P (tmp))
1755 	{
1756 	  rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
1757 	  rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
1758 	  rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
1759 	  rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
1760 
1761 	  /* tmphi[:] = mem_base_hi  */
1762 	  if (exec)
1763 	    emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
1764 						    undef_v64si, exec));
1765 	  else
1766 	    emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
1767 
1768 	  /* tmp[:] += zext (mem_base)  */
1769 	  if (exec)
1770 	    {
1771 	      rtx undef_di = gcn_gen_undef (DImode);
1772 	      emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
1773 						     vcc, undef_v64si, exec));
1774 	      emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
1775 					      vcc, vcc, undef_v64si, exec));
1776 	    }
1777 	  else
1778 	    emit_insn (gen_addv64di3_zext_dup (tmp, mem_base_lo, tmp));
1779 	}
1780       else
1781 	{
1782 	  tmp = gen_reg_rtx (V64DImode);
1783 	  if (exec)
1784 	    emit_insn (gen_addv64di3_zext_dup2_exec (tmp, tmplo, mem_base,
1785 						     gcn_gen_undef (V64DImode),
1786 						     exec));
1787 	  else
1788 	    emit_insn (gen_addv64di3_zext_dup2 (tmp, tmplo, mem_base));
1789 	}
1790 
1791       new_base = tmp;
1792     }
1793   else if (AS_ANY_DS_P (as))
1794     {
1795       if (!exec)
1796 	emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
1797       else
1798         emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
1799 					   gcn_gen_undef (V64SImode), exec));
1800       new_base = tmplo;
1801     }
1802   else
1803     {
1804       mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
1805       new_base = gen_rtx_PLUS (V64DImode, mem_base,
1806 			       gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
1807     }
1808 
1809   return gen_rtx_PLUS (GET_MODE (new_base), new_base,
1810 		       gen_rtx_VEC_DUPLICATE (GET_MODE (new_base),
1811 					      (mem_index ? mem_index
1812 					       : const0_rtx)));
1813 }
1814 
1815 /* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
1816    suitable for the given address space.  This is indented for use in
1817    gather/scatter patterns.
1818 
1819    The offsets may be signed or unsigned, according to UNSIGNED_P.
1820    If EXEC is set then _exec patterns will be used, otherwise plain.
1821 
1822    Return values.
1823      ADDR_SPACE_FLAT   - return V64DImode vector of absolute addresses.
1824      ADDR_SPACE_GLOBAL - return V64SImode vector of offsets.  */
1825 
1826 rtx
1827 gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
1828 			   bool unsigned_p, rtx exec)
1829 {
1830   /* Convert the offsets to V64SImode.
1831      TODO: more conversions will be needed when more types are vectorized. */
1832   if (GET_MODE (offsets) == V64DImode)
1833     {
1834       rtx tmp = gen_reg_rtx (V64SImode);
1835       emit_insn (gen_vec_truncatev64div64si (tmp, offsets));
1836       offsets = tmp;
1837     }
1838 
1839   rtx tmpsi = gen_reg_rtx (V64SImode);
1840   rtx tmpdi = gen_reg_rtx (V64DImode);
1841   rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
1842   rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
1843 
1844   if (CONST_INT_P (scale)
1845       && INTVAL (scale) > 0
1846       && exact_log2 (INTVAL (scale)) >= 0)
1847     emit_insn (gen_ashlv64si3 (tmpsi, offsets,
1848 			       GEN_INT (exact_log2 (INTVAL (scale)))));
1849   else
1850     (exec
1851      ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
1852 					  exec))
1853      : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
1854 
1855   /* "Global" instructions do not support negative register offsets.  */
1856   if (as == ADDR_SPACE_FLAT || !unsigned_p)
1857     {
1858       if (unsigned_p)
1859 	(exec
1860 	 ?  emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
1861 						    undefdi, exec))
1862 	 :  emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
1863       else
1864 	(exec
1865 	 ?  emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
1866 						     undefdi, exec))
1867 	 :  emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
1868       return tmpdi;
1869     }
1870   else if (as == ADDR_SPACE_GLOBAL)
1871     return tmpsi;
1872 
1873   gcc_unreachable ();
1874 }
1875 
1876 /* Return true if move from OP0 to OP1 is known to be executed in vector
1877    unit.  */
1878 
1879 bool
1880 gcn_vgpr_move_p (rtx op0, rtx op1)
1881 {
1882   if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1883     return true;
1884   if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1885     return true;
1886   return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
1887 	  || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
1888 	  || vgpr_vector_mode_p (GET_MODE (op0)));
1889 }
1890 
1891 /* Return true if move from OP0 to OP1 is known to be executed in scalar
1892    unit.  Used in the machine description.  */
1893 
1894 bool
1895 gcn_sgpr_move_p (rtx op0, rtx op1)
1896 {
1897   if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1898     return true;
1899   if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1900     return true;
1901   if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
1902       || VGPR_REGNO_P (REGNO (op0)))
1903     return false;
1904   if (REG_P (op1)
1905       && REGNO (op1) < FIRST_PSEUDO_REGISTER
1906       && !VGPR_REGNO_P (REGNO (op1)))
1907     return true;
1908   return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
1909 }
1910 
1911 /* Implement TARGET_SECONDARY_RELOAD.
1912 
1913    The address space determines which registers can be used for loads and
1914    stores.  */
1915 
1916 static reg_class_t
1917 gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
1918 		      machine_mode reload_mode, secondary_reload_info *sri)
1919 {
1920   reg_class_t result = NO_REGS;
1921   bool spilled_pseudo =
1922     (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1;
1923 
1924   if (dump_file && (dump_flags & TDF_DETAILS))
1925     {
1926       fprintf (dump_file, "gcn_secondary_reload: ");
1927       dump_value_slim (dump_file, x, 1);
1928       fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"),
1929 	       reg_class_names[rclass], GET_MODE_NAME (reload_mode));
1930       if (REG_P (x) || GET_CODE (x) == SUBREG)
1931 	fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x),
1932 		 (true_regnum (x) >= 0
1933 		  && true_regnum (x) < FIRST_PSEUDO_REGISTER
1934 		  ? reg_names[true_regnum (x)]
1935 		  : (spilled_pseudo ? "stack spill" : "??")));
1936       fprintf (dump_file, "\n");
1937     }
1938 
1939   /* Some callers don't use or initialize icode.  */
1940   sri->icode = CODE_FOR_nothing;
1941 
1942   if (MEM_P (x) || spilled_pseudo)
1943     {
1944       addr_space_t as = DEFAULT_ADDR_SPACE;
1945 
1946       /* If we have a spilled pseudo, we can't find the address space
1947 	 directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
1948 	 ADDR_SPACE_GLOBAL for GCN5.  */
1949       if (MEM_P (x))
1950 	as = MEM_ADDR_SPACE (x);
1951 
1952       if (as == ADDR_SPACE_DEFAULT)
1953 	as = DEFAULT_ADDR_SPACE;
1954 
1955       switch (as)
1956 	{
1957 	case ADDR_SPACE_SCALAR_FLAT:
1958 	  result =
1959 	    ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS);
1960 	  break;
1961 	case ADDR_SPACE_FLAT:
1962 	case ADDR_SPACE_FLAT_SCRATCH:
1963 	case ADDR_SPACE_GLOBAL:
1964 	  if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT
1965 	      || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
1966 	    {
1967 	      if (in_p)
1968 		switch (reload_mode)
1969 		  {
1970 		  case E_V64SImode:
1971 		    sri->icode = CODE_FOR_reload_inv64si;
1972 		    break;
1973 		  case E_V64SFmode:
1974 		    sri->icode = CODE_FOR_reload_inv64sf;
1975 		    break;
1976 		  case E_V64HImode:
1977 		    sri->icode = CODE_FOR_reload_inv64hi;
1978 		    break;
1979 		  case E_V64HFmode:
1980 		    sri->icode = CODE_FOR_reload_inv64hf;
1981 		    break;
1982 		  case E_V64QImode:
1983 		    sri->icode = CODE_FOR_reload_inv64qi;
1984 		    break;
1985 		  case E_V64DImode:
1986 		    sri->icode = CODE_FOR_reload_inv64di;
1987 		    break;
1988 		  case E_V64DFmode:
1989 		    sri->icode = CODE_FOR_reload_inv64df;
1990 		    break;
1991 		  default:
1992 		    gcc_unreachable ();
1993 		  }
1994 	      else
1995 		switch (reload_mode)
1996 		  {
1997 		  case E_V64SImode:
1998 		    sri->icode = CODE_FOR_reload_outv64si;
1999 		    break;
2000 		  case E_V64SFmode:
2001 		    sri->icode = CODE_FOR_reload_outv64sf;
2002 		    break;
2003 		  case E_V64HImode:
2004 		    sri->icode = CODE_FOR_reload_outv64hi;
2005 		    break;
2006 		  case E_V64HFmode:
2007 		    sri->icode = CODE_FOR_reload_outv64hf;
2008 		    break;
2009 		  case E_V64QImode:
2010 		    sri->icode = CODE_FOR_reload_outv64qi;
2011 		    break;
2012 		  case E_V64DImode:
2013 		    sri->icode = CODE_FOR_reload_outv64di;
2014 		    break;
2015 		  case E_V64DFmode:
2016 		    sri->icode = CODE_FOR_reload_outv64df;
2017 		    break;
2018 		  default:
2019 		    gcc_unreachable ();
2020 		  }
2021 	      break;
2022 	    }
2023 	  /* Fallthrough.  */
2024 	case ADDR_SPACE_LDS:
2025 	case ADDR_SPACE_GDS:
2026 	case ADDR_SPACE_SCRATCH:
2027 	  result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
2028 	  break;
2029 	}
2030     }
2031 
2032   if (dump_file && (dump_flags & TDF_DETAILS))
2033     fprintf (dump_file, "   <= %s (icode: %s)\n", reg_class_names[result],
2034 	     get_insn_name (sri->icode));
2035 
2036   return result;
2037 }
2038 
2039 /* Update register usage after having seen the compiler flags and kernel
2040    attributes.  We typically want to fix registers that contain values
2041    set by the HSA runtime.  */
2042 
2043 static void
2044 gcn_conditional_register_usage (void)
2045 {
2046   int i;
2047 
2048   /* FIXME: Do we need to reset fixed_regs?  */
2049 
2050 /* Limit ourselves to 1/16 the register file for maximimum sized workgroups.
2051    There are enough SGPRs not to limit those.
2052    TODO: Adjust this more dynamically.  */
2053   for (i = FIRST_VGPR_REG + 64; i <= LAST_VGPR_REG; i++)
2054     fixed_regs[i] = 1, call_used_regs[i] = 1;
2055 
2056   if (!cfun || !cfun->machine || cfun->machine->normal_function)
2057     {
2058       /* Normal functions can't know what kernel argument registers are
2059          live, so just fix the bottom 16 SGPRs, and bottom 3 VGPRs.  */
2060       for (i = 0; i < 16; i++)
2061 	fixed_regs[FIRST_SGPR_REG + i] = 1;
2062       for (i = 0; i < 3; i++)
2063 	fixed_regs[FIRST_VGPR_REG + i] = 1;
2064       return;
2065     }
2066 
2067   /* Fix the runtime argument register containing values that may be
2068      needed later.  DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
2069      needed after the prologue so there's no need to fix them.  */
2070   if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
2071     fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
2072   if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
2073     {
2074       fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
2075       fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
2076       fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 2] = 1;
2077       fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 3] = 1;
2078     }
2079   if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
2080     {
2081       fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1;
2082       fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1;
2083     }
2084   if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
2085     {
2086       fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1;
2087       fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1;
2088     }
2089   if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0)
2090     fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1;
2091   if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0)
2092     fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1;
2093   if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0)
2094     fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1;
2095   if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0)
2096     fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
2097 
2098   if (TARGET_GCN5_PLUS)
2099     /* v0 is always zero, for global nul-offsets.  */
2100     fixed_regs[VGPR_REGNO (0)] = 1;
2101 }
2102 
2103 /* Determine if a load or store is valid, according to the register classes
2104    and address space.  Used primarily by the machine description to decide
2105    when to split a move into two steps.  */
2106 
2107 bool
2108 gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
2109 {
2110   if (!MEM_P (dest) && !MEM_P (src))
2111     return true;
2112 
2113   if (MEM_P (dest)
2114       && AS_FLAT_P (MEM_ADDR_SPACE (dest))
2115       && (gcn_flat_address_p (XEXP (dest, 0), mode)
2116 	  || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2117 	  || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2118       && gcn_vgpr_register_operand (src, mode))
2119     return true;
2120   else if (MEM_P (src)
2121 	   && AS_FLAT_P (MEM_ADDR_SPACE (src))
2122 	   && (gcn_flat_address_p (XEXP (src, 0), mode)
2123 	       || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2124 	       || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2125 	   && gcn_vgpr_register_operand (dest, mode))
2126     return true;
2127 
2128   if (MEM_P (dest)
2129       && AS_GLOBAL_P (MEM_ADDR_SPACE (dest))
2130       && (gcn_global_address_p (XEXP (dest, 0))
2131 	  || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2132 	  || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2133       && gcn_vgpr_register_operand (src, mode))
2134     return true;
2135   else if (MEM_P (src)
2136 	   && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
2137 	   && (gcn_global_address_p (XEXP (src, 0))
2138 	       || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2139 	       || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2140 	   && gcn_vgpr_register_operand (dest, mode))
2141     return true;
2142 
2143   if (MEM_P (dest)
2144       && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT
2145       && (gcn_scalar_flat_address_p (XEXP (dest, 0))
2146 	  || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2147 	  || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2148       && gcn_ssrc_register_operand (src, mode))
2149     return true;
2150   else if (MEM_P (src)
2151 	   && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT
2152 	   && (gcn_scalar_flat_address_p (XEXP (src, 0))
2153 	       || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2154 	       || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2155 	   && gcn_sdst_register_operand (dest, mode))
2156     return true;
2157 
2158   if (MEM_P (dest)
2159       && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
2160       && gcn_ds_address_p (XEXP (dest, 0))
2161       && gcn_vgpr_register_operand (src, mode))
2162     return true;
2163   else if (MEM_P (src)
2164 	   && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
2165 	   && gcn_ds_address_p (XEXP (src, 0))
2166 	   && gcn_vgpr_register_operand (dest, mode))
2167     return true;
2168 
2169   return false;
2170 }
2171 
2172 /* }}}  */
2173 /* {{{ Functions and ABI.  */
2174 
2175 /* Implement TARGET_FUNCTION_VALUE.
2176 
2177    Define how to find the value returned by a function.
2178    The register location is always the same, but the mode depends on
2179    VALTYPE.  */
2180 
2181 static rtx
2182 gcn_function_value (const_tree valtype, const_tree, bool)
2183 {
2184   machine_mode mode = TYPE_MODE (valtype);
2185 
2186   if (INTEGRAL_TYPE_P (valtype)
2187       && GET_MODE_CLASS (mode) == MODE_INT
2188       && GET_MODE_SIZE (mode) < 4)
2189     mode = SImode;
2190 
2191   return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
2192 }
2193 
2194 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
2195 
2196    Return true if N is a possible register number for the function return
2197    value.  */
2198 
2199 static bool
2200 gcn_function_value_regno_p (const unsigned int n)
2201 {
2202   return n == RETURN_VALUE_REG;
2203 }
2204 
2205 /* Calculate the number of registers required to hold a function argument
2206    of MODE and TYPE.  */
2207 
2208 static int
2209 num_arg_regs (machine_mode mode, const_tree type)
2210 {
2211   int size;
2212 
2213   if (targetm.calls.must_pass_in_stack (mode, type))
2214     return 0;
2215 
2216   if (type && mode == BLKmode)
2217     size = int_size_in_bytes (type);
2218   else
2219     size = GET_MODE_SIZE (mode);
2220 
2221   return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2222 }
2223 
2224 /* Implement TARGET_STRICT_ARGUMENT_NAMING.
2225 
2226    Return true if the location where a function argument is passed
2227    depends on whether or not it is a named argument
2228 
2229    For gcn, we know how to handle functions declared as stdarg: by
2230    passing an extra pointer to the unnamed arguments.  However, the
2231    Fortran frontend can produce a different situation, where a
2232    function pointer is declared with no arguments, but the actual
2233    function and calls to it take more arguments.  In that case, we
2234    want to ensure the call matches the definition of the function.  */
2235 
2236 static bool
2237 gcn_strict_argument_naming (cumulative_args_t cum_v)
2238 {
2239   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2240 
2241   return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
2242 }
2243 
2244 /* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
2245 
2246    See comment on gcn_strict_argument_naming.  */
2247 
2248 static bool
2249 gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
2250 {
2251   return !gcn_strict_argument_naming (cum_v);
2252 }
2253 
2254 /* Implement TARGET_FUNCTION_ARG.
2255 
2256    Return an RTX indicating whether a function argument is passed in a register
2257    and if so, which register.  */
2258 
2259 static rtx
2260 gcn_function_arg (cumulative_args_t cum_v, machine_mode mode, const_tree type,
2261 		  bool named)
2262 {
2263   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2264   if (cum->normal_function)
2265     {
2266       if (!named || mode == VOIDmode)
2267 	return 0;
2268 
2269       if (targetm.calls.must_pass_in_stack (mode, type))
2270 	return 0;
2271 
2272       int reg_num = FIRST_PARM_REG + cum->num;
2273       int num_regs = num_arg_regs (mode, type);
2274       if (num_regs > 0)
2275 	while (reg_num % num_regs != 0)
2276 	  reg_num++;
2277       if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
2278 	return gen_rtx_REG (mode, reg_num);
2279     }
2280   else
2281     {
2282       if (cum->num >= cum->args.nargs)
2283 	{
2284 	  cum->offset = (cum->offset + TYPE_ALIGN (type) / 8 - 1)
2285 	    & -(TYPE_ALIGN (type) / 8);
2286 	  cfun->machine->kernarg_segment_alignment
2287 	    = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
2288 		   TYPE_ALIGN (type) / 8);
2289 	  rtx addr = gen_rtx_REG (DImode,
2290 				  cum->args.reg[KERNARG_SEGMENT_PTR_ARG]);
2291 	  if (cum->offset)
2292 	    addr = gen_rtx_PLUS (DImode, addr,
2293 				 gen_int_mode (cum->offset, DImode));
2294 	  rtx mem = gen_rtx_MEM (mode, addr);
2295 	  set_mem_attributes (mem, const_cast<tree>(type), 1);
2296 	  set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
2297 	  MEM_READONLY_P (mem) = 1;
2298 	  return mem;
2299 	}
2300 
2301       int a = cum->args.order[cum->num];
2302       if (mode != gcn_kernel_arg_types[a].mode)
2303 	{
2304 	  error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
2305 	  return 0;
2306 	}
2307       return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode,
2308 			  cum->args.reg[a]);
2309     }
2310   return 0;
2311 }
2312 
2313 /* Implement TARGET_FUNCTION_ARG_ADVANCE.
2314 
2315    Updates the summarizer variable pointed to by CUM_V to advance past an
2316    argument in the argument list.  */
2317 
2318 static void
2319 gcn_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
2320 			  const_tree type, bool named)
2321 {
2322   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2323 
2324   if (cum->normal_function)
2325     {
2326       if (!named)
2327 	return;
2328 
2329       int num_regs = num_arg_regs (mode, type);
2330       if (num_regs > 0)
2331 	while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
2332 	  cum->num++;
2333       cum->num += num_regs;
2334     }
2335   else
2336     {
2337       if (cum->num < cum->args.nargs)
2338 	cum->num++;
2339       else
2340 	{
2341 	  cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (type));
2342 	  cfun->machine->kernarg_segment_byte_size = cum->offset;
2343 	}
2344     }
2345 }
2346 
2347 /* Implement TARGET_ARG_PARTIAL_BYTES.
2348 
2349    Returns the number of bytes at the beginning of an argument that must be put
2350    in registers.  The value must be zero for arguments that are passed entirely
2351    in registers or that are entirely pushed on the stack.  */
2352 
2353 static int
2354 gcn_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, tree type,
2355 		       bool named)
2356 {
2357   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2358 
2359   if (!named)
2360     return 0;
2361 
2362   if (targetm.calls.must_pass_in_stack (mode, type))
2363     return 0;
2364 
2365   if (cum->num >= NUM_PARM_REGS)
2366     return 0;
2367 
2368   /* If the argument fits entirely in registers, return 0.  */
2369   if (cum->num + num_arg_regs (mode, type) <= NUM_PARM_REGS)
2370     return 0;
2371 
2372   return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
2373 }
2374 
2375 /* A normal function which takes a pointer argument (to a scalar) may be
2376    passed a pointer to LDS space (via a high-bits-set aperture), and that only
2377    works with FLAT addressing, not GLOBAL.  Force FLAT addressing if the
2378    function has an incoming pointer-to-scalar parameter.  */
2379 
2380 static void
2381 gcn_detect_incoming_pointer_arg (tree fndecl)
2382 {
2383   gcc_assert (cfun && cfun->machine);
2384 
2385   for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
2386        arg;
2387        arg = TREE_CHAIN (arg))
2388     if (POINTER_TYPE_P (TREE_VALUE (arg))
2389 	&& !AGGREGATE_TYPE_P (TREE_TYPE (TREE_VALUE (arg))))
2390       cfun->machine->use_flat_addressing = true;
2391 }
2392 
2393 /* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
2394 
2395    Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
2396    whose data type is FNTYPE.  For a library call, FNTYPE is 0.  */
2397 
2398 void
2399 gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
2400 			  tree fntype /* tree ptr for function decl */ ,
2401 			  rtx libname /* SYMBOL_REF of library name or 0 */ ,
2402 			  tree fndecl, int caller)
2403 {
2404   memset (cum, 0, sizeof (*cum));
2405   cum->fntype = fntype;
2406   if (libname)
2407     {
2408       gcc_assert (cfun && cfun->machine);
2409       cum->normal_function = true;
2410       if (!caller)
2411 	{
2412 	  cfun->machine->normal_function = true;
2413 	  gcn_detect_incoming_pointer_arg (fndecl);
2414 	}
2415       return;
2416     }
2417   tree attr = NULL;
2418   if (fndecl)
2419     attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
2420   if (fndecl && !attr)
2421     attr = lookup_attribute ("amdgpu_hsa_kernel",
2422 			     TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
2423   if (!attr && fntype)
2424     attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
2425   /* Handle main () as kernel, so we can run testsuite.
2426      Handle OpenACC kernels similarly to main.  */
2427   if (!attr && !caller && fndecl
2428       && (MAIN_NAME_P (DECL_NAME (fndecl))
2429 	  || lookup_attribute ("omp target entrypoint",
2430 			       DECL_ATTRIBUTES (fndecl)) != NULL_TREE))
2431     gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
2432   else
2433     {
2434       if (!attr || caller)
2435 	{
2436 	  gcc_assert (cfun && cfun->machine);
2437 	  cum->normal_function = true;
2438 	  if (!caller)
2439 	    cfun->machine->normal_function = true;
2440 	}
2441       gcn_parse_amdgpu_hsa_kernel_attribute
2442 	(&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE);
2443     }
2444   cfun->machine->args = cum->args;
2445   if (!caller && cfun->machine->normal_function)
2446     gcn_detect_incoming_pointer_arg (fndecl);
2447 }
2448 
2449 static bool
2450 gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
2451 {
2452   machine_mode mode = TYPE_MODE (type);
2453   HOST_WIDE_INT size = int_size_in_bytes (type);
2454 
2455   if (AGGREGATE_TYPE_P (type))
2456     return true;
2457 
2458   if (mode == BLKmode)
2459     return true;
2460 
2461   if (size > 2 * UNITS_PER_WORD)
2462     return true;
2463 
2464   return false;
2465 }
2466 
2467 /* Implement TARGET_PROMOTE_FUNCTION_MODE.
2468 
2469    Return the mode to use for outgoing function arguments.  */
2470 
2471 machine_mode
2472 gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
2473 			   int *ARG_UNUSED (punsignedp),
2474 			   const_tree ARG_UNUSED (funtype),
2475 			   int ARG_UNUSED (for_return))
2476 {
2477   if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4)
2478     return SImode;
2479 
2480   return mode;
2481 }
2482 
2483 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
2484 
2485    Derived from hppa_gimplify_va_arg_expr.  The generic routine doesn't handle
2486    ARGS_GROW_DOWNWARDS.  */
2487 
2488 static tree
2489 gcn_gimplify_va_arg_expr (tree valist, tree type,
2490 			  gimple_seq *ARG_UNUSED (pre_p),
2491 			  gimple_seq *ARG_UNUSED (post_p))
2492 {
2493   tree ptr = build_pointer_type (type);
2494   tree valist_type;
2495   tree t, u;
2496   bool indirect;
2497 
2498   indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
2499   if (indirect)
2500     {
2501       type = ptr;
2502       ptr = build_pointer_type (type);
2503     }
2504   valist_type = TREE_TYPE (valist);
2505 
2506   /* Args grow down.  Not handled by generic routines.  */
2507 
2508   u = fold_convert (sizetype, size_in_bytes (type));
2509   u = fold_build1 (NEGATE_EXPR, sizetype, u);
2510   t = fold_build_pointer_plus (valist, u);
2511 
2512   /* Align to 8 byte boundary.  */
2513 
2514   u = build_int_cst (TREE_TYPE (t), -8);
2515   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
2516   t = fold_convert (valist_type, t);
2517 
2518   t = build2 (MODIFY_EXPR, valist_type, valist, t);
2519 
2520   t = fold_convert (ptr, t);
2521   t = build_va_arg_indirect_ref (t);
2522 
2523   if (indirect)
2524     t = build_va_arg_indirect_ref (t);
2525 
2526   return t;
2527 }
2528 
2529 /* Calculate stack offsets needed to create prologues and epilogues.  */
2530 
2531 static struct machine_function *
2532 gcn_compute_frame_offsets (void)
2533 {
2534   machine_function *offsets = cfun->machine;
2535 
2536   if (reload_completed)
2537     return offsets;
2538 
2539   offsets->need_frame_pointer = frame_pointer_needed;
2540 
2541   offsets->outgoing_args_size = crtl->outgoing_args_size;
2542   offsets->pretend_size = crtl->args.pretend_args_size;
2543 
2544   offsets->local_vars = get_frame_size ();
2545 
2546   offsets->lr_needs_saving = (!leaf_function_p ()
2547 			      || df_regs_ever_live_p (LR_REGNUM)
2548 			      || df_regs_ever_live_p (LR_REGNUM + 1));
2549 
2550   offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0;
2551 
2552   for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2553     if ((df_regs_ever_live_p (regno) && !call_used_regs[regno])
2554 	|| ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2555 	    && frame_pointer_needed))
2556       offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4);
2557 
2558   /* Round up to 64-bit boundary to maintain stack alignment.  */
2559   offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
2560 
2561   return offsets;
2562 }
2563 
2564 /* Insert code into the prologue or epilogue to store or load any
2565    callee-save register to/from the stack.
2566 
2567    Helper function for gcn_expand_prologue and gcn_expand_epilogue.  */
2568 
2569 static void
2570 move_callee_saved_registers (rtx sp, machine_function *offsets,
2571 			     bool prologue)
2572 {
2573   int regno, offset, saved_scalars;
2574   rtx exec = gen_rtx_REG (DImode, EXEC_REG);
2575   rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG);
2576   rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22));
2577   rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
2578   HOST_WIDE_INT exec_set = 0;
2579   int offreg_set = 0;
2580 
2581   start_sequence ();
2582 
2583   /* Move scalars into two vector registers.  */
2584   for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++)
2585     if ((df_regs_ever_live_p (regno) && !call_used_regs[regno])
2586 	|| ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving)
2587 	|| ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2588 	    && offsets->need_frame_pointer))
2589       {
2590 	rtx reg = gen_rtx_REG (SImode, regno);
2591 	rtx vreg = gen_rtx_REG (V64SImode,
2592 				VGPR_REGNO (6 + (saved_scalars / 64)));
2593 	int lane = saved_scalars % 64;
2594 
2595 	if (prologue)
2596 	  emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
2597 	else
2598 	  emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
2599 
2600 	saved_scalars++;
2601       }
2602 
2603   rtx move_scalars = get_insns ();
2604   end_sequence ();
2605   start_sequence ();
2606 
2607   /* Ensure that all vector lanes are moved.  */
2608   exec_set = -1;
2609   emit_move_insn (exec, GEN_INT (exec_set));
2610 
2611   /* Set up a vector stack pointer.  */
2612   rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2613   rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3));
2614   emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2),
2615 				  gcn_gen_undef (V64SImode), exec));
2616   rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4));
2617   emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode),
2618 					  exec));
2619   emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0),
2620 				     gcn_operand_part (V64SImode, vsp, 0),
2621 				     _0_4_8_12, vcc, gcn_gen_undef (V64SImode),
2622 				     exec));
2623   emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1),
2624 				  gcn_operand_part (V64SImode, vsp, 1),
2625 				  const0_rtx, vcc, vcc,
2626 				  gcn_gen_undef (V64SImode), exec));
2627 
2628   /* Move vectors.  */
2629   for (regno = FIRST_VGPR_REG, offset = offsets->pretend_size;
2630        regno < FIRST_PSEUDO_REGISTER; regno++)
2631     if ((df_regs_ever_live_p (regno) && !call_used_regs[regno])
2632 	|| (regno == VGPR_REGNO (6) && saved_scalars > 0)
2633 	|| (regno == VGPR_REGNO (7) && saved_scalars > 63))
2634       {
2635 	rtx reg = gen_rtx_REG (V64SImode, regno);
2636 	int size = 256;
2637 
2638 	if (regno == VGPR_REGNO (6) && saved_scalars < 64)
2639 	  size = saved_scalars * 4;
2640 	else if (regno == VGPR_REGNO (7) && saved_scalars < 128)
2641 	  size = (saved_scalars - 64) * 4;
2642 
2643 	if (size != 256 || exec_set != -1)
2644 	  {
2645 	    exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1;
2646 	    emit_move_insn (exec, gen_int_mode (exec_set, DImode));
2647 	  }
2648 
2649 	if (prologue)
2650 	  emit_insn (gen_scatterv64si_insn_1offset_exec (vsp, const0_rtx, reg,
2651 							 as, const0_rtx, exec));
2652 	else
2653 	  emit_insn (gen_gatherv64si_insn_1offset_exec
2654 		     (reg, vsp, const0_rtx, as, const0_rtx,
2655 		      gcn_gen_undef (V64SImode), exec));
2656 
2657 	/* Move our VSP to the next stack entry.  */
2658 	if (offreg_set != size)
2659 	  {
2660 	    offreg_set = size;
2661 	    emit_move_insn (offreg, GEN_INT (size));
2662 	  }
2663 	if (exec_set != -1)
2664 	  {
2665 	    exec_set = -1;
2666 	    emit_move_insn (exec, GEN_INT (exec_set));
2667 	  }
2668 	emit_insn (gen_addv64si3_vcc_dup_exec
2669 		   (gcn_operand_part (V64SImode, vsp, 0),
2670 		    offreg, gcn_operand_part (V64SImode, vsp, 0),
2671 		    vcc, gcn_gen_undef (V64SImode), exec));
2672 	emit_insn (gen_addcv64si3_exec
2673 		   (gcn_operand_part (V64SImode, vsp, 1),
2674 		    gcn_operand_part (V64SImode, vsp, 1),
2675 		    const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec));
2676 
2677 	offset += size;
2678       }
2679 
2680   rtx move_vectors = get_insns ();
2681   end_sequence ();
2682 
2683   if (prologue)
2684     {
2685       emit_insn (move_scalars);
2686       emit_insn (move_vectors);
2687     }
2688   else
2689     {
2690       emit_insn (move_vectors);
2691       emit_insn (move_scalars);
2692     }
2693 }
2694 
2695 /* Generate prologue.  Called from gen_prologue during pro_and_epilogue pass.
2696 
2697    For a non-kernel function, the stack layout looks like this (interim),
2698    growing *upwards*:
2699 
2700  hi | + ...
2701     |__________________| <-- current SP
2702     | outgoing args    |
2703     |__________________|
2704     | (alloca space)   |
2705     |__________________|
2706     | local vars       |
2707     |__________________| <-- FP/hard FP
2708     | callee-save regs |
2709     |__________________| <-- soft arg pointer
2710     | pretend args     |
2711     |__________________| <-- incoming SP
2712     | incoming args    |
2713  lo |..................|
2714 
2715    This implies arguments (beyond the first N in registers) must grow
2716    downwards (as, apparently, PA has them do).
2717 
2718    For a kernel function we have the simpler:
2719 
2720  hi | + ...
2721     |__________________| <-- current SP
2722     | outgoing args    |
2723     |__________________|
2724     | (alloca space)   |
2725     |__________________|
2726     | local vars       |
2727  lo |__________________| <-- FP/hard FP
2728 
2729 */
2730 
2731 void
2732 gcn_expand_prologue ()
2733 {
2734   machine_function *offsets = gcn_compute_frame_offsets ();
2735 
2736   if (!cfun || !cfun->machine || cfun->machine->normal_function)
2737     {
2738       rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2739       rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2740 
2741       start_sequence ();
2742 
2743       if (offsets->pretend_size > 0)
2744 	{
2745 	  /* FIXME: Do the actual saving of register pretend args to the stack.
2746 	     Register order needs consideration.  */
2747 	}
2748 
2749       /* Save callee-save regs.  */
2750       move_callee_saved_registers (sp, offsets, true);
2751 
2752       HOST_WIDE_INT sp_adjust = offsets->pretend_size
2753 	+ offsets->callee_saves
2754 	+ offsets->local_vars + offsets->outgoing_args_size;
2755       if (sp_adjust > 0)
2756 	emit_insn (gen_adddi3_scc (sp, sp, gen_int_mode (sp_adjust, DImode)));
2757 
2758       if (offsets->need_frame_pointer)
2759 	emit_insn (gen_adddi3_scc (fp, sp,
2760 				   gen_int_mode
2761 				   (-(offsets->local_vars +
2762 				      offsets->outgoing_args_size),
2763 				    DImode)));
2764 
2765       rtx_insn *seq = get_insns ();
2766       end_sequence ();
2767 
2768       /* FIXME: Prologue insns should have this flag set for debug output, etc.
2769 	 but it causes issues for now.
2770       for (insn = seq; insn; insn = NEXT_INSN (insn))
2771         if (INSN_P (insn))
2772 	  RTX_FRAME_RELATED_P (insn) = 1;*/
2773 
2774       emit_insn (seq);
2775     }
2776   else
2777     {
2778       rtx wave_offset = gen_rtx_REG (SImode,
2779 				     cfun->machine->args.
2780 				     reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
2781 
2782       if (TARGET_GCN5_PLUS)
2783 	{
2784 	  /* v0 is reserved for constant zero so that "global"
2785 	     memory instructions can have a nul-offset without
2786 	     causing reloads.  */
2787 	  emit_insn (gen_vec_duplicatev64si
2788 		     (gen_rtx_REG (V64SImode, VGPR_REGNO (0)), const0_rtx));
2789 	}
2790 
2791       if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
2792 	{
2793 	  rtx fs_init_lo =
2794 	    gen_rtx_REG (SImode,
2795 			 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
2796 	  rtx fs_init_hi =
2797 	    gen_rtx_REG (SImode,
2798 			 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1);
2799 	  rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG);
2800 	  rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1);
2801 
2802 	  /*rtx queue = gen_rtx_REG(DImode,
2803 				  cfun->machine->args.reg[QUEUE_PTR_ARG]);
2804 	  rtx aperture = gen_rtx_MEM (SImode,
2805 				      gen_rtx_PLUS (DImode, queue,
2806 						    gen_int_mode (68, SImode)));
2807 	  set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
2808 
2809 	  /* Set up flat_scratch.  */
2810 	  emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset));
2811 	  emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi,
2812 				      gen_int_mode (8, SImode)));
2813 	  emit_move_insn (fs_reg_lo, fs_init_hi);
2814 	}
2815 
2816       /* Set up frame pointer and stack pointer.  */
2817       rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
2818       rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
2819       rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
2820       rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
2821 
2822       HOST_WIDE_INT sp_adjust = (offsets->local_vars
2823 				 + offsets->outgoing_args_size);
2824 
2825       /* Initialise FP and SP from the buffer descriptor in s[0:3].  */
2826       emit_move_insn (fp_lo, gen_rtx_REG (SImode, 0));
2827       emit_insn (gen_andsi3_scc (fp_hi, gen_rtx_REG (SImode, 1),
2828 				 gen_int_mode (0xffff, SImode)));
2829       rtx scc = gen_rtx_REG (BImode, SCC_REG);
2830       emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
2831       emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
2832 
2833       if (sp_adjust > 0)
2834 	emit_insn (gen_adddi3_scc (sp, fp, gen_int_mode (sp_adjust, DImode)));
2835       else
2836 	emit_move_insn (sp, fp);
2837 
2838       /* Make sure the flat scratch reg doesn't get optimised away.  */
2839       emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
2840     }
2841 
2842   /* Ensure that the scheduler doesn't do anything unexpected.  */
2843   emit_insn (gen_blockage ());
2844 
2845   emit_move_insn (gen_rtx_REG (SImode, M0_REG),
2846 		  gen_int_mode (LDS_SIZE, SImode));
2847 
2848   emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
2849   if (TARGET_GCN5_PLUS)
2850     emit_insn (gen_prologue_use (gen_rtx_REG (SImode, VGPR_REGNO (0))));
2851 
2852   if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
2853     {
2854       /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel.  */
2855       rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
2856       emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode,
2857 						  "gomp_gcn_enter_kernel"));
2858       emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
2859     }
2860 }
2861 
2862 /* Generate epilogue.  Called from gen_epilogue during pro_and_epilogue pass.
2863 
2864    See gcn_expand_prologue for stack details.  */
2865 
2866 void
2867 gcn_expand_epilogue (void)
2868 {
2869   /* Ensure that the scheduler doesn't do anything unexpected.  */
2870   emit_insn (gen_blockage ());
2871 
2872   if (!cfun || !cfun->machine || cfun->machine->normal_function)
2873     {
2874       machine_function *offsets = gcn_compute_frame_offsets ();
2875       rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2876       rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2877 
2878       HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size;
2879 
2880       if (offsets->need_frame_pointer)
2881 	{
2882 	  /* Restore old SP from the frame pointer.  */
2883 	  if (sp_adjust > 0)
2884 	    emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode)));
2885 	  else
2886 	    emit_move_insn (sp, fp);
2887 	}
2888       else
2889 	{
2890 	  /* Restore old SP from current SP.  */
2891 	  sp_adjust += offsets->outgoing_args_size + offsets->local_vars;
2892 
2893 	  if (sp_adjust > 0)
2894 	    emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode)));
2895 	}
2896 
2897       move_callee_saved_registers (sp, offsets, false);
2898 
2899       /* There's no explicit use of the link register on the return insn.  Emit
2900          one here instead.  */
2901       if (offsets->lr_needs_saving)
2902 	emit_use (gen_rtx_REG (DImode, LINK_REGNUM));
2903 
2904       /* Similar for frame pointer.  */
2905       if (offsets->need_frame_pointer)
2906 	emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM));
2907     }
2908   else if (flag_openmp)
2909     {
2910       /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel.  */
2911       rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
2912       emit_move_insn (fn_reg,
2913 		      gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel"));
2914       emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
2915     }
2916   else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE)
2917     {
2918       /* Assume that an exit value compatible with gcn-run is expected.
2919          That is, the third input parameter is an int*.
2920 
2921          We can't allocate any new registers, but the kernarg_reg is
2922          dead after this, so we'll use that.  */
2923       rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
2924 				     [KERNARG_SEGMENT_PTR_ARG]);
2925       rtx retptr_mem = gen_rtx_MEM (DImode,
2926 				    gen_rtx_PLUS (DImode, kernarg_reg,
2927 						  GEN_INT (16)));
2928       set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
2929       emit_move_insn (kernarg_reg, retptr_mem);
2930 
2931       rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
2932       set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
2933       emit_move_insn (retval_mem,
2934 		      gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
2935     }
2936 
2937   emit_jump_insn (gen_gcn_return ());
2938 }
2939 
2940 /* Implement TARGET_CAN_ELIMINATE.
2941 
2942    Return true if the compiler is allowed to try to replace register number
2943    FROM_REG with register number TO_REG.
2944 
2945    FIXME: is the default "true" not enough? Should this be a negative set?  */
2946 
2947 bool
2948 gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
2949 {
2950   return (to_reg == HARD_FRAME_POINTER_REGNUM
2951 	  || to_reg == STACK_POINTER_REGNUM);
2952 }
2953 
2954 /* Implement INITIAL_ELIMINATION_OFFSET.
2955 
2956    Returns the initial difference between the specified pair of registers, in
2957    terms of stack position.  */
2958 
2959 HOST_WIDE_INT
2960 gcn_initial_elimination_offset (int from, int to)
2961 {
2962   machine_function *offsets = gcn_compute_frame_offsets ();
2963 
2964   switch (from)
2965     {
2966     case ARG_POINTER_REGNUM:
2967       if (to == STACK_POINTER_REGNUM)
2968 	return -(offsets->callee_saves + offsets->local_vars
2969 		 + offsets->outgoing_args_size);
2970       else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
2971 	return -offsets->callee_saves;
2972       else
2973 	gcc_unreachable ();
2974       break;
2975 
2976     case FRAME_POINTER_REGNUM:
2977       if (to == STACK_POINTER_REGNUM)
2978 	return -(offsets->local_vars + offsets->outgoing_args_size);
2979       else if (to == HARD_FRAME_POINTER_REGNUM)
2980 	return 0;
2981       else
2982 	gcc_unreachable ();
2983       break;
2984 
2985     default:
2986       gcc_unreachable ();
2987     }
2988 }
2989 
2990 /* Implement HARD_REGNO_RENAME_OK.
2991 
2992    Return true if it is permissible to rename a hard register from
2993    FROM_REG to TO_REG.  */
2994 
2995 bool
2996 gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
2997 {
2998   if (from_reg == SCC_REG
2999       || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG
3000       || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
3001       || to_reg == SCC_REG
3002       || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
3003       || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
3004     return false;
3005 
3006   /* Allow the link register to be used if it was saved.  */
3007   if ((to_reg & ~1) == LINK_REGNUM)
3008     return !cfun || cfun->machine->lr_needs_saving;
3009 
3010   /* Allow the registers used for the static chain to be used if the chain is
3011      not in active use.  */
3012   if ((to_reg & ~1) == STATIC_CHAIN_REGNUM)
3013     return !cfun
3014 	|| !(cfun->static_chain_decl
3015 	     && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
3016 	     && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1));
3017 
3018   return true;
3019 }
3020 
3021 /* Implement HARD_REGNO_CALLER_SAVE_MODE.
3022 
3023    Which mode is required for saving NREGS of a pseudo-register in
3024    call-clobbered hard register REGNO.  */
3025 
3026 machine_mode
3027 gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
3028 				 machine_mode regmode)
3029 {
3030   machine_mode result = choose_hard_reg_mode (regno, nregs, false);
3031 
3032   if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode))
3033     result = (nregs == 1 ? SImode : DImode);
3034 
3035   return result;
3036 }
3037 
3038 /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
3039 
3040    Output assembler code for a block containing the constant parts
3041    of a trampoline, leaving space for the variable parts.  */
3042 
3043 static void
3044 gcn_asm_trampoline_template (FILE *f)
3045 {
3046   /* The source operand of the move instructions must be a 32-bit
3047      constant following the opcode.  */
3048   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM);
3049   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1);
3050   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG);
3051   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1);
3052   asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1);
3053 }
3054 
3055 /* Implement TARGET_TRAMPOLINE_INIT.
3056 
3057    Emit RTL insns to initialize the variable parts of a trampoline.
3058    FNDECL is the decl of the target address, M_TRAMP is a MEM for
3059    the trampoline, and CHAIN_VALUE is an RTX for the static chain
3060    to be passed to the target function.  */
3061 
3062 static void
3063 gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3064 {
3065   if (TARGET_GCN5_PLUS)
3066     sorry ("nested function trampolines not supported on GCN5 due to"
3067            " non-executable stacks");
3068 
3069   emit_block_move (m_tramp, assemble_trampoline_template (),
3070 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3071 
3072   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3073   rtx chain_value_reg = copy_to_reg (chain_value);
3074   rtx fnaddr_reg = copy_to_reg (fnaddr);
3075 
3076   for (int i = 0; i < 4; i++)
3077     {
3078       rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4);
3079       rtx reg = i < 2 ? chain_value_reg : fnaddr_reg;
3080       emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4));
3081     }
3082 
3083   rtx tramp_addr = XEXP (m_tramp, 0);
3084   emit_insn (gen_clear_icache (tramp_addr,
3085 			       plus_constant (ptr_mode, tramp_addr,
3086 					      TRAMPOLINE_SIZE)));
3087 }
3088 
3089 /* }}}  */
3090 /* {{{ Miscellaneous.  */
3091 
3092 /* Implement TARGET_CANNOT_COPY_INSN_P.
3093 
3094    Return true if INSN must not be duplicated.  */
3095 
3096 static bool
3097 gcn_cannot_copy_insn_p (rtx_insn *insn)
3098 {
3099   if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier)
3100     return true;
3101 
3102   return false;
3103 }
3104 
3105 /* Implement TARGET_DEBUG_UNWIND_INFO.
3106 
3107    Defines the mechanism that will be used for describing frame unwind
3108    information to the debugger.  */
3109 
3110 static enum unwind_info_type
3111 gcn_debug_unwind_info ()
3112 {
3113   /* No support for debug info, yet.  */
3114   return UI_NONE;
3115 }
3116 
3117 /* Determine if there is a suitable hardware conversion instruction.
3118    Used primarily by the machine description.  */
3119 
3120 bool
3121 gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
3122 {
3123   if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to))
3124     return false;
3125 
3126   if (VECTOR_MODE_P (from))
3127     {
3128       from = GET_MODE_INNER (from);
3129       to = GET_MODE_INNER (to);
3130     }
3131 
3132   switch (op)
3133     {
3134     case fix_trunc_cvt:
3135     case fixuns_trunc_cvt:
3136       if (GET_MODE_CLASS (from) != MODE_FLOAT
3137 	  || GET_MODE_CLASS (to) != MODE_INT)
3138 	return false;
3139       break;
3140     case float_cvt:
3141     case floatuns_cvt:
3142       if (GET_MODE_CLASS (from) != MODE_INT
3143 	  || GET_MODE_CLASS (to) != MODE_FLOAT)
3144 	return false;
3145       break;
3146     case extend_cvt:
3147       if (GET_MODE_CLASS (from) != MODE_FLOAT
3148 	  || GET_MODE_CLASS (to) != MODE_FLOAT
3149 	  || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to))
3150 	return false;
3151       break;
3152     case trunc_cvt:
3153       if (GET_MODE_CLASS (from) != MODE_FLOAT
3154 	  || GET_MODE_CLASS (to) != MODE_FLOAT
3155 	  || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to))
3156 	return false;
3157       break;
3158     }
3159 
3160   return ((to == HImode && from == HFmode)
3161 	  || (to == SImode && (from == SFmode || from == DFmode))
3162 	  || (to == HFmode && (from == HImode || from == SFmode))
3163 	  || (to == SFmode && (from == SImode || from == HFmode
3164 			       || from == DFmode))
3165 	  || (to == DFmode && (from == SImode || from == SFmode)));
3166 }
3167 
3168 /* Implement both TARGET_ASM_CONSTRUCTOR and TARGET_ASM_DESTRUCTOR.
3169 
3170    The current loader does not support running code outside "main".  This
3171    hook implementation can be replaced or removed when that changes.  */
3172 
3173 void
3174 gcn_disable_constructors (rtx symbol, int priority __attribute__ ((unused)))
3175 {
3176   tree d = SYMBOL_REF_DECL (symbol);
3177   location_t l = d ? DECL_SOURCE_LOCATION (d) : UNKNOWN_LOCATION;
3178 
3179   sorry_at (l, "GCN does not support static constructors or destructors");
3180 }
3181 
3182 /* }}}  */
3183 /* {{{ Costs.  */
3184 
3185 /* Implement TARGET_RTX_COSTS.
3186 
3187    Compute a (partial) cost for rtx X.  Return true if the complete
3188    cost has been computed, and false if subexpressions should be
3189    scanned.  In either case, *TOTAL contains the cost result.  */
3190 
3191 static bool
3192 gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
3193 {
3194   enum rtx_code code = GET_CODE (x);
3195   switch (code)
3196     {
3197     case CONST:
3198     case CONST_DOUBLE:
3199     case CONST_VECTOR:
3200     case CONST_INT:
3201       if (gcn_inline_constant_p (x))
3202 	*total = 0;
3203       else if (code == CONST_INT
3204 	  && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
3205 	*total = 1;
3206       else if (gcn_constant_p (x))
3207 	*total = 2;
3208       else
3209 	*total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
3210       return true;
3211 
3212     case DIV:
3213       *total = 100;
3214       return false;
3215 
3216     default:
3217       *total = 3;
3218       return false;
3219     }
3220 }
3221 
3222 /* Implement TARGET_MEMORY_MOVE_COST.
3223 
3224    Return the cost of moving data of mode M between a
3225    register and memory.  A value of 2 is the default; this cost is
3226    relative to those in `REGISTER_MOVE_COST'.
3227 
3228    This function is used extensively by register_move_cost that is used to
3229    build tables at startup.  Make it inline in this case.
3230    When IN is 2, return maximum of in and out move cost.
3231 
3232    If moving between registers and memory is more expensive than
3233    between two registers, you should define this macro to express the
3234    relative cost.
3235 
3236    Model also increased moving costs of QImode registers in non
3237    Q_REGS classes.  */
3238 
3239 #define LOAD_COST  32
3240 #define STORE_COST 32
3241 static int
3242 gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
3243 {
3244   int nregs = CEIL (GET_MODE_SIZE (mode), 4);
3245   switch (regclass)
3246     {
3247     case SCC_CONDITIONAL_REG:
3248     case VCCZ_CONDITIONAL_REG:
3249     case VCC_CONDITIONAL_REG:
3250     case EXECZ_CONDITIONAL_REG:
3251     case ALL_CONDITIONAL_REGS:
3252     case SGPR_REGS:
3253     case SGPR_EXEC_REGS:
3254     case EXEC_MASK_REG:
3255     case SGPR_VOP_SRC_REGS:
3256     case SGPR_MEM_SRC_REGS:
3257     case SGPR_SRC_REGS:
3258     case SGPR_DST_REGS:
3259     case GENERAL_REGS:
3260     case AFP_REGS:
3261       if (!in)
3262 	return (STORE_COST + 2) * nregs;
3263       return LOAD_COST * nregs;
3264     case VGPR_REGS:
3265       if (in)
3266 	return (LOAD_COST + 2) * nregs;
3267       return STORE_COST * nregs;
3268     case ALL_REGS:
3269     case ALL_GPR_REGS:
3270     case SRCDST_REGS:
3271       if (in)
3272 	return (LOAD_COST + 2) * nregs;
3273       return (STORE_COST + 2) * nregs;
3274     default:
3275       gcc_unreachable ();
3276     }
3277 }
3278 
3279 /* Implement TARGET_REGISTER_MOVE_COST.
3280 
3281    Return the cost of moving data from a register in class CLASS1 to
3282    one in class CLASS2.  Base value is 2.  */
3283 
3284 static int
3285 gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
3286 {
3287   /* Increase cost of moving from and to vector registers.  While this is
3288      fast in hardware (I think), it has hidden cost of setting up the exec
3289      flags.  */
3290   if ((src < VGPR_REGS) != (dst < VGPR_REGS))
3291     return 4;
3292   return 2;
3293 }
3294 
3295 /* }}}  */
3296 /* {{{ Builtins.  */
3297 
3298 /* Type codes used by GCN built-in definitions.  */
3299 
3300 enum gcn_builtin_type_index
3301 {
3302   GCN_BTI_END_OF_PARAMS,
3303 
3304   GCN_BTI_VOID,
3305   GCN_BTI_BOOL,
3306   GCN_BTI_INT,
3307   GCN_BTI_UINT,
3308   GCN_BTI_SIZE_T,
3309   GCN_BTI_LLINT,
3310   GCN_BTI_LLUINT,
3311   GCN_BTI_EXEC,
3312 
3313   GCN_BTI_SF,
3314   GCN_BTI_V64SI,
3315   GCN_BTI_V64SF,
3316   GCN_BTI_V64PTR,
3317   GCN_BTI_SIPTR,
3318   GCN_BTI_SFPTR,
3319   GCN_BTI_VOIDPTR,
3320 
3321   GCN_BTI_LDS_VOIDPTR,
3322 
3323   GCN_BTI_MAX
3324 };
3325 
3326 static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
3327 
3328 #define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
3329 #define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
3330 #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
3331 #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
3332 #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
3333 #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
3334 #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
3335 #define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
3336 #define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
3337 
3338 static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
3339 				 struct gcn_builtin_description *);
3340 static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
3341 				     struct gcn_builtin_description *);
3342 
3343 struct gcn_builtin_description;
3344 typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
3345 				     struct gcn_builtin_description *);
3346 
3347 enum gcn_builtin_type
3348 {
3349   B_UNIMPLEMENTED,		/* Sorry out */
3350   B_INSN,			/* Emit a pattern */
3351   B_OVERLOAD			/* Placeholder for an overloaded function */
3352 };
3353 
3354 struct gcn_builtin_description
3355 {
3356   int fcode;
3357   int icode;
3358   const char *name;
3359   enum gcn_builtin_type type;
3360   /* The first element of parm is always the return type.  The rest
3361      are a zero terminated list of parameters.  */
3362   int parm[6];
3363   gcn_builtin_expander expander;
3364 };
3365 
3366 /* Read in the GCN builtins from gcn-builtins.def.  */
3367 
3368 extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
3369 
3370 struct gcn_builtin_description gcn_builtins[] = {
3371 #define DEF_BUILTIN(fcode, icode, name, type, params, expander)	\
3372   {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
3373 
3374 #define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name)			\
3375   {GCN_BUILTIN_ ## fcode ## _V64SI,					\
3376    CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN,		\
3377    {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI,		\
3378     GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},	\
3379   {GCN_BUILTIN_ ## fcode ## _V64SI_unspec,				\
3380    CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, 	\
3381    {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI,		\
3382     GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
3383 
3384 #include "gcn-builtins.def"
3385 #undef DEF_BUILTIN_BINOP_INT_FP
3386 #undef DEF_BUILTIN
3387 };
3388 
3389 static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
3390 
3391 /* Implement TARGET_BUILTIN_DECL.
3392 
3393    Return the GCN builtin for CODE.  */
3394 
3395 tree
3396 gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
3397 {
3398   if (code >= GCN_BUILTIN_MAX)
3399     return error_mark_node;
3400 
3401   return gcn_builtin_decls[code];
3402 }
3403 
3404 /* Helper function for gcn_init_builtins.  */
3405 
3406 static void
3407 gcn_init_builtin_types (void)
3408 {
3409   gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
3410   gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node;
3411   gcn_builtin_types[GCN_BTI_INT] = intSI_type_node;
3412   gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node);
3413   gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node;
3414   gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node;
3415   gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node);
3416 
3417   exec_type_node = unsigned_intDI_type_node;
3418   sf_type_node = float32_type_node;
3419   v64si_type_node = build_vector_type (intSI_type_node, 64);
3420   v64sf_type_node = build_vector_type (float_type_node, 64);
3421   v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
3422 					/*build_pointer_type
3423 					  (integer_type_node) */
3424 					, 64);
3425   tree tmp = build_distinct_type_copy (intSI_type_node);
3426   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3427   siptr_type_node = build_pointer_type (tmp);
3428 
3429   tmp = build_distinct_type_copy (float_type_node);
3430   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3431   sfptr_type_node = build_pointer_type (tmp);
3432 
3433   tmp = build_distinct_type_copy (void_type_node);
3434   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3435   voidptr_type_node = build_pointer_type (tmp);
3436 
3437   tmp = build_distinct_type_copy (void_type_node);
3438   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS;
3439   gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp);
3440 }
3441 
3442 /* Implement TARGET_INIT_BUILTINS.
3443 
3444    Set up all builtin functions for this target.  */
3445 
3446 static void
3447 gcn_init_builtins (void)
3448 {
3449   gcn_init_builtin_types ();
3450 
3451   struct gcn_builtin_description *d;
3452   unsigned int i;
3453   for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
3454     {
3455       tree p;
3456       char name[64];		/* build_function will make a copy.  */
3457       int parm;
3458 
3459       /* FIXME: Is this necessary/useful? */
3460       if (d->name == 0)
3461 	continue;
3462 
3463       /* Find last parm.  */
3464       for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
3465 	;
3466 
3467       p = void_list_node;
3468       while (parm > 1)
3469 	p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
3470 
3471       p = build_function_type (gcn_builtin_types[d->parm[0]], p);
3472 
3473       sprintf (name, "__builtin_gcn_%s", d->name);
3474       gcn_builtin_decls[i]
3475 	= add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
3476 
3477       /* These builtins don't throw.  */
3478       TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
3479     }
3480 
3481 /* FIXME: remove the ifdef once OpenACC support is merged upstream.  */
3482 #ifdef BUILT_IN_GOACC_SINGLE_START
3483   /* These builtins need to take/return an LDS pointer: override the generic
3484      versions here.  */
3485 
3486   set_builtin_decl (BUILT_IN_GOACC_SINGLE_START,
3487 		    gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false);
3488 
3489   set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START,
3490 		    gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START],
3491 		    false);
3492 
3493   set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END,
3494 		    gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END],
3495 		    false);
3496 
3497   set_builtin_decl (BUILT_IN_GOACC_BARRIER,
3498 		    gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false);
3499 #endif
3500 }
3501 
3502 /* Expand the CMP_SWAP GCN builtins.  We have our own versions that do
3503    not require taking the address of any object, other than the memory
3504    cell being operated on.
3505 
3506    Helper function for gcn_expand_builtin_1.  */
3507 
3508 static rtx
3509 gcn_expand_cmp_swap (tree exp, rtx target)
3510 {
3511   machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3512   addr_space_t as
3513     = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))));
3514   machine_mode as_mode = gcn_addr_space_address_mode (as);
3515 
3516   if (!target)
3517     target = gen_reg_rtx (mode);
3518 
3519   rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0),
3520 			  NULL_RTX, as_mode, EXPAND_NORMAL);
3521   rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
3522 			 NULL_RTX, mode, EXPAND_NORMAL);
3523   rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
3524 			 NULL_RTX, mode, EXPAND_NORMAL);
3525   rtx pat;
3526 
3527   rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr));
3528   set_mem_addr_space (mem, as);
3529 
3530   if (!REG_P (cmp))
3531     cmp = copy_to_mode_reg (mode, cmp);
3532   if (!REG_P (src))
3533     src = copy_to_mode_reg (mode, src);
3534 
3535   if (mode == SImode)
3536     pat = gen_sync_compare_and_swapsi (target, mem, cmp, src);
3537   else
3538     pat = gen_sync_compare_and_swapdi (target, mem, cmp, src);
3539 
3540   emit_insn (pat);
3541 
3542   return target;
3543 }
3544 
3545 /* Expand many different builtins.
3546 
3547    Intended for use in gcn-builtins.def.  */
3548 
3549 static rtx
3550 gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
3551 		      machine_mode /*mode */ , int ignore,
3552 		      struct gcn_builtin_description *)
3553 {
3554   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3555   switch (DECL_FUNCTION_CODE (fndecl))
3556     {
3557     case GCN_BUILTIN_FLAT_LOAD_INT32:
3558       {
3559 	if (ignore)
3560 	  return target;
3561 	/*rtx exec = */
3562 	force_reg (DImode,
3563 		   expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3564 				EXPAND_NORMAL));
3565 	/*rtx ptr = */
3566 	force_reg (V64DImode,
3567 		   expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
3568 				EXPAND_NORMAL));
3569 	/*emit_insn (gen_vector_flat_loadv64si
3570 		     (target, gcn_gen_undef (V64SImode), ptr, exec)); */
3571 	return target;
3572       }
3573     case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
3574     case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
3575       {
3576 	if (ignore)
3577 	  return target;
3578 	rtx exec = force_reg (DImode,
3579 			      expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3580 					   DImode,
3581 					   EXPAND_NORMAL));
3582 	rtx ptr = force_reg (DImode,
3583 			     expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3584 					  V64DImode,
3585 					  EXPAND_NORMAL));
3586 	rtx offsets = force_reg (V64SImode,
3587 				 expand_expr (CALL_EXPR_ARG (exp, 2),
3588 					      NULL_RTX, V64DImode,
3589 					      EXPAND_NORMAL));
3590 	rtx addrs = gen_reg_rtx (V64DImode);
3591 	rtx tmp = gen_reg_rtx (V64SImode);
3592 	emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3593 					  GEN_INT (2),
3594 					  gcn_gen_undef (V64SImode), exec));
3595 	emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3596 						 gcn_gen_undef (V64DImode),
3597 						 exec));
3598 	rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
3599 	/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3600 	/* FIXME: set attributes.  */
3601 	emit_insn (gen_mov_with_exec (target, mem, exec));
3602 	return target;
3603       }
3604     case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
3605     case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
3606       {
3607 	rtx exec = force_reg (DImode,
3608 			      expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3609 					   DImode,
3610 					   EXPAND_NORMAL));
3611 	rtx ptr = force_reg (DImode,
3612 			     expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3613 					  V64DImode,
3614 					  EXPAND_NORMAL));
3615 	rtx offsets = force_reg (V64SImode,
3616 				 expand_expr (CALL_EXPR_ARG (exp, 2),
3617 					      NULL_RTX, V64DImode,
3618 					      EXPAND_NORMAL));
3619 	machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
3620 								       3)));
3621 	rtx val = force_reg (vmode,
3622 			     expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3623 					  vmode,
3624 					  EXPAND_NORMAL));
3625 	rtx addrs = gen_reg_rtx (V64DImode);
3626 	rtx tmp = gen_reg_rtx (V64SImode);
3627 	emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3628 					  GEN_INT (2),
3629 					  gcn_gen_undef (V64SImode), exec));
3630 	emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3631 						 gcn_gen_undef (V64DImode),
3632 						 exec));
3633 	rtx mem = gen_rtx_MEM (vmode, addrs);
3634 	/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3635 	/* FIXME: set attributes.  */
3636 	emit_insn (gen_mov_with_exec (mem, val, exec));
3637 	return target;
3638       }
3639     case GCN_BUILTIN_SQRTVF:
3640       {
3641 	if (ignore)
3642 	  return target;
3643 	rtx exec = gcn_full_exec_reg ();
3644 	rtx arg = force_reg (V64SFmode,
3645 			     expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3646 					  V64SFmode,
3647 					  EXPAND_NORMAL));
3648 	emit_insn (gen_sqrtv64sf2_exec
3649 		   (target, arg, gcn_gen_undef (V64SFmode), exec));
3650 	return target;
3651       }
3652     case GCN_BUILTIN_SQRTF:
3653       {
3654 	if (ignore)
3655 	  return target;
3656 	rtx arg = force_reg (SFmode,
3657 			     expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3658 					  SFmode,
3659 					  EXPAND_NORMAL));
3660 	emit_insn (gen_sqrtsf2 (target, arg));
3661 	return target;
3662       }
3663     case GCN_BUILTIN_OMP_DIM_SIZE:
3664       {
3665 	if (ignore)
3666 	  return target;
3667 	emit_insn (gen_oacc_dim_size (target,
3668 				      expand_expr (CALL_EXPR_ARG (exp, 0),
3669 						   NULL_RTX, SImode,
3670 						   EXPAND_NORMAL)));
3671 	return target;
3672       }
3673     case GCN_BUILTIN_OMP_DIM_POS:
3674       {
3675 	if (ignore)
3676 	  return target;
3677 	emit_insn (gen_oacc_dim_pos (target,
3678 				     expand_expr (CALL_EXPR_ARG (exp, 0),
3679 						  NULL_RTX, SImode,
3680 						  EXPAND_NORMAL)));
3681 	return target;
3682       }
3683     case GCN_BUILTIN_CMP_SWAP:
3684     case GCN_BUILTIN_CMP_SWAPLL:
3685       return gcn_expand_cmp_swap (exp, target);
3686 
3687     case GCN_BUILTIN_ACC_SINGLE_START:
3688       {
3689 	if (ignore)
3690 	  return target;
3691 
3692 	rtx wavefront = gcn_oacc_dim_pos (1);
3693 	rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx);
3694 	rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode);
3695 	emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx));
3696 	return cc;
3697       }
3698 
3699     case GCN_BUILTIN_ACC_SINGLE_COPY_START:
3700       {
3701 	rtx blk = force_reg (SImode,
3702 			     expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3703 					  SImode, EXPAND_NORMAL));
3704 	rtx wavefront = gcn_oacc_dim_pos (1);
3705 	rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx);
3706 	rtx not_zero = gen_label_rtx ();
3707 	emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero));
3708 	emit_move_insn (blk, const0_rtx);
3709 	emit_label (not_zero);
3710 	return blk;
3711       }
3712 
3713     case GCN_BUILTIN_ACC_SINGLE_COPY_END:
3714       return target;
3715 
3716     case GCN_BUILTIN_ACC_BARRIER:
3717       emit_insn (gen_gcn_wavefront_barrier ());
3718       return target;
3719 
3720     default:
3721       gcc_unreachable ();
3722     }
3723 }
3724 
3725 /* Expansion of simple arithmetic and bit binary operation builtins.
3726 
3727    Intended for use with gcn_builtins table.  */
3728 
3729 static rtx
3730 gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
3731 			  machine_mode /*mode */ , int ignore,
3732 			  struct gcn_builtin_description *d)
3733 {
3734   int icode = d->icode;
3735   if (ignore)
3736     return target;
3737 
3738   rtx exec = force_reg (DImode,
3739 			expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3740 				     EXPAND_NORMAL));
3741 
3742   machine_mode m1 = insn_data[icode].operand[1].mode;
3743   rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
3744 			  EXPAND_NORMAL);
3745   if (!insn_data[icode].operand[1].predicate (arg1, m1))
3746     arg1 = force_reg (m1, arg1);
3747 
3748   machine_mode m2 = insn_data[icode].operand[2].mode;
3749   rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
3750 			  EXPAND_NORMAL);
3751   if (!insn_data[icode].operand[2].predicate (arg2, m2))
3752     arg2 = force_reg (m2, arg2);
3753 
3754   rtx arg_prev;
3755   if (call_expr_nargs (exp) == 4)
3756     {
3757       machine_mode m_prev = insn_data[icode].operand[4].mode;
3758       arg_prev = force_reg (m_prev,
3759 			    expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3760 					 m_prev, EXPAND_NORMAL));
3761     }
3762   else
3763     arg_prev = gcn_gen_undef (GET_MODE (target));
3764 
3765   rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
3766   emit_insn (pat);
3767   return target;
3768 }
3769 
3770 /* Implement TARGET_EXPAND_BUILTIN.
3771 
3772    Expand an expression EXP that calls a built-in function, with result going
3773    to TARGET if that's convenient (and in mode MODE if that's convenient).
3774    SUBTARGET may be used as the target for computing one of EXP's operands.
3775    IGNORE is nonzero if the value is to be ignored.  */
3776 
3777 rtx
3778 gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
3779 		    int ignore)
3780 {
3781   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3782   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
3783   struct gcn_builtin_description *d;
3784 
3785   gcc_assert (fcode < GCN_BUILTIN_MAX);
3786   d = &gcn_builtins[fcode];
3787 
3788   if (d->type == B_UNIMPLEMENTED)
3789     sorry ("Builtin not implemented");
3790 
3791   return d->expander (exp, target, subtarget, mode, ignore, d);
3792 }
3793 
3794 /* }}}  */
3795 /* {{{ Vectorization.  */
3796 
3797 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.
3798 
3799    A vector mask is a value that holds one boolean result for every element in
3800    a vector.  */
3801 
3802 opt_machine_mode
3803 gcn_vectorize_get_mask_mode (poly_uint64 ARG_UNUSED (nunits),
3804 			     poly_uint64 ARG_UNUSED (length))
3805 {
3806   /* GCN uses a DImode bit-mask.  */
3807   return DImode;
3808 }
3809 
3810 /* Return an RTX that references a vector with the i-th lane containing
3811    PERM[i]*4.
3812 
3813    Helper function for gcn_vectorize_vec_perm_const.  */
3814 
3815 static rtx
3816 gcn_make_vec_perm_address (unsigned int *perm)
3817 {
3818   rtx x = gen_reg_rtx (V64SImode);
3819   emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
3820 
3821   /* Permutation addresses use byte addressing.  With each vector lane being
3822      4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
3823      so only set those.
3824 
3825      The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
3826      select between lanes in two vectors, but as the DS_BPERMUTE* instructions
3827      only take one source vector, the most-significant bit can be ignored
3828      here.  Instead, we can use EXEC masking to select the relevant part of
3829      each source vector after they are permuted separately.  */
3830   uint64_t bit_mask = 1 << 2;
3831   for (int i = 2; i < 8; i++, bit_mask <<= 1)
3832     {
3833       uint64_t exec_mask = 0;
3834       uint64_t lane_mask = 1;
3835       for (int j = 0; j < 64; j++, lane_mask <<= 1)
3836 	if ((perm[j] * 4) & bit_mask)
3837 	  exec_mask |= lane_mask;
3838 
3839       if (exec_mask)
3840 	emit_insn (gen_addv64si3_exec (x, x,
3841 				       gcn_vec_constant (V64SImode,
3842 							 bit_mask),
3843 				       x, get_exec (exec_mask)));
3844     }
3845 
3846   return x;
3847 }
3848 
3849 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
3850 
3851    Return true if permutation with SEL is possible.
3852 
3853    If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
3854    permutations.  */
3855 
3856 static bool
3857 gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
3858 			      rtx src0, rtx src1,
3859 			      const vec_perm_indices & sel)
3860 {
3861   unsigned int nelt = GET_MODE_NUNITS (vmode);
3862 
3863   gcc_assert (VECTOR_MODE_P (vmode));
3864   gcc_assert (nelt <= 64);
3865   gcc_assert (sel.length () == nelt);
3866 
3867   if (!dst)
3868     {
3869       /* All vector permutations are possible on this architecture,
3870          with varying degrees of efficiency depending on the permutation. */
3871       return true;
3872     }
3873 
3874   unsigned int perm[64];
3875   for (unsigned int i = 0; i < nelt; ++i)
3876     perm[i] = sel[i] & (2 * nelt - 1);
3877 
3878   /* Make life a bit easier by swapping operands if necessary so that
3879      the first element always comes from src0.  */
3880   if (perm[0] >= nelt)
3881     {
3882       rtx temp = src0;
3883       src0 = src1;
3884       src1 = temp;
3885 
3886       for (unsigned int i = 0; i < nelt; ++i)
3887 	if (perm[i] < nelt)
3888 	  perm[i] += nelt;
3889 	else
3890 	  perm[i] -= nelt;
3891     }
3892 
3893   /* TODO: There are more efficient ways to implement certain permutations
3894      using ds_swizzle_b32 and/or DPP.  Test for and expand them here, before
3895      this more inefficient generic approach is used.  */
3896 
3897   int64_t src1_lanes = 0;
3898   int64_t lane_bit = 1;
3899 
3900   for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1)
3901     {
3902       /* Set the bits for lanes from src1.  */
3903       if (perm[i] >= nelt)
3904 	src1_lanes |= lane_bit;
3905     }
3906 
3907   rtx addr = gcn_make_vec_perm_address (perm);
3908   rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
3909 
3910   switch (vmode)
3911     {
3912     case E_V64QImode:
3913       ds_bpermute = gen_ds_bpermutev64qi;
3914       break;
3915     case E_V64HImode:
3916       ds_bpermute = gen_ds_bpermutev64hi;
3917       break;
3918     case E_V64SImode:
3919       ds_bpermute = gen_ds_bpermutev64si;
3920       break;
3921     case E_V64HFmode:
3922       ds_bpermute = gen_ds_bpermutev64hf;
3923       break;
3924     case E_V64SFmode:
3925       ds_bpermute = gen_ds_bpermutev64sf;
3926       break;
3927     case E_V64DImode:
3928       ds_bpermute = gen_ds_bpermutev64di;
3929       break;
3930     case E_V64DFmode:
3931       ds_bpermute = gen_ds_bpermutev64df;
3932       break;
3933     default:
3934       gcc_assert (false);
3935     }
3936 
3937   /* Load elements from src0 to dst.  */
3938   gcc_assert (~src1_lanes);
3939   emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
3940 
3941   /* Load elements from src1 to dst.  */
3942   if (src1_lanes)
3943     {
3944       /* Masking a lane masks both the destination and source lanes for
3945          DS_BPERMUTE, so we need to have all lanes enabled for the permute,
3946          then add an extra masked move to merge the results of permuting
3947          the two source vectors together.
3948        */
3949       rtx tmp = gen_reg_rtx (vmode);
3950       emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
3951       emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
3952     }
3953 
3954   return true;
3955 }
3956 
3957 /* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
3958 
3959    Return nonzero if vector MODE is supported with at least move
3960    instructions.  */
3961 
3962 static bool
3963 gcn_vector_mode_supported_p (machine_mode mode)
3964 {
3965   /* FIXME: Enable V64QImode and V64HImode.
3966 	    We should support these modes, but vector operations are usually
3967 	    assumed to automatically truncate types, and GCN does not.  We
3968 	    need to add explicit truncates and/or use SDWA for QI/HI insns.  */
3969   return (/* mode == V64QImode || mode == V64HImode
3970 	  ||*/ mode == V64SImode || mode == V64DImode
3971 	  || mode == V64SFmode || mode == V64DFmode);
3972 }
3973 
3974 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
3975 
3976    Enables autovectorization for all supported modes.  */
3977 
3978 static machine_mode
3979 gcn_vectorize_preferred_simd_mode (scalar_mode mode)
3980 {
3981   switch (mode)
3982     {
3983     case E_QImode:
3984       return V64QImode;
3985     case E_HImode:
3986       return V64HImode;
3987     case E_SImode:
3988       return V64SImode;
3989     case E_DImode:
3990       return V64DImode;
3991     case E_SFmode:
3992       return V64SFmode;
3993     case E_DFmode:
3994       return V64DFmode;
3995     default:
3996       return word_mode;
3997     }
3998 }
3999 
4000 /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
4001 
4002    Returns the preferred alignment in bits for accesses to vectors of type type
4003    in vectorized code. This might be less than or greater than the ABI-defined
4004    value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
4005    of a single element, in which case the vectorizer will not try to optimize
4006    for alignment.  */
4007 
4008 static poly_uint64
4009 gcn_preferred_vector_alignment (const_tree type)
4010 {
4011   return TYPE_ALIGN (TREE_TYPE (type));
4012 }
4013 
4014 /* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
4015 
4016    Return true if the target supports misaligned vector store/load of a
4017    specific factor denoted in the misalignment parameter.  */
4018 
4019 static bool
4020 gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
4021 					   const_tree type, int misalignment,
4022 					   bool is_packed)
4023 {
4024   if (is_packed)
4025     return false;
4026 
4027   /* If the misalignment is unknown, we should be able to handle the access
4028      so long as it is not to a member of a packed data structure.  */
4029   if (misalignment == -1)
4030     return true;
4031 
4032   /* Return true if the misalignment is a multiple of the natural alignment
4033      of the vector's element type.  This is probably always going to be
4034      true in practice, since we've already established that this isn't a
4035      packed access.  */
4036   return misalignment % TYPE_ALIGN_UNIT (type) == 0;
4037 }
4038 
4039 /* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
4040 
4041    Return true if vector alignment is reachable (by peeling N iterations) for
4042    the given scalar type TYPE.  */
4043 
4044 static bool
4045 gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed)
4046 {
4047   /* Vectors which aren't in packed structures will not be less aligned than
4048      the natural alignment of their element type, so this is safe.  */
4049   return !is_packed;
4050 }
4051 
4052 /* Generate DPP instructions used for vector reductions.
4053 
4054    The opcode is given by INSN.
4055    The first operand of the operation is shifted right by SHIFT vector lanes.
4056    SHIFT must be a power of 2.  If SHIFT is 16, the 15th lane of each row is
4057    broadcast the next row (thereby acting like a shift of 16 for the end of
4058    each row).  If SHIFT is 32, lane 31 is broadcast to all the
4059    following lanes (thereby acting like a shift of 32 for lane 63).  */
4060 
4061 char *
4062 gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
4063 			 int unspec, int shift)
4064 {
4065   static char buf[64];
4066   const char *dpp;
4067   const char *vcc_in = "";
4068   const char *vcc_out = "";
4069 
4070   /* Add the vcc operand if needed.  */
4071   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
4072     {
4073       if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4074 	vcc_in = ", vcc";
4075 
4076       if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR
4077 	  || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4078 	vcc_out = ", vcc";
4079     }
4080 
4081   /* Add the DPP modifiers.  */
4082   switch (shift)
4083     {
4084     case 1:
4085       dpp = "row_shr:1 bound_ctrl:0";
4086       break;
4087     case 2:
4088       dpp = "row_shr:2 bound_ctrl:0";
4089       break;
4090     case 4:
4091       dpp = "row_shr:4 bank_mask:0xe";
4092       break;
4093     case 8:
4094       dpp = "row_shr:8 bank_mask:0xc";
4095       break;
4096     case 16:
4097       dpp = "row_bcast:15 row_mask:0xa";
4098       break;
4099     case 32:
4100       dpp = "row_bcast:31 row_mask:0xc";
4101       break;
4102     default:
4103       gcc_unreachable ();
4104     }
4105 
4106   sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
4107 
4108   return buf;
4109 }
4110 
4111 /* Generate vector reductions in terms of DPP instructions.
4112 
4113    The vector register SRC of mode MODE is reduced using the operation given
4114    by UNSPEC, and the scalar result is returned in lane 63 of a vector
4115    register.  */
4116 
4117 rtx
4118 gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
4119 {
4120   rtx tmp = gen_reg_rtx (mode);
4121   bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
4122 			&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4123 			&& (TARGET_GCN3 || mode == V64DImode);
4124 
4125   if (use_plus_carry)
4126     unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
4127 
4128   /* Perform reduction by first performing the reduction operation on every
4129      pair of lanes, then on every pair of results from the previous
4130      iteration (thereby effectively reducing every 4 lanes) and so on until
4131      all lanes are reduced.  */
4132   for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
4133     {
4134       rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
4135       rtx insn = gen_rtx_SET (tmp,
4136 			      gen_rtx_UNSPEC (mode,
4137 					      gen_rtvec (3,
4138 							 src, src, shift_val),
4139 					      unspec));
4140 
4141       /* Add clobber for instructions that set the carry flags.  */
4142       if (use_plus_carry)
4143 	{
4144 	  rtx clobber = gen_rtx_CLOBBER (VOIDmode,
4145 					 gen_rtx_REG (DImode, VCC_REG));
4146 	  insn = gen_rtx_PARALLEL (VOIDmode,
4147 				   gen_rtvec (2, insn, clobber));
4148 	}
4149 
4150       emit_insn (insn);
4151 
4152       /* The source operands for every iteration after the first
4153 	   should be TMP.  */
4154       src = tmp;
4155     }
4156 
4157   return tmp;
4158 }
4159 
4160 /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.  */
4161 
4162 int
4163 gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
4164 			tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
4165 {
4166   /* Always vectorize.  */
4167   return 1;
4168 }
4169 
4170 /* }}}  */
4171 /* {{{ md_reorg pass.  */
4172 
4173 /* Identify VMEM instructions from their "type" attribute.  */
4174 
4175 static bool
4176 gcn_vmem_insn_p (attr_type type)
4177 {
4178   switch (type)
4179     {
4180     case TYPE_MUBUF:
4181     case TYPE_MTBUF:
4182     case TYPE_FLAT:
4183       return true;
4184     case TYPE_UNKNOWN:
4185     case TYPE_SOP1:
4186     case TYPE_SOP2:
4187     case TYPE_SOPK:
4188     case TYPE_SOPC:
4189     case TYPE_SOPP:
4190     case TYPE_SMEM:
4191     case TYPE_DS:
4192     case TYPE_VOP2:
4193     case TYPE_VOP1:
4194     case TYPE_VOPC:
4195     case TYPE_VOP3A:
4196     case TYPE_VOP3B:
4197     case TYPE_VOP_SDWA:
4198     case TYPE_VOP_DPP:
4199     case TYPE_MULT:
4200     case TYPE_VMULT:
4201       return false;
4202     }
4203   gcc_unreachable ();
4204   return false;
4205 }
4206 
4207 /* If INSN sets the EXEC register to a constant value, return the value,
4208    otherwise return zero.  */
4209 
4210 static int64_t
4211 gcn_insn_exec_value (rtx_insn *insn)
4212 {
4213   if (!NONDEBUG_INSN_P (insn))
4214     return 0;
4215 
4216   rtx pattern = PATTERN (insn);
4217 
4218   if (GET_CODE (pattern) == SET)
4219     {
4220       rtx dest = XEXP (pattern, 0);
4221       rtx src = XEXP (pattern, 1);
4222 
4223       if (GET_MODE (dest) == DImode
4224 	  && REG_P (dest) && REGNO (dest) == EXEC_REG
4225 	  && CONST_INT_P (src))
4226 	return INTVAL (src);
4227     }
4228 
4229   return 0;
4230 }
4231 
4232 /* Sets the EXEC register before INSN to the value that it had after
4233    LAST_EXEC_DEF.  The constant value of the EXEC register is returned if
4234    known, otherwise it returns zero.  */
4235 
4236 static int64_t
4237 gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
4238 		  bool curr_exec_known, bool &last_exec_def_saved)
4239 {
4240   rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4241   rtx exec;
4242 
4243   int64_t exec_value = gcn_insn_exec_value (last_exec_def);
4244 
4245   if (exec_value)
4246     {
4247       /* If the EXEC value is a constant and it happens to be the same as the
4248          current EXEC value, the restore can be skipped.  */
4249       if (curr_exec_known && exec_value == curr_exec)
4250 	return exec_value;
4251 
4252       exec = GEN_INT (exec_value);
4253     }
4254   else
4255     {
4256       /* If the EXEC value is not a constant, save it in a register after the
4257 	 point of definition.  */
4258       rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG);
4259 
4260       if (!last_exec_def_saved)
4261 	{
4262 	  start_sequence ();
4263 	  emit_move_insn (exec_save_reg, exec_reg);
4264 	  rtx_insn *seq = get_insns ();
4265 	  end_sequence ();
4266 
4267 	  emit_insn_after (seq, last_exec_def);
4268 	  if (dump_file && (dump_flags & TDF_DETAILS))
4269 	    fprintf (dump_file, "Saving EXEC after insn %d.\n",
4270 		     INSN_UID (last_exec_def));
4271 
4272 	  last_exec_def_saved = true;
4273 	}
4274 
4275       exec = exec_save_reg;
4276     }
4277 
4278   /* Restore EXEC register before the usage.  */
4279   start_sequence ();
4280   emit_move_insn (exec_reg, exec);
4281   rtx_insn *seq = get_insns ();
4282   end_sequence ();
4283   emit_insn_before (seq, insn);
4284 
4285   if (dump_file && (dump_flags & TDF_DETAILS))
4286     {
4287       if (exec_value)
4288 	fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n",
4289 		 exec_value, INSN_UID (insn));
4290       else
4291 	fprintf (dump_file,
4292 		 "Restoring EXEC from saved value before insn %d.\n",
4293 		 INSN_UID (insn));
4294     }
4295 
4296   return exec_value;
4297 }
4298 
4299 /* Implement TARGET_MACHINE_DEPENDENT_REORG.
4300 
4301    Ensure that pipeline dependencies and lane masking are set correctly.  */
4302 
4303 static void
4304 gcn_md_reorg (void)
4305 {
4306   basic_block bb;
4307   rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4308   rtx exec_lo_reg = gen_rtx_REG (SImode, EXEC_LO_REG);
4309   rtx exec_hi_reg = gen_rtx_REG (SImode, EXEC_HI_REG);
4310   regset_head live;
4311 
4312   INIT_REG_SET (&live);
4313 
4314   compute_bb_for_insn ();
4315 
4316   if (!optimize)
4317     {
4318       split_all_insns ();
4319       if (dump_file && (dump_flags & TDF_DETAILS))
4320 	{
4321 	  fprintf (dump_file, "After split:\n");
4322 	  print_rtl_with_bb (dump_file, get_insns (), dump_flags);
4323 	}
4324 
4325       /* Update data-flow information for split instructions.  */
4326       df_insn_rescan_all ();
4327     }
4328 
4329   df_analyze ();
4330 
4331   /* This pass ensures that the EXEC register is set correctly, according
4332      to the "exec" attribute.  However, care must be taken so that the
4333      value that reaches explicit uses of the EXEC register remains the
4334      same as before.
4335    */
4336 
4337   FOR_EACH_BB_FN (bb, cfun)
4338     {
4339       if (dump_file && (dump_flags & TDF_DETAILS))
4340 	fprintf (dump_file, "BB %d:\n", bb->index);
4341 
4342       rtx_insn *insn, *curr;
4343       rtx_insn *last_exec_def = BB_HEAD (bb);
4344       bool last_exec_def_saved = false;
4345       bool curr_exec_explicit = true;
4346       bool curr_exec_known = true;
4347       int64_t curr_exec = 0;	/* 0 here means 'the value is that of EXEC
4348 				   after last_exec_def is executed'.  */
4349 
4350       FOR_BB_INSNS_SAFE (bb, insn, curr)
4351 	{
4352 	  if (!NONDEBUG_INSN_P (insn))
4353 	    continue;
4354 
4355 	  if (GET_CODE (PATTERN (insn)) == USE
4356 	      || GET_CODE (PATTERN (insn)) == CLOBBER)
4357 	    continue;
4358 
4359 	  HARD_REG_SET defs, uses;
4360 	  CLEAR_HARD_REG_SET (defs);
4361 	  CLEAR_HARD_REG_SET (uses);
4362 	  note_stores (PATTERN (insn), record_hard_reg_sets, &defs);
4363 	  note_uses (&PATTERN (insn), record_hard_reg_uses, &uses);
4364 
4365 	  bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG);
4366 	  bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG);
4367 	  bool exec_used = (hard_reg_set_intersect_p
4368 			    (uses, reg_class_contents[(int) EXEC_MASK_REG])
4369 			    || TEST_HARD_REG_BIT (uses, EXECZ_REG));
4370 
4371 	  /* Check the instruction for implicit setting of EXEC via an
4372 	     attribute.  */
4373 	  attr_exec exec_attr = get_attr_exec (insn);
4374 	  int64_t new_exec;
4375 
4376 	  switch (exec_attr)
4377 	    {
4378 	    case EXEC_NONE:
4379 	      new_exec = 0;
4380 	      break;
4381 
4382 	    case EXEC_SINGLE:
4383 	      /* Instructions that do not involve memory accesses only require
4384 		 bit 0 of EXEC to be set.  */
4385 	      if (gcn_vmem_insn_p (get_attr_type (insn))
4386 		  || get_attr_type (insn) == TYPE_DS)
4387 		new_exec = 1;
4388 	      else
4389 		new_exec = curr_exec | 1;
4390 	      break;
4391 
4392 	    case EXEC_FULL:
4393 	      new_exec = -1;
4394 	      break;
4395 
4396 	    default:  /* Auto-detect what setting is appropriate.  */
4397 	      {
4398 	        new_exec = 0;
4399 
4400 		/* If EXEC is referenced explicitly then we don't need to do
4401 		   anything to set it, so we're done.  */
4402 		if (exec_used)
4403 		  break;
4404 
4405 		/* Scan the insn for VGPRs defs or uses.  The mode determines
4406 		   what kind of exec is needed.  */
4407 		subrtx_iterator::array_type array;
4408 		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
4409 		  {
4410 		    const_rtx x = *iter;
4411 		    if (REG_P (x) && VGPR_REGNO_P (REGNO (x)))
4412 		      {
4413 			if (VECTOR_MODE_P (GET_MODE (x)))
4414 			  {
4415 			    new_exec = -1;
4416 			    break;
4417 			  }
4418 			else
4419 			  new_exec = 1;
4420 		      }
4421 		  }
4422 	        }
4423 	      break;
4424 	    }
4425 
4426 	  if (new_exec && (!curr_exec_known || new_exec != curr_exec))
4427 	    {
4428 	      start_sequence ();
4429 	      emit_move_insn (exec_reg, GEN_INT (new_exec));
4430 	      rtx_insn *seq = get_insns ();
4431 	      end_sequence ();
4432 	      emit_insn_before (seq, insn);
4433 
4434 	      if (dump_file && (dump_flags & TDF_DETAILS))
4435 		fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n",
4436 			 new_exec, INSN_UID (insn));
4437 
4438 	      curr_exec = new_exec;
4439 	      curr_exec_explicit = false;
4440 	      curr_exec_known = true;
4441 	    }
4442 	  else if (new_exec && dump_file && (dump_flags & TDF_DETAILS))
4443 	    {
4444 	      fprintf (dump_file, "Exec already is %ld before insn %d.\n",
4445 		       new_exec, INSN_UID (insn));
4446 	    }
4447 
4448 	  /* The state of the EXEC register is unknown after a
4449 	     function call.  */
4450 	  if (CALL_P (insn))
4451 	    curr_exec_known = false;
4452 
4453 	  /* Handle explicit uses of EXEC.  If the instruction is a partial
4454 	     explicit definition of EXEC, then treat it as an explicit use of
4455 	     EXEC as well.  */
4456 	  if (exec_used || exec_lo_def_p != exec_hi_def_p)
4457 	    {
4458 	      /* An instruction that explicitly uses EXEC should not also
4459 		 implicitly define it.  */
4460 	      gcc_assert (!exec_used || !new_exec);
4461 
4462 	      if (!curr_exec_known || !curr_exec_explicit)
4463 		{
4464 		  /* Restore the previous explicitly defined value.  */
4465 		  curr_exec = gcn_restore_exec (insn, last_exec_def,
4466 						curr_exec, curr_exec_known,
4467 						last_exec_def_saved);
4468 		  curr_exec_explicit = true;
4469 		  curr_exec_known = true;
4470 		}
4471 	    }
4472 
4473 	  /* Handle explicit definitions of EXEC.  */
4474 	  if (exec_lo_def_p || exec_hi_def_p)
4475 	    {
4476 	      last_exec_def = insn;
4477 	      last_exec_def_saved = false;
4478 	      curr_exec = gcn_insn_exec_value (insn);
4479 	      curr_exec_explicit = true;
4480 	      curr_exec_known = true;
4481 
4482 	      if (dump_file && (dump_flags & TDF_DETAILS))
4483 		fprintf (dump_file,
4484 			 "Found %s definition of EXEC at insn %d.\n",
4485 			 exec_lo_def_p == exec_hi_def_p ? "full" : "partial",
4486 			 INSN_UID (insn));
4487 	    }
4488 	}
4489 
4490       COPY_REG_SET (&live, DF_LR_OUT (bb));
4491       df_simulate_initialize_backwards (bb, &live);
4492 
4493       /* If EXEC is live after the basic block, restore the value of EXEC
4494 	 at the end of the block.  */
4495       if ((REGNO_REG_SET_P (&live, EXEC_LO_REG)
4496 	   || REGNO_REG_SET_P (&live, EXEC_HI_REG))
4497 	  && (!curr_exec_known || !curr_exec_explicit))
4498 	{
4499 	  rtx_insn *end_insn = BB_END (bb);
4500 
4501 	  /* If the instruction is not a jump instruction, do the restore
4502 	     after the last instruction in the basic block.  */
4503 	  if (NONJUMP_INSN_P (end_insn))
4504 	    end_insn = NEXT_INSN (end_insn);
4505 
4506 	  gcn_restore_exec (end_insn, last_exec_def, curr_exec,
4507 			    curr_exec_known, last_exec_def_saved);
4508 	}
4509     }
4510 
4511   CLEAR_REG_SET (&live);
4512 
4513   /* "Manually Inserted Wait States (NOPs)."
4514 
4515      GCN hardware detects most kinds of register dependencies, but there
4516      are some exceptions documented in the ISA manual.  This pass
4517      detects the missed cases, and inserts the documented number of NOPs
4518      required for correct execution.  */
4519 
4520   const int max_waits = 5;
4521   struct ilist
4522   {
4523     rtx_insn *insn;
4524     attr_unit unit;
4525     HARD_REG_SET writes;
4526     int age;
4527   } back[max_waits];
4528   int oldest = 0;
4529   for (int i = 0; i < max_waits; i++)
4530     back[i].insn = NULL;
4531 
4532   rtx_insn *insn, *last_insn = NULL;
4533   for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
4534     {
4535       if (!NONDEBUG_INSN_P (insn))
4536 	continue;
4537 
4538       if (GET_CODE (PATTERN (insn)) == USE
4539 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
4540 	continue;
4541 
4542       attr_type itype = get_attr_type (insn);
4543       attr_unit iunit = get_attr_unit (insn);
4544       HARD_REG_SET ireads, iwrites;
4545       CLEAR_HARD_REG_SET (ireads);
4546       CLEAR_HARD_REG_SET (iwrites);
4547       note_stores (PATTERN (insn), record_hard_reg_sets, &iwrites);
4548       note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads);
4549 
4550       /* Scan recent previous instructions for dependencies not handled in
4551          hardware.  */
4552       int nops_rqd = 0;
4553       for (int i = oldest; i < oldest + max_waits; i++)
4554 	{
4555 	  struct ilist *prev_insn = &back[i % max_waits];
4556 
4557 	  if (!prev_insn->insn)
4558 	    continue;
4559 
4560 	  /* VALU writes SGPR followed by VMEM reading the same SGPR
4561 	     requires 5 wait states.  */
4562 	  if ((prev_insn->age + nops_rqd) < 5
4563 	      && prev_insn->unit == UNIT_VECTOR
4564 	      && gcn_vmem_insn_p (itype))
4565 	    {
4566 	      HARD_REG_SET regs;
4567 	      COPY_HARD_REG_SET (regs, prev_insn->writes);
4568 	      AND_HARD_REG_SET (regs, ireads);
4569 	      if (hard_reg_set_intersect_p
4570 		  (regs, reg_class_contents[(int) SGPR_REGS]))
4571 		nops_rqd = 5 - prev_insn->age;
4572 	    }
4573 
4574 	  /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
4575 	     requires 5 wait states.  */
4576 	  if ((prev_insn->age + nops_rqd) < 5
4577 	      && prev_insn->unit == UNIT_VECTOR
4578 	      && iunit == UNIT_VECTOR
4579 	      && ((hard_reg_set_intersect_p
4580 		   (prev_insn->writes,
4581 		    reg_class_contents[(int) EXEC_MASK_REG])
4582 		   && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
4583 		  ||
4584 		  (hard_reg_set_intersect_p
4585 		   (prev_insn->writes,
4586 		    reg_class_contents[(int) VCC_CONDITIONAL_REG])
4587 		   && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
4588 	    nops_rqd = 5 - prev_insn->age;
4589 
4590 	  /* VALU writes SGPR/VCC followed by v_{read,write}lane using
4591 	     SGPR/VCC as lane select requires 4 wait states.  */
4592 	  if ((prev_insn->age + nops_rqd) < 4
4593 	      && prev_insn->unit == UNIT_VECTOR
4594 	      && get_attr_laneselect (insn) == LANESELECT_YES)
4595 	    {
4596 	      HARD_REG_SET regs;
4597 	      COPY_HARD_REG_SET (regs, prev_insn->writes);
4598 	      AND_HARD_REG_SET (regs, ireads);
4599 	      if (hard_reg_set_intersect_p
4600 		  (regs, reg_class_contents[(int) SGPR_REGS])
4601 		  || hard_reg_set_intersect_p
4602 		     (regs, reg_class_contents[(int) VCC_CONDITIONAL_REG]))
4603 		nops_rqd = 4 - prev_insn->age;
4604 	    }
4605 
4606 	  /* VALU writes VGPR followed by VALU_DPP reading that VGPR
4607 	     requires 2 wait states.  */
4608 	  if ((prev_insn->age + nops_rqd) < 2
4609 	      && prev_insn->unit == UNIT_VECTOR
4610 	      && itype == TYPE_VOP_DPP)
4611 	    {
4612 	      HARD_REG_SET regs;
4613 	      COPY_HARD_REG_SET (regs, prev_insn->writes);
4614 	      AND_HARD_REG_SET (regs, ireads);
4615 	      if (hard_reg_set_intersect_p
4616 		  (regs, reg_class_contents[(int) VGPR_REGS]))
4617 		nops_rqd = 2 - prev_insn->age;
4618 	    }
4619 	}
4620 
4621       /* Insert the required number of NOPs.  */
4622       for (int i = nops_rqd; i > 0; i--)
4623 	emit_insn_after (gen_nop (), last_insn);
4624 
4625       /* Age the previous instructions.  We can also ignore writes to
4626          registers subsequently overwritten.  */
4627       HARD_REG_SET written;
4628       CLEAR_HARD_REG_SET (written);
4629       for (int i = oldest + max_waits - 1; i > oldest; i--)
4630 	{
4631 	  struct ilist *prev_insn = &back[i % max_waits];
4632 
4633 	  /* Assume all instructions are equivalent to one "wait", the same
4634 	     as s_nop.  This is probably true for SALU, but not VALU (which
4635 	     may take longer), so this is not optimal.  However, AMD do
4636 	     not publish the cycle times for instructions.  */
4637 	  prev_insn->age += 1 + nops_rqd;
4638 
4639 	  IOR_HARD_REG_SET (written, iwrites);
4640 	  AND_COMPL_HARD_REG_SET (prev_insn->writes, written);
4641 	}
4642 
4643       /* Track the current instruction as a previous instruction.  */
4644       back[oldest].insn = insn;
4645       back[oldest].unit = iunit;
4646       COPY_HARD_REG_SET (back[oldest].writes, iwrites);
4647       back[oldest].age = 0;
4648       oldest = (oldest + 1) % max_waits;
4649 
4650       last_insn = insn;
4651     }
4652 }
4653 
4654 /* }}}  */
4655 /* {{{ OpenACC / OpenMP.  */
4656 
4657 #define GCN_DEFAULT_GANGS 0	/* Choose at runtime.  */
4658 #define GCN_DEFAULT_WORKERS 0	/* Choose at runtime.  */
4659 #define GCN_DEFAULT_VECTORS 1	/* Use autovectorization only, for now.  */
4660 
4661 /* Implement TARGET_GOACC_VALIDATE_DIMS.
4662 
4663    Check the launch dimensions provided for an OpenACC compute
4664    region, or routine.  */
4665 
4666 static bool
4667 gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
4668 			 unsigned /*used*/)
4669 {
4670   bool changed = false;
4671 
4672   /* FIXME: remove -facc-experimental-workers when they're ready.  */
4673   int max_workers = flag_worker_partitioning ? 16 : 1;
4674 
4675   /* The vector size must appear to be 64, to the user, unless this is a
4676      SEQ routine.  The real, internal value is always 1, which means use
4677      autovectorization, but the user should not see that.  */
4678   if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
4679       && dims[GOMP_DIM_VECTOR] >= 0)
4680     {
4681       if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0
4682 	  && dims[GOMP_DIM_VECTOR] != 64)
4683 	warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4684 		    OPT_Wopenacc_dims,
4685 		    (dims[GOMP_DIM_VECTOR]
4686 		     ? G_("using vector_length (64), ignoring %d")
4687 		     : G_("using vector_length (64), "
4688 			  "ignoring runtime setting")),
4689 		    dims[GOMP_DIM_VECTOR]);
4690       dims[GOMP_DIM_VECTOR] = 1;
4691       changed = true;
4692     }
4693 
4694   /* Check the num workers is not too large.  */
4695   if (dims[GOMP_DIM_WORKER] > max_workers)
4696     {
4697       warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4698 		  OPT_Wopenacc_dims,
4699 		  "using num_workers (%d), ignoring %d",
4700 		  max_workers, dims[GOMP_DIM_WORKER]);
4701       dims[GOMP_DIM_WORKER] = max_workers;
4702       changed = true;
4703     }
4704 
4705   /* Set global defaults.  */
4706   if (!decl)
4707     {
4708       dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS;
4709       if (dims[GOMP_DIM_WORKER] < 0)
4710 	dims[GOMP_DIM_WORKER] = (flag_worker_partitioning
4711 				 ? GCN_DEFAULT_WORKERS : 1);
4712       if (dims[GOMP_DIM_GANG] < 0)
4713 	dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS;
4714       changed = true;
4715     }
4716 
4717   return changed;
4718 }
4719 
4720 /* Helper function for oacc_dim_size instruction.
4721    Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass.  */
4722 
4723 rtx
4724 gcn_oacc_dim_size (int dim)
4725 {
4726   if (dim < 0 || dim > 2)
4727     error ("offload dimension out of range (%d)", dim);
4728 
4729   /* Vectors are a special case.  */
4730   if (dim == 2)
4731     return const1_rtx;		/* Think of this as 1 times 64.  */
4732 
4733   static int offset[] = {
4734     /* Offsets into dispatch packet.  */
4735     12,				/* X dim = Gang / Team / Work-group.  */
4736     20,				/* Z dim = Worker / Thread / Wavefront.  */
4737     16				/* Y dim = Vector / SIMD / Work-item.  */
4738   };
4739   rtx addr = gen_rtx_PLUS (DImode,
4740 			   gen_rtx_REG (DImode,
4741 					cfun->machine->args.
4742 					reg[DISPATCH_PTR_ARG]),
4743 			   GEN_INT (offset[dim]));
4744   return gen_rtx_MEM (SImode, addr);
4745 }
4746 
4747 /* Helper function for oacc_dim_pos instruction.
4748    Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass.  */
4749 
4750 rtx
4751 gcn_oacc_dim_pos (int dim)
4752 {
4753   if (dim < 0 || dim > 2)
4754     error ("offload dimension out of range (%d)", dim);
4755 
4756   static const int reg[] = {
4757     WORKGROUP_ID_X_ARG,		/* Gang / Team / Work-group.  */
4758     WORK_ITEM_ID_Z_ARG,		/* Worker / Thread / Wavefront.  */
4759     WORK_ITEM_ID_Y_ARG		/* Vector / SIMD / Work-item.  */
4760   };
4761 
4762   int reg_num = cfun->machine->args.reg[reg[dim]];
4763 
4764   /* The information must have been requested by the kernel.  */
4765   gcc_assert (reg_num >= 0);
4766 
4767   return gen_rtx_REG (SImode, reg_num);
4768 }
4769 
4770 /* Implement TARGET_GOACC_FORK_JOIN.  */
4771 
4772 static bool
4773 gcn_fork_join (gcall *ARG_UNUSED (call), const int *ARG_UNUSED (dims),
4774 	       bool ARG_UNUSED (is_fork))
4775 {
4776   /* GCN does not use the fork/join concept invented for NVPTX.
4777      Instead we use standard autovectorization.  */
4778   return false;
4779 }
4780 
4781 /* Implement ???????
4782    FIXME make this a real hook.
4783 
4784    Adjust FNDECL such that options inherited from the host compiler
4785    are made appropriate for the accelerator compiler.  */
4786 
4787 void
4788 gcn_fixup_accel_lto_options (tree fndecl)
4789 {
4790   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4791   if (!func_optimize)
4792     return;
4793 
4794   tree old_optimize = build_optimization_node (&global_options);
4795   tree new_optimize;
4796 
4797   /* If the function changed the optimization levels as well as
4798      setting target options, start with the optimizations
4799      specified.  */
4800   if (func_optimize != old_optimize)
4801     cl_optimization_restore (&global_options,
4802 			     TREE_OPTIMIZATION (func_optimize));
4803 
4804   gcn_option_override ();
4805 
4806   /* The target attributes may also change some optimization flags,
4807      so update the optimization options if necessary.  */
4808   new_optimize = build_optimization_node (&global_options);
4809 
4810   if (old_optimize != new_optimize)
4811     {
4812       DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4813       cl_optimization_restore (&global_options,
4814 			       TREE_OPTIMIZATION (old_optimize));
4815     }
4816 }
4817 
4818 /* }}}  */
4819 /* {{{ ASM Output.  */
4820 
4821 /*  Implement TARGET_ASM_FILE_START.
4822 
4823     Print assembler file header text.  */
4824 
4825 static void
4826 output_file_start (void)
4827 {
4828   fprintf (asm_out_file, "\t.text\n");
4829   fprintf (asm_out_file, "\t.hsa_code_object_version 2,0\n");
4830   fprintf (asm_out_file, "\t.hsa_code_object_isa\n");	/* Autodetect.  */
4831   fprintf (asm_out_file, "\t.section\t.AMDGPU.config\n");
4832   fprintf (asm_out_file, "\t.text\n");
4833 }
4834 
4835 /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
4836 
4837    Print the initial definition of a function name.
4838 
4839    For GCN kernel entry points this includes all the HSA meta-data, special
4840    alignment constraints that don't apply to regular functions, and magic
4841    comments that pass information to mkoffload.  */
4842 
4843 void
4844 gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
4845 {
4846   int sgpr, vgpr;
4847   bool xnack_enabled = false;
4848   int extra_regs = 0;
4849 
4850   if (cfun && cfun->machine && cfun->machine->normal_function)
4851     {
4852       fputs ("\t.type\t", file);
4853       assemble_name (file, name);
4854       fputs (",@function\n", file);
4855       assemble_name (file, name);
4856       fputs (":\n", file);
4857       return;
4858     }
4859 
4860   /* Determine count of sgpr/vgpr registers by looking for last
4861      one used.  */
4862   for (sgpr = 101; sgpr >= 0; sgpr--)
4863     if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
4864       break;
4865   sgpr++;
4866   for (vgpr = 255; vgpr >= 0; vgpr--)
4867     if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
4868       break;
4869   vgpr++;
4870 
4871   if (xnack_enabled)
4872     extra_regs = 6;
4873   if (df_regs_ever_live_p (FLAT_SCRATCH_LO_REG)
4874       || df_regs_ever_live_p (FLAT_SCRATCH_HI_REG))
4875     extra_regs = 4;
4876   else if (df_regs_ever_live_p (VCC_LO_REG)
4877 	   || df_regs_ever_live_p (VCC_HI_REG))
4878     extra_regs = 2;
4879 
4880   if (!leaf_function_p ())
4881     {
4882       /* We can't know how many registers function calls might use.  */
4883       if (vgpr < 64)
4884 	vgpr = 64;
4885       if (sgpr + extra_regs < 102)
4886 	sgpr = 102 - extra_regs;
4887     }
4888 
4889   fputs ("\t.align\t256\n", file);
4890   fputs ("\t.type\t", file);
4891   assemble_name (file, name);
4892   fputs (",@function\n\t.amdgpu_hsa_kernel\t", file);
4893   assemble_name (file, name);
4894   fputs ("\n", file);
4895   assemble_name (file, name);
4896   fputs (":\n", file);
4897   fprintf (file, "\t.amd_kernel_code_t\n"
4898 	   "\t\tkernel_code_version_major = 1\n"
4899 	   "\t\tkernel_code_version_minor = 0\n" "\t\tmachine_kind = 1\n"
4900 	   /* "\t\tmachine_version_major = 8\n"
4901 	      "\t\tmachine_version_minor = 0\n"
4902 	      "\t\tmachine_version_stepping = 1\n" */
4903 	   "\t\tkernel_code_entry_byte_offset = 256\n"
4904 	   "\t\tkernel_code_prefetch_byte_size = 0\n"
4905 	   "\t\tmax_scratch_backing_memory_byte_size = 0\n"
4906 	   "\t\tcompute_pgm_rsrc1_vgprs = %i\n"
4907 	   "\t\tcompute_pgm_rsrc1_sgprs = %i\n"
4908 	   "\t\tcompute_pgm_rsrc1_priority = 0\n"
4909 	   "\t\tcompute_pgm_rsrc1_float_mode = 192\n"
4910 	   "\t\tcompute_pgm_rsrc1_priv = 0\n"
4911 	   "\t\tcompute_pgm_rsrc1_dx10_clamp = 1\n"
4912 	   "\t\tcompute_pgm_rsrc1_debug_mode = 0\n"
4913 	   "\t\tcompute_pgm_rsrc1_ieee_mode = 1\n"
4914 	   /* We enable scratch memory.  */
4915 	   "\t\tcompute_pgm_rsrc2_scratch_en = 1\n"
4916 	   "\t\tcompute_pgm_rsrc2_user_sgpr = %i\n"
4917 	   "\t\tcompute_pgm_rsrc2_tgid_x_en = 1\n"
4918 	   "\t\tcompute_pgm_rsrc2_tgid_y_en = 0\n"
4919 	   "\t\tcompute_pgm_rsrc2_tgid_z_en = 0\n"
4920 	   "\t\tcompute_pgm_rsrc2_tg_size_en = 0\n"
4921 	   "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = 0\n"
4922 	   "\t\tcompute_pgm_rsrc2_excp_en_msb = 0\n"
4923 	   "\t\tcompute_pgm_rsrc2_lds_size = 0\n"	/* Set at runtime.  */
4924 	   "\t\tcompute_pgm_rsrc2_excp_en = 0\n",
4925 	   (vgpr - 1) / 4,
4926 	   /* Must match wavefront_sgpr_count */
4927 	   (sgpr + extra_regs + 7) / 8 - 1,
4928 	   /* The total number of SGPR user data registers requested.  This
4929 	      number must match the number of user data registers enabled.  */
4930 	   cfun->machine->args.nsgprs);
4931   int reg = FIRST_SGPR_REG;
4932   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
4933     {
4934       int reg_first = -1;
4935       int reg_last;
4936       if ((cfun->machine->args.requested & (1 << a))
4937 	  && (gcn_kernel_arg_types[a].fixed_regno < 0))
4938 	{
4939 	  reg_first = reg;
4940 	  reg_last = (reg_first
4941 		      + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode)
4942 			 / UNITS_PER_WORD) - 1);
4943 	  reg = reg_last + 1;
4944 	}
4945 
4946       if (gcn_kernel_arg_types[a].header_pseudo)
4947 	{
4948 	  fprintf (file, "\t\t%s = %i",
4949 		   gcn_kernel_arg_types[a].header_pseudo,
4950 		   (cfun->machine->args.requested & (1 << a)) != 0);
4951 	  if (reg_first != -1)
4952 	    {
4953 	      fprintf (file, " ; (");
4954 	      for (int i = reg_first; i <= reg_last; ++i)
4955 		{
4956 		  if (i != reg_first)
4957 		    fprintf (file, ", ");
4958 		  fprintf (file, "%s", reg_names[i]);
4959 		}
4960 	      fprintf (file, ")");
4961 	    }
4962 	  fprintf (file, "\n");
4963 	}
4964       else if (gcn_kernel_arg_types[a].fixed_regno >= 0
4965 	       && cfun->machine->args.requested & (1 << a))
4966 	fprintf (file, "\t\t; %s = %i (%s)\n",
4967 		 gcn_kernel_arg_types[a].name,
4968 		 (cfun->machine->args.requested & (1 << a)) != 0,
4969 		 reg_names[gcn_kernel_arg_types[a].fixed_regno]);
4970     }
4971   fprintf (file, "\t\tenable_vgpr_workitem_id = %i\n",
4972 	   (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
4973 	   ? 2
4974 	   : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
4975 	   ? 1 : 0);
4976   fprintf (file, "\t\tenable_ordered_append_gds = 0\n"
4977 	   "\t\tprivate_element_size = 1\n"
4978 	   "\t\tis_ptr64 = 1\n"
4979 	   "\t\tis_dynamic_callstack = 0\n"
4980 	   "\t\tis_debug_enabled = 0\n"
4981 	   "\t\tis_xnack_enabled = %i\n"
4982 	   "\t\tworkitem_private_segment_byte_size = %i\n"
4983 	   "\t\tworkgroup_group_segment_byte_size = %u\n"
4984 	   "\t\tgds_segment_byte_size = 0\n"
4985 	   "\t\tkernarg_segment_byte_size = %i\n"
4986 	   "\t\tworkgroup_fbarrier_count = 0\n"
4987 	   "\t\twavefront_sgpr_count = %i\n"
4988 	   "\t\tworkitem_vgpr_count = %i\n"
4989 	   "\t\treserved_vgpr_first = 0\n"
4990 	   "\t\treserved_vgpr_count = 0\n"
4991 	   "\t\treserved_sgpr_first = 0\n"
4992 	   "\t\treserved_sgpr_count = 0\n"
4993 	   "\t\tdebug_wavefront_private_segment_offset_sgpr = 0\n"
4994 	   "\t\tdebug_private_segment_buffer_sgpr = 0\n"
4995 	   "\t\tkernarg_segment_alignment = %i\n"
4996 	   "\t\tgroup_segment_alignment = 4\n"
4997 	   "\t\tprivate_segment_alignment = %i\n"
4998 	   "\t\twavefront_size = 6\n"
4999 	   "\t\tcall_convention = 0\n"
5000 	   "\t\truntime_loader_kernel_symbol = 0\n"
5001 	   "\t.end_amd_kernel_code_t\n", xnack_enabled,
5002 	   /* workitem_private_segment_bytes_size needs to be
5003 	      one 64th the wave-front stack size.  */
5004 	   stack_size_opt / 64,
5005 	   LDS_SIZE, cfun->machine->kernarg_segment_byte_size,
5006 	   /* Number of scalar registers used by a wavefront.  This
5007 	      includes the special SGPRs for VCC, Flat Scratch (Base,
5008 	      Size) and XNACK (for GFX8 (VI)+).  It does not include the
5009 	      16 SGPR added if a trap handler is enabled.  Must match
5010 	      compute_pgm_rsrc1.sgprs.  */
5011 	   sgpr + extra_regs, vgpr,
5012 	   cfun->machine->kernarg_segment_alignment,
5013 	   crtl->stack_alignment_needed / 8);
5014 
5015   /* This comment is read by mkoffload.  */
5016   if (flag_openacc)
5017     fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
5018 	     oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
5019 	     oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
5020 	     oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
5021 }
5022 
5023 /* Implement TARGET_ASM_SELECT_SECTION.
5024 
5025    Return the section into which EXP should be placed.  */
5026 
5027 static section *
5028 gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
5029 {
5030   if (TREE_TYPE (exp) != error_mark_node
5031       && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS)
5032     {
5033       if (!DECL_P (exp))
5034 	return get_section (".lds_bss",
5035 			    SECTION_WRITE | SECTION_BSS | SECTION_DEBUG,
5036 			    NULL);
5037 
5038       return get_named_section (exp, ".lds_bss", reloc);
5039     }
5040 
5041   return default_elf_select_section (exp, reloc, align);
5042 }
5043 
5044 /* Implement TARGET_ASM_FUNCTION_PROLOGUE.
5045 
5046    Emits custom text into the assembler file at the head of each function.  */
5047 
5048 static void
5049 gcn_target_asm_function_prologue (FILE *file)
5050 {
5051   machine_function *offsets = gcn_compute_frame_offsets ();
5052 
5053   asm_fprintf (file, "\t; using %s addressing in function\n",
5054 	       offsets->use_flat_addressing ? "flat" : "global");
5055 
5056   if (offsets->normal_function)
5057     {
5058       asm_fprintf (file, "\t; frame pointer needed: %s\n",
5059 		   offsets->need_frame_pointer ? "true" : "false");
5060       asm_fprintf (file, "\t; lr needs saving: %s\n",
5061 		   offsets->lr_needs_saving ? "true" : "false");
5062       asm_fprintf (file, "\t; outgoing args size: %wd\n",
5063 		   offsets->outgoing_args_size);
5064       asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size);
5065       asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5066       asm_fprintf (file, "\t; callee save size: %wd\n",
5067 		   offsets->callee_saves);
5068     }
5069   else
5070     {
5071       asm_fprintf (file, "\t; HSA kernel entry point\n");
5072       asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5073       asm_fprintf (file, "\t; outgoing args size: %wd\n",
5074 		   offsets->outgoing_args_size);
5075 
5076       /* Enable denorms.  */
5077       asm_fprintf (file, "\n\t; Set MODE[FP_DENORM]: allow single and double"
5078 		   " input and output denorms\n");
5079       asm_fprintf (file, "\ts_setreg_imm32_b32\thwreg(1, 4, 4), 0xf\n\n");
5080     }
5081 }
5082 
5083 /* Helper function for print_operand and print_operand_address.
5084 
5085    Print a register as the assembler requires, according to mode and name.  */
5086 
5087 static void
5088 print_reg (FILE *file, rtx x)
5089 {
5090   machine_mode mode = GET_MODE (x);
5091   if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
5092       || mode == HFmode || mode == SFmode
5093       || mode == V64SFmode || mode == V64SImode
5094       || mode == V64QImode || mode == V64HImode)
5095     fprintf (file, "%s", reg_names[REGNO (x)]);
5096   else if (mode == DImode || mode == V64DImode
5097 	   || mode == DFmode || mode == V64DFmode)
5098     {
5099       if (SGPR_REGNO_P (REGNO (x)))
5100 	fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5101 		 REGNO (x) - FIRST_SGPR_REG + 1);
5102       else if (VGPR_REGNO_P (REGNO (x)))
5103 	fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5104 		 REGNO (x) - FIRST_VGPR_REG + 1);
5105       else if (REGNO (x) == FLAT_SCRATCH_REG)
5106 	fprintf (file, "flat_scratch");
5107       else if (REGNO (x) == EXEC_REG)
5108 	fprintf (file, "exec");
5109       else if (REGNO (x) == VCC_LO_REG)
5110 	fprintf (file, "vcc");
5111       else
5112 	fprintf (file, "[%s:%s]",
5113 		 reg_names[REGNO (x)], reg_names[REGNO (x) + 1]);
5114     }
5115   else if (mode == TImode)
5116     {
5117       if (SGPR_REGNO_P (REGNO (x)))
5118 	fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5119 		 REGNO (x) - FIRST_SGPR_REG + 3);
5120       else if (VGPR_REGNO_P (REGNO (x)))
5121 	fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5122 		 REGNO (x) - FIRST_VGPR_REG + 3);
5123       else
5124 	gcc_unreachable ();
5125     }
5126   else
5127     gcc_unreachable ();
5128 }
5129 
5130 /* Implement TARGET_SECTION_TYPE_FLAGS.
5131 
5132    Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION.  */
5133 
5134 static unsigned int
5135 gcn_section_type_flags (tree decl, const char *name, int reloc)
5136 {
5137   if (strcmp (name, ".lds_bss") == 0)
5138     return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG;
5139 
5140   return default_section_type_flags (decl, name, reloc);
5141 }
5142 
5143 /* Helper function for gcn_asm_output_symbol_ref.
5144 
5145    FIXME: If we want to have propagation blocks allocated separately and
5146    statically like this, it would be better done via symbol refs and the
5147    assembler/linker.  This is a temporary hack.  */
5148 
5149 static void
5150 gcn_print_lds_decl (FILE *f, tree var)
5151 {
5152   int *offset;
5153   machine_function *machfun = cfun->machine;
5154 
5155   if ((offset = machfun->lds_allocs->get (var)))
5156     fprintf (f, "%u", (unsigned) *offset);
5157   else
5158     {
5159       unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var);
5160       tree type = TREE_TYPE (var);
5161       unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
5162       if (size > align && size > 4 && align < 8)
5163 	align = 8;
5164 
5165       machfun->lds_allocated = ((machfun->lds_allocated + align - 1)
5166 				& ~(align - 1));
5167 
5168       machfun->lds_allocs->put (var, machfun->lds_allocated);
5169       fprintf (f, "%u", machfun->lds_allocated);
5170       machfun->lds_allocated += size;
5171       if (machfun->lds_allocated > LDS_SIZE)
5172 	error ("local data-share memory exhausted");
5173     }
5174 }
5175 
5176 /* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h.  */
5177 
5178 void
5179 gcn_asm_output_symbol_ref (FILE *file, rtx x)
5180 {
5181   tree decl;
5182   if ((decl = SYMBOL_REF_DECL (x)) != 0
5183       && TREE_CODE (decl) == VAR_DECL
5184       && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5185     {
5186       /* LDS symbols (emitted using this hook) are only used at present
5187          to propagate worker values from an active thread to neutered
5188          threads.  Use the same offset for each such block, but don't
5189          use zero because null pointers are used to identify the active
5190          thread in GOACC_single_copy_start calls.  */
5191       gcn_print_lds_decl (file, decl);
5192     }
5193   else
5194     {
5195       assemble_name (file, XSTR (x, 0));
5196       /* FIXME: See above -- this condition is unreachable.  */
5197       if ((decl = SYMBOL_REF_DECL (x)) != 0
5198 	  && TREE_CODE (decl) == VAR_DECL
5199 	  && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5200 	fputs ("@abs32", file);
5201     }
5202 }
5203 
5204 /* Implement TARGET_CONSTANT_ALIGNMENT.
5205 
5206    Returns the alignment in bits of a constant that is being placed in memory.
5207    CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
5208    would ordinarily have.  */
5209 
5210 static HOST_WIDE_INT
5211 gcn_constant_alignment (const_tree ARG_UNUSED (constant),
5212 			HOST_WIDE_INT basic_align)
5213 {
5214   return basic_align > 128 ? basic_align : 128;
5215 }
5216 
5217 /* Implement PRINT_OPERAND_ADDRESS via gcn.h.  */
5218 
5219 void
5220 print_operand_address (FILE *file, rtx mem)
5221 {
5222   gcc_assert (MEM_P (mem));
5223 
5224   rtx reg;
5225   rtx offset;
5226   addr_space_t as = MEM_ADDR_SPACE (mem);
5227   rtx addr = XEXP (mem, 0);
5228   gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS);
5229 
5230   if (AS_SCRATCH_P (as))
5231     switch (GET_CODE (addr))
5232       {
5233       case REG:
5234 	print_reg (file, addr);
5235 	break;
5236 
5237       case PLUS:
5238 	reg = XEXP (addr, 0);
5239 	offset = XEXP (addr, 1);
5240 	print_reg (file, reg);
5241 	if (GET_CODE (offset) == CONST_INT)
5242 	  fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5243 	else
5244 	  abort ();
5245 	break;
5246 
5247       default:
5248 	debug_rtx (addr);
5249 	abort ();
5250       }
5251   else if (AS_ANY_FLAT_P (as))
5252     {
5253       if (GET_CODE (addr) == REG)
5254 	print_reg (file, addr);
5255       else
5256 	{
5257 	  gcc_assert (TARGET_GCN5_PLUS);
5258 	  print_reg (file, XEXP (addr, 0));
5259 	}
5260     }
5261   else if (AS_GLOBAL_P (as))
5262     {
5263       gcc_assert (TARGET_GCN5_PLUS);
5264 
5265       rtx base = addr;
5266       rtx vgpr_offset = NULL_RTX;
5267 
5268       if (GET_CODE (addr) == PLUS)
5269 	{
5270 	  base = XEXP (addr, 0);
5271 
5272 	  if (GET_CODE (base) == PLUS)
5273 	    {
5274 	      /* (SGPR + VGPR) + CONST  */
5275 	      vgpr_offset = XEXP (base, 1);
5276 	      base = XEXP (base, 0);
5277 	    }
5278 	  else
5279 	    {
5280 	      rtx offset = XEXP (addr, 1);
5281 
5282 	      if (REG_P (offset))
5283 		/* SGPR + VGPR  */
5284 		vgpr_offset = offset;
5285 	      else if (CONST_INT_P (offset))
5286 		/* VGPR + CONST or SGPR + CONST  */
5287 		;
5288 	      else
5289 		output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5290 	    }
5291 	}
5292 
5293       if (REG_P (base))
5294 	{
5295 	  if (VGPR_REGNO_P (REGNO (base)))
5296 	    print_reg (file, base);
5297 	  else if (SGPR_REGNO_P (REGNO (base)))
5298 	    {
5299 	      /* The assembler requires a 64-bit VGPR pair here, even though
5300 	         the offset should be only 32-bit.  */
5301 	      if (vgpr_offset == NULL_RTX)
5302 		/* In this case, the vector offset is zero, so we use v0,
5303 		   which is initialized by the kernel prologue to zero.  */
5304 		fprintf (file, "v[0:1]");
5305 	      else if (REG_P (vgpr_offset)
5306 		       && VGPR_REGNO_P (REGNO (vgpr_offset)))
5307 		{
5308 		  fprintf (file, "v[%d:%d]",
5309 			   REGNO (vgpr_offset) - FIRST_VGPR_REG,
5310 			   REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
5311 		}
5312 	      else
5313 		output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5314 	    }
5315 	}
5316       else
5317 	output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5318     }
5319   else if (AS_ANY_DS_P (as))
5320     switch (GET_CODE (addr))
5321       {
5322       case REG:
5323 	print_reg (file, addr);
5324 	break;
5325 
5326       case PLUS:
5327 	reg = XEXP (addr, 0);
5328 	print_reg (file, reg);
5329 	break;
5330 
5331       default:
5332 	debug_rtx (addr);
5333 	abort ();
5334       }
5335   else
5336     switch (GET_CODE (addr))
5337       {
5338       case REG:
5339 	print_reg (file, addr);
5340 	fprintf (file, ", 0");
5341 	break;
5342 
5343       case PLUS:
5344 	reg = XEXP (addr, 0);
5345 	offset = XEXP (addr, 1);
5346 	print_reg (file, reg);
5347 	fprintf (file, ", ");
5348 	if (GET_CODE (offset) == REG)
5349 	  print_reg (file, reg);
5350 	else if (GET_CODE (offset) == CONST_INT)
5351 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5352 	else
5353 	  abort ();
5354 	break;
5355 
5356       default:
5357 	debug_rtx (addr);
5358 	abort ();
5359       }
5360 }
5361 
5362 /* Implement PRINT_OPERAND via gcn.h.
5363 
5364    b - print operand size as untyped operand (b8/b16/b32/b64)
5365    B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
5366    i - print operand size as untyped operand (i16/b32/i64)
5367    u - print operand size as untyped operand (u16/u32/u64)
5368    o - print operand size as memory access size for loads
5369        (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
5370    s - print operand size as memory access size for stores
5371        (byte/short/dword/dwordx2/wordx3/dwordx4)
5372    C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
5373    c - print inverse conditional code for s_cbranch
5374    D - print conditional code for s_cmp (eq_u64/lg_u64...)
5375    E - print conditional code for v_cmp (eq_u64/ne_u64...)
5376    A - print address in formatting suitable for given address space.
5377    O - print offset:n for data share operations.
5378    ^ - print "_co" suffix for GCN5 mnemonics
5379    g - print "glc", if appropriate for given MEM
5380  */
5381 
5382 void
5383 print_operand (FILE *file, rtx x, int code)
5384 {
5385   int xcode = x ? GET_CODE (x) : 0;
5386   bool invert = false;
5387   switch (code)
5388     {
5389       /* Instructions have the following suffixes.
5390          If there are two suffixes, the first is the destination type,
5391 	 and the second is the source type.
5392 
5393          B32 Bitfield (untyped data) 32-bit
5394          B64 Bitfield (untyped data) 64-bit
5395          F16 floating-point 16-bit
5396          F32 floating-point 32-bit (IEEE 754 single-precision float)
5397          F64 floating-point 64-bit (IEEE 754 double-precision float)
5398          I16 signed 32-bit integer
5399          I32 signed 32-bit integer
5400          I64 signed 64-bit integer
5401          U16 unsigned 32-bit integer
5402          U32 unsigned 32-bit integer
5403          U64 unsigned 64-bit integer  */
5404 
5405       /* Print operand size as untyped suffix.  */
5406     case 'b':
5407       {
5408 	const char *s = "";
5409 	machine_mode mode = GET_MODE (x);
5410 	if (VECTOR_MODE_P (mode))
5411 	  mode = GET_MODE_INNER (mode);
5412 	switch (GET_MODE_SIZE (mode))
5413 	  {
5414 	  case 1:
5415 	    s = "_b8";
5416 	    break;
5417 	  case 2:
5418 	    s = "_b16";
5419 	    break;
5420 	  case 4:
5421 	    s = "_b32";
5422 	    break;
5423 	  case 8:
5424 	    s = "_b64";
5425 	    break;
5426 	  default:
5427 	    output_operand_lossage ("invalid operand %%xn code");
5428 	    return;
5429 	  }
5430 	fputs (s, file);
5431       }
5432       return;
5433     case 'B':
5434       {
5435 	const char *s = "";
5436 	machine_mode mode = GET_MODE (x);
5437 	if (VECTOR_MODE_P (mode))
5438 	  mode = GET_MODE_INNER (mode);
5439 	switch (GET_MODE_SIZE (mode))
5440 	  {
5441 	  case 1:
5442 	  case 2:
5443 	  case 4:
5444 	    s = "_b32";
5445 	    break;
5446 	  case 8:
5447 	    s = "_b64";
5448 	    break;
5449 	  default:
5450 	    output_operand_lossage ("invalid operand %%xn code");
5451 	    return;
5452 	  }
5453 	fputs (s, file);
5454       }
5455       return;
5456     case 'e':
5457       fputs ("sext(", file);
5458       print_operand (file, x, 0);
5459       fputs (")", file);
5460       return;
5461     case 'i':
5462     case 'u':
5463       {
5464 	bool signed_p = code == 'i';
5465 	const char *s = "";
5466 	machine_mode mode = GET_MODE (x);
5467 	if (VECTOR_MODE_P (mode))
5468 	  mode = GET_MODE_INNER (mode);
5469 	if (mode == VOIDmode)
5470 	  switch (GET_CODE (x))
5471 	    {
5472 	    case CONST_INT:
5473 	      s = signed_p ? "_i32" : "_u32";
5474 	      break;
5475 	    case CONST_DOUBLE:
5476 	      s = "_f64";
5477 	      break;
5478 	    default:
5479 	      output_operand_lossage ("invalid operand %%xn code");
5480 	      return;
5481 	    }
5482 	else if (FLOAT_MODE_P (mode))
5483 	  switch (GET_MODE_SIZE (mode))
5484 	    {
5485 	    case 2:
5486 	      s = "_f16";
5487 	      break;
5488 	    case 4:
5489 	      s = "_f32";
5490 	      break;
5491 	    case 8:
5492 	      s = "_f64";
5493 	      break;
5494 	    default:
5495 	      output_operand_lossage ("invalid operand %%xn code");
5496 	      return;
5497 	    }
5498 	else
5499 	  switch (GET_MODE_SIZE (mode))
5500 	    {
5501 	    case 1:
5502 	      s = signed_p ? "_i8" : "_u8";
5503 	      break;
5504 	    case 2:
5505 	      s = signed_p ? "_i16" : "_u16";
5506 	      break;
5507 	    case 4:
5508 	      s = signed_p ? "_i32" : "_u32";
5509 	      break;
5510 	    case 8:
5511 	      s = signed_p ? "_i64" : "_u64";
5512 	      break;
5513 	    default:
5514 	      output_operand_lossage ("invalid operand %%xn code");
5515 	      return;
5516 	    }
5517 	fputs (s, file);
5518       }
5519       return;
5520       /* Print operand size as untyped suffix.  */
5521     case 'o':
5522       {
5523 	const char *s = 0;
5524 	switch (GET_MODE_SIZE (GET_MODE (x)))
5525 	  {
5526 	  case 1:
5527 	    s = "_ubyte";
5528 	    break;
5529 	  case 2:
5530 	    s = "_ushort";
5531 	    break;
5532 	  /* The following are full-vector variants.  */
5533 	  case 64:
5534 	    s = "_ubyte";
5535 	    break;
5536 	  case 128:
5537 	    s = "_ushort";
5538 	    break;
5539 	  }
5540 
5541 	if (s)
5542 	  {
5543 	    fputs (s, file);
5544 	    return;
5545 	  }
5546 
5547 	/* Fall-through - the other cases for 'o' are the same as for 's'.  */
5548 	gcc_fallthrough();
5549       }
5550     case 's':
5551       {
5552 	const char *s = "";
5553 	switch (GET_MODE_SIZE (GET_MODE (x)))
5554 	  {
5555 	  case 1:
5556 	    s = "_byte";
5557 	    break;
5558 	  case 2:
5559 	    s = "_short";
5560 	    break;
5561 	  case 4:
5562 	    s = "_dword";
5563 	    break;
5564 	  case 8:
5565 	    s = "_dwordx2";
5566 	    break;
5567 	  case 12:
5568 	    s = "_dwordx3";
5569 	    break;
5570 	  case 16:
5571 	    s = "_dwordx4";
5572 	    break;
5573 	  case 32:
5574 	    s = "_dwordx8";
5575 	    break;
5576 	  case 64:
5577 	    s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
5578 	    break;
5579 	  /* The following are full-vector variants.  */
5580 	  case 128:
5581 	    s = "_short";
5582 	    break;
5583 	  case 256:
5584 	    s = "_dword";
5585 	    break;
5586 	  case 512:
5587 	    s = "_dwordx2";
5588 	    break;
5589 	  default:
5590 	    output_operand_lossage ("invalid operand %%xn code");
5591 	    return;
5592 	  }
5593 	fputs (s, file);
5594       }
5595       return;
5596     case 'A':
5597       if (xcode != MEM)
5598 	{
5599 	  output_operand_lossage ("invalid %%xn code");
5600 	  return;
5601 	}
5602       print_operand_address (file, x);
5603       return;
5604     case 'O':
5605       {
5606 	if (xcode != MEM)
5607 	  {
5608 	    output_operand_lossage ("invalid %%xn code");
5609 	    return;
5610 	  }
5611 	if (AS_GDS_P (MEM_ADDR_SPACE (x)))
5612 	  fprintf (file, " gds");
5613 
5614 	rtx x0 = XEXP (x, 0);
5615 	if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
5616 	  {
5617 	    gcc_assert (TARGET_GCN5_PLUS);
5618 
5619 	    fprintf (file, ", ");
5620 
5621 	    rtx base = x0;
5622 	    rtx const_offset = NULL_RTX;
5623 
5624 	    if (GET_CODE (base) == PLUS)
5625 	      {
5626 		rtx offset = XEXP (x0, 1);
5627 		base = XEXP (x0, 0);
5628 
5629 		if (GET_CODE (base) == PLUS)
5630 		  /* (SGPR + VGPR) + CONST  */
5631 		  /* Ignore the VGPR offset for this operand.  */
5632 		  base = XEXP (base, 0);
5633 
5634 		if (CONST_INT_P (offset))
5635 		  const_offset = XEXP (x0, 1);
5636 		else if (REG_P (offset))
5637 		  /* SGPR + VGPR  */
5638 		  /* Ignore the VGPR offset for this operand.  */
5639 		  ;
5640 		else
5641 		  output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5642 	      }
5643 
5644 	    if (REG_P (base))
5645 	      {
5646 		if (VGPR_REGNO_P (REGNO (base)))
5647 		  /* The VGPR address is specified in the %A operand.  */
5648 		  fprintf (file, "off");
5649 		else if (SGPR_REGNO_P (REGNO (base)))
5650 		  print_reg (file, base);
5651 		else
5652 		  output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5653 	      }
5654 	    else
5655 	      output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5656 
5657 	    if (const_offset != NULL_RTX)
5658 	      fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
5659 		       INTVAL (const_offset));
5660 
5661 	    return;
5662 	  }
5663 
5664 	if (GET_CODE (x0) == REG)
5665 	  return;
5666 	if (GET_CODE (x0) != PLUS)
5667 	  {
5668 	    output_operand_lossage ("invalid %%xn code");
5669 	    return;
5670 	  }
5671 	rtx val = XEXP (x0, 1);
5672 	if (GET_CODE (val) == CONST_VECTOR)
5673 	  val = CONST_VECTOR_ELT (val, 0);
5674 	if (GET_CODE (val) != CONST_INT)
5675 	  {
5676 	    output_operand_lossage ("invalid %%xn code");
5677 	    return;
5678 	  }
5679 	fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val));
5680 
5681       }
5682       return;
5683     case 'c':
5684       invert = true;
5685       /* Fall through.  */
5686     case 'C':
5687       {
5688 	const char *s;
5689 	bool num = false;
5690 	if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0)))
5691 	  {
5692 	    output_operand_lossage ("invalid %%xn code");
5693 	    return;
5694 	  }
5695 	switch (REGNO (XEXP (x, 0)))
5696 	  {
5697 	  case VCC_REG:
5698 	  case VCCZ_REG:
5699 	    s = "_vcc";
5700 	    break;
5701 	  case SCC_REG:
5702 	    /* For some reason llvm-mc insists on scc0 instead of sccz.  */
5703 	    num = true;
5704 	    s = "_scc";
5705 	    break;
5706 	  case EXECZ_REG:
5707 	    s = "_exec";
5708 	    break;
5709 	  default:
5710 	    output_operand_lossage ("invalid %%xn code");
5711 	    return;
5712 	  }
5713 	fputs (s, file);
5714 	if (xcode == (invert ? NE : EQ))
5715 	  fputc (num ? '0' : 'z', file);
5716 	else
5717 	  fputs (num ? "1" : "nz", file);
5718 	return;
5719       }
5720     case 'D':
5721       {
5722 	const char *s;
5723 	bool cmp_signed = false;
5724 	switch (xcode)
5725 	  {
5726 	  case EQ:
5727 	    s = "_eq_";
5728 	    break;
5729 	  case NE:
5730 	    s = "_lg_";
5731 	    break;
5732 	  case LT:
5733 	    s = "_lt_";
5734 	    cmp_signed = true;
5735 	    break;
5736 	  case LE:
5737 	    s = "_le_";
5738 	    cmp_signed = true;
5739 	    break;
5740 	  case GT:
5741 	    s = "_gt_";
5742 	    cmp_signed = true;
5743 	    break;
5744 	  case GE:
5745 	    s = "_ge_";
5746 	    cmp_signed = true;
5747 	    break;
5748 	  case LTU:
5749 	    s = "_lt_";
5750 	    break;
5751 	  case LEU:
5752 	    s = "_le_";
5753 	    break;
5754 	  case GTU:
5755 	    s = "_gt_";
5756 	    break;
5757 	  case GEU:
5758 	    s = "_ge_";
5759 	    break;
5760 	  default:
5761 	    output_operand_lossage ("invalid %%xn code");
5762 	    return;
5763 	  }
5764 	fputs (s, file);
5765 	fputc (cmp_signed ? 'i' : 'u', file);
5766 
5767 	machine_mode mode = GET_MODE (XEXP (x, 0));
5768 
5769 	if (mode == VOIDmode)
5770 	  mode = GET_MODE (XEXP (x, 1));
5771 
5772 	/* If both sides are constants, then assume the instruction is in
5773 	   SImode since s_cmp can only do integer compares.  */
5774 	if (mode == VOIDmode)
5775 	  mode = SImode;
5776 
5777 	switch (GET_MODE_SIZE (mode))
5778 	  {
5779 	  case 4:
5780 	    s = "32";
5781 	    break;
5782 	  case 8:
5783 	    s = "64";
5784 	    break;
5785 	  default:
5786 	    output_operand_lossage ("invalid operand %%xn code");
5787 	    return;
5788 	  }
5789 	fputs (s, file);
5790 	return;
5791       }
5792     case 'E':
5793       {
5794 	const char *s;
5795 	bool cmp_signed = false;
5796 	machine_mode mode = GET_MODE (XEXP (x, 0));
5797 
5798 	if (mode == VOIDmode)
5799 	  mode = GET_MODE (XEXP (x, 1));
5800 
5801 	/* If both sides are constants, assume the instruction is in SFmode
5802 	   if either operand is floating point, otherwise assume SImode.  */
5803 	if (mode == VOIDmode)
5804 	  {
5805 	    if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5806 		|| GET_CODE (XEXP (x, 1)) == CONST_DOUBLE)
5807 	      mode = SFmode;
5808 	    else
5809 	      mode = SImode;
5810 	  }
5811 
5812 	/* Use the same format code for vector comparisons.  */
5813 	if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5814 	    || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
5815 	  mode = GET_MODE_INNER (mode);
5816 
5817 	bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT;
5818 
5819 	switch (xcode)
5820 	  {
5821 	  case EQ:
5822 	    s = "_eq_";
5823 	    break;
5824 	  case NE:
5825 	    s = float_p ? "_neq_" : "_ne_";
5826 	    break;
5827 	  case LT:
5828 	    s = "_lt_";
5829 	    cmp_signed = true;
5830 	    break;
5831 	  case LE:
5832 	    s = "_le_";
5833 	    cmp_signed = true;
5834 	    break;
5835 	  case GT:
5836 	    s = "_gt_";
5837 	    cmp_signed = true;
5838 	    break;
5839 	  case GE:
5840 	    s = "_ge_";
5841 	    cmp_signed = true;
5842 	    break;
5843 	  case LTU:
5844 	    s = "_lt_";
5845 	    break;
5846 	  case LEU:
5847 	    s = "_le_";
5848 	    break;
5849 	  case GTU:
5850 	    s = "_gt_";
5851 	    break;
5852 	  case GEU:
5853 	    s = "_ge_";
5854 	    break;
5855 	  case ORDERED:
5856 	    s = "_o_";
5857 	    break;
5858 	  case UNORDERED:
5859 	    s = "_u_";
5860 	    break;
5861 	  default:
5862 	    output_operand_lossage ("invalid %%xn code");
5863 	    return;
5864 	  }
5865 	fputs (s, file);
5866 	fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file);
5867 
5868 	switch (GET_MODE_SIZE (mode))
5869 	  {
5870 	  case 1:
5871 	    s = "32";
5872 	    break;
5873 	  case 2:
5874 	    s = float_p ? "16" : "32";
5875 	    break;
5876 	  case 4:
5877 	    s = "32";
5878 	    break;
5879 	  case 8:
5880 	    s = "64";
5881 	    break;
5882 	  default:
5883 	    output_operand_lossage ("invalid operand %%xn code");
5884 	    return;
5885 	  }
5886 	fputs (s, file);
5887 	return;
5888       }
5889     case 'L':
5890       print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
5891       return;
5892     case 'H':
5893       print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
5894       return;
5895     case 'R':
5896       /* Print a scalar register number as an integer.  Temporary hack.  */
5897       gcc_assert (REG_P (x));
5898       fprintf (file, "%u", (int) REGNO (x));
5899       return;
5900     case 'V':
5901       /* Print a vector register number as an integer.  Temporary hack.  */
5902       gcc_assert (REG_P (x));
5903       fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG);
5904       return;
5905     case 0:
5906       if (xcode == REG)
5907 	print_reg (file, x);
5908       else if (xcode == MEM)
5909 	output_address (GET_MODE (x), x);
5910       else if (xcode == CONST_INT)
5911 	fprintf (file, "%i", (int) INTVAL (x));
5912       else if (xcode == CONST_VECTOR)
5913 	print_operand (file, CONST_VECTOR_ELT (x, 0), code);
5914       else if (xcode == CONST_DOUBLE)
5915 	{
5916 	  const char *str;
5917 	  switch (gcn_inline_fp_constant_p (x, false))
5918 	    {
5919 	    case 240:
5920 	      str = "0.5";
5921 	      break;
5922 	    case 241:
5923 	      str = "-0.5";
5924 	      break;
5925 	    case 242:
5926 	      str = "1.0";
5927 	      break;
5928 	    case 243:
5929 	      str = "-1.0";
5930 	      break;
5931 	    case 244:
5932 	      str = "2.0";
5933 	      break;
5934 	    case 245:
5935 	      str = "-2.0";
5936 	      break;
5937 	    case 246:
5938 	      str = "4.0";
5939 	      break;
5940 	    case 247:
5941 	      str = "-4.0";
5942 	      break;
5943 	    case 248:
5944 	      str = "1/pi";
5945 	      break;
5946 	    default:
5947 	      rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
5948 					    ? DImode : SImode,
5949 					    x, GET_MODE (x), 0);
5950 	      if (x)
5951 		print_operand (file, ix, code);
5952 	      else
5953 		output_operand_lossage ("invalid fp constant");
5954 	      return;
5955 	      break;
5956 	    }
5957 	  fprintf (file, str);
5958 	  return;
5959 	}
5960       else
5961 	output_addr_const (file, x);
5962       return;
5963     case '^':
5964       if (TARGET_GCN5_PLUS)
5965 	fputs ("_co", file);
5966       return;
5967     case 'g':
5968       gcc_assert (xcode == MEM);
5969       if (MEM_VOLATILE_P (x))
5970 	fputs (" glc", file);
5971       return;
5972     default:
5973       output_operand_lossage ("invalid %%xn code");
5974     }
5975   gcc_unreachable ();
5976 }
5977 
5978 /* }}}  */
5979 /* {{{ TARGET hook overrides.  */
5980 
5981 #undef  TARGET_ADDR_SPACE_ADDRESS_MODE
5982 #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
5983 #undef  TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
5984 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
5985   gcn_addr_space_legitimate_address_p
5986 #undef  TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
5987 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
5988 #undef  TARGET_ADDR_SPACE_POINTER_MODE
5989 #define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
5990 #undef  TARGET_ADDR_SPACE_SUBSET_P
5991 #define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
5992 #undef  TARGET_ADDR_SPACE_CONVERT
5993 #define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
5994 #undef  TARGET_ARG_PARTIAL_BYTES
5995 #define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
5996 #undef  TARGET_ASM_ALIGNED_DI_OP
5997 #define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
5998 #undef  TARGET_ASM_CONSTRUCTOR
5999 #define TARGET_ASM_CONSTRUCTOR gcn_disable_constructors
6000 #undef  TARGET_ASM_DESTRUCTOR
6001 #define TARGET_ASM_DESTRUCTOR gcn_disable_constructors
6002 #undef  TARGET_ASM_FILE_START
6003 #define TARGET_ASM_FILE_START output_file_start
6004 #undef  TARGET_ASM_FUNCTION_PROLOGUE
6005 #define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
6006 #undef  TARGET_ASM_SELECT_SECTION
6007 #define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
6008 #undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
6009 #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
6010 #undef  TARGET_ATTRIBUTE_TABLE
6011 #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
6012 #undef  TARGET_BUILTIN_DECL
6013 #define TARGET_BUILTIN_DECL gcn_builtin_decl
6014 #undef  TARGET_CAN_CHANGE_MODE_CLASS
6015 #define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
6016 #undef  TARGET_CAN_ELIMINATE
6017 #define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
6018 #undef  TARGET_CANNOT_COPY_INSN_P
6019 #define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
6020 #undef  TARGET_CLASS_LIKELY_SPILLED_P
6021 #define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
6022 #undef  TARGET_CLASS_MAX_NREGS
6023 #define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
6024 #undef  TARGET_CONDITIONAL_REGISTER_USAGE
6025 #define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
6026 #undef  TARGET_CONSTANT_ALIGNMENT
6027 #define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
6028 #undef  TARGET_DEBUG_UNWIND_INFO
6029 #define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
6030 #undef  TARGET_EXPAND_BUILTIN
6031 #define TARGET_EXPAND_BUILTIN gcn_expand_builtin
6032 #undef  TARGET_FUNCTION_ARG
6033 #undef  TARGET_FUNCTION_ARG_ADVANCE
6034 #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
6035 #define TARGET_FUNCTION_ARG gcn_function_arg
6036 #undef  TARGET_FUNCTION_VALUE
6037 #define TARGET_FUNCTION_VALUE gcn_function_value
6038 #undef  TARGET_FUNCTION_VALUE_REGNO_P
6039 #define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
6040 #undef  TARGET_GIMPLIFY_VA_ARG_EXPR
6041 #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
6042 #undef  TARGET_GOACC_ADJUST_PROPAGATION_RECORD
6043 #define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
6044   gcn_goacc_adjust_propagation_record
6045 #undef  TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
6046 #define TARGET_GOACC_ADJUST_GANGPRIVATE_DECL gcn_goacc_adjust_gangprivate_decl
6047 #undef  TARGET_GOACC_FORK_JOIN
6048 #define TARGET_GOACC_FORK_JOIN gcn_fork_join
6049 #undef  TARGET_GOACC_REDUCTION
6050 #define TARGET_GOACC_REDUCTION gcn_goacc_reduction
6051 #undef  TARGET_GOACC_VALIDATE_DIMS
6052 #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
6053 #undef  TARGET_GOACC_WORKER_PARTITIONING
6054 #define TARGET_GOACC_WORKER_PARTITIONING true
6055 #undef  TARGET_HARD_REGNO_MODE_OK
6056 #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
6057 #undef  TARGET_HARD_REGNO_NREGS
6058 #define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
6059 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
6060 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
6061 #undef  TARGET_INIT_BUILTINS
6062 #define TARGET_INIT_BUILTINS gcn_init_builtins
6063 #undef  TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
6064 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
6065   gcn_ira_change_pseudo_allocno_class
6066 #undef  TARGET_LEGITIMATE_CONSTANT_P
6067 #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
6068 #undef  TARGET_LRA_P
6069 #define TARGET_LRA_P hook_bool_void_true
6070 #undef  TARGET_MACHINE_DEPENDENT_REORG
6071 #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
6072 #undef  TARGET_MEMORY_MOVE_COST
6073 #define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
6074 #undef  TARGET_MODES_TIEABLE_P
6075 #define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
6076 #undef  TARGET_OPTION_OVERRIDE
6077 #define TARGET_OPTION_OVERRIDE gcn_option_override
6078 #undef  TARGET_PRETEND_OUTGOING_VARARGS_NAMED
6079 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
6080   gcn_pretend_outgoing_varargs_named
6081 #undef  TARGET_PROMOTE_FUNCTION_MODE
6082 #define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
6083 #undef  TARGET_REGISTER_MOVE_COST
6084 #define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
6085 #undef  TARGET_RETURN_IN_MEMORY
6086 #define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
6087 #undef  TARGET_RTX_COSTS
6088 #define TARGET_RTX_COSTS gcn_rtx_costs
6089 #undef  TARGET_SECONDARY_RELOAD
6090 #define TARGET_SECONDARY_RELOAD gcn_secondary_reload
6091 #undef  TARGET_SECTION_TYPE_FLAGS
6092 #define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
6093 #undef  TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
6094 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
6095   gcn_small_register_classes_for_mode_p
6096 #undef  TARGET_SPILL_CLASS
6097 #define TARGET_SPILL_CLASS gcn_spill_class
6098 #undef  TARGET_STRICT_ARGUMENT_NAMING
6099 #define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
6100 #undef  TARGET_TRAMPOLINE_INIT
6101 #define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
6102 #undef  TARGET_TRULY_NOOP_TRUNCATION
6103 #define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
6104 #undef  TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
6105 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
6106 #undef  TARGET_VECTORIZE_GET_MASK_MODE
6107 #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
6108 #undef  TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6109 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
6110 #undef  TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
6111 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
6112   gcn_preferred_vector_alignment
6113 #undef  TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
6114 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
6115   gcn_vectorize_support_vector_misalignment
6116 #undef  TARGET_VECTORIZE_VEC_PERM_CONST
6117 #define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
6118 #undef  TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
6119 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
6120   gcn_vector_alignment_reachable
6121 #undef  TARGET_VECTOR_MODE_SUPPORTED_P
6122 #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
6123 
6124 struct gcc_target targetm = TARGET_INITIALIZER;
6125 
6126 #include "gt-gcn.h"
6127 /* }}}  */
6128