xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/spu/spu.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
2 
3    This file is free software; you can redistribute it and/or modify it under
4    the terms of the GNU General Public License as published by the Free
5    Software Foundation; either version 3 of the License, or (at your option)
6    any later version.
7 
8    This file is distributed in the hope that it will be useful, but WITHOUT
9    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11    for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with GCC; see the file COPYING3.  If not see
15    <http://www.gnu.org/licenses/>.  */
16 
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "hash-set.h"
31 #include "machmode.h"
32 #include "vec.h"
33 #include "double-int.h"
34 #include "input.h"
35 #include "alias.h"
36 #include "symtab.h"
37 #include "wide-int.h"
38 #include "inchash.h"
39 #include "tree.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "hashtab.h"
46 #include "function.h"
47 #include "statistics.h"
48 #include "real.h"
49 #include "fixed-value.h"
50 #include "expmed.h"
51 #include "dojump.h"
52 #include "explow.h"
53 #include "emit-rtl.h"
54 #include "stmt.h"
55 #include "expr.h"
56 #include "insn-codes.h"
57 #include "optabs.h"
58 #include "except.h"
59 #include "output.h"
60 #include "predict.h"
61 #include "dominance.h"
62 #include "cfg.h"
63 #include "cfgrtl.h"
64 #include "cfganal.h"
65 #include "lcm.h"
66 #include "cfgbuild.h"
67 #include "cfgcleanup.h"
68 #include "basic-block.h"
69 #include "diagnostic-core.h"
70 #include "ggc.h"
71 #include "tm_p.h"
72 #include "target.h"
73 #include "target-def.h"
74 #include "langhooks.h"
75 #include "reload.h"
76 #include "sched-int.h"
77 #include "params.h"
78 #include "hash-table.h"
79 #include "tree-ssa-alias.h"
80 #include "internal-fn.h"
81 #include "gimple-fold.h"
82 #include "tree-eh.h"
83 #include "gimple-expr.h"
84 #include "is-a.h"
85 #include "gimple.h"
86 #include "gimplify.h"
87 #include "tm-constrs.h"
88 #include "sbitmap.h"
89 #include "df.h"
90 #include "ddg.h"
91 #include "timevar.h"
92 #include "dumpfile.h"
93 #include "cfgloop.h"
94 #include "builtins.h"
95 #include "rtl-iter.h"
96 
97 /* Builtin types, data and prototypes. */
98 
99 enum spu_builtin_type_index
100 {
101   SPU_BTI_END_OF_PARAMS,
102 
103   /* We create new type nodes for these. */
104   SPU_BTI_V16QI,
105   SPU_BTI_V8HI,
106   SPU_BTI_V4SI,
107   SPU_BTI_V2DI,
108   SPU_BTI_V4SF,
109   SPU_BTI_V2DF,
110   SPU_BTI_UV16QI,
111   SPU_BTI_UV8HI,
112   SPU_BTI_UV4SI,
113   SPU_BTI_UV2DI,
114 
115   /* A 16-byte type. (Implemented with V16QI_type_node) */
116   SPU_BTI_QUADWORD,
117 
118   /* These all correspond to intSI_type_node */
119   SPU_BTI_7,
120   SPU_BTI_S7,
121   SPU_BTI_U7,
122   SPU_BTI_S10,
123   SPU_BTI_S10_4,
124   SPU_BTI_U14,
125   SPU_BTI_16,
126   SPU_BTI_S16,
127   SPU_BTI_S16_2,
128   SPU_BTI_U16,
129   SPU_BTI_U16_2,
130   SPU_BTI_U18,
131 
132   /* These correspond to the standard types */
133   SPU_BTI_INTQI,
134   SPU_BTI_INTHI,
135   SPU_BTI_INTSI,
136   SPU_BTI_INTDI,
137 
138   SPU_BTI_UINTQI,
139   SPU_BTI_UINTHI,
140   SPU_BTI_UINTSI,
141   SPU_BTI_UINTDI,
142 
143   SPU_BTI_FLOAT,
144   SPU_BTI_DOUBLE,
145 
146   SPU_BTI_VOID,
147   SPU_BTI_PTR,
148 
149   SPU_BTI_MAX
150 };
151 
152 #define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
153 #define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
154 #define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
155 #define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
156 #define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
157 #define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
158 #define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
159 #define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
160 #define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
161 #define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
162 
163 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
164 
165 struct spu_builtin_range
166 {
167   int low, high;
168 };
169 
170 static struct spu_builtin_range spu_builtin_range[] = {
171   {-0x40ll, 0x7fll},		/* SPU_BTI_7     */
172   {-0x40ll, 0x3fll},		/* SPU_BTI_S7    */
173   {0ll, 0x7fll},		/* SPU_BTI_U7    */
174   {-0x200ll, 0x1ffll},		/* SPU_BTI_S10   */
175   {-0x2000ll, 0x1fffll},	/* SPU_BTI_S10_4 */
176   {0ll, 0x3fffll},		/* SPU_BTI_U14   */
177   {-0x8000ll, 0xffffll},	/* SPU_BTI_16    */
178   {-0x8000ll, 0x7fffll},	/* SPU_BTI_S16   */
179   {-0x20000ll, 0x1ffffll},	/* SPU_BTI_S16_2 */
180   {0ll, 0xffffll},		/* SPU_BTI_U16   */
181   {0ll, 0x3ffffll},		/* SPU_BTI_U16_2 */
182   {0ll, 0x3ffffll},		/* SPU_BTI_U18   */
183 };
184 
185 
186 /*  Target specific attribute specifications.  */
187 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
188 
189 /*  Prototypes and external defs.  */
190 static int get_pipe (rtx_insn *insn);
191 static int spu_naked_function_p (tree func);
192 static int mem_is_padded_component_ref (rtx x);
193 static void fix_range (const char *);
194 static rtx spu_expand_load (rtx, rtx, rtx, int);
195 
196 /* Which instruction set architecture to use.  */
197 int spu_arch;
198 /* Which cpu are we tuning for.  */
199 int spu_tune;
200 
201 /* The hardware requires 8 insns between a hint and the branch it
202    effects.  This variable describes how many rtl instructions the
203    compiler needs to see before inserting a hint, and then the compiler
204    will insert enough nops to make it at least 8 insns.  The default is
205    for the compiler to allow up to 2 nops be emitted.  The nops are
206    inserted in pairs, so we round down. */
207 int spu_hint_dist = (8*4) - (2*4);
208 
209 enum spu_immediate {
210   SPU_NONE,
211   SPU_IL,
212   SPU_ILA,
213   SPU_ILH,
214   SPU_ILHU,
215   SPU_ORI,
216   SPU_ORHI,
217   SPU_ORBI,
218   SPU_IOHL
219 };
220 enum immediate_class
221 {
222   IC_POOL,			/* constant pool */
223   IC_IL1,			/* one il* instruction */
224   IC_IL2,			/* both ilhu and iohl instructions */
225   IC_IL1s,			/* one il* instruction */
226   IC_IL2s,			/* both ilhu and iohl instructions */
227   IC_FSMBI,			/* the fsmbi instruction */
228   IC_CPAT,			/* one of the c*d instructions */
229   IC_FSMBI2			/* fsmbi plus 1 other instruction */
230 };
231 
232 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
233 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
234 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
235 static enum immediate_class classify_immediate (rtx op,
236 						machine_mode mode);
237 
238 /* Pointer mode for __ea references.  */
239 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
240 
241 
242 /* Define the structure for the machine field in struct function.  */
243 struct GTY(()) machine_function
244 {
245   /* Register to use for PIC accesses.  */
246   rtx pic_reg;
247 };
248 
249 /* How to allocate a 'struct machine_function'.  */
250 static struct machine_function *
251 spu_init_machine_status (void)
252 {
253   return ggc_cleared_alloc<machine_function> ();
254 }
255 
256 /* Implement TARGET_OPTION_OVERRIDE.  */
257 static void
258 spu_option_override (void)
259 {
260   /* Set up function hooks.  */
261   init_machine_status = spu_init_machine_status;
262 
263   /* Small loops will be unpeeled at -O3.  For SPU it is more important
264      to keep code small by default.  */
265   if (!flag_unroll_loops && !flag_peel_loops)
266     maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
267 			   global_options.x_param_values,
268 			   global_options_set.x_param_values);
269 
270   flag_omit_frame_pointer = 1;
271 
272   /* Functions must be 8 byte aligned so we correctly handle dual issue */
273   if (align_functions < 8)
274     align_functions = 8;
275 
276   spu_hint_dist = 8*4 - spu_max_nops*4;
277   if (spu_hint_dist < 0)
278     spu_hint_dist = 0;
279 
280   if (spu_fixed_range_string)
281     fix_range (spu_fixed_range_string);
282 
283   /* Determine processor architectural level.  */
284   if (spu_arch_string)
285     {
286       if (strcmp (&spu_arch_string[0], "cell") == 0)
287         spu_arch = PROCESSOR_CELL;
288       else if (strcmp (&spu_arch_string[0], "celledp") == 0)
289         spu_arch = PROCESSOR_CELLEDP;
290       else
291         error ("bad value (%s) for -march= switch", spu_arch_string);
292     }
293 
294   /* Determine processor to tune for.  */
295   if (spu_tune_string)
296     {
297       if (strcmp (&spu_tune_string[0], "cell") == 0)
298         spu_tune = PROCESSOR_CELL;
299       else if (strcmp (&spu_tune_string[0], "celledp") == 0)
300         spu_tune = PROCESSOR_CELLEDP;
301       else
302         error ("bad value (%s) for -mtune= switch", spu_tune_string);
303     }
304 
305   /* Change defaults according to the processor architecture.  */
306   if (spu_arch == PROCESSOR_CELLEDP)
307     {
308       /* If no command line option has been otherwise specified, change
309 	 the default to -mno-safe-hints on celledp -- only the original
310 	 Cell/B.E. processors require this workaround.  */
311       if (!(target_flags_explicit & MASK_SAFE_HINTS))
312 	target_flags &= ~MASK_SAFE_HINTS;
313     }
314 
315   REAL_MODE_FORMAT (SFmode) = &spu_single_format;
316 }
317 
318 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
319    struct attribute_spec.handler.  */
320 
321 /* True if MODE is valid for the target.  By "valid", we mean able to
322    be manipulated in non-trivial ways.  In particular, this means all
323    the arithmetic is supported.  */
324 static bool
325 spu_scalar_mode_supported_p (machine_mode mode)
326 {
327   switch (mode)
328     {
329     case QImode:
330     case HImode:
331     case SImode:
332     case SFmode:
333     case DImode:
334     case TImode:
335     case DFmode:
336       return true;
337 
338     default:
339       return false;
340     }
341 }
342 
343 /* Similarly for vector modes.  "Supported" here is less strict.  At
344    least some operations are supported; need to check optabs or builtins
345    for further details.  */
346 static bool
347 spu_vector_mode_supported_p (machine_mode mode)
348 {
349   switch (mode)
350     {
351     case V16QImode:
352     case V8HImode:
353     case V4SImode:
354     case V2DImode:
355     case V4SFmode:
356     case V2DFmode:
357       return true;
358 
359     default:
360       return false;
361     }
362 }
363 
364 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
365    least significant bytes of the outer mode.  This function returns
366    TRUE for the SUBREG's where this is correct.  */
367 int
368 valid_subreg (rtx op)
369 {
370   machine_mode om = GET_MODE (op);
371   machine_mode im = GET_MODE (SUBREG_REG (op));
372   return om != VOIDmode && im != VOIDmode
373     && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
374 	|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
375 	|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
376 }
377 
378 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
379    and adjust the start offset.  */
380 static rtx
381 adjust_operand (rtx op, HOST_WIDE_INT * start)
382 {
383   machine_mode mode;
384   int op_size;
385   /* Strip any paradoxical SUBREG.  */
386   if (GET_CODE (op) == SUBREG
387       && (GET_MODE_BITSIZE (GET_MODE (op))
388 	  > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
389     {
390       if (start)
391 	*start -=
392 	  GET_MODE_BITSIZE (GET_MODE (op)) -
393 	  GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
394       op = SUBREG_REG (op);
395     }
396   /* If it is smaller than SI, assure a SUBREG */
397   op_size = GET_MODE_BITSIZE (GET_MODE (op));
398   if (op_size < 32)
399     {
400       if (start)
401 	*start += 32 - op_size;
402       op_size = 32;
403     }
404   /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
405   mode = mode_for_size (op_size, MODE_INT, 0);
406   if (mode != GET_MODE (op))
407     op = gen_rtx_SUBREG (mode, op, 0);
408   return op;
409 }
410 
411 void
412 spu_expand_extv (rtx ops[], int unsignedp)
413 {
414   rtx dst = ops[0], src = ops[1];
415   HOST_WIDE_INT width = INTVAL (ops[2]);
416   HOST_WIDE_INT start = INTVAL (ops[3]);
417   HOST_WIDE_INT align_mask;
418   rtx s0, s1, mask, r0;
419 
420   gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
421 
422   if (MEM_P (src))
423     {
424       /* First, determine if we need 1 TImode load or 2.  We need only 1
425          if the bits being extracted do not cross the alignment boundary
426          as determined by the MEM and its address. */
427 
428       align_mask = -MEM_ALIGN (src);
429       if ((start & align_mask) == ((start + width - 1) & align_mask))
430 	{
431 	  /* Alignment is sufficient for 1 load. */
432 	  s0 = gen_reg_rtx (TImode);
433 	  r0 = spu_expand_load (s0, 0, src, start / 8);
434 	  start &= 7;
435 	  if (r0)
436 	    emit_insn (gen_rotqby_ti (s0, s0, r0));
437 	}
438       else
439 	{
440 	  /* Need 2 loads. */
441 	  s0 = gen_reg_rtx (TImode);
442 	  s1 = gen_reg_rtx (TImode);
443 	  r0 = spu_expand_load (s0, s1, src, start / 8);
444 	  start &= 7;
445 
446 	  gcc_assert (start + width <= 128);
447 	  if (r0)
448 	    {
449 	      rtx r1 = gen_reg_rtx (SImode);
450 	      mask = gen_reg_rtx (TImode);
451 	      emit_move_insn (mask, GEN_INT (-1));
452 	      emit_insn (gen_rotqby_ti (s0, s0, r0));
453 	      emit_insn (gen_rotqby_ti (s1, s1, r0));
454 	      if (GET_CODE (r0) == CONST_INT)
455 		r1 = GEN_INT (INTVAL (r0) & 15);
456 	      else
457 		emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
458 	      emit_insn (gen_shlqby_ti (mask, mask, r1));
459 	      emit_insn (gen_selb (s0, s1, s0, mask));
460 	    }
461 	}
462 
463     }
464   else if (GET_CODE (src) == SUBREG)
465     {
466       rtx r = SUBREG_REG (src);
467       gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
468       s0 = gen_reg_rtx (TImode);
469       if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
470 	emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
471       else
472 	emit_move_insn (s0, src);
473     }
474   else
475     {
476       gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
477       s0 = gen_reg_rtx (TImode);
478       emit_move_insn (s0, src);
479     }
480 
481   /* Now s0 is TImode and contains the bits to extract at start. */
482 
483   if (start)
484     emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
485 
486   if (128 - width)
487     s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
488 
489   emit_move_insn (dst, s0);
490 }
491 
492 void
493 spu_expand_insv (rtx ops[])
494 {
495   HOST_WIDE_INT width = INTVAL (ops[1]);
496   HOST_WIDE_INT start = INTVAL (ops[2]);
497   HOST_WIDE_INT maskbits;
498   machine_mode dst_mode;
499   rtx dst = ops[0], src = ops[3];
500   int dst_size;
501   rtx mask;
502   rtx shift_reg;
503   int shift;
504 
505 
506   if (GET_CODE (ops[0]) == MEM)
507     dst = gen_reg_rtx (TImode);
508   else
509     dst = adjust_operand (dst, &start);
510   dst_mode = GET_MODE (dst);
511   dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
512 
513   if (CONSTANT_P (src))
514     {
515       machine_mode m =
516 	(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
517       src = force_reg (m, convert_to_mode (m, src, 0));
518     }
519   src = adjust_operand (src, 0);
520 
521   mask = gen_reg_rtx (dst_mode);
522   shift_reg = gen_reg_rtx (dst_mode);
523   shift = dst_size - start - width;
524 
525   /* It's not safe to use subreg here because the compiler assumes
526      that the SUBREG_REG is right justified in the SUBREG. */
527   convert_move (shift_reg, src, 1);
528 
529   if (shift > 0)
530     {
531       switch (dst_mode)
532 	{
533 	case SImode:
534 	  emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
535 	  break;
536 	case DImode:
537 	  emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
538 	  break;
539 	case TImode:
540 	  emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
541 	  break;
542 	default:
543 	  abort ();
544 	}
545     }
546   else if (shift < 0)
547     abort ();
548 
549   switch (dst_size)
550     {
551     case 32:
552       maskbits = (-1ll << (32 - width - start));
553       if (start)
554 	maskbits += (1ll << (32 - start));
555       emit_move_insn (mask, GEN_INT (maskbits));
556       break;
557     case 64:
558       maskbits = (-1ll << (64 - width - start));
559       if (start)
560 	maskbits += (1ll << (64 - start));
561       emit_move_insn (mask, GEN_INT (maskbits));
562       break;
563     case 128:
564       {
565 	unsigned char arr[16];
566 	int i = start / 8;
567 	memset (arr, 0, sizeof (arr));
568 	arr[i] = 0xff >> (start & 7);
569 	for (i++; i <= (start + width - 1) / 8; i++)
570 	  arr[i] = 0xff;
571 	arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
572 	emit_move_insn (mask, array_to_constant (TImode, arr));
573       }
574       break;
575     default:
576       abort ();
577     }
578   if (GET_CODE (ops[0]) == MEM)
579     {
580       rtx low = gen_reg_rtx (SImode);
581       rtx rotl = gen_reg_rtx (SImode);
582       rtx mask0 = gen_reg_rtx (TImode);
583       rtx addr;
584       rtx addr0;
585       rtx addr1;
586       rtx mem;
587 
588       addr = force_reg (Pmode, XEXP (ops[0], 0));
589       addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
590       emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
591       emit_insn (gen_negsi2 (rotl, low));
592       emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
593       emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
594       mem = change_address (ops[0], TImode, addr0);
595       set_mem_alias_set (mem, 0);
596       emit_move_insn (dst, mem);
597       emit_insn (gen_selb (dst, dst, shift_reg, mask0));
598       if (start + width > MEM_ALIGN (ops[0]))
599 	{
600 	  rtx shl = gen_reg_rtx (SImode);
601 	  rtx mask1 = gen_reg_rtx (TImode);
602 	  rtx dst1 = gen_reg_rtx (TImode);
603 	  rtx mem1;
604 	  addr1 = plus_constant (Pmode, addr, 16);
605 	  addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
606 	  emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
607 	  emit_insn (gen_shlqby_ti (mask1, mask, shl));
608 	  mem1 = change_address (ops[0], TImode, addr1);
609 	  set_mem_alias_set (mem1, 0);
610 	  emit_move_insn (dst1, mem1);
611 	  emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
612 	  emit_move_insn (mem1, dst1);
613 	}
614       emit_move_insn (mem, dst);
615     }
616   else
617     emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
618 }
619 
620 
621 int
622 spu_expand_block_move (rtx ops[])
623 {
624   HOST_WIDE_INT bytes, align, offset;
625   rtx src, dst, sreg, dreg, target;
626   int i;
627   if (GET_CODE (ops[2]) != CONST_INT
628       || GET_CODE (ops[3]) != CONST_INT
629       || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
630     return 0;
631 
632   bytes = INTVAL (ops[2]);
633   align = INTVAL (ops[3]);
634 
635   if (bytes <= 0)
636     return 1;
637 
638   dst = ops[0];
639   src = ops[1];
640 
641   if (align == 16)
642     {
643       for (offset = 0; offset + 16 <= bytes; offset += 16)
644 	{
645 	  dst = adjust_address (ops[0], V16QImode, offset);
646 	  src = adjust_address (ops[1], V16QImode, offset);
647 	  emit_move_insn (dst, src);
648 	}
649       if (offset < bytes)
650 	{
651 	  rtx mask;
652 	  unsigned char arr[16] = { 0 };
653 	  for (i = 0; i < bytes - offset; i++)
654 	    arr[i] = 0xff;
655 	  dst = adjust_address (ops[0], V16QImode, offset);
656 	  src = adjust_address (ops[1], V16QImode, offset);
657 	  mask = gen_reg_rtx (V16QImode);
658 	  sreg = gen_reg_rtx (V16QImode);
659 	  dreg = gen_reg_rtx (V16QImode);
660 	  target = gen_reg_rtx (V16QImode);
661 	  emit_move_insn (mask, array_to_constant (V16QImode, arr));
662 	  emit_move_insn (dreg, dst);
663 	  emit_move_insn (sreg, src);
664 	  emit_insn (gen_selb (target, dreg, sreg, mask));
665 	  emit_move_insn (dst, target);
666 	}
667       return 1;
668     }
669   return 0;
670 }
671 
672 enum spu_comp_code
673 { SPU_EQ, SPU_GT, SPU_GTU };
674 
675 int spu_comp_icode[12][3] = {
676  {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
677  {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
678  {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
679  {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
680  {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
681  {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
682  {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
683  {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
684  {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
685  {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
686  {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
687  {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
688 };
689 
690 /* Generate a compare for CODE.  Return a brand-new rtx that represents
691    the result of the compare.   GCC can figure this out too if we don't
692    provide all variations of compares, but GCC always wants to use
693    WORD_MODE, we can generate better code in most cases if we do it
694    ourselves.  */
695 void
696 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
697 {
698   int reverse_compare = 0;
699   int reverse_test = 0;
700   rtx compare_result, eq_result;
701   rtx comp_rtx, eq_rtx;
702   machine_mode comp_mode;
703   machine_mode op_mode;
704   enum spu_comp_code scode, eq_code;
705   enum insn_code ior_code;
706   enum rtx_code code = GET_CODE (cmp);
707   rtx op0 = XEXP (cmp, 0);
708   rtx op1 = XEXP (cmp, 1);
709   int index;
710   int eq_test = 0;
711 
712   /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
713      and so on, to keep the constant in operand 1. */
714   if (GET_CODE (op1) == CONST_INT)
715     {
716       HOST_WIDE_INT val = INTVAL (op1) - 1;
717       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
718 	switch (code)
719 	  {
720 	  case GE:
721 	    op1 = GEN_INT (val);
722 	    code = GT;
723 	    break;
724 	  case LT:
725 	    op1 = GEN_INT (val);
726 	    code = LE;
727 	    break;
728 	  case GEU:
729 	    op1 = GEN_INT (val);
730 	    code = GTU;
731 	    break;
732 	  case LTU:
733 	    op1 = GEN_INT (val);
734 	    code = LEU;
735 	    break;
736 	  default:
737 	    break;
738 	  }
739     }
740 
741   /* However, if we generate an integer result, performing a reverse test
742      would require an extra negation, so avoid that where possible.  */
743   if (GET_CODE (op1) == CONST_INT && is_set == 1)
744     {
745       HOST_WIDE_INT val = INTVAL (op1) + 1;
746       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
747 	switch (code)
748 	  {
749 	  case LE:
750 	    op1 = GEN_INT (val);
751 	    code = LT;
752 	    break;
753 	  case LEU:
754 	    op1 = GEN_INT (val);
755 	    code = LTU;
756 	    break;
757 	  default:
758 	    break;
759 	  }
760     }
761 
762   comp_mode = SImode;
763   op_mode = GET_MODE (op0);
764 
765   switch (code)
766     {
767     case GE:
768       scode = SPU_GT;
769       if (HONOR_NANS (op_mode))
770 	{
771 	  reverse_compare = 0;
772 	  reverse_test = 0;
773 	  eq_test = 1;
774 	  eq_code = SPU_EQ;
775 	}
776       else
777 	{
778 	  reverse_compare = 1;
779 	  reverse_test = 1;
780 	}
781       break;
782     case LE:
783       scode = SPU_GT;
784       if (HONOR_NANS (op_mode))
785 	{
786 	  reverse_compare = 1;
787 	  reverse_test = 0;
788 	  eq_test = 1;
789 	  eq_code = SPU_EQ;
790 	}
791       else
792 	{
793 	  reverse_compare = 0;
794 	  reverse_test = 1;
795 	}
796       break;
797     case LT:
798       reverse_compare = 1;
799       reverse_test = 0;
800       scode = SPU_GT;
801       break;
802     case GEU:
803       reverse_compare = 1;
804       reverse_test = 1;
805       scode = SPU_GTU;
806       break;
807     case LEU:
808       reverse_compare = 0;
809       reverse_test = 1;
810       scode = SPU_GTU;
811       break;
812     case LTU:
813       reverse_compare = 1;
814       reverse_test = 0;
815       scode = SPU_GTU;
816       break;
817     case NE:
818       reverse_compare = 0;
819       reverse_test = 1;
820       scode = SPU_EQ;
821       break;
822 
823     case EQ:
824       scode = SPU_EQ;
825       break;
826     case GT:
827       scode = SPU_GT;
828       break;
829     case GTU:
830       scode = SPU_GTU;
831       break;
832     default:
833       scode = SPU_EQ;
834       break;
835     }
836 
837   switch (op_mode)
838     {
839     case QImode:
840       index = 0;
841       comp_mode = QImode;
842       break;
843     case HImode:
844       index = 1;
845       comp_mode = HImode;
846       break;
847     case SImode:
848       index = 2;
849       break;
850     case DImode:
851       index = 3;
852       break;
853     case TImode:
854       index = 4;
855       break;
856     case SFmode:
857       index = 5;
858       break;
859     case DFmode:
860       index = 6;
861       break;
862     case V16QImode:
863       index = 7;
864       comp_mode = op_mode;
865       break;
866     case V8HImode:
867       index = 8;
868       comp_mode = op_mode;
869       break;
870     case V4SImode:
871       index = 9;
872       comp_mode = op_mode;
873       break;
874     case V4SFmode:
875       index = 10;
876       comp_mode = V4SImode;
877       break;
878     case V2DFmode:
879       index = 11;
880       comp_mode = V2DImode;
881       break;
882     case V2DImode:
883     default:
884       abort ();
885     }
886 
887   if (GET_MODE (op1) == DFmode
888       && (scode != SPU_GT && scode != SPU_EQ))
889     abort ();
890 
891   if (is_set == 0 && op1 == const0_rtx
892       && (GET_MODE (op0) == SImode
893 	  || GET_MODE (op0) == HImode
894 	  || GET_MODE (op0) == QImode) && scode == SPU_EQ)
895     {
896       /* Don't need to set a register with the result when we are
897          comparing against zero and branching. */
898       reverse_test = !reverse_test;
899       compare_result = op0;
900     }
901   else
902     {
903       compare_result = gen_reg_rtx (comp_mode);
904 
905       if (reverse_compare)
906 	{
907 	  rtx t = op1;
908 	  op1 = op0;
909 	  op0 = t;
910 	}
911 
912       if (spu_comp_icode[index][scode] == 0)
913 	abort ();
914 
915       if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
916 	  (op0, op_mode))
917 	op0 = force_reg (op_mode, op0);
918       if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
919 	  (op1, op_mode))
920 	op1 = force_reg (op_mode, op1);
921       comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
922 							 op0, op1);
923       if (comp_rtx == 0)
924 	abort ();
925       emit_insn (comp_rtx);
926 
927       if (eq_test)
928         {
929           eq_result = gen_reg_rtx (comp_mode);
930           eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
931 							     op0, op1);
932           if (eq_rtx == 0)
933 	    abort ();
934           emit_insn (eq_rtx);
935           ior_code = optab_handler (ior_optab, comp_mode);
936           gcc_assert (ior_code != CODE_FOR_nothing);
937           emit_insn (GEN_FCN (ior_code)
938 		     (compare_result, compare_result, eq_result));
939         }
940     }
941 
942   if (is_set == 0)
943     {
944       rtx bcomp;
945       rtx loc_ref;
946 
947       /* We don't have branch on QI compare insns, so we convert the
948          QI compare result to a HI result. */
949       if (comp_mode == QImode)
950 	{
951 	  rtx old_res = compare_result;
952 	  compare_result = gen_reg_rtx (HImode);
953 	  comp_mode = HImode;
954 	  emit_insn (gen_extendqihi2 (compare_result, old_res));
955 	}
956 
957       if (reverse_test)
958 	bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
959       else
960 	bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
961 
962       loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
963       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
964 				   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
965 							 loc_ref, pc_rtx)));
966     }
967   else if (is_set == 2)
968     {
969       rtx target = operands[0];
970       int compare_size = GET_MODE_BITSIZE (comp_mode);
971       int target_size = GET_MODE_BITSIZE (GET_MODE (target));
972       machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
973       rtx select_mask;
974       rtx op_t = operands[2];
975       rtx op_f = operands[3];
976 
977       /* The result of the comparison can be SI, HI or QI mode.  Create a
978          mask based on that result. */
979       if (target_size > compare_size)
980 	{
981 	  select_mask = gen_reg_rtx (mode);
982 	  emit_insn (gen_extend_compare (select_mask, compare_result));
983 	}
984       else if (target_size < compare_size)
985 	select_mask =
986 	  gen_rtx_SUBREG (mode, compare_result,
987 			  (compare_size - target_size) / BITS_PER_UNIT);
988       else if (comp_mode != mode)
989 	select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
990       else
991 	select_mask = compare_result;
992 
993       if (GET_MODE (target) != GET_MODE (op_t)
994 	  || GET_MODE (target) != GET_MODE (op_f))
995 	abort ();
996 
997       if (reverse_test)
998 	emit_insn (gen_selb (target, op_t, op_f, select_mask));
999       else
1000 	emit_insn (gen_selb (target, op_f, op_t, select_mask));
1001     }
1002   else
1003     {
1004       rtx target = operands[0];
1005       if (reverse_test)
1006 	emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1007 				gen_rtx_NOT (comp_mode, compare_result)));
1008       if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1009 	emit_insn (gen_extendhisi2 (target, compare_result));
1010       else if (GET_MODE (target) == SImode
1011 	       && GET_MODE (compare_result) == QImode)
1012 	emit_insn (gen_extend_compare (target, compare_result));
1013       else
1014 	emit_move_insn (target, compare_result);
1015     }
1016 }
1017 
1018 HOST_WIDE_INT
1019 const_double_to_hwint (rtx x)
1020 {
1021   HOST_WIDE_INT val;
1022   REAL_VALUE_TYPE rv;
1023   if (GET_MODE (x) == SFmode)
1024     {
1025       REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1026       REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1027     }
1028   else if (GET_MODE (x) == DFmode)
1029     {
1030       long l[2];
1031       REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1032       REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1033       val = l[0];
1034       val = (val << 32) | (l[1] & 0xffffffff);
1035     }
1036   else
1037     abort ();
1038   return val;
1039 }
1040 
1041 rtx
1042 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1043 {
1044   long tv[2];
1045   REAL_VALUE_TYPE rv;
1046   gcc_assert (mode == SFmode || mode == DFmode);
1047 
1048   if (mode == SFmode)
1049     tv[0] = (v << 32) >> 32;
1050   else if (mode == DFmode)
1051     {
1052       tv[1] = (v << 32) >> 32;
1053       tv[0] = v >> 32;
1054     }
1055   real_from_target (&rv, tv, mode);
1056   return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1057 }
1058 
1059 void
1060 print_operand_address (FILE * file, register rtx addr)
1061 {
1062   rtx reg;
1063   rtx offset;
1064 
1065   if (GET_CODE (addr) == AND
1066       && GET_CODE (XEXP (addr, 1)) == CONST_INT
1067       && INTVAL (XEXP (addr, 1)) == -16)
1068     addr = XEXP (addr, 0);
1069 
1070   switch (GET_CODE (addr))
1071     {
1072     case REG:
1073       fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1074       break;
1075 
1076     case PLUS:
1077       reg = XEXP (addr, 0);
1078       offset = XEXP (addr, 1);
1079       if (GET_CODE (offset) == REG)
1080 	{
1081 	  fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1082 		   reg_names[REGNO (offset)]);
1083 	}
1084       else if (GET_CODE (offset) == CONST_INT)
1085 	{
1086 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1087 		   INTVAL (offset), reg_names[REGNO (reg)]);
1088 	}
1089       else
1090 	abort ();
1091       break;
1092 
1093     case CONST:
1094     case LABEL_REF:
1095     case SYMBOL_REF:
1096     case CONST_INT:
1097       output_addr_const (file, addr);
1098       break;
1099 
1100     default:
1101       debug_rtx (addr);
1102       abort ();
1103     }
1104 }
1105 
1106 void
1107 print_operand (FILE * file, rtx x, int code)
1108 {
1109   machine_mode mode = GET_MODE (x);
1110   HOST_WIDE_INT val;
1111   unsigned char arr[16];
1112   int xcode = GET_CODE (x);
1113   int i, info;
1114   if (GET_MODE (x) == VOIDmode)
1115     switch (code)
1116       {
1117       case 'L':			/* 128 bits, signed */
1118       case 'm':			/* 128 bits, signed */
1119       case 'T':			/* 128 bits, signed */
1120       case 't':			/* 128 bits, signed */
1121 	mode = TImode;
1122 	break;
1123       case 'K':			/* 64 bits, signed */
1124       case 'k':			/* 64 bits, signed */
1125       case 'D':			/* 64 bits, signed */
1126       case 'd':			/* 64 bits, signed */
1127 	mode = DImode;
1128 	break;
1129       case 'J':			/* 32 bits, signed */
1130       case 'j':			/* 32 bits, signed */
1131       case 's':			/* 32 bits, signed */
1132       case 'S':			/* 32 bits, signed */
1133 	mode = SImode;
1134 	break;
1135       }
1136   switch (code)
1137     {
1138 
1139     case 'j':			/* 32 bits, signed */
1140     case 'k':			/* 64 bits, signed */
1141     case 'm':			/* 128 bits, signed */
1142       if (xcode == CONST_INT
1143 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1144 	{
1145 	  gcc_assert (logical_immediate_p (x, mode));
1146 	  constant_to_array (mode, x, arr);
1147 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1148 	  val = trunc_int_for_mode (val, SImode);
1149 	  switch (which_logical_immediate (val))
1150 	  {
1151 	  case SPU_ORI:
1152 	    break;
1153 	  case SPU_ORHI:
1154 	    fprintf (file, "h");
1155 	    break;
1156 	  case SPU_ORBI:
1157 	    fprintf (file, "b");
1158 	    break;
1159 	  default:
1160 	    gcc_unreachable();
1161 	  }
1162 	}
1163       else
1164 	gcc_unreachable();
1165       return;
1166 
1167     case 'J':			/* 32 bits, signed */
1168     case 'K':			/* 64 bits, signed */
1169     case 'L':			/* 128 bits, signed */
1170       if (xcode == CONST_INT
1171 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1172 	{
1173 	  gcc_assert (logical_immediate_p (x, mode)
1174 		      || iohl_immediate_p (x, mode));
1175 	  constant_to_array (mode, x, arr);
1176 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1177 	  val = trunc_int_for_mode (val, SImode);
1178 	  switch (which_logical_immediate (val))
1179 	  {
1180 	  case SPU_ORI:
1181 	  case SPU_IOHL:
1182 	    break;
1183 	  case SPU_ORHI:
1184 	    val = trunc_int_for_mode (val, HImode);
1185 	    break;
1186 	  case SPU_ORBI:
1187 	    val = trunc_int_for_mode (val, QImode);
1188 	    break;
1189 	  default:
1190 	    gcc_unreachable();
1191 	  }
1192 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1193 	}
1194       else
1195 	gcc_unreachable();
1196       return;
1197 
1198     case 't':			/* 128 bits, signed */
1199     case 'd':			/* 64 bits, signed */
1200     case 's':			/* 32 bits, signed */
1201       if (CONSTANT_P (x))
1202 	{
1203 	  enum immediate_class c = classify_immediate (x, mode);
1204 	  switch (c)
1205 	    {
1206 	    case IC_IL1:
1207 	      constant_to_array (mode, x, arr);
1208 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1209 	      val = trunc_int_for_mode (val, SImode);
1210 	      switch (which_immediate_load (val))
1211 		{
1212 		case SPU_IL:
1213 		  break;
1214 		case SPU_ILA:
1215 		  fprintf (file, "a");
1216 		  break;
1217 		case SPU_ILH:
1218 		  fprintf (file, "h");
1219 		  break;
1220 		case SPU_ILHU:
1221 		  fprintf (file, "hu");
1222 		  break;
1223 		default:
1224 		  gcc_unreachable ();
1225 		}
1226 	      break;
1227 	    case IC_CPAT:
1228 	      constant_to_array (mode, x, arr);
1229 	      cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1230 	      if (info == 1)
1231 		fprintf (file, "b");
1232 	      else if (info == 2)
1233 		fprintf (file, "h");
1234 	      else if (info == 4)
1235 		fprintf (file, "w");
1236 	      else if (info == 8)
1237 		fprintf (file, "d");
1238 	      break;
1239 	    case IC_IL1s:
1240 	      if (xcode == CONST_VECTOR)
1241 		{
1242 		  x = CONST_VECTOR_ELT (x, 0);
1243 		  xcode = GET_CODE (x);
1244 		}
1245 	      if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1246 		fprintf (file, "a");
1247 	      else if (xcode == HIGH)
1248 		fprintf (file, "hu");
1249 	      break;
1250 	    case IC_FSMBI:
1251 	    case IC_FSMBI2:
1252 	    case IC_IL2:
1253 	    case IC_IL2s:
1254 	    case IC_POOL:
1255 	      abort ();
1256 	    }
1257 	}
1258       else
1259 	gcc_unreachable ();
1260       return;
1261 
1262     case 'T':			/* 128 bits, signed */
1263     case 'D':			/* 64 bits, signed */
1264     case 'S':			/* 32 bits, signed */
1265       if (CONSTANT_P (x))
1266 	{
1267 	  enum immediate_class c = classify_immediate (x, mode);
1268 	  switch (c)
1269 	    {
1270 	    case IC_IL1:
1271 	      constant_to_array (mode, x, arr);
1272 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1273 	      val = trunc_int_for_mode (val, SImode);
1274 	      switch (which_immediate_load (val))
1275 		{
1276 		case SPU_IL:
1277 		case SPU_ILA:
1278 		  break;
1279 		case SPU_ILH:
1280 		case SPU_ILHU:
1281 		  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1282 		  break;
1283 		default:
1284 		  gcc_unreachable ();
1285 		}
1286 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1287 	      break;
1288 	    case IC_FSMBI:
1289 	      constant_to_array (mode, x, arr);
1290 	      val = 0;
1291 	      for (i = 0; i < 16; i++)
1292 		{
1293 		  val <<= 1;
1294 		  val |= arr[i] & 1;
1295 		}
1296 	      print_operand (file, GEN_INT (val), 0);
1297 	      break;
1298 	    case IC_CPAT:
1299 	      constant_to_array (mode, x, arr);
1300 	      cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1301 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1302 	      break;
1303 	    case IC_IL1s:
1304 	      if (xcode == HIGH)
1305 		x = XEXP (x, 0);
1306 	      if (GET_CODE (x) == CONST_VECTOR)
1307 		x = CONST_VECTOR_ELT (x, 0);
1308 	      output_addr_const (file, x);
1309 	      if (xcode == HIGH)
1310 		fprintf (file, "@h");
1311 	      break;
1312 	    case IC_IL2:
1313 	    case IC_IL2s:
1314 	    case IC_FSMBI2:
1315 	    case IC_POOL:
1316 	      abort ();
1317 	    }
1318 	}
1319       else
1320 	gcc_unreachable ();
1321       return;
1322 
1323     case 'C':
1324       if (xcode == CONST_INT)
1325 	{
1326 	  /* Only 4 least significant bits are relevant for generate
1327 	     control word instructions. */
1328 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1329 	  return;
1330 	}
1331       break;
1332 
1333     case 'M':			/* print code for c*d */
1334       if (GET_CODE (x) == CONST_INT)
1335 	switch (INTVAL (x))
1336 	  {
1337 	  case 1:
1338 	    fprintf (file, "b");
1339 	    break;
1340 	  case 2:
1341 	    fprintf (file, "h");
1342 	    break;
1343 	  case 4:
1344 	    fprintf (file, "w");
1345 	    break;
1346 	  case 8:
1347 	    fprintf (file, "d");
1348 	    break;
1349 	  default:
1350 	    gcc_unreachable();
1351 	  }
1352       else
1353 	gcc_unreachable();
1354       return;
1355 
1356     case 'N':			/* Negate the operand */
1357       if (xcode == CONST_INT)
1358 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1359       else if (xcode == CONST_VECTOR)
1360 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1361 		 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1362       return;
1363 
1364     case 'I':			/* enable/disable interrupts */
1365       if (xcode == CONST_INT)
1366 	fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
1367       return;
1368 
1369     case 'b':			/* branch modifiers */
1370       if (xcode == REG)
1371 	fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1372       else if (COMPARISON_P (x))
1373 	fprintf (file, "%s", xcode == NE ? "n" : "");
1374       return;
1375 
1376     case 'i':			/* indirect call */
1377       if (xcode == MEM)
1378 	{
1379 	  if (GET_CODE (XEXP (x, 0)) == REG)
1380 	    /* Used in indirect function calls. */
1381 	    fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1382 	  else
1383 	    output_address (XEXP (x, 0));
1384 	}
1385       return;
1386 
1387     case 'p':			/* load/store */
1388       if (xcode == MEM)
1389 	{
1390 	  x = XEXP (x, 0);
1391 	  xcode = GET_CODE (x);
1392 	}
1393       if (xcode == AND)
1394 	{
1395 	  x = XEXP (x, 0);
1396 	  xcode = GET_CODE (x);
1397 	}
1398       if (xcode == REG)
1399 	fprintf (file, "d");
1400       else if (xcode == CONST_INT)
1401 	fprintf (file, "a");
1402       else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1403 	fprintf (file, "r");
1404       else if (xcode == PLUS || xcode == LO_SUM)
1405 	{
1406 	  if (GET_CODE (XEXP (x, 1)) == REG)
1407 	    fprintf (file, "x");
1408 	  else
1409 	    fprintf (file, "d");
1410 	}
1411       return;
1412 
1413     case 'e':
1414       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1415       val &= 0x7;
1416       output_addr_const (file, GEN_INT (val));
1417       return;
1418 
1419     case 'f':
1420       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1421       val &= 0x1f;
1422       output_addr_const (file, GEN_INT (val));
1423       return;
1424 
1425     case 'g':
1426       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427       val &= 0x3f;
1428       output_addr_const (file, GEN_INT (val));
1429       return;
1430 
1431     case 'h':
1432       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433       val = (val >> 3) & 0x1f;
1434       output_addr_const (file, GEN_INT (val));
1435       return;
1436 
1437     case 'E':
1438       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1439       val = -val;
1440       val &= 0x7;
1441       output_addr_const (file, GEN_INT (val));
1442       return;
1443 
1444     case 'F':
1445       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1446       val = -val;
1447       val &= 0x1f;
1448       output_addr_const (file, GEN_INT (val));
1449       return;
1450 
1451     case 'G':
1452       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1453       val = -val;
1454       val &= 0x3f;
1455       output_addr_const (file, GEN_INT (val));
1456       return;
1457 
1458     case 'H':
1459       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1460       val = -(val & -8ll);
1461       val = (val >> 3) & 0x1f;
1462       output_addr_const (file, GEN_INT (val));
1463       return;
1464 
1465     case 'v':
1466     case 'w':
1467       constant_to_array (mode, x, arr);
1468       val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1469       output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1470       return;
1471 
1472     case 0:
1473       if (xcode == REG)
1474 	fprintf (file, "%s", reg_names[REGNO (x)]);
1475       else if (xcode == MEM)
1476 	output_address (XEXP (x, 0));
1477       else if (xcode == CONST_VECTOR)
1478 	print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1479       else
1480 	output_addr_const (file, x);
1481       return;
1482 
1483       /* unused letters
1484 	              o qr  u   yz
1485 	AB            OPQR  UVWXYZ */
1486     default:
1487       output_operand_lossage ("invalid %%xn code");
1488     }
1489   gcc_unreachable ();
1490 }
1491 
1492 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1493    caller saved register.  For leaf functions it is more efficient to
1494    use a volatile register because we won't need to save and restore the
1495    pic register.  This routine is only valid after register allocation
1496    is completed, so we can pick an unused register.  */
1497 static rtx
1498 get_pic_reg (void)
1499 {
1500   if (!reload_completed && !reload_in_progress)
1501     abort ();
1502 
1503   /* If we've already made the decision, we need to keep with it.  Once we've
1504      decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1505      return true since the register is now live; this should not cause us to
1506      "switch back" to using pic_offset_table_rtx.  */
1507   if (!cfun->machine->pic_reg)
1508     {
1509       if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1510 	cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1511       else
1512 	cfun->machine->pic_reg = pic_offset_table_rtx;
1513     }
1514 
1515   return cfun->machine->pic_reg;
1516 }
1517 
1518 /* Split constant addresses to handle cases that are too large.
1519    Add in the pic register when in PIC mode.
1520    Split immediates that require more than 1 instruction. */
1521 int
1522 spu_split_immediate (rtx * ops)
1523 {
1524   machine_mode mode = GET_MODE (ops[0]);
1525   enum immediate_class c = classify_immediate (ops[1], mode);
1526 
1527   switch (c)
1528     {
1529     case IC_IL2:
1530       {
1531 	unsigned char arrhi[16];
1532 	unsigned char arrlo[16];
1533 	rtx to, temp, hi, lo;
1534 	int i;
1535 	machine_mode imode = mode;
1536 	/* We need to do reals as ints because the constant used in the
1537 	   IOR might not be a legitimate real constant. */
1538 	imode = int_mode_for_mode (mode);
1539 	constant_to_array (mode, ops[1], arrhi);
1540 	if (imode != mode)
1541 	  to = simplify_gen_subreg (imode, ops[0], mode, 0);
1542 	else
1543 	  to = ops[0];
1544 	temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1545 	for (i = 0; i < 16; i += 4)
1546 	  {
1547 	    arrlo[i + 2] = arrhi[i + 2];
1548 	    arrlo[i + 3] = arrhi[i + 3];
1549 	    arrlo[i + 0] = arrlo[i + 1] = 0;
1550 	    arrhi[i + 2] = arrhi[i + 3] = 0;
1551 	  }
1552 	hi = array_to_constant (imode, arrhi);
1553 	lo = array_to_constant (imode, arrlo);
1554 	emit_move_insn (temp, hi);
1555 	emit_insn (gen_rtx_SET
1556 		   (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1557 	return 1;
1558       }
1559     case IC_FSMBI2:
1560       {
1561 	unsigned char arr_fsmbi[16];
1562 	unsigned char arr_andbi[16];
1563 	rtx to, reg_fsmbi, reg_and;
1564 	int i;
1565 	machine_mode imode = mode;
1566 	/* We need to do reals as ints because the constant used in the
1567 	 * AND might not be a legitimate real constant. */
1568 	imode = int_mode_for_mode (mode);
1569 	constant_to_array (mode, ops[1], arr_fsmbi);
1570 	if (imode != mode)
1571 	  to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1572 	else
1573 	  to = ops[0];
1574 	for (i = 0; i < 16; i++)
1575 	  if (arr_fsmbi[i] != 0)
1576 	    {
1577 	      arr_andbi[0] = arr_fsmbi[i];
1578 	      arr_fsmbi[i] = 0xff;
1579 	    }
1580 	for (i = 1; i < 16; i++)
1581 	  arr_andbi[i] = arr_andbi[0];
1582 	reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1583 	reg_and = array_to_constant (imode, arr_andbi);
1584 	emit_move_insn (to, reg_fsmbi);
1585 	emit_insn (gen_rtx_SET
1586 		   (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1587 	return 1;
1588       }
1589     case IC_POOL:
1590       if (reload_in_progress || reload_completed)
1591 	{
1592 	  rtx mem = force_const_mem (mode, ops[1]);
1593 	  if (TARGET_LARGE_MEM)
1594 	    {
1595 	      rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1596 	      emit_move_insn (addr, XEXP (mem, 0));
1597 	      mem = replace_equiv_address (mem, addr);
1598 	    }
1599 	  emit_move_insn (ops[0], mem);
1600 	  return 1;
1601 	}
1602       break;
1603     case IC_IL1s:
1604     case IC_IL2s:
1605       if (reload_completed && GET_CODE (ops[1]) != HIGH)
1606 	{
1607 	  if (c == IC_IL2s)
1608 	    {
1609 	      emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1610 	      emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1611 	    }
1612 	  else if (flag_pic)
1613 	    emit_insn (gen_pic (ops[0], ops[1]));
1614 	  if (flag_pic)
1615 	    {
1616 	      rtx pic_reg = get_pic_reg ();
1617 	      emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1618 	    }
1619 	  return flag_pic || c == IC_IL2s;
1620 	}
1621       break;
1622     case IC_IL1:
1623     case IC_FSMBI:
1624     case IC_CPAT:
1625       break;
1626     }
1627   return 0;
1628 }
1629 
1630 /* SAVING is TRUE when we are generating the actual load and store
1631    instructions for REGNO.  When determining the size of the stack
1632    needed for saving register we must allocate enough space for the
1633    worst case, because we don't always have the information early enough
1634    to not allocate it.  But we can at least eliminate the actual loads
1635    and stores during the prologue/epilogue.  */
1636 static int
1637 need_to_save_reg (int regno, int saving)
1638 {
1639   if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1640     return 1;
1641   if (flag_pic
1642       && regno == PIC_OFFSET_TABLE_REGNUM
1643       && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1644     return 1;
1645   return 0;
1646 }
1647 
1648 /* This function is only correct starting with local register
1649    allocation */
1650 int
1651 spu_saved_regs_size (void)
1652 {
1653   int reg_save_size = 0;
1654   int regno;
1655 
1656   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1657     if (need_to_save_reg (regno, 0))
1658       reg_save_size += 0x10;
1659   return reg_save_size;
1660 }
1661 
1662 static rtx_insn *
1663 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1664 {
1665   rtx reg = gen_rtx_REG (V4SImode, regno);
1666   rtx mem =
1667     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1668   return emit_insn (gen_movv4si (mem, reg));
1669 }
1670 
1671 static rtx_insn *
1672 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1673 {
1674   rtx reg = gen_rtx_REG (V4SImode, regno);
1675   rtx mem =
1676     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1677   return emit_insn (gen_movv4si (reg, mem));
1678 }
1679 
1680 /* This happens after reload, so we need to expand it.  */
1681 static rtx_insn *
1682 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1683 {
1684   rtx_insn *insn;
1685   if (satisfies_constraint_K (GEN_INT (imm)))
1686     {
1687       insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1688     }
1689   else
1690     {
1691       emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1692       insn = emit_insn (gen_addsi3 (dst, src, scratch));
1693       if (REGNO (src) == REGNO (scratch))
1694 	abort ();
1695     }
1696   return insn;
1697 }
1698 
1699 /* Return nonzero if this function is known to have a null epilogue.  */
1700 
1701 int
1702 direct_return (void)
1703 {
1704   if (reload_completed)
1705     {
1706       if (cfun->static_chain_decl == 0
1707 	  && (spu_saved_regs_size ()
1708 	      + get_frame_size ()
1709 	      + crtl->outgoing_args_size
1710 	      + crtl->args.pretend_args_size == 0)
1711 	  && crtl->is_leaf)
1712 	return 1;
1713     }
1714   return 0;
1715 }
1716 
1717 /*
1718    The stack frame looks like this:
1719          +-------------+
1720          |  incoming   |
1721          |    args     |
1722    AP -> +-------------+
1723          | $lr save    |
1724          +-------------+
1725  prev SP | back chain  |
1726          +-------------+
1727          |  var args   |
1728          |  reg save   | crtl->args.pretend_args_size bytes
1729          +-------------+
1730          |    ...      |
1731          | saved regs  | spu_saved_regs_size() bytes
1732    FP -> +-------------+
1733          |    ...      |
1734          |   vars      | get_frame_size()  bytes
1735   HFP -> +-------------+
1736          |    ...      |
1737          |  outgoing   |
1738          |    args     | crtl->outgoing_args_size bytes
1739          +-------------+
1740          | $lr of next |
1741          |   frame     |
1742          +-------------+
1743          | back chain  |
1744    SP -> +-------------+
1745 
1746 */
1747 void
1748 spu_expand_prologue (void)
1749 {
1750   HOST_WIDE_INT size = get_frame_size (), offset, regno;
1751   HOST_WIDE_INT total_size;
1752   HOST_WIDE_INT saved_regs_size;
1753   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1754   rtx scratch_reg_0, scratch_reg_1;
1755   rtx_insn *insn;
1756   rtx real;
1757 
1758   if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1759     cfun->machine->pic_reg = pic_offset_table_rtx;
1760 
1761   if (spu_naked_function_p (current_function_decl))
1762     return;
1763 
1764   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1765   scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1766 
1767   saved_regs_size = spu_saved_regs_size ();
1768   total_size = size + saved_regs_size
1769     + crtl->outgoing_args_size
1770     + crtl->args.pretend_args_size;
1771 
1772   if (!crtl->is_leaf
1773       || cfun->calls_alloca || total_size > 0)
1774     total_size += STACK_POINTER_OFFSET;
1775 
1776   /* Save this first because code after this might use the link
1777      register as a scratch register. */
1778   if (!crtl->is_leaf)
1779     {
1780       insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1781       RTX_FRAME_RELATED_P (insn) = 1;
1782     }
1783 
1784   if (total_size > 0)
1785     {
1786       offset = -crtl->args.pretend_args_size;
1787       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1788 	if (need_to_save_reg (regno, 1))
1789 	  {
1790 	    offset -= 16;
1791 	    insn = frame_emit_store (regno, sp_reg, offset);
1792 	    RTX_FRAME_RELATED_P (insn) = 1;
1793 	  }
1794     }
1795 
1796   if (flag_pic && cfun->machine->pic_reg)
1797     {
1798       rtx pic_reg = cfun->machine->pic_reg;
1799       insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1800       insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1801     }
1802 
1803   if (total_size > 0)
1804     {
1805       if (flag_stack_check)
1806 	{
1807 	  /* We compare against total_size-1 because
1808 	     ($sp >= total_size) <=> ($sp > total_size-1) */
1809 	  rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1810 	  rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1811 	  rtx size_v4si = spu_const (V4SImode, total_size - 1);
1812 	  if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1813 	    {
1814 	      emit_move_insn (scratch_v4si, size_v4si);
1815 	      size_v4si = scratch_v4si;
1816 	    }
1817 	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1818 	  emit_insn (gen_vec_extractv4si
1819 		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1820 	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1821 	}
1822 
1823       /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1824          the value of the previous $sp because we save it as the back
1825          chain. */
1826       if (total_size <= 2000)
1827 	{
1828 	  /* In this case we save the back chain first. */
1829 	  insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1830 	  insn =
1831 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1832 	}
1833       else
1834 	{
1835 	  insn = emit_move_insn (scratch_reg_0, sp_reg);
1836 	  insn =
1837 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1838 	}
1839       RTX_FRAME_RELATED_P (insn) = 1;
1840       real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1841       add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1842 
1843       if (total_size > 2000)
1844 	{
1845 	  /* Save the back chain ptr */
1846 	  insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1847 	}
1848 
1849       if (frame_pointer_needed)
1850 	{
1851 	  rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1852 	  HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1853 	    + crtl->outgoing_args_size;
1854 	  /* Set the new frame_pointer */
1855 	  insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1856 	  RTX_FRAME_RELATED_P (insn) = 1;
1857 	  real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1858 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1859           REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1860 	}
1861     }
1862 
1863   if (flag_stack_usage_info)
1864     current_function_static_stack_size = total_size;
1865 }
1866 
1867 void
1868 spu_expand_epilogue (bool sibcall_p)
1869 {
1870   int size = get_frame_size (), offset, regno;
1871   HOST_WIDE_INT saved_regs_size, total_size;
1872   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1873   rtx scratch_reg_0;
1874 
1875   if (spu_naked_function_p (current_function_decl))
1876     return;
1877 
1878   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1879 
1880   saved_regs_size = spu_saved_regs_size ();
1881   total_size = size + saved_regs_size
1882     + crtl->outgoing_args_size
1883     + crtl->args.pretend_args_size;
1884 
1885   if (!crtl->is_leaf
1886       || cfun->calls_alloca || total_size > 0)
1887     total_size += STACK_POINTER_OFFSET;
1888 
1889   if (total_size > 0)
1890     {
1891       if (cfun->calls_alloca)
1892 	frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1893       else
1894 	frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1895 
1896 
1897       if (saved_regs_size > 0)
1898 	{
1899 	  offset = -crtl->args.pretend_args_size;
1900 	  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1901 	    if (need_to_save_reg (regno, 1))
1902 	      {
1903 		offset -= 0x10;
1904 		frame_emit_load (regno, sp_reg, offset);
1905 	      }
1906 	}
1907     }
1908 
1909   if (!crtl->is_leaf)
1910     frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1911 
1912   if (!sibcall_p)
1913     {
1914       emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1915       emit_jump_insn (gen__return ());
1916     }
1917 }
1918 
1919 rtx
1920 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1921 {
1922   if (count != 0)
1923     return 0;
1924   /* This is inefficient because it ends up copying to a save-register
1925      which then gets saved even though $lr has already been saved.  But
1926      it does generate better code for leaf functions and we don't need
1927      to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
1928      used for __builtin_return_address anyway, so maybe we don't care if
1929      it's inefficient. */
1930   return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1931 }
1932 
1933 
1934 /* Given VAL, generate a constant appropriate for MODE.
1935    If MODE is a vector mode, every element will be VAL.
1936    For TImode, VAL will be zero extended to 128 bits. */
1937 rtx
1938 spu_const (machine_mode mode, HOST_WIDE_INT val)
1939 {
1940   rtx inner;
1941   rtvec v;
1942   int units, i;
1943 
1944   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1945 	      || GET_MODE_CLASS (mode) == MODE_FLOAT
1946 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1947 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1948 
1949   if (GET_MODE_CLASS (mode) == MODE_INT)
1950     return immed_double_const (val, 0, mode);
1951 
1952   /* val is the bit representation of the float */
1953   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1954     return hwint_to_const_double (mode, val);
1955 
1956   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1957     inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1958   else
1959     inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1960 
1961   units = GET_MODE_NUNITS (mode);
1962 
1963   v = rtvec_alloc (units);
1964 
1965   for (i = 0; i < units; ++i)
1966     RTVEC_ELT (v, i) = inner;
1967 
1968   return gen_rtx_CONST_VECTOR (mode, v);
1969 }
1970 
1971 /* Create a MODE vector constant from 4 ints. */
1972 rtx
1973 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1974 {
1975   unsigned char arr[16];
1976   arr[0] = (a >> 24) & 0xff;
1977   arr[1] = (a >> 16) & 0xff;
1978   arr[2] = (a >> 8) & 0xff;
1979   arr[3] = (a >> 0) & 0xff;
1980   arr[4] = (b >> 24) & 0xff;
1981   arr[5] = (b >> 16) & 0xff;
1982   arr[6] = (b >> 8) & 0xff;
1983   arr[7] = (b >> 0) & 0xff;
1984   arr[8] = (c >> 24) & 0xff;
1985   arr[9] = (c >> 16) & 0xff;
1986   arr[10] = (c >> 8) & 0xff;
1987   arr[11] = (c >> 0) & 0xff;
1988   arr[12] = (d >> 24) & 0xff;
1989   arr[13] = (d >> 16) & 0xff;
1990   arr[14] = (d >> 8) & 0xff;
1991   arr[15] = (d >> 0) & 0xff;
1992   return array_to_constant(mode, arr);
1993 }
1994 
1995 /* branch hint stuff */
1996 
1997 /* An array of these is used to propagate hints to predecessor blocks. */
1998 struct spu_bb_info
1999 {
2000   rtx_insn *prop_jump; /* propagated from another block */
2001   int bb_index;  /* the original block. */
2002 };
2003 static struct spu_bb_info *spu_bb_info;
2004 
2005 #define STOP_HINT_P(INSN) \
2006 		(CALL_P(INSN) \
2007 		 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2008 		 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2009 
2010 /* 1 when RTX is a hinted branch or its target.  We keep track of
2011    what has been hinted so the safe-hint code can test it easily.  */
2012 #define HINTED_P(RTX)						\
2013   (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2014 
2015 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2016 #define SCHED_ON_EVEN_P(RTX)						\
2017   (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2018 
2019 /* Emit a nop for INSN such that the two will dual issue.  This assumes
2020    INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
2021    We check for TImode to handle a MULTI1 insn which has dual issued its
2022    first instruction.  get_pipe returns -1 for MULTI0 or inline asm.  */
2023 static void
2024 emit_nop_for_insn (rtx_insn *insn)
2025 {
2026   int p;
2027   rtx_insn *new_insn;
2028 
2029   /* We need to handle JUMP_TABLE_DATA separately.  */
2030   if (JUMP_TABLE_DATA_P (insn))
2031     {
2032       new_insn = emit_insn_after (gen_lnop(), insn);
2033       recog_memoized (new_insn);
2034       INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2035       return;
2036     }
2037 
2038   p = get_pipe (insn);
2039   if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2040     new_insn = emit_insn_after (gen_lnop (), insn);
2041   else if (p == 1 && GET_MODE (insn) == TImode)
2042     {
2043       new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2044       PUT_MODE (new_insn, TImode);
2045       PUT_MODE (insn, VOIDmode);
2046     }
2047   else
2048     new_insn = emit_insn_after (gen_lnop (), insn);
2049   recog_memoized (new_insn);
2050   INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2051 }
2052 
2053 /* Insert nops in basic blocks to meet dual issue alignment
2054    requirements.  Also make sure hbrp and hint instructions are at least
2055    one cycle apart, possibly inserting a nop.  */
2056 static void
2057 pad_bb(void)
2058 {
2059   rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2060   int length;
2061   int addr;
2062 
2063   /* This sets up INSN_ADDRESSES. */
2064   shorten_branches (get_insns ());
2065 
2066   /* Keep track of length added by nops. */
2067   length = 0;
2068 
2069   prev_insn = 0;
2070   insn = get_insns ();
2071   if (!active_insn_p (insn))
2072     insn = next_active_insn (insn);
2073   for (; insn; insn = next_insn)
2074     {
2075       next_insn = next_active_insn (insn);
2076       if (INSN_CODE (insn) == CODE_FOR_iprefetch
2077 	  || INSN_CODE (insn) == CODE_FOR_hbr)
2078 	{
2079 	  if (hbr_insn)
2080 	    {
2081 	      int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2082 	      int a1 = INSN_ADDRESSES (INSN_UID (insn));
2083 	      if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2084 		  || (a1 - a0 == 4))
2085 		{
2086 		  prev_insn = emit_insn_before (gen_lnop (), insn);
2087 		  PUT_MODE (prev_insn, GET_MODE (insn));
2088 		  PUT_MODE (insn, TImode);
2089 		  INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2090 		  length += 4;
2091 		}
2092 	    }
2093 	  hbr_insn = insn;
2094 	}
2095       if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2096 	{
2097 	  if (GET_MODE (insn) == TImode)
2098 	    PUT_MODE (next_insn, TImode);
2099 	  insn = next_insn;
2100 	  next_insn = next_active_insn (insn);
2101 	}
2102       addr = INSN_ADDRESSES (INSN_UID (insn));
2103       if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2104 	{
2105 	  if (((addr + length) & 7) != 0)
2106 	    {
2107 	      emit_nop_for_insn (prev_insn);
2108 	      length += 4;
2109 	    }
2110 	}
2111       else if (GET_MODE (insn) == TImode
2112 	       && ((next_insn && GET_MODE (next_insn) != TImode)
2113 		   || get_attr_type (insn) == TYPE_MULTI0)
2114 	       && ((addr + length) & 7) != 0)
2115 	{
2116 	  /* prev_insn will always be set because the first insn is
2117 	     always 8-byte aligned. */
2118 	  emit_nop_for_insn (prev_insn);
2119 	  length += 4;
2120 	}
2121       prev_insn = insn;
2122     }
2123 }
2124 
2125 
2126 /* Routines for branch hints. */
2127 
2128 static void
2129 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2130 		      int distance, sbitmap blocks)
2131 {
2132   rtx branch_label = 0;
2133   rtx_insn *hint;
2134   rtx_insn *insn;
2135   rtx_jump_table_data *table;
2136 
2137   if (before == 0 || branch == 0 || target == 0)
2138     return;
2139 
2140   /* While scheduling we require hints to be no further than 600, so
2141      we need to enforce that here too */
2142   if (distance > 600)
2143     return;
2144 
2145   /* If we have a Basic block note, emit it after the basic block note.  */
2146   if (NOTE_INSN_BASIC_BLOCK_P (before))
2147     before = NEXT_INSN (before);
2148 
2149   branch_label = gen_label_rtx ();
2150   LABEL_NUSES (branch_label)++;
2151   LABEL_PRESERVE_P (branch_label) = 1;
2152   insn = emit_label_before (branch_label, branch);
2153   branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2154   bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2155 
2156   hint = emit_insn_before (gen_hbr (branch_label, target), before);
2157   recog_memoized (hint);
2158   INSN_LOCATION (hint) = INSN_LOCATION (branch);
2159   HINTED_P (branch) = 1;
2160 
2161   if (GET_CODE (target) == LABEL_REF)
2162     HINTED_P (XEXP (target, 0)) = 1;
2163   else if (tablejump_p (branch, 0, &table))
2164     {
2165       rtvec vec;
2166       int j;
2167       if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2168 	vec = XVEC (PATTERN (table), 0);
2169       else
2170 	vec = XVEC (PATTERN (table), 1);
2171       for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2172 	HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2173     }
2174 
2175   if (distance >= 588)
2176     {
2177       /* Make sure the hint isn't scheduled any earlier than this point,
2178          which could make it too far for the branch offest to fit */
2179       insn = emit_insn_before (gen_blockage (), hint);
2180       recog_memoized (insn);
2181       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2182     }
2183   else if (distance <= 8 * 4)
2184     {
2185       /* To guarantee at least 8 insns between the hint and branch we
2186          insert nops. */
2187       int d;
2188       for (d = distance; d < 8 * 4; d += 4)
2189 	{
2190 	  insn =
2191 	    emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2192 	  recog_memoized (insn);
2193 	  INSN_LOCATION (insn) = INSN_LOCATION (hint);
2194 	}
2195 
2196       /* Make sure any nops inserted aren't scheduled before the hint. */
2197       insn = emit_insn_after (gen_blockage (), hint);
2198       recog_memoized (insn);
2199       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2200 
2201       /* Make sure any nops inserted aren't scheduled after the call. */
2202       if (CALL_P (branch) && distance < 8 * 4)
2203 	{
2204 	  insn = emit_insn_before (gen_blockage (), branch);
2205 	  recog_memoized (insn);
2206 	  INSN_LOCATION (insn) = INSN_LOCATION (branch);
2207 	}
2208     }
2209 }
2210 
2211 /* Returns 0 if we don't want a hint for this branch.  Otherwise return
2212    the rtx for the branch target. */
2213 static rtx
2214 get_branch_target (rtx_insn *branch)
2215 {
2216   if (JUMP_P (branch))
2217     {
2218       rtx set, src;
2219 
2220       /* Return statements */
2221       if (GET_CODE (PATTERN (branch)) == RETURN)
2222 	return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2223 
2224      /* ASM GOTOs. */
2225      if (extract_asm_operands (PATTERN (branch)) != NULL)
2226 	return NULL;
2227 
2228       set = single_set (branch);
2229       src = SET_SRC (set);
2230       if (GET_CODE (SET_DEST (set)) != PC)
2231 	abort ();
2232 
2233       if (GET_CODE (src) == IF_THEN_ELSE)
2234 	{
2235 	  rtx lab = 0;
2236 	  rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2237 	  if (note)
2238 	    {
2239 	      /* If the more probable case is not a fall through, then
2240 	         try a branch hint.  */
2241 	      int prob = XINT (note, 0);
2242 	      if (prob > (REG_BR_PROB_BASE * 6 / 10)
2243 		  && GET_CODE (XEXP (src, 1)) != PC)
2244 		lab = XEXP (src, 1);
2245 	      else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2246 		       && GET_CODE (XEXP (src, 2)) != PC)
2247 		lab = XEXP (src, 2);
2248 	    }
2249 	  if (lab)
2250 	    {
2251 	      if (GET_CODE (lab) == RETURN)
2252 		return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2253 	      return lab;
2254 	    }
2255 	  return 0;
2256 	}
2257 
2258       return src;
2259     }
2260   else if (CALL_P (branch))
2261     {
2262       rtx call;
2263       /* All of our call patterns are in a PARALLEL and the CALL is
2264          the first pattern in the PARALLEL. */
2265       if (GET_CODE (PATTERN (branch)) != PARALLEL)
2266 	abort ();
2267       call = XVECEXP (PATTERN (branch), 0, 0);
2268       if (GET_CODE (call) == SET)
2269 	call = SET_SRC (call);
2270       if (GET_CODE (call) != CALL)
2271 	abort ();
2272       return XEXP (XEXP (call, 0), 0);
2273     }
2274   return 0;
2275 }
2276 
2277 /* The special $hbr register is used to prevent the insn scheduler from
2278    moving hbr insns across instructions which invalidate them.  It
2279    should only be used in a clobber, and this function searches for
2280    insns which clobber it.  */
2281 static bool
2282 insn_clobbers_hbr (rtx_insn *insn)
2283 {
2284   if (INSN_P (insn)
2285       && GET_CODE (PATTERN (insn)) == PARALLEL)
2286     {
2287       rtx parallel = PATTERN (insn);
2288       rtx clobber;
2289       int j;
2290       for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2291 	{
2292 	  clobber = XVECEXP (parallel, 0, j);
2293 	  if (GET_CODE (clobber) == CLOBBER
2294 	      && GET_CODE (XEXP (clobber, 0)) == REG
2295 	      && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2296 	    return 1;
2297 	}
2298     }
2299   return 0;
2300 }
2301 
2302 /* Search up to 32 insns starting at FIRST:
2303    - at any kind of hinted branch, just return
2304    - at any unconditional branch in the first 15 insns, just return
2305    - at a call or indirect branch, after the first 15 insns, force it to
2306      an even address and return
2307    - at any unconditional branch, after the first 15 insns, force it to
2308      an even address.
2309    At then end of the search, insert an hbrp within 4 insns of FIRST,
2310    and an hbrp within 16 instructions of FIRST.
2311  */
2312 static void
2313 insert_hbrp_for_ilb_runout (rtx_insn *first)
2314 {
2315   rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2316   int addr = 0, length, first_addr = -1;
2317   int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2318   int insert_lnop_after = 0;
2319   for (insn = first; insn; insn = NEXT_INSN (insn))
2320     if (INSN_P (insn))
2321       {
2322 	if (first_addr == -1)
2323 	  first_addr = INSN_ADDRESSES (INSN_UID (insn));
2324 	addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2325 	length = get_attr_length (insn);
2326 
2327 	if (before_4 == 0 && addr + length >= 4 * 4)
2328 	  before_4 = insn;
2329 	/* We test for 14 instructions because the first hbrp will add
2330 	   up to 2 instructions. */
2331 	if (before_16 == 0 && addr + length >= 14 * 4)
2332 	  before_16 = insn;
2333 
2334 	if (INSN_CODE (insn) == CODE_FOR_hbr)
2335 	  {
2336 	    /* Make sure an hbrp is at least 2 cycles away from a hint.
2337 	       Insert an lnop after the hbrp when necessary. */
2338 	    if (before_4 == 0 && addr > 0)
2339 	      {
2340 		before_4 = insn;
2341 		insert_lnop_after |= 1;
2342 	      }
2343 	    else if (before_4 && addr <= 4 * 4)
2344 	      insert_lnop_after |= 1;
2345 	    if (before_16 == 0 && addr > 10 * 4)
2346 	      {
2347 		before_16 = insn;
2348 		insert_lnop_after |= 2;
2349 	      }
2350 	    else if (before_16 && addr <= 14 * 4)
2351 	      insert_lnop_after |= 2;
2352 	  }
2353 
2354 	if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2355 	  {
2356 	    if (addr < hbrp_addr0)
2357 	      hbrp_addr0 = addr;
2358 	    else if (addr < hbrp_addr1)
2359 	      hbrp_addr1 = addr;
2360 	  }
2361 
2362 	if (CALL_P (insn) || JUMP_P (insn))
2363 	  {
2364 	    if (HINTED_P (insn))
2365 	      return;
2366 
2367 	    /* Any branch after the first 15 insns should be on an even
2368 	       address to avoid a special case branch.  There might be
2369 	       some nops and/or hbrps inserted, so we test after 10
2370 	       insns. */
2371 	    if (addr > 10 * 4)
2372 	      SCHED_ON_EVEN_P (insn) = 1;
2373 	  }
2374 
2375 	if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2376 	  return;
2377 
2378 
2379 	if (addr + length >= 32 * 4)
2380 	  {
2381 	    gcc_assert (before_4 && before_16);
2382 	    if (hbrp_addr0 > 4 * 4)
2383 	      {
2384 		insn =
2385 		  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2386 		recog_memoized (insn);
2387 		INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2388 		INSN_ADDRESSES_NEW (insn,
2389 				    INSN_ADDRESSES (INSN_UID (before_4)));
2390 		PUT_MODE (insn, GET_MODE (before_4));
2391 		PUT_MODE (before_4, TImode);
2392 		if (insert_lnop_after & 1)
2393 		  {
2394 		    insn = emit_insn_before (gen_lnop (), before_4);
2395 		    recog_memoized (insn);
2396 		    INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2397 		    INSN_ADDRESSES_NEW (insn,
2398 					INSN_ADDRESSES (INSN_UID (before_4)));
2399 		    PUT_MODE (insn, TImode);
2400 		  }
2401 	      }
2402 	    if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2403 		&& hbrp_addr1 > 16 * 4)
2404 	      {
2405 		insn =
2406 		  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2407 		recog_memoized (insn);
2408 		INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2409 		INSN_ADDRESSES_NEW (insn,
2410 				    INSN_ADDRESSES (INSN_UID (before_16)));
2411 		PUT_MODE (insn, GET_MODE (before_16));
2412 		PUT_MODE (before_16, TImode);
2413 		if (insert_lnop_after & 2)
2414 		  {
2415 		    insn = emit_insn_before (gen_lnop (), before_16);
2416 		    recog_memoized (insn);
2417 		    INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2418 		    INSN_ADDRESSES_NEW (insn,
2419 					INSN_ADDRESSES (INSN_UID
2420 							(before_16)));
2421 		    PUT_MODE (insn, TImode);
2422 		  }
2423 	      }
2424 	    return;
2425 	  }
2426       }
2427     else if (BARRIER_P (insn))
2428       return;
2429 
2430 }
2431 
2432 /* The SPU might hang when it executes 48 inline instructions after a
2433    hinted branch jumps to its hinted target.  The beginning of a
2434    function and the return from a call might have been hinted, and
2435    must be handled as well.  To prevent a hang we insert 2 hbrps.  The
2436    first should be within 6 insns of the branch target.  The second
2437    should be within 22 insns of the branch target.  When determining
2438    if hbrps are necessary, we look for only 32 inline instructions,
2439    because up to 12 nops and 4 hbrps could be inserted.  Similarily,
2440    when inserting new hbrps, we insert them within 4 and 16 insns of
2441    the target.  */
2442 static void
2443 insert_hbrp (void)
2444 {
2445   rtx_insn *insn;
2446   if (TARGET_SAFE_HINTS)
2447     {
2448       shorten_branches (get_insns ());
2449       /* Insert hbrp at beginning of function */
2450       insn = next_active_insn (get_insns ());
2451       if (insn)
2452 	insert_hbrp_for_ilb_runout (insn);
2453       /* Insert hbrp after hinted targets. */
2454       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2455 	if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2456 	  insert_hbrp_for_ilb_runout (next_active_insn (insn));
2457     }
2458 }
2459 
2460 static int in_spu_reorg;
2461 
2462 static void
2463 spu_var_tracking (void)
2464 {
2465   if (flag_var_tracking)
2466     {
2467       df_analyze ();
2468       timevar_push (TV_VAR_TRACKING);
2469       variable_tracking_main ();
2470       timevar_pop (TV_VAR_TRACKING);
2471       df_finish_pass (false);
2472     }
2473 }
2474 
2475 /* Insert branch hints.  There are no branch optimizations after this
2476    pass, so it's safe to set our branch hints now. */
2477 static void
2478 spu_machine_dependent_reorg (void)
2479 {
2480   sbitmap blocks;
2481   basic_block bb;
2482   rtx_insn *branch, *insn;
2483   rtx branch_target = 0;
2484   int branch_addr = 0, insn_addr, required_dist = 0;
2485   int i;
2486   unsigned int j;
2487 
2488   if (!TARGET_BRANCH_HINTS || optimize == 0)
2489     {
2490       /* We still do it for unoptimized code because an external
2491          function might have hinted a call or return. */
2492       compute_bb_for_insn ();
2493       insert_hbrp ();
2494       pad_bb ();
2495       spu_var_tracking ();
2496       free_bb_for_insn ();
2497       return;
2498     }
2499 
2500   blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2501   bitmap_clear (blocks);
2502 
2503   in_spu_reorg = 1;
2504   compute_bb_for_insn ();
2505 
2506   /* (Re-)discover loops so that bb->loop_father can be used
2507      in the analysis below.  */
2508   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2509 
2510   compact_blocks ();
2511 
2512   spu_bb_info =
2513     (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2514 				    sizeof (struct spu_bb_info));
2515 
2516   /* We need exact insn addresses and lengths.  */
2517   shorten_branches (get_insns ());
2518 
2519   for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2520     {
2521       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2522       branch = 0;
2523       if (spu_bb_info[i].prop_jump)
2524 	{
2525 	  branch = spu_bb_info[i].prop_jump;
2526 	  branch_target = get_branch_target (branch);
2527 	  branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2528 	  required_dist = spu_hint_dist;
2529 	}
2530       /* Search from end of a block to beginning.   In this loop, find
2531          jumps which need a branch and emit them only when:
2532          - it's an indirect branch and we're at the insn which sets
2533          the register
2534          - we're at an insn that will invalidate the hint. e.g., a
2535          call, another hint insn, inline asm that clobbers $hbr, and
2536          some inlined operations (divmodsi4).  Don't consider jumps
2537          because they are only at the end of a block and are
2538          considered when we are deciding whether to propagate
2539          - we're getting too far away from the branch.  The hbr insns
2540          only have a signed 10 bit offset
2541          We go back as far as possible so the branch will be considered
2542          for propagation when we get to the beginning of the block.  */
2543       for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2544 	{
2545 	  if (INSN_P (insn))
2546 	    {
2547 	      insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2548 	      if (branch
2549 		  && ((GET_CODE (branch_target) == REG
2550 		       && set_of (branch_target, insn) != NULL_RTX)
2551 		      || insn_clobbers_hbr (insn)
2552 		      || branch_addr - insn_addr > 600))
2553 		{
2554 		  rtx_insn *next = NEXT_INSN (insn);
2555 		  int next_addr = INSN_ADDRESSES (INSN_UID (next));
2556 		  if (insn != BB_END (bb)
2557 		      && branch_addr - next_addr >= required_dist)
2558 		    {
2559 		      if (dump_file)
2560 			fprintf (dump_file,
2561 				 "hint for %i in block %i before %i\n",
2562 				 INSN_UID (branch), bb->index,
2563 				 INSN_UID (next));
2564 		      spu_emit_branch_hint (next, branch, branch_target,
2565 					    branch_addr - next_addr, blocks);
2566 		    }
2567 		  branch = 0;
2568 		}
2569 
2570 	      /* JUMP_P will only be true at the end of a block.  When
2571 	         branch is already set it means we've previously decided
2572 	         to propagate a hint for that branch into this block. */
2573 	      if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2574 		{
2575 		  branch = 0;
2576 		  if ((branch_target = get_branch_target (insn)))
2577 		    {
2578 		      branch = insn;
2579 		      branch_addr = insn_addr;
2580 		      required_dist = spu_hint_dist;
2581 		    }
2582 		}
2583 	    }
2584 	  if (insn == BB_HEAD (bb))
2585 	    break;
2586 	}
2587 
2588       if (branch)
2589 	{
2590 	  /* If we haven't emitted a hint for this branch yet, it might
2591 	     be profitable to emit it in one of the predecessor blocks,
2592 	     especially for loops.  */
2593 	  rtx_insn *bbend;
2594 	  basic_block prev = 0, prop = 0, prev2 = 0;
2595 	  int loop_exit = 0, simple_loop = 0;
2596 	  int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2597 
2598 	  for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2599 	    if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2600 	      prev = EDGE_PRED (bb, j)->src;
2601 	    else
2602 	      prev2 = EDGE_PRED (bb, j)->src;
2603 
2604 	  for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2605 	    if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2606 	      loop_exit = 1;
2607 	    else if (EDGE_SUCC (bb, j)->dest == bb)
2608 	      simple_loop = 1;
2609 
2610 	  /* If this branch is a loop exit then propagate to previous
2611 	     fallthru block. This catches the cases when it is a simple
2612 	     loop or when there is an initial branch into the loop. */
2613 	  if (prev && (loop_exit || simple_loop)
2614 	      && bb_loop_depth (prev) <= bb_loop_depth (bb))
2615 	    prop = prev;
2616 
2617 	  /* If there is only one adjacent predecessor.  Don't propagate
2618 	     outside this loop.  */
2619 	  else if (prev && single_pred_p (bb)
2620 		   && prev->loop_father == bb->loop_father)
2621 	    prop = prev;
2622 
2623 	  /* If this is the JOIN block of a simple IF-THEN then
2624 	     propagate the hint to the HEADER block. */
2625 	  else if (prev && prev2
2626 		   && EDGE_COUNT (bb->preds) == 2
2627 		   && EDGE_COUNT (prev->preds) == 1
2628 		   && EDGE_PRED (prev, 0)->src == prev2
2629 		   && prev2->loop_father == bb->loop_father
2630 		   && GET_CODE (branch_target) != REG)
2631 	    prop = prev;
2632 
2633 	  /* Don't propagate when:
2634 	     - this is a simple loop and the hint would be too far
2635 	     - this is not a simple loop and there are 16 insns in
2636 	     this block already
2637 	     - the predecessor block ends in a branch that will be
2638 	     hinted
2639 	     - the predecessor block ends in an insn that invalidates
2640 	     the hint */
2641 	  if (prop
2642 	      && prop->index >= 0
2643 	      && (bbend = BB_END (prop))
2644 	      && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2645 	      (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2646 	      && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2647 	    {
2648 	      if (dump_file)
2649 		fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2650 			 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2651 			 bb->index, prop->index, bb_loop_depth (bb),
2652 			 INSN_UID (branch), loop_exit, simple_loop,
2653 			 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2654 
2655 	      spu_bb_info[prop->index].prop_jump = branch;
2656 	      spu_bb_info[prop->index].bb_index = i;
2657 	    }
2658 	  else if (branch_addr - next_addr >= required_dist)
2659 	    {
2660 	      if (dump_file)
2661 		fprintf (dump_file, "hint for %i in block %i before %i\n",
2662 			 INSN_UID (branch), bb->index,
2663 			 INSN_UID (NEXT_INSN (insn)));
2664 	      spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2665 				    branch_addr - next_addr, blocks);
2666 	    }
2667 	  branch = 0;
2668 	}
2669     }
2670   free (spu_bb_info);
2671 
2672   if (!bitmap_empty_p (blocks))
2673     find_many_sub_basic_blocks (blocks);
2674 
2675   /* We have to schedule to make sure alignment is ok. */
2676   FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2677 
2678   /* The hints need to be scheduled, so call it again. */
2679   schedule_insns ();
2680   df_finish_pass (true);
2681 
2682   insert_hbrp ();
2683 
2684   pad_bb ();
2685 
2686   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2687     if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2688       {
2689 	/* Adjust the LABEL_REF in a hint when we have inserted a nop
2690 	   between its branch label and the branch .  We don't move the
2691 	   label because GCC expects it at the beginning of the block. */
2692 	rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2693 	rtx label_ref = XVECEXP (unspec, 0, 0);
2694 	rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2695 	rtx_insn *branch;
2696 	int offset = 0;
2697 	for (branch = NEXT_INSN (label);
2698 	     !JUMP_P (branch) && !CALL_P (branch);
2699 	     branch = NEXT_INSN (branch))
2700 	  if (NONJUMP_INSN_P (branch))
2701 	    offset += get_attr_length (branch);
2702 	if (offset > 0)
2703 	  XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2704       }
2705 
2706   spu_var_tracking ();
2707 
2708   loop_optimizer_finalize ();
2709 
2710   free_bb_for_insn ();
2711 
2712   in_spu_reorg = 0;
2713 }
2714 
2715 
2716 /* Insn scheduling routines, primarily for dual issue. */
2717 static int
2718 spu_sched_issue_rate (void)
2719 {
2720   return 2;
2721 }
2722 
2723 static int
2724 uses_ls_unit(rtx_insn *insn)
2725 {
2726   rtx set = single_set (insn);
2727   if (set != 0
2728       && (GET_CODE (SET_DEST (set)) == MEM
2729 	  || GET_CODE (SET_SRC (set)) == MEM))
2730     return 1;
2731   return 0;
2732 }
2733 
2734 static int
2735 get_pipe (rtx_insn *insn)
2736 {
2737   enum attr_type t;
2738   /* Handle inline asm */
2739   if (INSN_CODE (insn) == -1)
2740     return -1;
2741   t = get_attr_type (insn);
2742   switch (t)
2743     {
2744     case TYPE_CONVERT:
2745       return -2;
2746     case TYPE_MULTI0:
2747       return -1;
2748 
2749     case TYPE_FX2:
2750     case TYPE_FX3:
2751     case TYPE_SPR:
2752     case TYPE_NOP:
2753     case TYPE_FXB:
2754     case TYPE_FPD:
2755     case TYPE_FP6:
2756     case TYPE_FP7:
2757       return 0;
2758 
2759     case TYPE_LNOP:
2760     case TYPE_SHUF:
2761     case TYPE_LOAD:
2762     case TYPE_STORE:
2763     case TYPE_BR:
2764     case TYPE_MULTI1:
2765     case TYPE_HBR:
2766     case TYPE_IPREFETCH:
2767       return 1;
2768     default:
2769       abort ();
2770     }
2771 }
2772 
2773 
2774 /* haifa-sched.c has a static variable that keeps track of the current
2775    cycle.  It is passed to spu_sched_reorder, and we record it here for
2776    use by spu_sched_variable_issue.  It won't be accurate if the
2777    scheduler updates it's clock_var between the two calls. */
2778 static int clock_var;
2779 
2780 /* This is used to keep track of insn alignment.  Set to 0 at the
2781    beginning of each block and increased by the "length" attr of each
2782    insn scheduled. */
2783 static int spu_sched_length;
2784 
2785 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2786    ready list appropriately in spu_sched_reorder(). */
2787 static int pipe0_clock;
2788 static int pipe1_clock;
2789 
2790 static int prev_clock_var;
2791 
2792 static int prev_priority;
2793 
2794 /* The SPU needs to load the next ilb sometime during the execution of
2795    the previous ilb.  There is a potential conflict if every cycle has a
2796    load or store.  To avoid the conflict we make sure the load/store
2797    unit is free for at least one cycle during the execution of insns in
2798    the previous ilb. */
2799 static int spu_ls_first;
2800 static int prev_ls_clock;
2801 
2802 static void
2803 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2804 		       int max_ready ATTRIBUTE_UNUSED)
2805 {
2806   spu_sched_length = 0;
2807 }
2808 
2809 static void
2810 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2811 		int max_ready ATTRIBUTE_UNUSED)
2812 {
2813   if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2814     {
2815       /* When any block might be at least 8-byte aligned, assume they
2816          will all be at least 8-byte aligned to make sure dual issue
2817          works out correctly. */
2818       spu_sched_length = 0;
2819     }
2820   spu_ls_first = INT_MAX;
2821   clock_var = -1;
2822   prev_ls_clock = -1;
2823   pipe0_clock = -1;
2824   pipe1_clock = -1;
2825   prev_clock_var = -1;
2826   prev_priority = -1;
2827 }
2828 
2829 static int
2830 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2831 			  int verbose ATTRIBUTE_UNUSED,
2832 			  rtx_insn *insn, int more)
2833 {
2834   int len;
2835   int p;
2836   if (GET_CODE (PATTERN (insn)) == USE
2837       || GET_CODE (PATTERN (insn)) == CLOBBER
2838       || (len = get_attr_length (insn)) == 0)
2839     return more;
2840 
2841   spu_sched_length += len;
2842 
2843   /* Reset on inline asm */
2844   if (INSN_CODE (insn) == -1)
2845     {
2846       spu_ls_first = INT_MAX;
2847       pipe0_clock = -1;
2848       pipe1_clock = -1;
2849       return 0;
2850     }
2851   p = get_pipe (insn);
2852   if (p == 0)
2853     pipe0_clock = clock_var;
2854   else
2855     pipe1_clock = clock_var;
2856 
2857   if (in_spu_reorg)
2858     {
2859       if (clock_var - prev_ls_clock > 1
2860 	  || INSN_CODE (insn) == CODE_FOR_iprefetch)
2861 	spu_ls_first = INT_MAX;
2862       if (uses_ls_unit (insn))
2863 	{
2864 	  if (spu_ls_first == INT_MAX)
2865 	    spu_ls_first = spu_sched_length;
2866 	  prev_ls_clock = clock_var;
2867 	}
2868 
2869       /* The scheduler hasn't inserted the nop, but we will later on.
2870          Include those nops in spu_sched_length. */
2871       if (prev_clock_var == clock_var && (spu_sched_length & 7))
2872 	spu_sched_length += 4;
2873       prev_clock_var = clock_var;
2874 
2875       /* more is -1 when called from spu_sched_reorder for new insns
2876          that don't have INSN_PRIORITY */
2877       if (more >= 0)
2878 	prev_priority = INSN_PRIORITY (insn);
2879     }
2880 
2881   /* Always try issuing more insns.  spu_sched_reorder will decide
2882      when the cycle should be advanced. */
2883   return 1;
2884 }
2885 
2886 /* This function is called for both TARGET_SCHED_REORDER and
2887    TARGET_SCHED_REORDER2.  */
2888 static int
2889 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2890 		   rtx_insn **ready, int *nreadyp, int clock)
2891 {
2892   int i, nready = *nreadyp;
2893   int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2894   rtx_insn *insn;
2895 
2896   clock_var = clock;
2897 
2898   if (nready <= 0 || pipe1_clock >= clock)
2899     return 0;
2900 
2901   /* Find any rtl insns that don't generate assembly insns and schedule
2902      them first. */
2903   for (i = nready - 1; i >= 0; i--)
2904     {
2905       insn = ready[i];
2906       if (INSN_CODE (insn) == -1
2907 	  || INSN_CODE (insn) == CODE_FOR_blockage
2908 	  || (INSN_P (insn) && get_attr_length (insn) == 0))
2909 	{
2910 	  ready[i] = ready[nready - 1];
2911 	  ready[nready - 1] = insn;
2912 	  return 1;
2913 	}
2914     }
2915 
2916   pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2917   for (i = 0; i < nready; i++)
2918     if (INSN_CODE (ready[i]) != -1)
2919       {
2920 	insn = ready[i];
2921 	switch (get_attr_type (insn))
2922 	  {
2923 	  default:
2924 	  case TYPE_MULTI0:
2925 	  case TYPE_CONVERT:
2926 	  case TYPE_FX2:
2927 	  case TYPE_FX3:
2928 	  case TYPE_SPR:
2929 	  case TYPE_NOP:
2930 	  case TYPE_FXB:
2931 	  case TYPE_FPD:
2932 	  case TYPE_FP6:
2933 	  case TYPE_FP7:
2934 	    pipe_0 = i;
2935 	    break;
2936 	  case TYPE_LOAD:
2937 	  case TYPE_STORE:
2938 	    pipe_ls = i;
2939 	  case TYPE_LNOP:
2940 	  case TYPE_SHUF:
2941 	  case TYPE_BR:
2942 	  case TYPE_MULTI1:
2943 	  case TYPE_HBR:
2944 	    pipe_1 = i;
2945 	    break;
2946 	  case TYPE_IPREFETCH:
2947 	    pipe_hbrp = i;
2948 	    break;
2949 	  }
2950       }
2951 
2952   /* In the first scheduling phase, schedule loads and stores together
2953      to increase the chance they will get merged during postreload CSE. */
2954   if (!reload_completed && pipe_ls >= 0)
2955     {
2956       insn = ready[pipe_ls];
2957       ready[pipe_ls] = ready[nready - 1];
2958       ready[nready - 1] = insn;
2959       return 1;
2960     }
2961 
2962   /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2963   if (pipe_hbrp >= 0)
2964     pipe_1 = pipe_hbrp;
2965 
2966   /* When we have loads/stores in every cycle of the last 15 insns and
2967      we are about to schedule another load/store, emit an hbrp insn
2968      instead. */
2969   if (in_spu_reorg
2970       && spu_sched_length - spu_ls_first >= 4 * 15
2971       && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2972     {
2973       insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2974       recog_memoized (insn);
2975       if (pipe0_clock < clock)
2976 	PUT_MODE (insn, TImode);
2977       spu_sched_variable_issue (file, verbose, insn, -1);
2978       return 0;
2979     }
2980 
2981   /* In general, we want to emit nops to increase dual issue, but dual
2982      issue isn't faster when one of the insns could be scheduled later
2983      without effecting the critical path.  We look at INSN_PRIORITY to
2984      make a good guess, but it isn't perfect so -mdual-nops=n can be
2985      used to effect it. */
2986   if (in_spu_reorg && spu_dual_nops < 10)
2987     {
2988       /* When we are at an even address and we are not issuing nops to
2989          improve scheduling then we need to advance the cycle.  */
2990       if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2991 	  && (spu_dual_nops == 0
2992 	      || (pipe_1 != -1
2993 		  && prev_priority >
2994 		  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2995 	return 0;
2996 
2997       /* When at an odd address, schedule the highest priority insn
2998          without considering pipeline. */
2999       if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3000 	  && (spu_dual_nops == 0
3001 	      || (prev_priority >
3002 		  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3003 	return 1;
3004     }
3005 
3006 
3007   /* We haven't issued a pipe0 insn yet this cycle, if there is a
3008      pipe0 insn in the ready list, schedule it. */
3009   if (pipe0_clock < clock && pipe_0 >= 0)
3010     schedule_i = pipe_0;
3011 
3012   /* Either we've scheduled a pipe0 insn already or there is no pipe0
3013      insn to schedule.  Put a pipe1 insn at the front of the ready list. */
3014   else
3015     schedule_i = pipe_1;
3016 
3017   if (schedule_i > -1)
3018     {
3019       insn = ready[schedule_i];
3020       ready[schedule_i] = ready[nready - 1];
3021       ready[nready - 1] = insn;
3022       return 1;
3023     }
3024   return 0;
3025 }
3026 
3027 /* INSN is dependent on DEP_INSN. */
3028 static int
3029 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3030 {
3031   rtx set;
3032 
3033   /* The blockage pattern is used to prevent instructions from being
3034      moved across it and has no cost. */
3035   if (INSN_CODE (insn) == CODE_FOR_blockage
3036       || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3037     return 0;
3038 
3039   if ((INSN_P (insn) && get_attr_length (insn) == 0)
3040       || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3041     return 0;
3042 
3043   /* Make sure hbrps are spread out. */
3044   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3045       && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3046     return 8;
3047 
3048   /* Make sure hints and hbrps are 2 cycles apart. */
3049   if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3050        || INSN_CODE (insn) == CODE_FOR_hbr)
3051        && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3052 	   || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3053     return 2;
3054 
3055   /* An hbrp has no real dependency on other insns. */
3056   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3057       || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3058     return 0;
3059 
3060   /* Assuming that it is unlikely an argument register will be used in
3061      the first cycle of the called function, we reduce the cost for
3062      slightly better scheduling of dep_insn.  When not hinted, the
3063      mispredicted branch would hide the cost as well.  */
3064   if (CALL_P (insn))
3065   {
3066     rtx target = get_branch_target (insn);
3067     if (GET_CODE (target) != REG || !set_of (target, insn))
3068       return cost - 2;
3069     return cost;
3070   }
3071 
3072   /* And when returning from a function, let's assume the return values
3073      are completed sooner too. */
3074   if (CALL_P (dep_insn))
3075     return cost - 2;
3076 
3077   /* Make sure an instruction that loads from the back chain is schedule
3078      away from the return instruction so a hint is more likely to get
3079      issued. */
3080   if (INSN_CODE (insn) == CODE_FOR__return
3081       && (set = single_set (dep_insn))
3082       && GET_CODE (SET_DEST (set)) == REG
3083       && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3084     return 20;
3085 
3086   /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3087      scheduler makes every insn in a block anti-dependent on the final
3088      jump_insn.  We adjust here so higher cost insns will get scheduled
3089      earlier. */
3090   if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3091     return insn_cost (dep_insn) - 3;
3092 
3093   return cost;
3094 }
3095 
3096 /* Create a CONST_DOUBLE from a string.  */
3097 rtx
3098 spu_float_const (const char *string, machine_mode mode)
3099 {
3100   REAL_VALUE_TYPE value;
3101   value = REAL_VALUE_ATOF (string, mode);
3102   return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3103 }
3104 
3105 int
3106 spu_constant_address_p (rtx x)
3107 {
3108   return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3109 	  || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3110 	  || GET_CODE (x) == HIGH);
3111 }
3112 
3113 static enum spu_immediate
3114 which_immediate_load (HOST_WIDE_INT val)
3115 {
3116   gcc_assert (val == trunc_int_for_mode (val, SImode));
3117 
3118   if (val >= -0x8000 && val <= 0x7fff)
3119     return SPU_IL;
3120   if (val >= 0 && val <= 0x3ffff)
3121     return SPU_ILA;
3122   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3123     return SPU_ILH;
3124   if ((val & 0xffff) == 0)
3125     return SPU_ILHU;
3126 
3127   return SPU_NONE;
3128 }
3129 
3130 /* Return true when OP can be loaded by one of the il instructions, or
3131    when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3132 int
3133 immediate_load_p (rtx op, machine_mode mode)
3134 {
3135   if (CONSTANT_P (op))
3136     {
3137       enum immediate_class c = classify_immediate (op, mode);
3138       return c == IC_IL1 || c == IC_IL1s
3139 	     || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3140     }
3141   return 0;
3142 }
3143 
3144 /* Return true if the first SIZE bytes of arr is a constant that can be
3145    generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
3146    represent the size and offset of the instruction to use. */
3147 static int
3148 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3149 {
3150   int cpat, run, i, start;
3151   cpat = 1;
3152   run = 0;
3153   start = -1;
3154   for (i = 0; i < size && cpat; i++)
3155     if (arr[i] != i+16)
3156       {
3157 	if (!run)
3158 	  {
3159 	    start = i;
3160 	    if (arr[i] == 3)
3161 	      run = 1;
3162 	    else if (arr[i] == 2 && arr[i+1] == 3)
3163 	      run = 2;
3164 	    else if (arr[i] == 0)
3165 	      {
3166 		while (arr[i+run] == run && i+run < 16)
3167 		  run++;
3168 		if (run != 4 && run != 8)
3169 		  cpat = 0;
3170 	      }
3171 	    else
3172 	      cpat = 0;
3173 	    if ((i & (run-1)) != 0)
3174 	      cpat = 0;
3175 	    i += run;
3176 	  }
3177 	else
3178 	  cpat = 0;
3179       }
3180   if (cpat && (run || size < 16))
3181     {
3182       if (run == 0)
3183 	run = 1;
3184       if (prun)
3185 	*prun = run;
3186       if (pstart)
3187 	*pstart = start == -1 ? 16-run : start;
3188       return 1;
3189     }
3190   return 0;
3191 }
3192 
3193 /* OP is a CONSTANT_P.  Determine what instructions can be used to load
3194    it into a register.  MODE is only valid when OP is a CONST_INT. */
3195 static enum immediate_class
3196 classify_immediate (rtx op, machine_mode mode)
3197 {
3198   HOST_WIDE_INT val;
3199   unsigned char arr[16];
3200   int i, j, repeated, fsmbi, repeat;
3201 
3202   gcc_assert (CONSTANT_P (op));
3203 
3204   if (GET_MODE (op) != VOIDmode)
3205     mode = GET_MODE (op);
3206 
3207   /* A V4SI const_vector with all identical symbols is ok. */
3208   if (!flag_pic
3209       && mode == V4SImode
3210       && GET_CODE (op) == CONST_VECTOR
3211       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3212       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3213       && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3214       && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3215       && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3216     op = CONST_VECTOR_ELT (op, 0);
3217 
3218   switch (GET_CODE (op))
3219     {
3220     case SYMBOL_REF:
3221     case LABEL_REF:
3222       return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3223 
3224     case CONST:
3225       /* We can never know if the resulting address fits in 18 bits and can be
3226 	 loaded with ila.  For now, assume the address will not overflow if
3227 	 the displacement is "small" (fits 'K' constraint).  */
3228       if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3229 	{
3230 	  rtx sym = XEXP (XEXP (op, 0), 0);
3231 	  rtx cst = XEXP (XEXP (op, 0), 1);
3232 
3233 	  if (GET_CODE (sym) == SYMBOL_REF
3234 	      && GET_CODE (cst) == CONST_INT
3235 	      && satisfies_constraint_K (cst))
3236 	    return IC_IL1s;
3237 	}
3238       return IC_IL2s;
3239 
3240     case HIGH:
3241       return IC_IL1s;
3242 
3243     case CONST_VECTOR:
3244       for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3245 	if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3246 	    && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3247 	  return IC_POOL;
3248       /* Fall through. */
3249 
3250     case CONST_INT:
3251     case CONST_DOUBLE:
3252       constant_to_array (mode, op, arr);
3253 
3254       /* Check that each 4-byte slot is identical. */
3255       repeated = 1;
3256       for (i = 4; i < 16; i += 4)
3257 	for (j = 0; j < 4; j++)
3258 	  if (arr[j] != arr[i + j])
3259 	    repeated = 0;
3260 
3261       if (repeated)
3262 	{
3263 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3264 	  val = trunc_int_for_mode (val, SImode);
3265 
3266 	  if (which_immediate_load (val) != SPU_NONE)
3267 	    return IC_IL1;
3268 	}
3269 
3270       /* Any mode of 2 bytes or smaller can be loaded with an il
3271          instruction. */
3272       gcc_assert (GET_MODE_SIZE (mode) > 2);
3273 
3274       fsmbi = 1;
3275       repeat = 0;
3276       for (i = 0; i < 16 && fsmbi; i++)
3277 	if (arr[i] != 0 && repeat == 0)
3278 	  repeat = arr[i];
3279 	else if (arr[i] != 0 && arr[i] != repeat)
3280 	  fsmbi = 0;
3281       if (fsmbi)
3282 	return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3283 
3284       if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3285 	return IC_CPAT;
3286 
3287       if (repeated)
3288 	return IC_IL2;
3289 
3290       return IC_POOL;
3291     default:
3292       break;
3293     }
3294   gcc_unreachable ();
3295 }
3296 
3297 static enum spu_immediate
3298 which_logical_immediate (HOST_WIDE_INT val)
3299 {
3300   gcc_assert (val == trunc_int_for_mode (val, SImode));
3301 
3302   if (val >= -0x200 && val <= 0x1ff)
3303     return SPU_ORI;
3304   if (val >= 0 && val <= 0xffff)
3305     return SPU_IOHL;
3306   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3307     {
3308       val = trunc_int_for_mode (val, HImode);
3309       if (val >= -0x200 && val <= 0x1ff)
3310 	return SPU_ORHI;
3311       if ((val & 0xff) == ((val >> 8) & 0xff))
3312 	{
3313 	  val = trunc_int_for_mode (val, QImode);
3314 	  if (val >= -0x200 && val <= 0x1ff)
3315 	    return SPU_ORBI;
3316 	}
3317     }
3318   return SPU_NONE;
3319 }
3320 
3321 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3322    CONST_DOUBLEs. */
3323 static int
3324 const_vector_immediate_p (rtx x)
3325 {
3326   int i;
3327   gcc_assert (GET_CODE (x) == CONST_VECTOR);
3328   for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3329     if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3330 	&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3331       return 0;
3332   return 1;
3333 }
3334 
3335 int
3336 logical_immediate_p (rtx op, machine_mode mode)
3337 {
3338   HOST_WIDE_INT val;
3339   unsigned char arr[16];
3340   int i, j;
3341 
3342   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3343 	      || GET_CODE (op) == CONST_VECTOR);
3344 
3345   if (GET_CODE (op) == CONST_VECTOR
3346       && !const_vector_immediate_p (op))
3347     return 0;
3348 
3349   if (GET_MODE (op) != VOIDmode)
3350     mode = GET_MODE (op);
3351 
3352   constant_to_array (mode, op, arr);
3353 
3354   /* Check that bytes are repeated. */
3355   for (i = 4; i < 16; i += 4)
3356     for (j = 0; j < 4; j++)
3357       if (arr[j] != arr[i + j])
3358 	return 0;
3359 
3360   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3361   val = trunc_int_for_mode (val, SImode);
3362 
3363   i = which_logical_immediate (val);
3364   return i != SPU_NONE && i != SPU_IOHL;
3365 }
3366 
3367 int
3368 iohl_immediate_p (rtx op, machine_mode mode)
3369 {
3370   HOST_WIDE_INT val;
3371   unsigned char arr[16];
3372   int i, j;
3373 
3374   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3375 	      || GET_CODE (op) == CONST_VECTOR);
3376 
3377   if (GET_CODE (op) == CONST_VECTOR
3378       && !const_vector_immediate_p (op))
3379     return 0;
3380 
3381   if (GET_MODE (op) != VOIDmode)
3382     mode = GET_MODE (op);
3383 
3384   constant_to_array (mode, op, arr);
3385 
3386   /* Check that bytes are repeated. */
3387   for (i = 4; i < 16; i += 4)
3388     for (j = 0; j < 4; j++)
3389       if (arr[j] != arr[i + j])
3390 	return 0;
3391 
3392   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3393   val = trunc_int_for_mode (val, SImode);
3394 
3395   return val >= 0 && val <= 0xffff;
3396 }
3397 
3398 int
3399 arith_immediate_p (rtx op, machine_mode mode,
3400 		   HOST_WIDE_INT low, HOST_WIDE_INT high)
3401 {
3402   HOST_WIDE_INT val;
3403   unsigned char arr[16];
3404   int bytes, i, j;
3405 
3406   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3407 	      || GET_CODE (op) == CONST_VECTOR);
3408 
3409   if (GET_CODE (op) == CONST_VECTOR
3410       && !const_vector_immediate_p (op))
3411     return 0;
3412 
3413   if (GET_MODE (op) != VOIDmode)
3414     mode = GET_MODE (op);
3415 
3416   constant_to_array (mode, op, arr);
3417 
3418   if (VECTOR_MODE_P (mode))
3419     mode = GET_MODE_INNER (mode);
3420 
3421   bytes = GET_MODE_SIZE (mode);
3422   mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3423 
3424   /* Check that bytes are repeated. */
3425   for (i = bytes; i < 16; i += bytes)
3426     for (j = 0; j < bytes; j++)
3427       if (arr[j] != arr[i + j])
3428 	return 0;
3429 
3430   val = arr[0];
3431   for (j = 1; j < bytes; j++)
3432     val = (val << 8) | arr[j];
3433 
3434   val = trunc_int_for_mode (val, mode);
3435 
3436   return val >= low && val <= high;
3437 }
3438 
3439 /* TRUE when op is an immediate and an exact power of 2, and given that
3440    OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
3441    all entries must be the same. */
3442 bool
3443 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3444 {
3445   machine_mode int_mode;
3446   HOST_WIDE_INT val;
3447   unsigned char arr[16];
3448   int bytes, i, j;
3449 
3450   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3451 	      || GET_CODE (op) == CONST_VECTOR);
3452 
3453   if (GET_CODE (op) == CONST_VECTOR
3454       && !const_vector_immediate_p (op))
3455     return 0;
3456 
3457   if (GET_MODE (op) != VOIDmode)
3458     mode = GET_MODE (op);
3459 
3460   constant_to_array (mode, op, arr);
3461 
3462   if (VECTOR_MODE_P (mode))
3463     mode = GET_MODE_INNER (mode);
3464 
3465   bytes = GET_MODE_SIZE (mode);
3466   int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3467 
3468   /* Check that bytes are repeated. */
3469   for (i = bytes; i < 16; i += bytes)
3470     for (j = 0; j < bytes; j++)
3471       if (arr[j] != arr[i + j])
3472 	return 0;
3473 
3474   val = arr[0];
3475   for (j = 1; j < bytes; j++)
3476     val = (val << 8) | arr[j];
3477 
3478   val = trunc_int_for_mode (val, int_mode);
3479 
3480   /* Currently, we only handle SFmode */
3481   gcc_assert (mode == SFmode);
3482   if (mode == SFmode)
3483     {
3484       int exp = (val >> 23) - 127;
3485       return val > 0 && (val & 0x007fffff) == 0
3486 	     &&  exp >= low && exp <= high;
3487     }
3488   return FALSE;
3489 }
3490 
3491 /* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
3492 
3493 static bool
3494 ea_symbol_ref_p (const_rtx x)
3495 {
3496   tree decl;
3497 
3498   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3499     {
3500       rtx plus = XEXP (x, 0);
3501       rtx op0 = XEXP (plus, 0);
3502       rtx op1 = XEXP (plus, 1);
3503       if (GET_CODE (op1) == CONST_INT)
3504 	x = op0;
3505     }
3506 
3507   return (GET_CODE (x) == SYMBOL_REF
3508  	  && (decl = SYMBOL_REF_DECL (x)) != 0
3509  	  && TREE_CODE (decl) == VAR_DECL
3510  	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3511 }
3512 
3513 /* We accept:
3514    - any 32-bit constant (SImode, SFmode)
3515    - any constant that can be generated with fsmbi (any mode)
3516    - a 64-bit constant where the high and low bits are identical
3517      (DImode, DFmode)
3518    - a 128-bit constant where the four 32-bit words match.  */
3519 bool
3520 spu_legitimate_constant_p (machine_mode mode, rtx x)
3521 {
3522   subrtx_iterator::array_type array;
3523   if (GET_CODE (x) == HIGH)
3524     x = XEXP (x, 0);
3525 
3526   /* Reject any __ea qualified reference.  These can't appear in
3527      instructions but must be forced to the constant pool.  */
3528   FOR_EACH_SUBRTX (iter, array, x, ALL)
3529     if (ea_symbol_ref_p (*iter))
3530       return 0;
3531 
3532   /* V4SI with all identical symbols is valid. */
3533   if (!flag_pic
3534       && mode == V4SImode
3535       && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3536 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3537 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3538     return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3539 	   && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3540 	   && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3541 
3542   if (GET_CODE (x) == CONST_VECTOR
3543       && !const_vector_immediate_p (x))
3544     return 0;
3545   return 1;
3546 }
3547 
3548 /* Valid address are:
3549    - symbol_ref, label_ref, const
3550    - reg
3551    - reg + const_int, where const_int is 16 byte aligned
3552    - reg + reg, alignment doesn't matter
3553   The alignment matters in the reg+const case because lqd and stqd
3554   ignore the 4 least significant bits of the const.  We only care about
3555   16 byte modes because the expand phase will change all smaller MEM
3556   references to TImode.  */
3557 static bool
3558 spu_legitimate_address_p (machine_mode mode,
3559 			  rtx x, bool reg_ok_strict)
3560 {
3561   int aligned = GET_MODE_SIZE (mode) >= 16;
3562   if (aligned
3563       && GET_CODE (x) == AND
3564       && GET_CODE (XEXP (x, 1)) == CONST_INT
3565       && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3566     x = XEXP (x, 0);
3567   switch (GET_CODE (x))
3568     {
3569     case LABEL_REF:
3570       return !TARGET_LARGE_MEM;
3571 
3572     case SYMBOL_REF:
3573     case CONST:
3574       /* Keep __ea references until reload so that spu_expand_mov can see them
3575 	 in MEMs.  */
3576       if (ea_symbol_ref_p (x))
3577 	return !reload_in_progress && !reload_completed;
3578       return !TARGET_LARGE_MEM;
3579 
3580     case CONST_INT:
3581       return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3582 
3583     case SUBREG:
3584       x = XEXP (x, 0);
3585       if (REG_P (x))
3586 	return 0;
3587 
3588     case REG:
3589       return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3590 
3591     case PLUS:
3592     case LO_SUM:
3593       {
3594 	rtx op0 = XEXP (x, 0);
3595 	rtx op1 = XEXP (x, 1);
3596 	if (GET_CODE (op0) == SUBREG)
3597 	  op0 = XEXP (op0, 0);
3598 	if (GET_CODE (op1) == SUBREG)
3599 	  op1 = XEXP (op1, 0);
3600 	if (GET_CODE (op0) == REG
3601 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3602 	    && GET_CODE (op1) == CONST_INT
3603 	    && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3604 		/* If virtual registers are involved, the displacement will
3605 		   change later on anyway, so checking would be premature.
3606 		   Reload will make sure the final displacement after
3607 		   register elimination is OK.  */
3608 		|| op0 == arg_pointer_rtx
3609 		|| op0 == frame_pointer_rtx
3610 		|| op0 == virtual_stack_vars_rtx)
3611 	    && (!aligned || (INTVAL (op1) & 15) == 0))
3612 	  return TRUE;
3613 	if (GET_CODE (op0) == REG
3614 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3615 	    && GET_CODE (op1) == REG
3616 	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3617 	  return TRUE;
3618       }
3619       break;
3620 
3621     default:
3622       break;
3623     }
3624   return FALSE;
3625 }
3626 
3627 /* Like spu_legitimate_address_p, except with named addresses.  */
3628 static bool
3629 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3630 				     bool reg_ok_strict, addr_space_t as)
3631 {
3632   if (as == ADDR_SPACE_EA)
3633     return (REG_P (x) && (GET_MODE (x) == EAmode));
3634 
3635   else if (as != ADDR_SPACE_GENERIC)
3636     gcc_unreachable ();
3637 
3638   return spu_legitimate_address_p (mode, x, reg_ok_strict);
3639 }
3640 
3641 /* When the address is reg + const_int, force the const_int into a
3642    register.  */
3643 static rtx
3644 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3645 			machine_mode mode ATTRIBUTE_UNUSED)
3646 {
3647   rtx op0, op1;
3648   /* Make sure both operands are registers.  */
3649   if (GET_CODE (x) == PLUS)
3650     {
3651       op0 = XEXP (x, 0);
3652       op1 = XEXP (x, 1);
3653       if (ALIGNED_SYMBOL_REF_P (op0))
3654 	{
3655 	  op0 = force_reg (Pmode, op0);
3656 	  mark_reg_pointer (op0, 128);
3657 	}
3658       else if (GET_CODE (op0) != REG)
3659 	op0 = force_reg (Pmode, op0);
3660       if (ALIGNED_SYMBOL_REF_P (op1))
3661 	{
3662 	  op1 = force_reg (Pmode, op1);
3663 	  mark_reg_pointer (op1, 128);
3664 	}
3665       else if (GET_CODE (op1) != REG)
3666 	op1 = force_reg (Pmode, op1);
3667       x = gen_rtx_PLUS (Pmode, op0, op1);
3668     }
3669   return x;
3670 }
3671 
3672 /* Like spu_legitimate_address, except with named address support.  */
3673 static rtx
3674 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3675 				   addr_space_t as)
3676 {
3677   if (as != ADDR_SPACE_GENERIC)
3678     return x;
3679 
3680   return spu_legitimize_address (x, oldx, mode);
3681 }
3682 
3683 /* Reload reg + const_int for out-of-range displacements.  */
3684 rtx
3685 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3686 			       int opnum, int type)
3687 {
3688   bool removed_and = false;
3689 
3690   if (GET_CODE (ad) == AND
3691       && CONST_INT_P (XEXP (ad, 1))
3692       && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3693     {
3694       ad = XEXP (ad, 0);
3695       removed_and = true;
3696     }
3697 
3698   if (GET_CODE (ad) == PLUS
3699       && REG_P (XEXP (ad, 0))
3700       && CONST_INT_P (XEXP (ad, 1))
3701       && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3702 	   && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3703     {
3704       /* Unshare the sum.  */
3705       ad = copy_rtx (ad);
3706 
3707       /* Reload the displacement.  */
3708       push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3709 		   BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3710 		   opnum, (enum reload_type) type);
3711 
3712       /* Add back AND for alignment if we stripped it.  */
3713       if (removed_and)
3714 	ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3715 
3716       return ad;
3717     }
3718 
3719   return NULL_RTX;
3720 }
3721 
3722 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3723    struct attribute_spec.handler.  */
3724 static tree
3725 spu_handle_fndecl_attribute (tree * node,
3726 			     tree name,
3727 			     tree args ATTRIBUTE_UNUSED,
3728 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3729 {
3730   if (TREE_CODE (*node) != FUNCTION_DECL)
3731     {
3732       warning (0, "%qE attribute only applies to functions",
3733 	       name);
3734       *no_add_attrs = true;
3735     }
3736 
3737   return NULL_TREE;
3738 }
3739 
3740 /* Handle the "vector" attribute.  */
3741 static tree
3742 spu_handle_vector_attribute (tree * node, tree name,
3743 			     tree args ATTRIBUTE_UNUSED,
3744 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3745 {
3746   tree type = *node, result = NULL_TREE;
3747   machine_mode mode;
3748   int unsigned_p;
3749 
3750   while (POINTER_TYPE_P (type)
3751 	 || TREE_CODE (type) == FUNCTION_TYPE
3752 	 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3753     type = TREE_TYPE (type);
3754 
3755   mode = TYPE_MODE (type);
3756 
3757   unsigned_p = TYPE_UNSIGNED (type);
3758   switch (mode)
3759     {
3760     case DImode:
3761       result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3762       break;
3763     case SImode:
3764       result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3765       break;
3766     case HImode:
3767       result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3768       break;
3769     case QImode:
3770       result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3771       break;
3772     case SFmode:
3773       result = V4SF_type_node;
3774       break;
3775     case DFmode:
3776       result = V2DF_type_node;
3777       break;
3778     default:
3779       break;
3780     }
3781 
3782   /* Propagate qualifiers attached to the element type
3783      onto the vector type.  */
3784   if (result && result != type && TYPE_QUALS (type))
3785     result = build_qualified_type (result, TYPE_QUALS (type));
3786 
3787   *no_add_attrs = true;		/* No need to hang on to the attribute.  */
3788 
3789   if (!result)
3790     warning (0, "%qE attribute ignored", name);
3791   else
3792     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3793 
3794   return NULL_TREE;
3795 }
3796 
3797 /* Return nonzero if FUNC is a naked function.  */
3798 static int
3799 spu_naked_function_p (tree func)
3800 {
3801   tree a;
3802 
3803   if (TREE_CODE (func) != FUNCTION_DECL)
3804     abort ();
3805 
3806   a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3807   return a != NULL_TREE;
3808 }
3809 
3810 int
3811 spu_initial_elimination_offset (int from, int to)
3812 {
3813   int saved_regs_size = spu_saved_regs_size ();
3814   int sp_offset = 0;
3815   if (!crtl->is_leaf || crtl->outgoing_args_size
3816       || get_frame_size () || saved_regs_size)
3817     sp_offset = STACK_POINTER_OFFSET;
3818   if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3819     return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3820   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3821     return get_frame_size ();
3822   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3823     return sp_offset + crtl->outgoing_args_size
3824       + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3825   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3826     return get_frame_size () + saved_regs_size + sp_offset;
3827   else
3828     gcc_unreachable ();
3829 }
3830 
3831 rtx
3832 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3833 {
3834   machine_mode mode = TYPE_MODE (type);
3835   int byte_size = ((mode == BLKmode)
3836 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3837 
3838   /* Make sure small structs are left justified in a register. */
3839   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3840       && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3841     {
3842       machine_mode smode;
3843       rtvec v;
3844       int i;
3845       int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3846       int n = byte_size / UNITS_PER_WORD;
3847       v = rtvec_alloc (nregs);
3848       for (i = 0; i < n; i++)
3849 	{
3850 	  RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3851 						gen_rtx_REG (TImode,
3852 							     FIRST_RETURN_REGNUM
3853 							     + i),
3854 						GEN_INT (UNITS_PER_WORD * i));
3855 	  byte_size -= UNITS_PER_WORD;
3856 	}
3857 
3858       if (n < nregs)
3859 	{
3860 	  if (byte_size < 4)
3861 	    byte_size = 4;
3862 	  smode =
3863 	    smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3864 	  RTVEC_ELT (v, n) =
3865 	    gen_rtx_EXPR_LIST (VOIDmode,
3866 			       gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3867 			       GEN_INT (UNITS_PER_WORD * n));
3868 	}
3869       return gen_rtx_PARALLEL (mode, v);
3870     }
3871   return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3872 }
3873 
3874 static rtx
3875 spu_function_arg (cumulative_args_t cum_v,
3876 		  machine_mode mode,
3877 		  const_tree type, bool named ATTRIBUTE_UNUSED)
3878 {
3879   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3880   int byte_size;
3881 
3882   if (*cum >= MAX_REGISTER_ARGS)
3883     return 0;
3884 
3885   byte_size = ((mode == BLKmode)
3886 	       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3887 
3888   /* The ABI does not allow parameters to be passed partially in
3889      reg and partially in stack. */
3890   if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3891     return 0;
3892 
3893   /* Make sure small structs are left justified in a register. */
3894   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3895       && byte_size < UNITS_PER_WORD && byte_size > 0)
3896     {
3897       machine_mode smode;
3898       rtx gr_reg;
3899       if (byte_size < 4)
3900 	byte_size = 4;
3901       smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3902       gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3903 				  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3904 				  const0_rtx);
3905       return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3906     }
3907   else
3908     return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3909 }
3910 
3911 static void
3912 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3913 			  const_tree type, bool named ATTRIBUTE_UNUSED)
3914 {
3915   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3916 
3917   *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3918 	   ? 1
3919 	   : mode == BLKmode
3920 	   ? ((int_size_in_bytes (type) + 15) / 16)
3921 	   : mode == VOIDmode
3922 	   ? 1
3923 	   : HARD_REGNO_NREGS (cum, mode));
3924 }
3925 
3926 /* Variable sized types are passed by reference.  */
3927 static bool
3928 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3929 		       machine_mode mode ATTRIBUTE_UNUSED,
3930 		       const_tree type, bool named ATTRIBUTE_UNUSED)
3931 {
3932   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3933 }
3934 
3935 
3936 /* Var args. */
3937 
3938 /* Create and return the va_list datatype.
3939 
3940    On SPU, va_list is an array type equivalent to
3941 
3942       typedef struct __va_list_tag
3943         {
3944             void *__args __attribute__((__aligned(16)));
3945             void *__skip __attribute__((__aligned(16)));
3946 
3947         } va_list[1];
3948 
3949    where __args points to the arg that will be returned by the next
3950    va_arg(), and __skip points to the previous stack frame such that
3951    when __args == __skip we should advance __args by 32 bytes. */
3952 static tree
3953 spu_build_builtin_va_list (void)
3954 {
3955   tree f_args, f_skip, record, type_decl;
3956   bool owp;
3957 
3958   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3959 
3960   type_decl =
3961     build_decl (BUILTINS_LOCATION,
3962 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
3963 
3964   f_args = build_decl (BUILTINS_LOCATION,
3965 		       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3966   f_skip = build_decl (BUILTINS_LOCATION,
3967 		       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3968 
3969   DECL_FIELD_CONTEXT (f_args) = record;
3970   DECL_ALIGN (f_args) = 128;
3971   DECL_USER_ALIGN (f_args) = 1;
3972 
3973   DECL_FIELD_CONTEXT (f_skip) = record;
3974   DECL_ALIGN (f_skip) = 128;
3975   DECL_USER_ALIGN (f_skip) = 1;
3976 
3977   TYPE_STUB_DECL (record) = type_decl;
3978   TYPE_NAME (record) = type_decl;
3979   TYPE_FIELDS (record) = f_args;
3980   DECL_CHAIN (f_args) = f_skip;
3981 
3982   /* We know this is being padded and we want it too.  It is an internal
3983      type so hide the warnings from the user. */
3984   owp = warn_padded;
3985   warn_padded = false;
3986 
3987   layout_type (record);
3988 
3989   warn_padded = owp;
3990 
3991   /* The correct type is an array type of one element.  */
3992   return build_array_type (record, build_index_type (size_zero_node));
3993 }
3994 
3995 /* Implement va_start by filling the va_list structure VALIST.
3996    NEXTARG points to the first anonymous stack argument.
3997 
3998    The following global variables are used to initialize
3999    the va_list structure:
4000 
4001      crtl->args.info;
4002        the CUMULATIVE_ARGS for this function
4003 
4004      crtl->args.arg_offset_rtx:
4005        holds the offset of the first anonymous stack argument
4006        (relative to the virtual arg pointer).  */
4007 
4008 static void
4009 spu_va_start (tree valist, rtx nextarg)
4010 {
4011   tree f_args, f_skip;
4012   tree args, skip, t;
4013 
4014   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4015   f_skip = DECL_CHAIN (f_args);
4016 
4017   valist = build_simple_mem_ref (valist);
4018   args =
4019     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4020   skip =
4021     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4022 
4023   /* Find the __args area.  */
4024   t = make_tree (TREE_TYPE (args), nextarg);
4025   if (crtl->args.pretend_args_size > 0)
4026     t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4027   t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4028   TREE_SIDE_EFFECTS (t) = 1;
4029   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4030 
4031   /* Find the __skip area.  */
4032   t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4033   t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4034 				       - STACK_POINTER_OFFSET));
4035   t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4036   TREE_SIDE_EFFECTS (t) = 1;
4037   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4038 }
4039 
4040 /* Gimplify va_arg by updating the va_list structure
4041    VALIST as required to retrieve an argument of type
4042    TYPE, and returning that argument.
4043 
4044    ret = va_arg(VALIST, TYPE);
4045 
4046    generates code equivalent to:
4047 
4048     paddedsize = (sizeof(TYPE) + 15) & -16;
4049     if (VALIST.__args + paddedsize > VALIST.__skip
4050 	&& VALIST.__args <= VALIST.__skip)
4051       addr = VALIST.__skip + 32;
4052     else
4053       addr = VALIST.__args;
4054     VALIST.__args = addr + paddedsize;
4055     ret = *(TYPE *)addr;
4056  */
4057 static tree
4058 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4059 			  gimple_seq * post_p ATTRIBUTE_UNUSED)
4060 {
4061   tree f_args, f_skip;
4062   tree args, skip;
4063   HOST_WIDE_INT size, rsize;
4064   tree addr, tmp;
4065   bool pass_by_reference_p;
4066 
4067   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4068   f_skip = DECL_CHAIN (f_args);
4069 
4070   valist = build_simple_mem_ref (valist);
4071   args =
4072     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4073   skip =
4074     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4075 
4076   addr = create_tmp_var (ptr_type_node, "va_arg");
4077 
4078   /* if an object is dynamically sized, a pointer to it is passed
4079      instead of the object itself. */
4080   pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4081 					   false);
4082   if (pass_by_reference_p)
4083     type = build_pointer_type (type);
4084   size = int_size_in_bytes (type);
4085   rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4086 
4087   /* build conditional expression to calculate addr. The expression
4088      will be gimplified later. */
4089   tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4090   tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4091 		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4092 		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4093 		unshare_expr (skip)));
4094 
4095   tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4096 		fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4097 		unshare_expr (args));
4098 
4099   gimplify_assign (addr, tmp, pre_p);
4100 
4101   /* update VALIST.__args */
4102   tmp = fold_build_pointer_plus_hwi (addr, rsize);
4103   gimplify_assign (unshare_expr (args), tmp, pre_p);
4104 
4105   addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4106 		       addr);
4107 
4108   if (pass_by_reference_p)
4109     addr = build_va_arg_indirect_ref (addr);
4110 
4111   return build_va_arg_indirect_ref (addr);
4112 }
4113 
4114 /* Save parameter registers starting with the register that corresponds
4115    to the first unnamed parameters.  If the first unnamed parameter is
4116    in the stack then save no registers.  Set pretend_args_size to the
4117    amount of space needed to save the registers. */
4118 static void
4119 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4120 			    tree type, int *pretend_size, int no_rtl)
4121 {
4122   if (!no_rtl)
4123     {
4124       rtx tmp;
4125       int regno;
4126       int offset;
4127       int ncum = *get_cumulative_args (cum);
4128 
4129       /* cum currently points to the last named argument, we want to
4130          start at the next argument. */
4131       spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4132 
4133       offset = -STACK_POINTER_OFFSET;
4134       for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4135 	{
4136 	  tmp = gen_frame_mem (V4SImode,
4137 			       plus_constant (Pmode, virtual_incoming_args_rtx,
4138 					      offset));
4139 	  emit_move_insn (tmp,
4140 			  gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4141 	  offset += 16;
4142 	}
4143       *pretend_size = offset + STACK_POINTER_OFFSET;
4144     }
4145 }
4146 
4147 static void
4148 spu_conditional_register_usage (void)
4149 {
4150   if (flag_pic)
4151     {
4152       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4153       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4154     }
4155 }
4156 
4157 /* This is called any time we inspect the alignment of a register for
4158    addresses.  */
4159 static int
4160 reg_aligned_for_addr (rtx x)
4161 {
4162   int regno =
4163     REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4164   return REGNO_POINTER_ALIGN (regno) >= 128;
4165 }
4166 
4167 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4168    into its SYMBOL_REF_FLAGS.  */
4169 static void
4170 spu_encode_section_info (tree decl, rtx rtl, int first)
4171 {
4172   default_encode_section_info (decl, rtl, first);
4173 
4174   /* If a variable has a forced alignment to < 16 bytes, mark it with
4175      SYMBOL_FLAG_ALIGN1.  */
4176   if (TREE_CODE (decl) == VAR_DECL
4177       && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4178     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4179 }
4180 
4181 /* Return TRUE if we are certain the mem refers to a complete object
4182    which is both 16-byte aligned and padded to a 16-byte boundary.  This
4183    would make it safe to store with a single instruction.
4184    We guarantee the alignment and padding for static objects by aligning
4185    all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4186    FIXME: We currently cannot guarantee this for objects on the stack
4187    because assign_parm_setup_stack calls assign_stack_local with the
4188    alignment of the parameter mode and in that case the alignment never
4189    gets adjusted by LOCAL_ALIGNMENT. */
4190 static int
4191 store_with_one_insn_p (rtx mem)
4192 {
4193   machine_mode mode = GET_MODE (mem);
4194   rtx addr = XEXP (mem, 0);
4195   if (mode == BLKmode)
4196     return 0;
4197   if (GET_MODE_SIZE (mode) >= 16)
4198     return 1;
4199   /* Only static objects. */
4200   if (GET_CODE (addr) == SYMBOL_REF)
4201     {
4202       /* We use the associated declaration to make sure the access is
4203          referring to the whole object.
4204          We check both MEM_EXPR and SYMBOL_REF_DECL.  I'm not sure
4205          if it is necessary.  Will there be cases where one exists, and
4206          the other does not?  Will there be cases where both exist, but
4207          have different types?  */
4208       tree decl = MEM_EXPR (mem);
4209       if (decl
4210 	  && TREE_CODE (decl) == VAR_DECL
4211 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4212 	return 1;
4213       decl = SYMBOL_REF_DECL (addr);
4214       if (decl
4215 	  && TREE_CODE (decl) == VAR_DECL
4216 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4217 	return 1;
4218     }
4219   return 0;
4220 }
4221 
4222 /* Return 1 when the address is not valid for a simple load and store as
4223    required by the '_mov*' patterns.   We could make this less strict
4224    for loads, but we prefer mem's to look the same so they are more
4225    likely to be merged.  */
4226 static int
4227 address_needs_split (rtx mem)
4228 {
4229   if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4230       && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4231 	  || !(store_with_one_insn_p (mem)
4232 	       || mem_is_padded_component_ref (mem))))
4233     return 1;
4234 
4235   return 0;
4236 }
4237 
4238 static GTY(()) rtx cache_fetch;		  /* __cache_fetch function */
4239 static GTY(()) rtx cache_fetch_dirty;	  /* __cache_fetch_dirty function */
4240 static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
4241 
4242 /* MEM is known to be an __ea qualified memory access.  Emit a call to
4243    fetch the ppu memory to local store, and return its address in local
4244    store.  */
4245 
4246 static void
4247 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4248 {
4249   if (is_store)
4250     {
4251       rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4252       if (!cache_fetch_dirty)
4253 	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4254       emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4255 			       2, ea_addr, EAmode, ndirty, SImode);
4256     }
4257   else
4258     {
4259       if (!cache_fetch)
4260 	cache_fetch = init_one_libfunc ("__cache_fetch");
4261       emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4262 			       1, ea_addr, EAmode);
4263     }
4264 }
4265 
4266 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4267    dirty bit marking, inline.
4268 
4269    The cache control data structure is an array of
4270 
4271    struct __cache_tag_array
4272      {
4273         unsigned int tag_lo[4];
4274         unsigned int tag_hi[4];
4275         void *data_pointer[4];
4276         int reserved[4];
4277         vector unsigned short dirty_bits[4];
4278      }  */
4279 
4280 static void
4281 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4282 {
4283   rtx ea_addr_si;
4284   HOST_WIDE_INT v;
4285   rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4286   rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4287   rtx index_mask = gen_reg_rtx (SImode);
4288   rtx tag_arr = gen_reg_rtx (Pmode);
4289   rtx splat_mask = gen_reg_rtx (TImode);
4290   rtx splat = gen_reg_rtx (V4SImode);
4291   rtx splat_hi = NULL_RTX;
4292   rtx tag_index = gen_reg_rtx (Pmode);
4293   rtx block_off = gen_reg_rtx (SImode);
4294   rtx tag_addr = gen_reg_rtx (Pmode);
4295   rtx tag = gen_reg_rtx (V4SImode);
4296   rtx cache_tag = gen_reg_rtx (V4SImode);
4297   rtx cache_tag_hi = NULL_RTX;
4298   rtx cache_ptrs = gen_reg_rtx (TImode);
4299   rtx cache_ptrs_si = gen_reg_rtx (SImode);
4300   rtx tag_equal = gen_reg_rtx (V4SImode);
4301   rtx tag_equal_hi = NULL_RTX;
4302   rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4303   rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4304   rtx eq_index = gen_reg_rtx (SImode);
4305   rtx bcomp, hit_label, hit_ref, cont_label;
4306   rtx_insn *insn;
4307 
4308   if (spu_ea_model != 32)
4309     {
4310       splat_hi = gen_reg_rtx (V4SImode);
4311       cache_tag_hi = gen_reg_rtx (V4SImode);
4312       tag_equal_hi = gen_reg_rtx (V4SImode);
4313     }
4314 
4315   emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4316   emit_move_insn (tag_arr, tag_arr_sym);
4317   v = 0x0001020300010203LL;
4318   emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4319   ea_addr_si = ea_addr;
4320   if (spu_ea_model != 32)
4321     ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4322 
4323   /* tag_index = ea_addr & (tag_array_size - 128)  */
4324   emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4325 
4326   /* splat ea_addr to all 4 slots.  */
4327   emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4328   /* Similarly for high 32 bits of ea_addr.  */
4329   if (spu_ea_model != 32)
4330     emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4331 
4332   /* block_off = ea_addr & 127  */
4333   emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4334 
4335   /* tag_addr = tag_arr + tag_index  */
4336   emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4337 
4338   /* Read cache tags.  */
4339   emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4340   if (spu_ea_model != 32)
4341     emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4342 					       plus_constant (Pmode,
4343 							      tag_addr, 16)));
4344 
4345   /* tag = ea_addr & -128  */
4346   emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4347 
4348   /* Read all four cache data pointers.  */
4349   emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4350 					   plus_constant (Pmode,
4351 							  tag_addr, 32)));
4352 
4353   /* Compare tags.  */
4354   emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4355   if (spu_ea_model != 32)
4356     {
4357       emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4358       emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4359     }
4360 
4361   /* At most one of the tags compare equal, so tag_equal has one
4362      32-bit slot set to all 1's, with the other slots all zero.
4363      gbb picks off low bit from each byte in the 128-bit registers,
4364      so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4365      we have a hit.  */
4366   emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4367   emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4368 
4369   /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
4370   emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4371 
4372   /* Allowing us to rotate the corresponding cache data pointer to slot0.
4373      (rotating eq_index mod 16 bytes).  */
4374   emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4375   emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4376 
4377   /* Add block offset to form final data address.  */
4378   emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4379 
4380   /* Check that we did hit.  */
4381   hit_label = gen_label_rtx ();
4382   hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4383   bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4384   insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4385 				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4386 							    hit_ref, pc_rtx)));
4387   /* Say that this branch is very likely to happen.  */
4388   v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4389   add_int_reg_note (insn, REG_BR_PROB, v);
4390 
4391   ea_load_store (mem, is_store, ea_addr, data_addr);
4392   cont_label = gen_label_rtx ();
4393   emit_jump_insn (gen_jump (cont_label));
4394   emit_barrier ();
4395 
4396   emit_label (hit_label);
4397 
4398   if (is_store)
4399     {
4400       HOST_WIDE_INT v_hi;
4401       rtx dirty_bits = gen_reg_rtx (TImode);
4402       rtx dirty_off = gen_reg_rtx (SImode);
4403       rtx dirty_128 = gen_reg_rtx (TImode);
4404       rtx neg_block_off = gen_reg_rtx (SImode);
4405 
4406       /* Set up mask with one dirty bit per byte of the mem we are
4407 	 writing, starting from top bit.  */
4408       v_hi = v = -1;
4409       v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4410       if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4411 	{
4412 	  v_hi = v;
4413 	  v = 0;
4414 	}
4415       emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4416 
4417       /* Form index into cache dirty_bits.  eq_index is one of
4418 	 0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
4419 	 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4420 	 offset to each of the four dirty_bits elements.  */
4421       emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4422 
4423       emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4424 
4425       /* Rotate bit mask to proper bit.  */
4426       emit_insn (gen_negsi2 (neg_block_off, block_off));
4427       emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4428       emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4429 
4430       /* Or in the new dirty bits.  */
4431       emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4432 
4433       /* Store.  */
4434       emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4435     }
4436 
4437   emit_label (cont_label);
4438 }
4439 
4440 static rtx
4441 expand_ea_mem (rtx mem, bool is_store)
4442 {
4443   rtx ea_addr;
4444   rtx data_addr = gen_reg_rtx (Pmode);
4445   rtx new_mem;
4446 
4447   ea_addr = force_reg (EAmode, XEXP (mem, 0));
4448   if (optimize_size || optimize == 0)
4449     ea_load_store (mem, is_store, ea_addr, data_addr);
4450   else
4451     ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4452 
4453   if (ea_alias_set == -1)
4454     ea_alias_set = new_alias_set ();
4455 
4456   /* We generate a new MEM RTX to refer to the copy of the data
4457      in the cache.  We do not copy memory attributes (except the
4458      alignment) from the original MEM, as they may no longer apply
4459      to the cache copy.  */
4460   new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4461   set_mem_alias_set (new_mem, ea_alias_set);
4462   set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4463 
4464   return new_mem;
4465 }
4466 
4467 int
4468 spu_expand_mov (rtx * ops, machine_mode mode)
4469 {
4470   if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4471     {
4472       /* Perform the move in the destination SUBREG's inner mode.  */
4473       ops[0] = SUBREG_REG (ops[0]);
4474       mode = GET_MODE (ops[0]);
4475       ops[1] = gen_lowpart_common (mode, ops[1]);
4476       gcc_assert (ops[1]);
4477     }
4478 
4479   if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4480     {
4481       rtx from = SUBREG_REG (ops[1]);
4482       machine_mode imode = int_mode_for_mode (GET_MODE (from));
4483 
4484       gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4485 		  && GET_MODE_CLASS (imode) == MODE_INT
4486 		  && subreg_lowpart_p (ops[1]));
4487 
4488       if (GET_MODE_SIZE (imode) < 4)
4489 	imode = SImode;
4490       if (imode != GET_MODE (from))
4491 	from = gen_rtx_SUBREG (imode, from, 0);
4492 
4493       if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4494 	{
4495 	  enum insn_code icode = convert_optab_handler (trunc_optab,
4496 							mode, imode);
4497 	  emit_insn (GEN_FCN (icode) (ops[0], from));
4498 	}
4499       else
4500 	emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4501       return 1;
4502     }
4503 
4504   /* At least one of the operands needs to be a register. */
4505   if ((reload_in_progress | reload_completed) == 0
4506       && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4507     {
4508       rtx temp = force_reg (mode, ops[1]);
4509       emit_move_insn (ops[0], temp);
4510       return 1;
4511     }
4512   if (reload_in_progress || reload_completed)
4513     {
4514       if (CONSTANT_P (ops[1]))
4515 	return spu_split_immediate (ops);
4516       return 0;
4517     }
4518 
4519   /* Catch the SImode immediates greater than 0x7fffffff, and sign
4520      extend them. */
4521   if (GET_CODE (ops[1]) == CONST_INT)
4522     {
4523       HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4524       if (val != INTVAL (ops[1]))
4525 	{
4526 	  emit_move_insn (ops[0], GEN_INT (val));
4527 	  return 1;
4528 	}
4529     }
4530   if (MEM_P (ops[0]))
4531     {
4532       if (MEM_ADDR_SPACE (ops[0]))
4533 	ops[0] = expand_ea_mem (ops[0], true);
4534       return spu_split_store (ops);
4535     }
4536   if (MEM_P (ops[1]))
4537     {
4538       if (MEM_ADDR_SPACE (ops[1]))
4539 	ops[1] = expand_ea_mem (ops[1], false);
4540       return spu_split_load (ops);
4541     }
4542 
4543   return 0;
4544 }
4545 
4546 static void
4547 spu_convert_move (rtx dst, rtx src)
4548 {
4549   machine_mode mode = GET_MODE (dst);
4550   machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4551   rtx reg;
4552   gcc_assert (GET_MODE (src) == TImode);
4553   reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4554   emit_insn (gen_rtx_SET (VOIDmode, reg,
4555 	       gen_rtx_TRUNCATE (int_mode,
4556 		 gen_rtx_LSHIFTRT (TImode, src,
4557 		   GEN_INT (int_mode == DImode ? 64 : 96)))));
4558   if (int_mode != mode)
4559     {
4560       reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4561       emit_move_insn (dst, reg);
4562     }
4563 }
4564 
4565 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4566    the address from SRC and SRC+16.  Return a REG or CONST_INT that
4567    specifies how many bytes to rotate the loaded registers, plus any
4568    extra from EXTRA_ROTQBY.  The address and rotate amounts are
4569    normalized to improve merging of loads and rotate computations. */
4570 static rtx
4571 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4572 {
4573   rtx addr = XEXP (src, 0);
4574   rtx p0, p1, rot, addr0, addr1;
4575   int rot_amt;
4576 
4577   rot = 0;
4578   rot_amt = 0;
4579 
4580   if (MEM_ALIGN (src) >= 128)
4581     /* Address is already aligned; simply perform a TImode load.  */ ;
4582   else if (GET_CODE (addr) == PLUS)
4583     {
4584       /* 8 cases:
4585          aligned reg   + aligned reg     => lqx
4586          aligned reg   + unaligned reg   => lqx, rotqby
4587          aligned reg   + aligned const   => lqd
4588          aligned reg   + unaligned const => lqd, rotqbyi
4589          unaligned reg + aligned reg     => lqx, rotqby
4590          unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
4591          unaligned reg + aligned const   => lqd, rotqby
4592          unaligned reg + unaligned const -> not allowed by legitimate address
4593        */
4594       p0 = XEXP (addr, 0);
4595       p1 = XEXP (addr, 1);
4596       if (!reg_aligned_for_addr (p0))
4597 	{
4598 	  if (REG_P (p1) && !reg_aligned_for_addr (p1))
4599 	    {
4600 	      rot = gen_reg_rtx (SImode);
4601 	      emit_insn (gen_addsi3 (rot, p0, p1));
4602 	    }
4603 	  else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4604 	    {
4605 	      if (INTVAL (p1) > 0
4606 		  && REG_POINTER (p0)
4607 		  && INTVAL (p1) * BITS_PER_UNIT
4608 		     < REGNO_POINTER_ALIGN (REGNO (p0)))
4609 		{
4610 		  rot = gen_reg_rtx (SImode);
4611 		  emit_insn (gen_addsi3 (rot, p0, p1));
4612 		  addr = p0;
4613 		}
4614 	      else
4615 		{
4616 		  rtx x = gen_reg_rtx (SImode);
4617 		  emit_move_insn (x, p1);
4618 		  if (!spu_arith_operand (p1, SImode))
4619 		    p1 = x;
4620 		  rot = gen_reg_rtx (SImode);
4621 		  emit_insn (gen_addsi3 (rot, p0, p1));
4622 		  addr = gen_rtx_PLUS (Pmode, p0, x);
4623 		}
4624 	    }
4625 	  else
4626 	    rot = p0;
4627 	}
4628       else
4629 	{
4630 	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4631 	    {
4632 	      rot_amt = INTVAL (p1) & 15;
4633 	      if (INTVAL (p1) & -16)
4634 		{
4635 		  p1 = GEN_INT (INTVAL (p1) & -16);
4636 		  addr = gen_rtx_PLUS (SImode, p0, p1);
4637 		}
4638 	      else
4639 		addr = p0;
4640 	    }
4641 	  else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4642 	    rot = p1;
4643 	}
4644     }
4645   else if (REG_P (addr))
4646     {
4647       if (!reg_aligned_for_addr (addr))
4648 	rot = addr;
4649     }
4650   else if (GET_CODE (addr) == CONST)
4651     {
4652       if (GET_CODE (XEXP (addr, 0)) == PLUS
4653 	  && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4654 	  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4655 	{
4656 	  rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4657 	  if (rot_amt & -16)
4658 	    addr = gen_rtx_CONST (Pmode,
4659 				  gen_rtx_PLUS (Pmode,
4660 						XEXP (XEXP (addr, 0), 0),
4661 						GEN_INT (rot_amt & -16)));
4662 	  else
4663 	    addr = XEXP (XEXP (addr, 0), 0);
4664 	}
4665       else
4666 	{
4667 	  rot = gen_reg_rtx (Pmode);
4668 	  emit_move_insn (rot, addr);
4669 	}
4670     }
4671   else if (GET_CODE (addr) == CONST_INT)
4672     {
4673       rot_amt = INTVAL (addr);
4674       addr = GEN_INT (rot_amt & -16);
4675     }
4676   else if (!ALIGNED_SYMBOL_REF_P (addr))
4677     {
4678       rot = gen_reg_rtx (Pmode);
4679       emit_move_insn (rot, addr);
4680     }
4681 
4682   rot_amt += extra_rotby;
4683 
4684   rot_amt &= 15;
4685 
4686   if (rot && rot_amt)
4687     {
4688       rtx x = gen_reg_rtx (SImode);
4689       emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4690       rot = x;
4691       rot_amt = 0;
4692     }
4693   if (!rot && rot_amt)
4694     rot = GEN_INT (rot_amt);
4695 
4696   addr0 = copy_rtx (addr);
4697   addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4698   emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4699 
4700   if (dst1)
4701     {
4702       addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4703       addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4704       emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4705     }
4706 
4707   return rot;
4708 }
4709 
4710 int
4711 spu_split_load (rtx * ops)
4712 {
4713   machine_mode mode = GET_MODE (ops[0]);
4714   rtx addr, load, rot;
4715   int rot_amt;
4716 
4717   if (GET_MODE_SIZE (mode) >= 16)
4718     return 0;
4719 
4720   addr = XEXP (ops[1], 0);
4721   gcc_assert (GET_CODE (addr) != AND);
4722 
4723   if (!address_needs_split (ops[1]))
4724     {
4725       ops[1] = change_address (ops[1], TImode, addr);
4726       load = gen_reg_rtx (TImode);
4727       emit_insn (gen__movti (load, ops[1]));
4728       spu_convert_move (ops[0], load);
4729       return 1;
4730     }
4731 
4732   rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4733 
4734   load = gen_reg_rtx (TImode);
4735   rot = spu_expand_load (load, 0, ops[1], rot_amt);
4736 
4737   if (rot)
4738     emit_insn (gen_rotqby_ti (load, load, rot));
4739 
4740   spu_convert_move (ops[0], load);
4741   return 1;
4742 }
4743 
4744 int
4745 spu_split_store (rtx * ops)
4746 {
4747   machine_mode mode = GET_MODE (ops[0]);
4748   rtx reg;
4749   rtx addr, p0, p1, p1_lo, smem;
4750   int aform;
4751   int scalar;
4752 
4753   if (GET_MODE_SIZE (mode) >= 16)
4754     return 0;
4755 
4756   addr = XEXP (ops[0], 0);
4757   gcc_assert (GET_CODE (addr) != AND);
4758 
4759   if (!address_needs_split (ops[0]))
4760     {
4761       reg = gen_reg_rtx (TImode);
4762       emit_insn (gen_spu_convert (reg, ops[1]));
4763       ops[0] = change_address (ops[0], TImode, addr);
4764       emit_move_insn (ops[0], reg);
4765       return 1;
4766     }
4767 
4768   if (GET_CODE (addr) == PLUS)
4769     {
4770       /* 8 cases:
4771          aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
4772          aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
4773          aligned reg   + aligned const   => lqd, c?d, shuf, stqx
4774          aligned reg   + unaligned const => lqd, c?d, shuf, stqx
4775          unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
4776          unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
4777          unaligned reg + aligned const   => lqd, c?d, shuf, stqx
4778          unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4779        */
4780       aform = 0;
4781       p0 = XEXP (addr, 0);
4782       p1 = p1_lo = XEXP (addr, 1);
4783       if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4784 	{
4785 	  p1_lo = GEN_INT (INTVAL (p1) & 15);
4786 	  if (reg_aligned_for_addr (p0))
4787 	    {
4788 	      p1 = GEN_INT (INTVAL (p1) & -16);
4789 	      if (p1 == const0_rtx)
4790 		addr = p0;
4791 	      else
4792 		addr = gen_rtx_PLUS (SImode, p0, p1);
4793 	    }
4794 	  else
4795 	    {
4796 	      rtx x = gen_reg_rtx (SImode);
4797 	      emit_move_insn (x, p1);
4798 	      addr = gen_rtx_PLUS (SImode, p0, x);
4799 	    }
4800 	}
4801     }
4802   else if (REG_P (addr))
4803     {
4804       aform = 0;
4805       p0 = addr;
4806       p1 = p1_lo = const0_rtx;
4807     }
4808   else
4809     {
4810       aform = 1;
4811       p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4812       p1 = 0;			/* aform doesn't use p1 */
4813       p1_lo = addr;
4814       if (ALIGNED_SYMBOL_REF_P (addr))
4815 	p1_lo = const0_rtx;
4816       else if (GET_CODE (addr) == CONST
4817 	       && GET_CODE (XEXP (addr, 0)) == PLUS
4818 	       && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4819 	       && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4820 	{
4821 	  HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4822 	  if ((v & -16) != 0)
4823 	    addr = gen_rtx_CONST (Pmode,
4824 				  gen_rtx_PLUS (Pmode,
4825 						XEXP (XEXP (addr, 0), 0),
4826 						GEN_INT (v & -16)));
4827 	  else
4828 	    addr = XEXP (XEXP (addr, 0), 0);
4829 	  p1_lo = GEN_INT (v & 15);
4830 	}
4831       else if (GET_CODE (addr) == CONST_INT)
4832 	{
4833 	  p1_lo = GEN_INT (INTVAL (addr) & 15);
4834 	  addr = GEN_INT (INTVAL (addr) & -16);
4835 	}
4836       else
4837 	{
4838 	  p1_lo = gen_reg_rtx (SImode);
4839 	  emit_move_insn (p1_lo, addr);
4840 	}
4841     }
4842 
4843   gcc_assert (aform == 0 || aform == 1);
4844   reg = gen_reg_rtx (TImode);
4845 
4846   scalar = store_with_one_insn_p (ops[0]);
4847   if (!scalar)
4848     {
4849       /* We could copy the flags from the ops[0] MEM to mem here,
4850          We don't because we want this load to be optimized away if
4851          possible, and copying the flags will prevent that in certain
4852          cases, e.g. consider the volatile flag. */
4853 
4854       rtx pat = gen_reg_rtx (TImode);
4855       rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4856       set_mem_alias_set (lmem, 0);
4857       emit_insn (gen_movti (reg, lmem));
4858 
4859       if (!p0 || reg_aligned_for_addr (p0))
4860 	p0 = stack_pointer_rtx;
4861       if (!p1_lo)
4862 	p1_lo = const0_rtx;
4863 
4864       emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4865       emit_insn (gen_shufb (reg, ops[1], reg, pat));
4866     }
4867   else
4868     {
4869       if (GET_CODE (ops[1]) == REG)
4870 	emit_insn (gen_spu_convert (reg, ops[1]));
4871       else if (GET_CODE (ops[1]) == SUBREG)
4872 	emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4873       else
4874 	abort ();
4875     }
4876 
4877   if (GET_MODE_SIZE (mode) < 4 && scalar)
4878     emit_insn (gen_ashlti3
4879 	       (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4880 
4881   smem = change_address (ops[0], TImode, copy_rtx (addr));
4882   /* We can't use the previous alias set because the memory has changed
4883      size and can potentially overlap objects of other types.  */
4884   set_mem_alias_set (smem, 0);
4885 
4886   emit_insn (gen_movti (smem, reg));
4887   return 1;
4888 }
4889 
4890 /* Return TRUE if X is MEM which is a struct member reference
4891    and the member can safely be loaded and stored with a single
4892    instruction because it is padded. */
4893 static int
4894 mem_is_padded_component_ref (rtx x)
4895 {
4896   tree t = MEM_EXPR (x);
4897   tree r;
4898   if (!t || TREE_CODE (t) != COMPONENT_REF)
4899     return 0;
4900   t = TREE_OPERAND (t, 1);
4901   if (!t || TREE_CODE (t) != FIELD_DECL
4902       || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4903     return 0;
4904   /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4905   r = DECL_FIELD_CONTEXT (t);
4906   if (!r || TREE_CODE (r) != RECORD_TYPE)
4907     return 0;
4908   /* Make sure they are the same mode */
4909   if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4910     return 0;
4911   /* If there are no following fields then the field alignment assures
4912      the structure is padded to the alignment which means this field is
4913      padded too.  */
4914   if (TREE_CHAIN (t) == 0)
4915     return 1;
4916   /* If the following field is also aligned then this field will be
4917      padded. */
4918   t = TREE_CHAIN (t);
4919   if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4920     return 1;
4921   return 0;
4922 }
4923 
4924 /* Parse the -mfixed-range= option string.  */
4925 static void
4926 fix_range (const char *const_str)
4927 {
4928   int i, first, last;
4929   char *str, *dash, *comma;
4930 
4931   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4932      REG2 are either register names or register numbers.  The effect
4933      of this option is to mark the registers in the range from REG1 to
4934      REG2 as ``fixed'' so they won't be used by the compiler.  */
4935 
4936   i = strlen (const_str);
4937   str = (char *) alloca (i + 1);
4938   memcpy (str, const_str, i + 1);
4939 
4940   while (1)
4941     {
4942       dash = strchr (str, '-');
4943       if (!dash)
4944 	{
4945 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
4946 	  return;
4947 	}
4948       *dash = '\0';
4949       comma = strchr (dash + 1, ',');
4950       if (comma)
4951 	*comma = '\0';
4952 
4953       first = decode_reg_name (str);
4954       if (first < 0)
4955 	{
4956 	  warning (0, "unknown register name: %s", str);
4957 	  return;
4958 	}
4959 
4960       last = decode_reg_name (dash + 1);
4961       if (last < 0)
4962 	{
4963 	  warning (0, "unknown register name: %s", dash + 1);
4964 	  return;
4965 	}
4966 
4967       *dash = '-';
4968 
4969       if (first > last)
4970 	{
4971 	  warning (0, "%s-%s is an empty range", str, dash + 1);
4972 	  return;
4973 	}
4974 
4975       for (i = first; i <= last; ++i)
4976 	fixed_regs[i] = call_used_regs[i] = 1;
4977 
4978       if (!comma)
4979 	break;
4980 
4981       *comma = ',';
4982       str = comma + 1;
4983     }
4984 }
4985 
4986 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4987    can be generated using the fsmbi instruction. */
4988 int
4989 fsmbi_const_p (rtx x)
4990 {
4991   if (CONSTANT_P (x))
4992     {
4993       /* We can always choose TImode for CONST_INT because the high bits
4994          of an SImode will always be all 1s, i.e., valid for fsmbi. */
4995       enum immediate_class c = classify_immediate (x, TImode);
4996       return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4997     }
4998   return 0;
4999 }
5000 
5001 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5002    can be generated using the cbd, chd, cwd or cdd instruction. */
5003 int
5004 cpat_const_p (rtx x, machine_mode mode)
5005 {
5006   if (CONSTANT_P (x))
5007     {
5008       enum immediate_class c = classify_immediate (x, mode);
5009       return c == IC_CPAT;
5010     }
5011   return 0;
5012 }
5013 
5014 rtx
5015 gen_cpat_const (rtx * ops)
5016 {
5017   unsigned char dst[16];
5018   int i, offset, shift, isize;
5019   if (GET_CODE (ops[3]) != CONST_INT
5020       || GET_CODE (ops[2]) != CONST_INT
5021       || (GET_CODE (ops[1]) != CONST_INT
5022 	  && GET_CODE (ops[1]) != REG))
5023     return 0;
5024   if (GET_CODE (ops[1]) == REG
5025       && (!REG_POINTER (ops[1])
5026 	  || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5027     return 0;
5028 
5029   for (i = 0; i < 16; i++)
5030     dst[i] = i + 16;
5031   isize = INTVAL (ops[3]);
5032   if (isize == 1)
5033     shift = 3;
5034   else if (isize == 2)
5035     shift = 2;
5036   else
5037     shift = 0;
5038   offset = (INTVAL (ops[2]) +
5039 	    (GET_CODE (ops[1]) ==
5040 	     CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5041   for (i = 0; i < isize; i++)
5042     dst[offset + i] = i + shift;
5043   return array_to_constant (TImode, dst);
5044 }
5045 
5046 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5047    array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
5048    than 16 bytes, the value is repeated across the rest of the array. */
5049 void
5050 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5051 {
5052   HOST_WIDE_INT val;
5053   int i, j, first;
5054 
5055   memset (arr, 0, 16);
5056   mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5057   if (GET_CODE (x) == CONST_INT
5058       || (GET_CODE (x) == CONST_DOUBLE
5059 	  && (mode == SFmode || mode == DFmode)))
5060     {
5061       gcc_assert (mode != VOIDmode && mode != BLKmode);
5062 
5063       if (GET_CODE (x) == CONST_DOUBLE)
5064 	val = const_double_to_hwint (x);
5065       else
5066 	val = INTVAL (x);
5067       first = GET_MODE_SIZE (mode) - 1;
5068       for (i = first; i >= 0; i--)
5069 	{
5070 	  arr[i] = val & 0xff;
5071 	  val >>= 8;
5072 	}
5073       /* Splat the constant across the whole array. */
5074       for (j = 0, i = first + 1; i < 16; i++)
5075 	{
5076 	  arr[i] = arr[j];
5077 	  j = (j == first) ? 0 : j + 1;
5078 	}
5079     }
5080   else if (GET_CODE (x) == CONST_DOUBLE)
5081     {
5082       val = CONST_DOUBLE_LOW (x);
5083       for (i = 15; i >= 8; i--)
5084 	{
5085 	  arr[i] = val & 0xff;
5086 	  val >>= 8;
5087 	}
5088       val = CONST_DOUBLE_HIGH (x);
5089       for (i = 7; i >= 0; i--)
5090 	{
5091 	  arr[i] = val & 0xff;
5092 	  val >>= 8;
5093 	}
5094     }
5095   else if (GET_CODE (x) == CONST_VECTOR)
5096     {
5097       int units;
5098       rtx elt;
5099       mode = GET_MODE_INNER (mode);
5100       units = CONST_VECTOR_NUNITS (x);
5101       for (i = 0; i < units; i++)
5102 	{
5103 	  elt = CONST_VECTOR_ELT (x, i);
5104 	  if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5105 	    {
5106 	      if (GET_CODE (elt) == CONST_DOUBLE)
5107 		val = const_double_to_hwint (elt);
5108 	      else
5109 		val = INTVAL (elt);
5110 	      first = GET_MODE_SIZE (mode) - 1;
5111 	      if (first + i * GET_MODE_SIZE (mode) > 16)
5112 		abort ();
5113 	      for (j = first; j >= 0; j--)
5114 		{
5115 		  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5116 		  val >>= 8;
5117 		}
5118 	    }
5119 	}
5120     }
5121   else
5122     gcc_unreachable();
5123 }
5124 
5125 /* Convert a 16 byte array to a constant of mode MODE.  When MODE is
5126    smaller than 16 bytes, use the bytes that would represent that value
5127    in a register, e.g., for QImode return the value of arr[3].  */
5128 rtx
5129 array_to_constant (machine_mode mode, const unsigned char arr[16])
5130 {
5131   machine_mode inner_mode;
5132   rtvec v;
5133   int units, size, i, j, k;
5134   HOST_WIDE_INT val;
5135 
5136   if (GET_MODE_CLASS (mode) == MODE_INT
5137       && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5138     {
5139       j = GET_MODE_SIZE (mode);
5140       i = j < 4 ? 4 - j : 0;
5141       for (val = 0; i < j; i++)
5142 	val = (val << 8) | arr[i];
5143       val = trunc_int_for_mode (val, mode);
5144       return GEN_INT (val);
5145     }
5146 
5147   if (mode == TImode)
5148     {
5149       HOST_WIDE_INT high;
5150       for (i = high = 0; i < 8; i++)
5151 	high = (high << 8) | arr[i];
5152       for (i = 8, val = 0; i < 16; i++)
5153 	val = (val << 8) | arr[i];
5154       return immed_double_const (val, high, TImode);
5155     }
5156   if (mode == SFmode)
5157     {
5158       val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5159       val = trunc_int_for_mode (val, SImode);
5160       return hwint_to_const_double (SFmode, val);
5161     }
5162   if (mode == DFmode)
5163     {
5164       for (i = 0, val = 0; i < 8; i++)
5165 	val = (val << 8) | arr[i];
5166       return hwint_to_const_double (DFmode, val);
5167     }
5168 
5169   if (!VECTOR_MODE_P (mode))
5170     abort ();
5171 
5172   units = GET_MODE_NUNITS (mode);
5173   size = GET_MODE_UNIT_SIZE (mode);
5174   inner_mode = GET_MODE_INNER (mode);
5175   v = rtvec_alloc (units);
5176 
5177   for (k = i = 0; i < units; ++i)
5178     {
5179       val = 0;
5180       for (j = 0; j < size; j++, k++)
5181 	val = (val << 8) | arr[k];
5182 
5183       if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5184 	RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5185       else
5186 	RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5187     }
5188   if (k > 16)
5189     abort ();
5190 
5191   return gen_rtx_CONST_VECTOR (mode, v);
5192 }
5193 
5194 static void
5195 reloc_diagnostic (rtx x)
5196 {
5197   tree decl = 0;
5198   if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5199     return;
5200 
5201   if (GET_CODE (x) == SYMBOL_REF)
5202     decl = SYMBOL_REF_DECL (x);
5203   else if (GET_CODE (x) == CONST
5204 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5205     decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5206 
5207   /* SYMBOL_REF_DECL is not necessarily a DECL. */
5208   if (decl && !DECL_P (decl))
5209     decl = 0;
5210 
5211   /* The decl could be a string constant.  */
5212   if (decl && DECL_P (decl))
5213     {
5214       location_t loc;
5215       /* We use last_assemble_variable_decl to get line information.  It's
5216 	 not always going to be right and might not even be close, but will
5217 	 be right for the more common cases. */
5218       if (!last_assemble_variable_decl || in_section == ctors_section)
5219 	loc = DECL_SOURCE_LOCATION (decl);
5220       else
5221 	loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5222 
5223       if (TARGET_WARN_RELOC)
5224 	warning_at (loc, 0,
5225 		    "creating run-time relocation for %qD", decl);
5226       else
5227 	error_at (loc,
5228 		  "creating run-time relocation for %qD", decl);
5229     }
5230   else
5231     {
5232       if (TARGET_WARN_RELOC)
5233 	warning_at (input_location, 0, "creating run-time relocation");
5234       else
5235 	error_at (input_location, "creating run-time relocation");
5236     }
5237 }
5238 
5239 /* Hook into assemble_integer so we can generate an error for run-time
5240    relocations.  The SPU ABI disallows them. */
5241 static bool
5242 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5243 {
5244   /* By default run-time relocations aren't supported, but we allow them
5245      in case users support it in their own run-time loader.  And we provide
5246      a warning for those users that don't.  */
5247   if ((GET_CODE (x) == SYMBOL_REF)
5248       || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5249     reloc_diagnostic (x);
5250 
5251   return default_assemble_integer (x, size, aligned_p);
5252 }
5253 
5254 static void
5255 spu_asm_globalize_label (FILE * file, const char *name)
5256 {
5257   fputs ("\t.global\t", file);
5258   assemble_name (file, name);
5259   fputs ("\n", file);
5260 }
5261 
5262 static bool
5263 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5264 	       int opno ATTRIBUTE_UNUSED, int *total,
5265 	       bool speed ATTRIBUTE_UNUSED)
5266 {
5267   machine_mode mode = GET_MODE (x);
5268   int cost = COSTS_N_INSNS (2);
5269 
5270   /* Folding to a CONST_VECTOR will use extra space but there might
5271      be only a small savings in cycles.  We'd like to use a CONST_VECTOR
5272      only if it allows us to fold away multiple insns.  Changing the cost
5273      of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5274      because this cost will only be compared against a single insn.
5275      if (code == CONST_VECTOR)
5276        return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5277    */
5278 
5279   /* Use defaults for float operations.  Not accurate but good enough. */
5280   if (mode == DFmode)
5281     {
5282       *total = COSTS_N_INSNS (13);
5283       return true;
5284     }
5285   if (mode == SFmode)
5286     {
5287       *total = COSTS_N_INSNS (6);
5288       return true;
5289     }
5290   switch (code)
5291     {
5292     case CONST_INT:
5293       if (satisfies_constraint_K (x))
5294 	*total = 0;
5295       else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5296 	*total = COSTS_N_INSNS (1);
5297       else
5298 	*total = COSTS_N_INSNS (3);
5299       return true;
5300 
5301     case CONST:
5302       *total = COSTS_N_INSNS (3);
5303       return true;
5304 
5305     case LABEL_REF:
5306     case SYMBOL_REF:
5307       *total = COSTS_N_INSNS (0);
5308       return true;
5309 
5310     case CONST_DOUBLE:
5311       *total = COSTS_N_INSNS (5);
5312       return true;
5313 
5314     case FLOAT_EXTEND:
5315     case FLOAT_TRUNCATE:
5316     case FLOAT:
5317     case UNSIGNED_FLOAT:
5318     case FIX:
5319     case UNSIGNED_FIX:
5320       *total = COSTS_N_INSNS (7);
5321       return true;
5322 
5323     case PLUS:
5324       if (mode == TImode)
5325 	{
5326 	  *total = COSTS_N_INSNS (9);
5327 	  return true;
5328 	}
5329       break;
5330 
5331     case MULT:
5332       cost =
5333 	GET_CODE (XEXP (x, 0)) ==
5334 	REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5335       if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5336 	{
5337 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5338 	    {
5339 	      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5340 	      cost = COSTS_N_INSNS (14);
5341 	      if ((val & 0xffff) == 0)
5342 		cost = COSTS_N_INSNS (9);
5343 	      else if (val > 0 && val < 0x10000)
5344 		cost = COSTS_N_INSNS (11);
5345 	    }
5346 	}
5347       *total = cost;
5348       return true;
5349     case DIV:
5350     case UDIV:
5351     case MOD:
5352     case UMOD:
5353       *total = COSTS_N_INSNS (20);
5354       return true;
5355     case ROTATE:
5356     case ROTATERT:
5357     case ASHIFT:
5358     case ASHIFTRT:
5359     case LSHIFTRT:
5360       *total = COSTS_N_INSNS (4);
5361       return true;
5362     case UNSPEC:
5363       if (XINT (x, 1) == UNSPEC_CONVERT)
5364 	*total = COSTS_N_INSNS (0);
5365       else
5366 	*total = COSTS_N_INSNS (4);
5367       return true;
5368     }
5369   /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
5370   if (GET_MODE_CLASS (mode) == MODE_INT
5371       && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5372     cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5373       * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5374   *total = cost;
5375   return true;
5376 }
5377 
5378 static machine_mode
5379 spu_unwind_word_mode (void)
5380 {
5381   return SImode;
5382 }
5383 
5384 /* Decide whether we can make a sibling call to a function.  DECL is the
5385    declaration of the function being targeted by the call and EXP is the
5386    CALL_EXPR representing the call.  */
5387 static bool
5388 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5389 {
5390   return decl && !TARGET_LARGE_MEM;
5391 }
5392 
5393 /* We need to correctly update the back chain pointer and the Available
5394    Stack Size (which is in the second slot of the sp register.) */
5395 void
5396 spu_allocate_stack (rtx op0, rtx op1)
5397 {
5398   HOST_WIDE_INT v;
5399   rtx chain = gen_reg_rtx (V4SImode);
5400   rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5401   rtx sp = gen_reg_rtx (V4SImode);
5402   rtx splatted = gen_reg_rtx (V4SImode);
5403   rtx pat = gen_reg_rtx (TImode);
5404 
5405   /* copy the back chain so we can save it back again. */
5406   emit_move_insn (chain, stack_bot);
5407 
5408   op1 = force_reg (SImode, op1);
5409 
5410   v = 0x1020300010203ll;
5411   emit_move_insn (pat, immed_double_const (v, v, TImode));
5412   emit_insn (gen_shufb (splatted, op1, op1, pat));
5413 
5414   emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5415   emit_insn (gen_subv4si3 (sp, sp, splatted));
5416 
5417   if (flag_stack_check)
5418     {
5419       rtx avail = gen_reg_rtx(SImode);
5420       rtx result = gen_reg_rtx(SImode);
5421       emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5422       emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5423       emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5424     }
5425 
5426   emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5427 
5428   emit_move_insn (stack_bot, chain);
5429 
5430   emit_move_insn (op0, virtual_stack_dynamic_rtx);
5431 }
5432 
5433 void
5434 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5435 {
5436   static unsigned char arr[16] =
5437     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5438   rtx temp = gen_reg_rtx (SImode);
5439   rtx temp2 = gen_reg_rtx (SImode);
5440   rtx temp3 = gen_reg_rtx (V4SImode);
5441   rtx temp4 = gen_reg_rtx (V4SImode);
5442   rtx pat = gen_reg_rtx (TImode);
5443   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5444 
5445   /* Restore the backchain from the first word, sp from the second.  */
5446   emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5447   emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5448 
5449   emit_move_insn (pat, array_to_constant (TImode, arr));
5450 
5451   /* Compute Available Stack Size for sp */
5452   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5453   emit_insn (gen_shufb (temp3, temp, temp, pat));
5454 
5455   /* Compute Available Stack Size for back chain */
5456   emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5457   emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5458   emit_insn (gen_addv4si3 (temp4, sp, temp4));
5459 
5460   emit_insn (gen_addv4si3 (sp, sp, temp3));
5461   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5462 }
5463 
5464 static void
5465 spu_init_libfuncs (void)
5466 {
5467   set_optab_libfunc (smul_optab, DImode, "__muldi3");
5468   set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5469   set_optab_libfunc (smod_optab, DImode, "__moddi3");
5470   set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5471   set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5472   set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5473   set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5474   set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5475   set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5476   set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5477   set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5478   set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5479 
5480   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5481   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5482 
5483   set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5484   set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5485   set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5486   set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5487   set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5488   set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5489   set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5490   set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5491   set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5492   set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5493   set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5494   set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5495 
5496   set_optab_libfunc (smul_optab, TImode, "__multi3");
5497   set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5498   set_optab_libfunc (smod_optab, TImode, "__modti3");
5499   set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5500   set_optab_libfunc (umod_optab, TImode, "__umodti3");
5501   set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5502 }
5503 
5504 /* Make a subreg, stripping any existing subreg.  We could possibly just
5505    call simplify_subreg, but in this case we know what we want. */
5506 rtx
5507 spu_gen_subreg (machine_mode mode, rtx x)
5508 {
5509   if (GET_CODE (x) == SUBREG)
5510     x = SUBREG_REG (x);
5511   if (GET_MODE (x) == mode)
5512     return x;
5513   return gen_rtx_SUBREG (mode, x, 0);
5514 }
5515 
5516 static bool
5517 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5518 {
5519   return (TYPE_MODE (type) == BLKmode
5520 	  && ((type) == 0
5521 	      || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5522 	      || int_size_in_bytes (type) >
5523 	      (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5524 }
5525 
5526 /* Create the built-in types and functions */
5527 
5528 enum spu_function_code
5529 {
5530 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5531 #include "spu-builtins.def"
5532 #undef DEF_BUILTIN
5533    NUM_SPU_BUILTINS
5534 };
5535 
5536 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5537 
5538 struct spu_builtin_description spu_builtins[] = {
5539 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5540   {fcode, icode, name, type, params},
5541 #include "spu-builtins.def"
5542 #undef DEF_BUILTIN
5543 };
5544 
5545 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5546 
5547 /* Returns the spu builtin decl for CODE.  */
5548 
5549 static tree
5550 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5551 {
5552   if (code >= NUM_SPU_BUILTINS)
5553     return error_mark_node;
5554 
5555   return spu_builtin_decls[code];
5556 }
5557 
5558 
5559 static void
5560 spu_init_builtins (void)
5561 {
5562   struct spu_builtin_description *d;
5563   unsigned int i;
5564 
5565   V16QI_type_node = build_vector_type (intQI_type_node, 16);
5566   V8HI_type_node = build_vector_type (intHI_type_node, 8);
5567   V4SI_type_node = build_vector_type (intSI_type_node, 4);
5568   V2DI_type_node = build_vector_type (intDI_type_node, 2);
5569   V4SF_type_node = build_vector_type (float_type_node, 4);
5570   V2DF_type_node = build_vector_type (double_type_node, 2);
5571 
5572   unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5573   unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5574   unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5575   unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5576 
5577   spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5578 
5579   spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5580   spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5581   spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5582   spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5583   spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5584   spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5585   spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5586   spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5587   spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5588   spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5589   spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5590   spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5591 
5592   spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5593   spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5594   spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5595   spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5596   spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5597   spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5598   spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5599   spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5600 
5601   spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5602   spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5603 
5604   spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5605 
5606   spu_builtin_types[SPU_BTI_PTR] =
5607     build_pointer_type (build_qualified_type
5608 			(void_type_node,
5609 			 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5610 
5611   /* For each builtin we build a new prototype.  The tree code will make
5612      sure nodes are shared. */
5613   for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5614     {
5615       tree p;
5616       char name[64];		/* build_function will make a copy. */
5617       int parm;
5618 
5619       if (d->name == 0)
5620 	continue;
5621 
5622       /* Find last parm.  */
5623       for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5624 	;
5625 
5626       p = void_list_node;
5627       while (parm > 1)
5628 	p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5629 
5630       p = build_function_type (spu_builtin_types[d->parm[0]], p);
5631 
5632       sprintf (name, "__builtin_%s", d->name);
5633       spu_builtin_decls[i] =
5634 	add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5635       if (d->fcode == SPU_MASK_FOR_LOAD)
5636 	TREE_READONLY (spu_builtin_decls[i]) = 1;
5637 
5638       /* These builtins don't throw.  */
5639       TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5640     }
5641 }
5642 
5643 void
5644 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5645 {
5646   static unsigned char arr[16] =
5647     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5648 
5649   rtx temp = gen_reg_rtx (Pmode);
5650   rtx temp2 = gen_reg_rtx (V4SImode);
5651   rtx temp3 = gen_reg_rtx (V4SImode);
5652   rtx pat = gen_reg_rtx (TImode);
5653   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5654 
5655   emit_move_insn (pat, array_to_constant (TImode, arr));
5656 
5657   /* Restore the sp.  */
5658   emit_move_insn (temp, op1);
5659   emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5660 
5661   /* Compute available stack size for sp.  */
5662   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5663   emit_insn (gen_shufb (temp3, temp, temp, pat));
5664 
5665   emit_insn (gen_addv4si3 (sp, sp, temp3));
5666   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5667 }
5668 
5669 int
5670 spu_safe_dma (HOST_WIDE_INT channel)
5671 {
5672   return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5673 }
5674 
5675 void
5676 spu_builtin_splats (rtx ops[])
5677 {
5678   machine_mode mode = GET_MODE (ops[0]);
5679   if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5680     {
5681       unsigned char arr[16];
5682       constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5683       emit_move_insn (ops[0], array_to_constant (mode, arr));
5684     }
5685   else
5686     {
5687       rtx reg = gen_reg_rtx (TImode);
5688       rtx shuf;
5689       if (GET_CODE (ops[1]) != REG
5690 	  && GET_CODE (ops[1]) != SUBREG)
5691 	ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5692       switch (mode)
5693 	{
5694 	case V2DImode:
5695 	case V2DFmode:
5696 	  shuf =
5697 	    immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5698 				TImode);
5699 	  break;
5700 	case V4SImode:
5701 	case V4SFmode:
5702 	  shuf =
5703 	    immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5704 				TImode);
5705 	  break;
5706 	case V8HImode:
5707 	  shuf =
5708 	    immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5709 				TImode);
5710 	  break;
5711 	case V16QImode:
5712 	  shuf =
5713 	    immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5714 				TImode);
5715 	  break;
5716 	default:
5717 	  abort ();
5718 	}
5719       emit_move_insn (reg, shuf);
5720       emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5721     }
5722 }
5723 
5724 void
5725 spu_builtin_extract (rtx ops[])
5726 {
5727   machine_mode mode;
5728   rtx rot, from, tmp;
5729 
5730   mode = GET_MODE (ops[1]);
5731 
5732   if (GET_CODE (ops[2]) == CONST_INT)
5733     {
5734       switch (mode)
5735 	{
5736 	case V16QImode:
5737 	  emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5738 	  break;
5739 	case V8HImode:
5740 	  emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5741 	  break;
5742 	case V4SFmode:
5743 	  emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5744 	  break;
5745 	case V4SImode:
5746 	  emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5747 	  break;
5748 	case V2DImode:
5749 	  emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5750 	  break;
5751 	case V2DFmode:
5752 	  emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5753 	  break;
5754 	default:
5755 	  abort ();
5756 	}
5757       return;
5758     }
5759 
5760   from = spu_gen_subreg (TImode, ops[1]);
5761   rot = gen_reg_rtx (TImode);
5762   tmp = gen_reg_rtx (SImode);
5763 
5764   switch (mode)
5765     {
5766     case V16QImode:
5767       emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5768       break;
5769     case V8HImode:
5770       emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5771       emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5772       break;
5773     case V4SFmode:
5774     case V4SImode:
5775       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5776       break;
5777     case V2DImode:
5778     case V2DFmode:
5779       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5780       break;
5781     default:
5782       abort ();
5783     }
5784   emit_insn (gen_rotqby_ti (rot, from, tmp));
5785 
5786   emit_insn (gen_spu_convert (ops[0], rot));
5787 }
5788 
5789 void
5790 spu_builtin_insert (rtx ops[])
5791 {
5792   machine_mode mode = GET_MODE (ops[0]);
5793   machine_mode imode = GET_MODE_INNER (mode);
5794   rtx mask = gen_reg_rtx (TImode);
5795   rtx offset;
5796 
5797   if (GET_CODE (ops[3]) == CONST_INT)
5798     offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5799   else
5800     {
5801       offset = gen_reg_rtx (SImode);
5802       emit_insn (gen_mulsi3
5803 		 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5804     }
5805   emit_insn (gen_cpat
5806 	     (mask, stack_pointer_rtx, offset,
5807 	      GEN_INT (GET_MODE_SIZE (imode))));
5808   emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5809 }
5810 
5811 void
5812 spu_builtin_promote (rtx ops[])
5813 {
5814   machine_mode mode, imode;
5815   rtx rot, from, offset;
5816   HOST_WIDE_INT pos;
5817 
5818   mode = GET_MODE (ops[0]);
5819   imode = GET_MODE_INNER (mode);
5820 
5821   from = gen_reg_rtx (TImode);
5822   rot = spu_gen_subreg (TImode, ops[0]);
5823 
5824   emit_insn (gen_spu_convert (from, ops[1]));
5825 
5826   if (GET_CODE (ops[2]) == CONST_INT)
5827     {
5828       pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5829       if (GET_MODE_SIZE (imode) < 4)
5830 	pos += 4 - GET_MODE_SIZE (imode);
5831       offset = GEN_INT (pos & 15);
5832     }
5833   else
5834     {
5835       offset = gen_reg_rtx (SImode);
5836       switch (mode)
5837 	{
5838 	case V16QImode:
5839 	  emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5840 	  break;
5841 	case V8HImode:
5842 	  emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5843 	  emit_insn (gen_addsi3 (offset, offset, offset));
5844 	  break;
5845 	case V4SFmode:
5846 	case V4SImode:
5847 	  emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5848 	  emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5849 	  break;
5850 	case V2DImode:
5851 	case V2DFmode:
5852 	  emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5853 	  break;
5854 	default:
5855 	  abort ();
5856 	}
5857     }
5858   emit_insn (gen_rotqby_ti (rot, from, offset));
5859 }
5860 
5861 static void
5862 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5863 {
5864   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5865   rtx shuf = gen_reg_rtx (V4SImode);
5866   rtx insn = gen_reg_rtx (V4SImode);
5867   rtx shufc;
5868   rtx insnc;
5869   rtx mem;
5870 
5871   fnaddr = force_reg (SImode, fnaddr);
5872   cxt = force_reg (SImode, cxt);
5873 
5874   if (TARGET_LARGE_MEM)
5875     {
5876       rtx rotl = gen_reg_rtx (V4SImode);
5877       rtx mask = gen_reg_rtx (V4SImode);
5878       rtx bi = gen_reg_rtx (SImode);
5879       static unsigned char const shufa[16] = {
5880 	2, 3, 0, 1, 18, 19, 16, 17,
5881 	0, 1, 2, 3, 16, 17, 18, 19
5882       };
5883       static unsigned char const insna[16] = {
5884 	0x41, 0, 0, 79,
5885 	0x41, 0, 0, STATIC_CHAIN_REGNUM,
5886 	0x60, 0x80, 0, 79,
5887 	0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5888       };
5889 
5890       shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5891       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5892 
5893       emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5894       emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5895       emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5896       emit_insn (gen_selb (insn, insnc, rotl, mask));
5897 
5898       mem = adjust_address (m_tramp, V4SImode, 0);
5899       emit_move_insn (mem, insn);
5900 
5901       emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5902       mem = adjust_address (m_tramp, Pmode, 16);
5903       emit_move_insn (mem, bi);
5904     }
5905   else
5906     {
5907       rtx scxt = gen_reg_rtx (SImode);
5908       rtx sfnaddr = gen_reg_rtx (SImode);
5909       static unsigned char const insna[16] = {
5910 	0x42, 0, 0, STATIC_CHAIN_REGNUM,
5911 	0x30, 0, 0, 0,
5912 	0, 0, 0, 0,
5913 	0, 0, 0, 0
5914       };
5915 
5916       shufc = gen_reg_rtx (TImode);
5917       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5918 
5919       /* By or'ing all of cxt with the ila opcode we are assuming cxt
5920 	 fits 18 bits and the last 4 are zeros.  This will be true if
5921 	 the stack pointer is initialized to 0x3fff0 at program start,
5922 	 otherwise the ila instruction will be garbage. */
5923 
5924       emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5925       emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5926       emit_insn (gen_cpat
5927 		 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5928       emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5929       emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5930 
5931       mem = adjust_address (m_tramp, V4SImode, 0);
5932       emit_move_insn (mem, insn);
5933     }
5934   emit_insn (gen_sync ());
5935 }
5936 
5937 static bool
5938 spu_warn_func_return (tree decl)
5939 {
5940   /* Naked functions are implemented entirely in assembly, including the
5941      return sequence, so suppress warnings about this.  */
5942   return !spu_naked_function_p (decl);
5943 }
5944 
5945 void
5946 spu_expand_sign_extend (rtx ops[])
5947 {
5948   unsigned char arr[16];
5949   rtx pat = gen_reg_rtx (TImode);
5950   rtx sign, c;
5951   int i, last;
5952   last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5953   if (GET_MODE (ops[1]) == QImode)
5954     {
5955       sign = gen_reg_rtx (HImode);
5956       emit_insn (gen_extendqihi2 (sign, ops[1]));
5957       for (i = 0; i < 16; i++)
5958 	arr[i] = 0x12;
5959       arr[last] = 0x13;
5960     }
5961   else
5962     {
5963       for (i = 0; i < 16; i++)
5964 	arr[i] = 0x10;
5965       switch (GET_MODE (ops[1]))
5966 	{
5967 	case HImode:
5968 	  sign = gen_reg_rtx (SImode);
5969 	  emit_insn (gen_extendhisi2 (sign, ops[1]));
5970 	  arr[last] = 0x03;
5971 	  arr[last - 1] = 0x02;
5972 	  break;
5973 	case SImode:
5974 	  sign = gen_reg_rtx (SImode);
5975 	  emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5976 	  for (i = 0; i < 4; i++)
5977 	    arr[last - i] = 3 - i;
5978 	  break;
5979 	case DImode:
5980 	  sign = gen_reg_rtx (SImode);
5981 	  c = gen_reg_rtx (SImode);
5982 	  emit_insn (gen_spu_convert (c, ops[1]));
5983 	  emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5984 	  for (i = 0; i < 8; i++)
5985 	    arr[last - i] = 7 - i;
5986 	  break;
5987 	default:
5988 	  abort ();
5989 	}
5990     }
5991   emit_move_insn (pat, array_to_constant (TImode, arr));
5992   emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5993 }
5994 
5995 /* expand vector initialization. If there are any constant parts,
5996    load constant parts first. Then load any non-constant parts.  */
5997 void
5998 spu_expand_vector_init (rtx target, rtx vals)
5999 {
6000   machine_mode mode = GET_MODE (target);
6001   int n_elts = GET_MODE_NUNITS (mode);
6002   int n_var = 0;
6003   bool all_same = true;
6004   rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6005   int i;
6006 
6007   first = XVECEXP (vals, 0, 0);
6008   for (i = 0; i < n_elts; ++i)
6009     {
6010       x = XVECEXP (vals, 0, i);
6011       if (!(CONST_INT_P (x)
6012 	    || GET_CODE (x) == CONST_DOUBLE
6013 	    || GET_CODE (x) == CONST_FIXED))
6014 	++n_var;
6015       else
6016 	{
6017 	  if (first_constant == NULL_RTX)
6018 	    first_constant = x;
6019 	}
6020       if (i > 0 && !rtx_equal_p (x, first))
6021 	all_same = false;
6022     }
6023 
6024   /* if all elements are the same, use splats to repeat elements */
6025   if (all_same)
6026     {
6027       if (!CONSTANT_P (first)
6028 	  && !register_operand (first, GET_MODE (x)))
6029 	first = force_reg (GET_MODE (first), first);
6030       emit_insn (gen_spu_splats (target, first));
6031       return;
6032     }
6033 
6034   /* load constant parts */
6035   if (n_var != n_elts)
6036     {
6037       if (n_var == 0)
6038 	{
6039 	  emit_move_insn (target,
6040 			  gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6041 	}
6042       else
6043 	{
6044 	  rtx constant_parts_rtx = copy_rtx (vals);
6045 
6046 	  gcc_assert (first_constant != NULL_RTX);
6047 	  /* fill empty slots with the first constant, this increases
6048 	     our chance of using splats in the recursive call below. */
6049 	  for (i = 0; i < n_elts; ++i)
6050 	    {
6051 	      x = XVECEXP (constant_parts_rtx, 0, i);
6052 	      if (!(CONST_INT_P (x)
6053 		    || GET_CODE (x) == CONST_DOUBLE
6054 		    || GET_CODE (x) == CONST_FIXED))
6055 		XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6056 	    }
6057 
6058 	  spu_expand_vector_init (target, constant_parts_rtx);
6059 	}
6060     }
6061 
6062   /* load variable parts */
6063   if (n_var != 0)
6064     {
6065       rtx insert_operands[4];
6066 
6067       insert_operands[0] = target;
6068       insert_operands[2] = target;
6069       for (i = 0; i < n_elts; ++i)
6070 	{
6071 	  x = XVECEXP (vals, 0, i);
6072 	  if (!(CONST_INT_P (x)
6073 		|| GET_CODE (x) == CONST_DOUBLE
6074 		|| GET_CODE (x) == CONST_FIXED))
6075 	    {
6076 	      if (!register_operand (x, GET_MODE (x)))
6077 		x = force_reg (GET_MODE (x), x);
6078 	      insert_operands[1] = x;
6079 	      insert_operands[3] = GEN_INT (i);
6080 	      spu_builtin_insert (insert_operands);
6081 	    }
6082 	}
6083     }
6084 }
6085 
6086 /* Return insn index for the vector compare instruction for given CODE,
6087    and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
6088 
6089 static int
6090 get_vec_cmp_insn (enum rtx_code code,
6091                   machine_mode dest_mode,
6092                   machine_mode op_mode)
6093 
6094 {
6095   switch (code)
6096     {
6097     case EQ:
6098       if (dest_mode == V16QImode && op_mode == V16QImode)
6099         return CODE_FOR_ceq_v16qi;
6100       if (dest_mode == V8HImode && op_mode == V8HImode)
6101         return CODE_FOR_ceq_v8hi;
6102       if (dest_mode == V4SImode && op_mode == V4SImode)
6103         return CODE_FOR_ceq_v4si;
6104       if (dest_mode == V4SImode && op_mode == V4SFmode)
6105         return CODE_FOR_ceq_v4sf;
6106       if (dest_mode == V2DImode && op_mode == V2DFmode)
6107         return CODE_FOR_ceq_v2df;
6108       break;
6109     case GT:
6110       if (dest_mode == V16QImode && op_mode == V16QImode)
6111         return CODE_FOR_cgt_v16qi;
6112       if (dest_mode == V8HImode && op_mode == V8HImode)
6113         return CODE_FOR_cgt_v8hi;
6114       if (dest_mode == V4SImode && op_mode == V4SImode)
6115         return CODE_FOR_cgt_v4si;
6116       if (dest_mode == V4SImode && op_mode == V4SFmode)
6117         return CODE_FOR_cgt_v4sf;
6118       if (dest_mode == V2DImode && op_mode == V2DFmode)
6119         return CODE_FOR_cgt_v2df;
6120       break;
6121     case GTU:
6122       if (dest_mode == V16QImode && op_mode == V16QImode)
6123         return CODE_FOR_clgt_v16qi;
6124       if (dest_mode == V8HImode && op_mode == V8HImode)
6125         return CODE_FOR_clgt_v8hi;
6126       if (dest_mode == V4SImode && op_mode == V4SImode)
6127         return CODE_FOR_clgt_v4si;
6128       break;
6129     default:
6130       break;
6131     }
6132   return -1;
6133 }
6134 
6135 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6136    DMODE is expected destination mode. This is a recursive function.  */
6137 
6138 static rtx
6139 spu_emit_vector_compare (enum rtx_code rcode,
6140                          rtx op0, rtx op1,
6141                          machine_mode dmode)
6142 {
6143   int vec_cmp_insn;
6144   rtx mask;
6145   machine_mode dest_mode;
6146   machine_mode op_mode = GET_MODE (op1);
6147 
6148   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6149 
6150   /* Floating point vector compare instructions uses destination V4SImode.
6151      Double floating point vector compare instructions uses destination V2DImode.
6152      Move destination to appropriate mode later.  */
6153   if (dmode == V4SFmode)
6154     dest_mode = V4SImode;
6155   else if (dmode == V2DFmode)
6156     dest_mode = V2DImode;
6157   else
6158     dest_mode = dmode;
6159 
6160   mask = gen_reg_rtx (dest_mode);
6161   vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6162 
6163   if (vec_cmp_insn == -1)
6164     {
6165       bool swap_operands = false;
6166       bool try_again = false;
6167       switch (rcode)
6168         {
6169         case LT:
6170           rcode = GT;
6171           swap_operands = true;
6172           try_again = true;
6173           break;
6174         case LTU:
6175           rcode = GTU;
6176           swap_operands = true;
6177           try_again = true;
6178           break;
6179         case NE:
6180 	case UNEQ:
6181 	case UNLE:
6182 	case UNLT:
6183 	case UNGE:
6184 	case UNGT:
6185 	case UNORDERED:
6186           /* Treat A != B as ~(A==B).  */
6187           {
6188 	    enum rtx_code rev_code;
6189             enum insn_code nor_code;
6190 	    rtx rev_mask;
6191 
6192 	    rev_code = reverse_condition_maybe_unordered (rcode);
6193             rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6194 
6195             nor_code = optab_handler (one_cmpl_optab, dest_mode);
6196             gcc_assert (nor_code != CODE_FOR_nothing);
6197             emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6198             if (dmode != dest_mode)
6199               {
6200                 rtx temp = gen_reg_rtx (dest_mode);
6201                 convert_move (temp, mask, 0);
6202                 return temp;
6203               }
6204             return mask;
6205           }
6206           break;
6207         case GE:
6208         case GEU:
6209         case LE:
6210         case LEU:
6211           /* Try GT/GTU/LT/LTU OR EQ */
6212           {
6213             rtx c_rtx, eq_rtx;
6214             enum insn_code ior_code;
6215             enum rtx_code new_code;
6216 
6217             switch (rcode)
6218               {
6219               case GE:  new_code = GT;  break;
6220               case GEU: new_code = GTU; break;
6221               case LE:  new_code = LT;  break;
6222               case LEU: new_code = LTU; break;
6223               default:
6224                 gcc_unreachable ();
6225               }
6226 
6227             c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6228             eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6229 
6230             ior_code = optab_handler (ior_optab, dest_mode);
6231             gcc_assert (ior_code != CODE_FOR_nothing);
6232             emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6233             if (dmode != dest_mode)
6234               {
6235                 rtx temp = gen_reg_rtx (dest_mode);
6236                 convert_move (temp, mask, 0);
6237                 return temp;
6238               }
6239             return mask;
6240           }
6241           break;
6242         case LTGT:
6243           /* Try LT OR GT */
6244           {
6245             rtx lt_rtx, gt_rtx;
6246             enum insn_code ior_code;
6247 
6248             lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6249             gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6250 
6251             ior_code = optab_handler (ior_optab, dest_mode);
6252             gcc_assert (ior_code != CODE_FOR_nothing);
6253             emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6254             if (dmode != dest_mode)
6255               {
6256                 rtx temp = gen_reg_rtx (dest_mode);
6257                 convert_move (temp, mask, 0);
6258                 return temp;
6259               }
6260             return mask;
6261           }
6262           break;
6263         case ORDERED:
6264           /* Implement as (A==A) & (B==B) */
6265           {
6266             rtx a_rtx, b_rtx;
6267             enum insn_code and_code;
6268 
6269             a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6270             b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6271 
6272             and_code = optab_handler (and_optab, dest_mode);
6273             gcc_assert (and_code != CODE_FOR_nothing);
6274             emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6275             if (dmode != dest_mode)
6276               {
6277                 rtx temp = gen_reg_rtx (dest_mode);
6278                 convert_move (temp, mask, 0);
6279                 return temp;
6280               }
6281             return mask;
6282           }
6283           break;
6284         default:
6285           gcc_unreachable ();
6286         }
6287 
6288       /* You only get two chances.  */
6289       if (try_again)
6290           vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6291 
6292       gcc_assert (vec_cmp_insn != -1);
6293 
6294       if (swap_operands)
6295         {
6296           rtx tmp;
6297           tmp = op0;
6298           op0 = op1;
6299           op1 = tmp;
6300         }
6301     }
6302 
6303   emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6304   if (dmode != dest_mode)
6305     {
6306       rtx temp = gen_reg_rtx (dest_mode);
6307       convert_move (temp, mask, 0);
6308       return temp;
6309     }
6310   return mask;
6311 }
6312 
6313 
6314 /* Emit vector conditional expression.
6315    DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6316    CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
6317 
6318 int
6319 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6320                            rtx cond, rtx cc_op0, rtx cc_op1)
6321 {
6322   machine_mode dest_mode = GET_MODE (dest);
6323   enum rtx_code rcode = GET_CODE (cond);
6324   rtx mask;
6325 
6326   /* Get the vector mask for the given relational operations.  */
6327   mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6328 
6329   emit_insn(gen_selb (dest, op2, op1, mask));
6330 
6331   return 1;
6332 }
6333 
6334 static rtx
6335 spu_force_reg (machine_mode mode, rtx op)
6336 {
6337   rtx x, r;
6338   if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6339     {
6340       if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6341 	  || GET_MODE (op) == BLKmode)
6342 	return force_reg (mode, convert_to_mode (mode, op, 0));
6343       abort ();
6344     }
6345 
6346   r = force_reg (GET_MODE (op), op);
6347   if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6348     {
6349       x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6350       if (x)
6351 	return x;
6352     }
6353 
6354   x = gen_reg_rtx (mode);
6355   emit_insn (gen_spu_convert (x, r));
6356   return x;
6357 }
6358 
6359 static void
6360 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6361 {
6362   HOST_WIDE_INT v = 0;
6363   int lsbits;
6364   /* Check the range of immediate operands. */
6365   if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6366     {
6367       int range = p - SPU_BTI_7;
6368 
6369       if (!CONSTANT_P (op))
6370 	error ("%s expects an integer literal in the range [%d, %d]",
6371 	       d->name,
6372 	       spu_builtin_range[range].low, spu_builtin_range[range].high);
6373 
6374       if (GET_CODE (op) == CONST
6375 	  && (GET_CODE (XEXP (op, 0)) == PLUS
6376 	      || GET_CODE (XEXP (op, 0)) == MINUS))
6377 	{
6378 	  v = INTVAL (XEXP (XEXP (op, 0), 1));
6379 	  op = XEXP (XEXP (op, 0), 0);
6380 	}
6381       else if (GET_CODE (op) == CONST_INT)
6382 	v = INTVAL (op);
6383       else if (GET_CODE (op) == CONST_VECTOR
6384 	       && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6385 	v = INTVAL (CONST_VECTOR_ELT (op, 0));
6386 
6387       /* The default for v is 0 which is valid in every range. */
6388       if (v < spu_builtin_range[range].low
6389 	  || v > spu_builtin_range[range].high)
6390 	error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6391 	       d->name,
6392 	       spu_builtin_range[range].low, spu_builtin_range[range].high,
6393 	       v);
6394 
6395       switch (p)
6396 	{
6397 	case SPU_BTI_S10_4:
6398 	  lsbits = 4;
6399 	  break;
6400 	case SPU_BTI_U16_2:
6401 	  /* This is only used in lqa, and stqa.  Even though the insns
6402 	     encode 16 bits of the address (all but the 2 least
6403 	     significant), only 14 bits are used because it is masked to
6404 	     be 16 byte aligned. */
6405 	  lsbits = 4;
6406 	  break;
6407 	case SPU_BTI_S16_2:
6408 	  /* This is used for lqr and stqr. */
6409 	  lsbits = 2;
6410 	  break;
6411 	default:
6412 	  lsbits = 0;
6413 	}
6414 
6415       if (GET_CODE (op) == LABEL_REF
6416 	  || (GET_CODE (op) == SYMBOL_REF
6417 	      && SYMBOL_REF_FUNCTION_P (op))
6418 	  || (v & ((1 << lsbits) - 1)) != 0)
6419 	warning (0, "%d least significant bits of %s are ignored", lsbits,
6420 		 d->name);
6421     }
6422 }
6423 
6424 
6425 static int
6426 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6427 		     rtx target, rtx ops[])
6428 {
6429   enum insn_code icode = (enum insn_code) d->icode;
6430   int i = 0, a;
6431 
6432   /* Expand the arguments into rtl. */
6433 
6434   if (d->parm[0] != SPU_BTI_VOID)
6435     ops[i++] = target;
6436 
6437   for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6438     {
6439       tree arg = CALL_EXPR_ARG (exp, a);
6440       if (arg == 0)
6441 	abort ();
6442       ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6443     }
6444 
6445   gcc_assert (i == insn_data[icode].n_generator_args);
6446   return i;
6447 }
6448 
6449 static rtx
6450 spu_expand_builtin_1 (struct spu_builtin_description *d,
6451 		      tree exp, rtx target)
6452 {
6453   rtx pat;
6454   rtx ops[8];
6455   enum insn_code icode = (enum insn_code) d->icode;
6456   machine_mode mode, tmode;
6457   int i, p;
6458   int n_operands;
6459   tree return_type;
6460 
6461   /* Set up ops[] with values from arglist. */
6462   n_operands = expand_builtin_args (d, exp, target, ops);
6463 
6464   /* Handle the target operand which must be operand 0. */
6465   i = 0;
6466   if (d->parm[0] != SPU_BTI_VOID)
6467     {
6468 
6469       /* We prefer the mode specified for the match_operand otherwise
6470          use the mode from the builtin function prototype. */
6471       tmode = insn_data[d->icode].operand[0].mode;
6472       if (tmode == VOIDmode)
6473 	tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6474 
6475       /* Try to use target because not using it can lead to extra copies
6476          and when we are using all of the registers extra copies leads
6477          to extra spills.  */
6478       if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6479 	ops[0] = target;
6480       else
6481 	target = ops[0] = gen_reg_rtx (tmode);
6482 
6483       if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6484 	abort ();
6485 
6486       i++;
6487     }
6488 
6489   if (d->fcode == SPU_MASK_FOR_LOAD)
6490     {
6491       machine_mode mode = insn_data[icode].operand[1].mode;
6492       tree arg;
6493       rtx addr, op, pat;
6494 
6495       /* get addr */
6496       arg = CALL_EXPR_ARG (exp, 0);
6497       gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6498       op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6499       addr = memory_address (mode, op);
6500 
6501       /* negate addr */
6502       op = gen_reg_rtx (GET_MODE (addr));
6503       emit_insn (gen_rtx_SET (VOIDmode, op,
6504                  gen_rtx_NEG (GET_MODE (addr), addr)));
6505       op = gen_rtx_MEM (mode, op);
6506 
6507       pat = GEN_FCN (icode) (target, op);
6508       if (!pat)
6509         return 0;
6510       emit_insn (pat);
6511       return target;
6512     }
6513 
6514   /* Ignore align_hint, but still expand it's args in case they have
6515      side effects. */
6516   if (icode == CODE_FOR_spu_align_hint)
6517     return 0;
6518 
6519   /* Handle the rest of the operands. */
6520   for (p = 1; i < n_operands; i++, p++)
6521     {
6522       if (insn_data[d->icode].operand[i].mode != VOIDmode)
6523 	mode = insn_data[d->icode].operand[i].mode;
6524       else
6525 	mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6526 
6527       /* mode can be VOIDmode here for labels */
6528 
6529       /* For specific intrinsics with an immediate operand, e.g.,
6530          si_ai(), we sometimes need to convert the scalar argument to a
6531          vector argument by splatting the scalar. */
6532       if (VECTOR_MODE_P (mode)
6533 	  && (GET_CODE (ops[i]) == CONST_INT
6534 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6535 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6536 	{
6537 	  if (GET_CODE (ops[i]) == CONST_INT)
6538 	    ops[i] = spu_const (mode, INTVAL (ops[i]));
6539 	  else
6540 	    {
6541 	      rtx reg = gen_reg_rtx (mode);
6542 	      machine_mode imode = GET_MODE_INNER (mode);
6543 	      if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6544 		ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6545 	      if (imode != GET_MODE (ops[i]))
6546 		ops[i] = convert_to_mode (imode, ops[i],
6547 					  TYPE_UNSIGNED (spu_builtin_types
6548 							 [d->parm[i]]));
6549 	      emit_insn (gen_spu_splats (reg, ops[i]));
6550 	      ops[i] = reg;
6551 	    }
6552 	}
6553 
6554       spu_check_builtin_parm (d, ops[i], d->parm[p]);
6555 
6556       if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6557 	ops[i] = spu_force_reg (mode, ops[i]);
6558     }
6559 
6560   switch (n_operands)
6561     {
6562     case 0:
6563       pat = GEN_FCN (icode) (0);
6564       break;
6565     case 1:
6566       pat = GEN_FCN (icode) (ops[0]);
6567       break;
6568     case 2:
6569       pat = GEN_FCN (icode) (ops[0], ops[1]);
6570       break;
6571     case 3:
6572       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6573       break;
6574     case 4:
6575       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6576       break;
6577     case 5:
6578       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6579       break;
6580     case 6:
6581       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6582       break;
6583     default:
6584       abort ();
6585     }
6586 
6587   if (!pat)
6588     abort ();
6589 
6590   if (d->type == B_CALL || d->type == B_BISLED)
6591     emit_call_insn (pat);
6592   else if (d->type == B_JUMP)
6593     {
6594       emit_jump_insn (pat);
6595       emit_barrier ();
6596     }
6597   else
6598     emit_insn (pat);
6599 
6600   return_type = spu_builtin_types[d->parm[0]];
6601   if (d->parm[0] != SPU_BTI_VOID
6602       && GET_MODE (target) != TYPE_MODE (return_type))
6603     {
6604       /* target is the return value.  It should always be the mode of
6605          the builtin function prototype. */
6606       target = spu_force_reg (TYPE_MODE (return_type), target);
6607     }
6608 
6609   return target;
6610 }
6611 
6612 rtx
6613 spu_expand_builtin (tree exp,
6614 		    rtx target,
6615 		    rtx subtarget ATTRIBUTE_UNUSED,
6616 		    machine_mode mode ATTRIBUTE_UNUSED,
6617 		    int ignore ATTRIBUTE_UNUSED)
6618 {
6619   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6620   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6621   struct spu_builtin_description *d;
6622 
6623   if (fcode < NUM_SPU_BUILTINS)
6624     {
6625       d = &spu_builtins[fcode];
6626 
6627       return spu_expand_builtin_1 (d, exp, target);
6628     }
6629   abort ();
6630 }
6631 
6632 /* Implement targetm.vectorize.builtin_mask_for_load.  */
6633 static tree
6634 spu_builtin_mask_for_load (void)
6635 {
6636   return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6637 }
6638 
6639 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6640 static int
6641 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6642                                 tree vectype,
6643                                 int misalign ATTRIBUTE_UNUSED)
6644 {
6645   unsigned elements;
6646 
6647   switch (type_of_cost)
6648     {
6649       case scalar_stmt:
6650       case vector_stmt:
6651       case vector_load:
6652       case vector_store:
6653       case vec_to_scalar:
6654       case scalar_to_vec:
6655       case cond_branch_not_taken:
6656       case vec_perm:
6657       case vec_promote_demote:
6658         return 1;
6659 
6660       case scalar_store:
6661         return 10;
6662 
6663       case scalar_load:
6664         /* Load + rotate.  */
6665         return 2;
6666 
6667       case unaligned_load:
6668         return 2;
6669 
6670       case cond_branch_taken:
6671         return 6;
6672 
6673       case vec_construct:
6674 	elements = TYPE_VECTOR_SUBPARTS (vectype);
6675 	return elements / 2 + 1;
6676 
6677       default:
6678         gcc_unreachable ();
6679     }
6680 }
6681 
6682 /* Implement targetm.vectorize.init_cost.  */
6683 
6684 static void *
6685 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6686 {
6687   unsigned *cost = XNEWVEC (unsigned, 3);
6688   cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6689   return cost;
6690 }
6691 
6692 /* Implement targetm.vectorize.add_stmt_cost.  */
6693 
6694 static unsigned
6695 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6696 		   struct _stmt_vec_info *stmt_info, int misalign,
6697 		   enum vect_cost_model_location where)
6698 {
6699   unsigned *cost = (unsigned *) data;
6700   unsigned retval = 0;
6701 
6702   if (flag_vect_cost_model)
6703     {
6704       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6705       int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6706 
6707       /* Statements in an inner loop relative to the loop being
6708 	 vectorized are weighted more heavily.  The value here is
6709 	 arbitrary and could potentially be improved with analysis.  */
6710       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6711 	count *= 50;  /* FIXME.  */
6712 
6713       retval = (unsigned) (count * stmt_cost);
6714       cost[where] += retval;
6715     }
6716 
6717   return retval;
6718 }
6719 
6720 /* Implement targetm.vectorize.finish_cost.  */
6721 
6722 static void
6723 spu_finish_cost (void *data, unsigned *prologue_cost,
6724 		 unsigned *body_cost, unsigned *epilogue_cost)
6725 {
6726   unsigned *cost = (unsigned *) data;
6727   *prologue_cost = cost[vect_prologue];
6728   *body_cost     = cost[vect_body];
6729   *epilogue_cost = cost[vect_epilogue];
6730 }
6731 
6732 /* Implement targetm.vectorize.destroy_cost_data.  */
6733 
6734 static void
6735 spu_destroy_cost_data (void *data)
6736 {
6737   free (data);
6738 }
6739 
6740 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6741    after applying N number of iterations.  This routine does not determine
6742    how may iterations are required to reach desired alignment.  */
6743 
6744 static bool
6745 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6746 {
6747   if (is_packed)
6748     return false;
6749 
6750   /* All other types are naturally aligned.  */
6751   return true;
6752 }
6753 
6754 /* Return the appropriate mode for a named address pointer.  */
6755 static machine_mode
6756 spu_addr_space_pointer_mode (addr_space_t addrspace)
6757 {
6758   switch (addrspace)
6759     {
6760     case ADDR_SPACE_GENERIC:
6761       return ptr_mode;
6762     case ADDR_SPACE_EA:
6763       return EAmode;
6764     default:
6765       gcc_unreachable ();
6766     }
6767 }
6768 
6769 /* Return the appropriate mode for a named address address.  */
6770 static machine_mode
6771 spu_addr_space_address_mode (addr_space_t addrspace)
6772 {
6773   switch (addrspace)
6774     {
6775     case ADDR_SPACE_GENERIC:
6776       return Pmode;
6777     case ADDR_SPACE_EA:
6778       return EAmode;
6779     default:
6780       gcc_unreachable ();
6781     }
6782 }
6783 
6784 /* Determine if one named address space is a subset of another.  */
6785 
6786 static bool
6787 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6788 {
6789   gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6790   gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6791 
6792   if (subset == superset)
6793     return true;
6794 
6795   /* If we have -mno-address-space-conversion, treat __ea and generic as not
6796      being subsets but instead as disjoint address spaces.  */
6797   else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6798     return false;
6799 
6800   else
6801     return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6802 }
6803 
6804 /* Convert from one address space to another.  */
6805 static rtx
6806 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6807 {
6808   addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6809   addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6810 
6811   gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6812   gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6813 
6814   if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6815     {
6816       rtx result, ls;
6817 
6818       ls = gen_const_mem (DImode,
6819 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6820       set_mem_align (ls, 128);
6821 
6822       result = gen_reg_rtx (Pmode);
6823       ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6824       op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6825       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6826 					  ls, const0_rtx, Pmode, 1);
6827 
6828       emit_insn (gen_subsi3 (result, op, ls));
6829 
6830       return result;
6831     }
6832 
6833   else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6834     {
6835       rtx result, ls;
6836 
6837       ls = gen_const_mem (DImode,
6838 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6839       set_mem_align (ls, 128);
6840 
6841       result = gen_reg_rtx (EAmode);
6842       ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6843       op = force_reg (Pmode, op);
6844       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6845 					  ls, const0_rtx, EAmode, 1);
6846       op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6847 
6848       if (EAmode == SImode)
6849 	emit_insn (gen_addsi3 (result, op, ls));
6850       else
6851 	emit_insn (gen_adddi3 (result, op, ls));
6852 
6853       return result;
6854     }
6855 
6856   else
6857     gcc_unreachable ();
6858 }
6859 
6860 
6861 /* Count the total number of instructions in each pipe and return the
6862    maximum, which is used as the Minimum Iteration Interval (MII)
6863    in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
6864    -2 are instructions that can go in pipe0 or pipe1.  */
6865 static int
6866 spu_sms_res_mii (struct ddg *g)
6867 {
6868   int i;
6869   unsigned t[4] = {0, 0, 0, 0};
6870 
6871   for (i = 0; i < g->num_nodes; i++)
6872     {
6873       rtx_insn *insn = g->nodes[i].insn;
6874       int p = get_pipe (insn) + 2;
6875 
6876       gcc_assert (p >= 0);
6877       gcc_assert (p < 4);
6878 
6879       t[p]++;
6880       if (dump_file && INSN_P (insn))
6881             fprintf (dump_file, "i%d %s %d %d\n",
6882                      INSN_UID (insn),
6883                      insn_data[INSN_CODE(insn)].name,
6884                      p, t[p]);
6885     }
6886   if (dump_file)
6887     fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6888 
6889   return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6890 }
6891 
6892 
6893 void
6894 spu_init_expanders (void)
6895 {
6896   if (cfun)
6897     {
6898       rtx r0, r1;
6899       /* HARD_FRAME_REGISTER is only 128 bit aligned when
6900          frame_pointer_needed is true.  We don't know that until we're
6901          expanding the prologue. */
6902       REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6903 
6904       /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6905 	 LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
6906 	 to be treated as aligned, so generate them here. */
6907       r0 = gen_reg_rtx (SImode);
6908       r1 = gen_reg_rtx (SImode);
6909       mark_reg_pointer (r0, 128);
6910       mark_reg_pointer (r1, 128);
6911       gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6912 		  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6913     }
6914 }
6915 
6916 static machine_mode
6917 spu_libgcc_cmp_return_mode (void)
6918 {
6919 
6920 /* For SPU word mode is TI mode so it is better to use SImode
6921    for compare returns.  */
6922   return SImode;
6923 }
6924 
6925 static machine_mode
6926 spu_libgcc_shift_count_mode (void)
6927 {
6928 /* For SPU word mode is TI mode so it is better to use SImode
6929    for shift counts.  */
6930   return SImode;
6931 }
6932 
6933 /* Implement targetm.section_type_flags.  */
6934 static unsigned int
6935 spu_section_type_flags (tree decl, const char *name, int reloc)
6936 {
6937   /* .toe needs to have type @nobits.  */
6938   if (strcmp (name, ".toe") == 0)
6939     return SECTION_BSS;
6940   /* Don't load _ea into the current address space.  */
6941   if (strcmp (name, "._ea") == 0)
6942     return SECTION_WRITE | SECTION_DEBUG;
6943   return default_section_type_flags (decl, name, reloc);
6944 }
6945 
6946 /* Implement targetm.select_section.  */
6947 static section *
6948 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6949 {
6950   /* Variables and constants defined in the __ea address space
6951      go into a special section named "._ea".  */
6952   if (TREE_TYPE (decl) != error_mark_node
6953       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6954     {
6955       /* We might get called with string constants, but get_named_section
6956 	 doesn't like them as they are not DECLs.  Also, we need to set
6957 	 flags in that case.  */
6958       if (!DECL_P (decl))
6959 	return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6960 
6961       return get_named_section (decl, "._ea", reloc);
6962     }
6963 
6964   return default_elf_select_section (decl, reloc, align);
6965 }
6966 
6967 /* Implement targetm.unique_section.  */
6968 static void
6969 spu_unique_section (tree decl, int reloc)
6970 {
6971   /* We don't support unique section names in the __ea address
6972      space for now.  */
6973   if (TREE_TYPE (decl) != error_mark_node
6974       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6975     return;
6976 
6977   default_unique_section (decl, reloc);
6978 }
6979 
6980 /* Generate a constant or register which contains 2^SCALE.  We assume
6981    the result is valid for MODE.  Currently, MODE must be V4SFmode and
6982    SCALE must be SImode. */
6983 rtx
6984 spu_gen_exp2 (machine_mode mode, rtx scale)
6985 {
6986   gcc_assert (mode == V4SFmode);
6987   gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6988   if (GET_CODE (scale) != CONST_INT)
6989     {
6990       /* unsigned int exp = (127 + scale) << 23;
6991 	__vector float m = (__vector float) spu_splats (exp); */
6992       rtx reg = force_reg (SImode, scale);
6993       rtx exp = gen_reg_rtx (SImode);
6994       rtx mul = gen_reg_rtx (mode);
6995       emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6996       emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6997       emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6998       return mul;
6999     }
7000   else
7001     {
7002       HOST_WIDE_INT exp = 127 + INTVAL (scale);
7003       unsigned char arr[16];
7004       arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7005       arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7006       arr[2] = arr[6] = arr[10] = arr[14] = 0;
7007       arr[3] = arr[7] = arr[11] = arr[15] = 0;
7008       return array_to_constant (mode, arr);
7009     }
7010 }
7011 
7012 /* After reload, just change the convert into a move instruction
7013    or a dead instruction. */
7014 void
7015 spu_split_convert (rtx ops[])
7016 {
7017   if (REGNO (ops[0]) == REGNO (ops[1]))
7018     emit_note (NOTE_INSN_DELETED);
7019   else
7020     {
7021       /* Use TImode always as this might help hard reg copyprop.  */
7022       rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7023       rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7024       emit_insn (gen_move_insn (op0, op1));
7025     }
7026 }
7027 
7028 void
7029 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7030 {
7031   fprintf (file, "# profile\n");
7032   fprintf (file, "brsl $75,  _mcount\n");
7033 }
7034 
7035 /* Implement targetm.ref_may_alias_errno.  */
7036 static bool
7037 spu_ref_may_alias_errno (ao_ref *ref)
7038 {
7039   tree base = ao_ref_base (ref);
7040 
7041   /* With SPU newlib, errno is defined as something like
7042          _impure_data._errno
7043      The default implementation of this target macro does not
7044      recognize such expressions, so special-code for it here.  */
7045 
7046   if (TREE_CODE (base) == VAR_DECL
7047       && !TREE_STATIC (base)
7048       && DECL_EXTERNAL (base)
7049       && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7050       && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7051 		 "_impure_data") == 0
7052       /* _errno is the first member of _impure_data.  */
7053       && ref->offset == 0)
7054     return true;
7055 
7056   return default_ref_may_alias_errno (ref);
7057 }
7058 
7059 /* Output thunk to FILE that implements a C++ virtual function call (with
7060    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
7061    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7062    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7063    relative to the resulting this pointer.  */
7064 
7065 static void
7066 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7067 		     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7068 		     tree function)
7069 {
7070   rtx op[8];
7071 
7072   /* Make sure unwind info is emitted for the thunk if needed.  */
7073   final_start_function (emit_barrier (), file, 1);
7074 
7075   /* Operand 0 is the target function.  */
7076   op[0] = XEXP (DECL_RTL (function), 0);
7077 
7078   /* Operand 1 is the 'this' pointer.  */
7079   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7080     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7081   else
7082     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7083 
7084   /* Operands 2/3 are the low/high halfwords of delta.  */
7085   op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7086   op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7087 
7088   /* Operands 4/5 are the low/high halfwords of vcall_offset.  */
7089   op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7090   op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7091 
7092   /* Operands 6/7 are temporary registers.  */
7093   op[6] = gen_rtx_REG (Pmode, 79);
7094   op[7] = gen_rtx_REG (Pmode, 78);
7095 
7096   /* Add DELTA to this pointer.  */
7097   if (delta)
7098     {
7099       if (delta >= -0x200 && delta < 0x200)
7100 	output_asm_insn ("ai\t%1,%1,%2", op);
7101       else if (delta >= -0x8000 && delta < 0x8000)
7102 	{
7103 	  output_asm_insn ("il\t%6,%2", op);
7104 	  output_asm_insn ("a\t%1,%1,%6", op);
7105 	}
7106       else
7107 	{
7108 	  output_asm_insn ("ilhu\t%6,%3", op);
7109 	  output_asm_insn ("iohl\t%6,%2", op);
7110 	  output_asm_insn ("a\t%1,%1,%6", op);
7111 	}
7112     }
7113 
7114   /* Perform vcall adjustment.  */
7115   if (vcall_offset)
7116     {
7117       output_asm_insn ("lqd\t%7,0(%1)", op);
7118       output_asm_insn ("rotqby\t%7,%7,%1", op);
7119 
7120       if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7121 	output_asm_insn ("ai\t%7,%7,%4", op);
7122       else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7123 	{
7124 	  output_asm_insn ("il\t%6,%4", op);
7125 	  output_asm_insn ("a\t%7,%7,%6", op);
7126 	}
7127       else
7128 	{
7129 	  output_asm_insn ("ilhu\t%6,%5", op);
7130 	  output_asm_insn ("iohl\t%6,%4", op);
7131 	  output_asm_insn ("a\t%7,%7,%6", op);
7132 	}
7133 
7134       output_asm_insn ("lqd\t%6,0(%7)", op);
7135       output_asm_insn ("rotqby\t%6,%6,%7", op);
7136       output_asm_insn ("a\t%1,%1,%6", op);
7137     }
7138 
7139   /* Jump to target.  */
7140   output_asm_insn ("br\t%0", op);
7141 
7142   final_end_function ();
7143 }
7144 
7145 /* Canonicalize a comparison from one we don't have to one we do have.  */
7146 static void
7147 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7148 			     bool op0_preserve_value)
7149 {
7150   if (!op0_preserve_value
7151       && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7152     {
7153       rtx tem = *op0;
7154       *op0 = *op1;
7155       *op1 = tem;
7156       *code = (int)swap_condition ((enum rtx_code)*code);
7157     }
7158 }
7159 
7160 /*  Table of machine attributes.  */
7161 static const struct attribute_spec spu_attribute_table[] =
7162 {
7163   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7164        affects_type_identity } */
7165   { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute,
7166     false },
7167   { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute,
7168     false },
7169   { NULL,             0, 0, false, false, false, NULL, false }
7170 };
7171 
7172 /*  TARGET overrides.  */
7173 
7174 #undef TARGET_ADDR_SPACE_POINTER_MODE
7175 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7176 
7177 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7178 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7179 
7180 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7181 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7182   spu_addr_space_legitimate_address_p
7183 
7184 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7185 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7186 
7187 #undef TARGET_ADDR_SPACE_SUBSET_P
7188 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7189 
7190 #undef TARGET_ADDR_SPACE_CONVERT
7191 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7192 
7193 #undef TARGET_INIT_BUILTINS
7194 #define TARGET_INIT_BUILTINS spu_init_builtins
7195 #undef TARGET_BUILTIN_DECL
7196 #define TARGET_BUILTIN_DECL spu_builtin_decl
7197 
7198 #undef TARGET_EXPAND_BUILTIN
7199 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7200 
7201 #undef TARGET_UNWIND_WORD_MODE
7202 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7203 
7204 #undef TARGET_LEGITIMIZE_ADDRESS
7205 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7206 
7207 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7208    and .quad for the debugger.  When it is known that the assembler is fixed,
7209    these can be removed.  */
7210 #undef TARGET_ASM_UNALIGNED_SI_OP
7211 #define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
7212 
7213 #undef TARGET_ASM_ALIGNED_DI_OP
7214 #define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
7215 
7216 /* The .8byte directive doesn't seem to work well for a 32 bit
7217    architecture. */
7218 #undef TARGET_ASM_UNALIGNED_DI_OP
7219 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7220 
7221 #undef TARGET_RTX_COSTS
7222 #define TARGET_RTX_COSTS spu_rtx_costs
7223 
7224 #undef TARGET_ADDRESS_COST
7225 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7226 
7227 #undef TARGET_SCHED_ISSUE_RATE
7228 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7229 
7230 #undef TARGET_SCHED_INIT_GLOBAL
7231 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7232 
7233 #undef TARGET_SCHED_INIT
7234 #define TARGET_SCHED_INIT spu_sched_init
7235 
7236 #undef TARGET_SCHED_VARIABLE_ISSUE
7237 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7238 
7239 #undef TARGET_SCHED_REORDER
7240 #define TARGET_SCHED_REORDER spu_sched_reorder
7241 
7242 #undef TARGET_SCHED_REORDER2
7243 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7244 
7245 #undef TARGET_SCHED_ADJUST_COST
7246 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7247 
7248 #undef  TARGET_ATTRIBUTE_TABLE
7249 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7250 
7251 #undef TARGET_ASM_INTEGER
7252 #define TARGET_ASM_INTEGER spu_assemble_integer
7253 
7254 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7255 #define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
7256 
7257 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7258 #define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
7259 
7260 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7261 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7262 
7263 #undef TARGET_ASM_GLOBALIZE_LABEL
7264 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7265 
7266 #undef TARGET_PASS_BY_REFERENCE
7267 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7268 
7269 #undef TARGET_FUNCTION_ARG
7270 #define TARGET_FUNCTION_ARG spu_function_arg
7271 
7272 #undef TARGET_FUNCTION_ARG_ADVANCE
7273 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7274 
7275 #undef TARGET_MUST_PASS_IN_STACK
7276 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7277 
7278 #undef TARGET_BUILD_BUILTIN_VA_LIST
7279 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7280 
7281 #undef TARGET_EXPAND_BUILTIN_VA_START
7282 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7283 
7284 #undef TARGET_SETUP_INCOMING_VARARGS
7285 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7286 
7287 #undef TARGET_MACHINE_DEPENDENT_REORG
7288 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7289 
7290 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7291 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7292 
7293 #undef TARGET_INIT_LIBFUNCS
7294 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7295 
7296 #undef TARGET_RETURN_IN_MEMORY
7297 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7298 
7299 #undef  TARGET_ENCODE_SECTION_INFO
7300 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7301 
7302 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7303 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7304 
7305 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7306 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7307 
7308 #undef TARGET_VECTORIZE_INIT_COST
7309 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7310 
7311 #undef TARGET_VECTORIZE_ADD_STMT_COST
7312 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7313 
7314 #undef TARGET_VECTORIZE_FINISH_COST
7315 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7316 
7317 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7318 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7319 
7320 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7321 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7322 
7323 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7324 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7325 
7326 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7327 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7328 
7329 #undef TARGET_SCHED_SMS_RES_MII
7330 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7331 
7332 #undef TARGET_SECTION_TYPE_FLAGS
7333 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7334 
7335 #undef TARGET_ASM_SELECT_SECTION
7336 #define TARGET_ASM_SELECT_SECTION  spu_select_section
7337 
7338 #undef TARGET_ASM_UNIQUE_SECTION
7339 #define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
7340 
7341 #undef TARGET_LEGITIMATE_ADDRESS_P
7342 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7343 
7344 #undef TARGET_LEGITIMATE_CONSTANT_P
7345 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7346 
7347 #undef TARGET_TRAMPOLINE_INIT
7348 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7349 
7350 #undef TARGET_WARN_FUNC_RETURN
7351 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7352 
7353 #undef TARGET_OPTION_OVERRIDE
7354 #define TARGET_OPTION_OVERRIDE spu_option_override
7355 
7356 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7357 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7358 
7359 #undef TARGET_REF_MAY_ALIAS_ERRNO
7360 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7361 
7362 #undef TARGET_ASM_OUTPUT_MI_THUNK
7363 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7364 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7365 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7366 
7367 /* Variable tracking should be run after all optimizations which
7368    change order of insns.  It also needs a valid CFG.  */
7369 #undef TARGET_DELAY_VARTRACK
7370 #define TARGET_DELAY_VARTRACK true
7371 
7372 #undef TARGET_CANONICALIZE_COMPARISON
7373 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7374 
7375 #undef TARGET_CAN_USE_DOLOOP_P
7376 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7377 
7378 struct gcc_target targetm = TARGET_INITIALIZER;
7379 
7380 #include "gt-spu.h"
7381