xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/sparc/sparc.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /* Subroutines for insn-output.c for SPARC.
2    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4    Free Software Foundation, Inc.
5    Contributed by Michael Tiemann (tiemann@cygnus.com)
6    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7    at Cygnus Support.
8 
9 This file is part of GCC.
10 
11 GCC is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
15 
16 GCC is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License for more details.
20 
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3.  If not see
23 <http://www.gnu.org/licenses/>.  */
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "tree.h"
30 #include "rtl.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "insn-codes.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "recog.h"
44 #include "toplev.h"
45 #include "ggc.h"
46 #include "tm_p.h"
47 #include "debug.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "cfglayout.h"
51 #include "gimple.h"
52 #include "langhooks.h"
53 #include "params.h"
54 #include "df.h"
55 #include "dwarf2out.h"
56 
57 /* Processor costs */
58 static const
59 struct processor_costs cypress_costs = {
60   COSTS_N_INSNS (2), /* int load */
61   COSTS_N_INSNS (2), /* int signed load */
62   COSTS_N_INSNS (2), /* int zeroed load */
63   COSTS_N_INSNS (2), /* float load */
64   COSTS_N_INSNS (5), /* fmov, fneg, fabs */
65   COSTS_N_INSNS (5), /* fadd, fsub */
66   COSTS_N_INSNS (1), /* fcmp */
67   COSTS_N_INSNS (1), /* fmov, fmovr */
68   COSTS_N_INSNS (7), /* fmul */
69   COSTS_N_INSNS (37), /* fdivs */
70   COSTS_N_INSNS (37), /* fdivd */
71   COSTS_N_INSNS (63), /* fsqrts */
72   COSTS_N_INSNS (63), /* fsqrtd */
73   COSTS_N_INSNS (1), /* imul */
74   COSTS_N_INSNS (1), /* imulX */
75   0, /* imul bit factor */
76   COSTS_N_INSNS (1), /* idiv */
77   COSTS_N_INSNS (1), /* idivX */
78   COSTS_N_INSNS (1), /* movcc/movr */
79   0, /* shift penalty */
80 };
81 
82 static const
83 struct processor_costs supersparc_costs = {
84   COSTS_N_INSNS (1), /* int load */
85   COSTS_N_INSNS (1), /* int signed load */
86   COSTS_N_INSNS (1), /* int zeroed load */
87   COSTS_N_INSNS (0), /* float load */
88   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
89   COSTS_N_INSNS (3), /* fadd, fsub */
90   COSTS_N_INSNS (3), /* fcmp */
91   COSTS_N_INSNS (1), /* fmov, fmovr */
92   COSTS_N_INSNS (3), /* fmul */
93   COSTS_N_INSNS (6), /* fdivs */
94   COSTS_N_INSNS (9), /* fdivd */
95   COSTS_N_INSNS (12), /* fsqrts */
96   COSTS_N_INSNS (12), /* fsqrtd */
97   COSTS_N_INSNS (4), /* imul */
98   COSTS_N_INSNS (4), /* imulX */
99   0, /* imul bit factor */
100   COSTS_N_INSNS (4), /* idiv */
101   COSTS_N_INSNS (4), /* idivX */
102   COSTS_N_INSNS (1), /* movcc/movr */
103   1, /* shift penalty */
104 };
105 
106 static const
107 struct processor_costs hypersparc_costs = {
108   COSTS_N_INSNS (1), /* int load */
109   COSTS_N_INSNS (1), /* int signed load */
110   COSTS_N_INSNS (1), /* int zeroed load */
111   COSTS_N_INSNS (1), /* float load */
112   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
113   COSTS_N_INSNS (1), /* fadd, fsub */
114   COSTS_N_INSNS (1), /* fcmp */
115   COSTS_N_INSNS (1), /* fmov, fmovr */
116   COSTS_N_INSNS (1), /* fmul */
117   COSTS_N_INSNS (8), /* fdivs */
118   COSTS_N_INSNS (12), /* fdivd */
119   COSTS_N_INSNS (17), /* fsqrts */
120   COSTS_N_INSNS (17), /* fsqrtd */
121   COSTS_N_INSNS (17), /* imul */
122   COSTS_N_INSNS (17), /* imulX */
123   0, /* imul bit factor */
124   COSTS_N_INSNS (17), /* idiv */
125   COSTS_N_INSNS (17), /* idivX */
126   COSTS_N_INSNS (1), /* movcc/movr */
127   0, /* shift penalty */
128 };
129 
130 static const
131 struct processor_costs sparclet_costs = {
132   COSTS_N_INSNS (3), /* int load */
133   COSTS_N_INSNS (3), /* int signed load */
134   COSTS_N_INSNS (1), /* int zeroed load */
135   COSTS_N_INSNS (1), /* float load */
136   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
137   COSTS_N_INSNS (1), /* fadd, fsub */
138   COSTS_N_INSNS (1), /* fcmp */
139   COSTS_N_INSNS (1), /* fmov, fmovr */
140   COSTS_N_INSNS (1), /* fmul */
141   COSTS_N_INSNS (1), /* fdivs */
142   COSTS_N_INSNS (1), /* fdivd */
143   COSTS_N_INSNS (1), /* fsqrts */
144   COSTS_N_INSNS (1), /* fsqrtd */
145   COSTS_N_INSNS (5), /* imul */
146   COSTS_N_INSNS (5), /* imulX */
147   0, /* imul bit factor */
148   COSTS_N_INSNS (5), /* idiv */
149   COSTS_N_INSNS (5), /* idivX */
150   COSTS_N_INSNS (1), /* movcc/movr */
151   0, /* shift penalty */
152 };
153 
154 static const
155 struct processor_costs ultrasparc_costs = {
156   COSTS_N_INSNS (2), /* int load */
157   COSTS_N_INSNS (3), /* int signed load */
158   COSTS_N_INSNS (2), /* int zeroed load */
159   COSTS_N_INSNS (2), /* float load */
160   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
161   COSTS_N_INSNS (4), /* fadd, fsub */
162   COSTS_N_INSNS (1), /* fcmp */
163   COSTS_N_INSNS (2), /* fmov, fmovr */
164   COSTS_N_INSNS (4), /* fmul */
165   COSTS_N_INSNS (13), /* fdivs */
166   COSTS_N_INSNS (23), /* fdivd */
167   COSTS_N_INSNS (13), /* fsqrts */
168   COSTS_N_INSNS (23), /* fsqrtd */
169   COSTS_N_INSNS (4), /* imul */
170   COSTS_N_INSNS (4), /* imulX */
171   2, /* imul bit factor */
172   COSTS_N_INSNS (37), /* idiv */
173   COSTS_N_INSNS (68), /* idivX */
174   COSTS_N_INSNS (2), /* movcc/movr */
175   2, /* shift penalty */
176 };
177 
178 static const
179 struct processor_costs ultrasparc3_costs = {
180   COSTS_N_INSNS (2), /* int load */
181   COSTS_N_INSNS (3), /* int signed load */
182   COSTS_N_INSNS (3), /* int zeroed load */
183   COSTS_N_INSNS (2), /* float load */
184   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
185   COSTS_N_INSNS (4), /* fadd, fsub */
186   COSTS_N_INSNS (5), /* fcmp */
187   COSTS_N_INSNS (3), /* fmov, fmovr */
188   COSTS_N_INSNS (4), /* fmul */
189   COSTS_N_INSNS (17), /* fdivs */
190   COSTS_N_INSNS (20), /* fdivd */
191   COSTS_N_INSNS (20), /* fsqrts */
192   COSTS_N_INSNS (29), /* fsqrtd */
193   COSTS_N_INSNS (6), /* imul */
194   COSTS_N_INSNS (6), /* imulX */
195   0, /* imul bit factor */
196   COSTS_N_INSNS (40), /* idiv */
197   COSTS_N_INSNS (71), /* idivX */
198   COSTS_N_INSNS (2), /* movcc/movr */
199   0, /* shift penalty */
200 };
201 
202 static const
203 struct processor_costs niagara_costs = {
204   COSTS_N_INSNS (3), /* int load */
205   COSTS_N_INSNS (3), /* int signed load */
206   COSTS_N_INSNS (3), /* int zeroed load */
207   COSTS_N_INSNS (9), /* float load */
208   COSTS_N_INSNS (8), /* fmov, fneg, fabs */
209   COSTS_N_INSNS (8), /* fadd, fsub */
210   COSTS_N_INSNS (26), /* fcmp */
211   COSTS_N_INSNS (8), /* fmov, fmovr */
212   COSTS_N_INSNS (29), /* fmul */
213   COSTS_N_INSNS (54), /* fdivs */
214   COSTS_N_INSNS (83), /* fdivd */
215   COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
216   COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
217   COSTS_N_INSNS (11), /* imul */
218   COSTS_N_INSNS (11), /* imulX */
219   0, /* imul bit factor */
220   COSTS_N_INSNS (72), /* idiv */
221   COSTS_N_INSNS (72), /* idivX */
222   COSTS_N_INSNS (1), /* movcc/movr */
223   0, /* shift penalty */
224 };
225 
226 static const
227 struct processor_costs niagara2_costs = {
228   COSTS_N_INSNS (3), /* int load */
229   COSTS_N_INSNS (3), /* int signed load */
230   COSTS_N_INSNS (3), /* int zeroed load */
231   COSTS_N_INSNS (3), /* float load */
232   COSTS_N_INSNS (6), /* fmov, fneg, fabs */
233   COSTS_N_INSNS (6), /* fadd, fsub */
234   COSTS_N_INSNS (6), /* fcmp */
235   COSTS_N_INSNS (6), /* fmov, fmovr */
236   COSTS_N_INSNS (6), /* fmul */
237   COSTS_N_INSNS (19), /* fdivs */
238   COSTS_N_INSNS (33), /* fdivd */
239   COSTS_N_INSNS (19), /* fsqrts */
240   COSTS_N_INSNS (33), /* fsqrtd */
241   COSTS_N_INSNS (5), /* imul */
242   COSTS_N_INSNS (5), /* imulX */
243   0, /* imul bit factor */
244   COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
245   COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
246   COSTS_N_INSNS (1), /* movcc/movr */
247   0, /* shift penalty */
248 };
249 
250 const struct processor_costs *sparc_costs = &cypress_costs;
251 
252 #ifdef HAVE_AS_RELAX_OPTION
253 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
254    "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
255    With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
256    somebody does not branch between the sethi and jmp.  */
257 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
258 #else
259 #define LEAF_SIBCALL_SLOT_RESERVED_P \
260   ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
261 #endif
262 
263 /* Global variables for machine-dependent things.  */
264 
265 /* Size of frame.  Need to know this to emit return insns from leaf procedures.
266    ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
267    reload pass.  This is important as the value is later used for scheduling
268    (to see what can go in a delay slot).
269    APPARENT_FSIZE is the size of the stack less the register save area and less
270    the outgoing argument area.  It is used when saving call preserved regs.  */
271 static HOST_WIDE_INT apparent_fsize;
272 static HOST_WIDE_INT actual_fsize;
273 
274 /* Number of live general or floating point registers needed to be
275    saved (as 4-byte quantities).  */
276 static int num_gfregs;
277 
278 /* The alias set for prologue/epilogue register save/restore.  */
279 static GTY(()) alias_set_type sparc_sr_alias_set;
280 
281 /* The alias set for the structure return value.  */
282 static GTY(()) alias_set_type struct_value_alias_set;
283 
284 /* Vector to say how input registers are mapped to output registers.
285    HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
286    eliminate it.  You must use -fomit-frame-pointer to get that.  */
287 char leaf_reg_remap[] =
288 { 0, 1, 2, 3, 4, 5, 6, 7,
289   -1, -1, -1, -1, -1, -1, 14, -1,
290   -1, -1, -1, -1, -1, -1, -1, -1,
291   8, 9, 10, 11, 12, 13, -1, 15,
292 
293   32, 33, 34, 35, 36, 37, 38, 39,
294   40, 41, 42, 43, 44, 45, 46, 47,
295   48, 49, 50, 51, 52, 53, 54, 55,
296   56, 57, 58, 59, 60, 61, 62, 63,
297   64, 65, 66, 67, 68, 69, 70, 71,
298   72, 73, 74, 75, 76, 77, 78, 79,
299   80, 81, 82, 83, 84, 85, 86, 87,
300   88, 89, 90, 91, 92, 93, 94, 95,
301   96, 97, 98, 99, 100};
302 
303 /* Vector, indexed by hard register number, which contains 1
304    for a register that is allowable in a candidate for leaf
305    function treatment.  */
306 char sparc_leaf_regs[] =
307 { 1, 1, 1, 1, 1, 1, 1, 1,
308   0, 0, 0, 0, 0, 0, 1, 0,
309   0, 0, 0, 0, 0, 0, 0, 0,
310   1, 1, 1, 1, 1, 1, 0, 1,
311   1, 1, 1, 1, 1, 1, 1, 1,
312   1, 1, 1, 1, 1, 1, 1, 1,
313   1, 1, 1, 1, 1, 1, 1, 1,
314   1, 1, 1, 1, 1, 1, 1, 1,
315   1, 1, 1, 1, 1, 1, 1, 1,
316   1, 1, 1, 1, 1, 1, 1, 1,
317   1, 1, 1, 1, 1, 1, 1, 1,
318   1, 1, 1, 1, 1, 1, 1, 1,
319   1, 1, 1, 1, 1};
320 
321 struct GTY(()) machine_function
322 {
323   /* Some local-dynamic TLS symbol name.  */
324   const char *some_ld_name;
325 
326   /* True if the current function is leaf and uses only leaf regs,
327      so that the SPARC leaf function optimization can be applied.
328      Private version of current_function_uses_only_leaf_regs, see
329      sparc_expand_prologue for the rationale.  */
330   int leaf_function_p;
331 
332   /* True if the data calculated by sparc_expand_prologue are valid.  */
333   bool prologue_data_valid_p;
334 };
335 
336 #define sparc_leaf_function_p  cfun->machine->leaf_function_p
337 #define sparc_prologue_data_valid_p  cfun->machine->prologue_data_valid_p
338 
339 /* Register we pretend to think the frame pointer is allocated to.
340    Normally, this is %fp, but if we are in a leaf procedure, this
341    is %sp+"something".  We record "something" separately as it may
342    be too big for reg+constant addressing.  */
343 static rtx frame_base_reg;
344 static HOST_WIDE_INT frame_base_offset;
345 
346 /* 1 if the next opcode is to be specially indented.  */
347 int sparc_indent_opcode = 0;
348 
349 static bool sparc_handle_option (size_t, const char *, int);
350 static void sparc_init_modes (void);
351 static void scan_record_type (tree, int *, int *, int *);
352 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
353 				tree, int, int, int *, int *);
354 
355 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
356 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
357 
358 static void sparc_output_addr_vec (rtx);
359 static void sparc_output_addr_diff_vec (rtx);
360 static void sparc_output_deferred_case_vectors (void);
361 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
362 static rtx sparc_builtin_saveregs (void);
363 static int epilogue_renumber (rtx *, int);
364 static bool sparc_assemble_integer (rtx, unsigned int, int);
365 static int set_extends (rtx);
366 static void load_got_register (void);
367 static int save_or_restore_regs (int, int, rtx, int, int);
368 static void emit_save_or_restore_regs (int);
369 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
370 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
371 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
372 						 tree) ATTRIBUTE_UNUSED;
373 static int sparc_adjust_cost (rtx, rtx, rtx, int);
374 static int sparc_issue_rate (void);
375 static void sparc_sched_init (FILE *, int, int);
376 static int sparc_use_sched_lookahead (void);
377 
378 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
379 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
380 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
381 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
382 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
383 
384 static bool sparc_function_ok_for_sibcall (tree, tree);
385 static void sparc_init_libfuncs (void);
386 static void sparc_init_builtins (void);
387 static void sparc_vis_init_builtins (void);
388 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
389 static tree sparc_fold_builtin (tree, tree, bool);
390 static int sparc_vis_mul8x16 (int, int);
391 static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
392 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
393 				   HOST_WIDE_INT, tree);
394 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
395 				       HOST_WIDE_INT, const_tree);
396 static struct machine_function * sparc_init_machine_status (void);
397 static bool sparc_cannot_force_const_mem (rtx);
398 static rtx sparc_tls_get_addr (void);
399 static rtx sparc_tls_got (void);
400 static const char *get_some_local_dynamic_name (void);
401 static int get_some_local_dynamic_name_1 (rtx *, void *);
402 static bool sparc_rtx_costs (rtx, int, int, int *, bool);
403 static bool sparc_promote_prototypes (const_tree);
404 static rtx sparc_struct_value_rtx (tree, int);
405 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
406 						      int *, const_tree, int);
407 static bool sparc_return_in_memory (const_tree, const_tree);
408 static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
409 static void sparc_va_start (tree, rtx);
410 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
411 static bool sparc_vector_mode_supported_p (enum machine_mode);
412 static bool sparc_tls_referenced_p (rtx);
413 static rtx legitimize_tls_address (rtx);
414 static rtx legitimize_pic_address (rtx, rtx);
415 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
416 static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
417 				     enum machine_mode, const_tree, bool);
418 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
419 				    enum machine_mode, tree, bool);
420 static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
421 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
422 static void sparc_file_end (void);
423 static bool sparc_frame_pointer_required (void);
424 static bool sparc_can_eliminate (const int, const int);
425 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
426 static const char *sparc_mangle_type (const_tree);
427 #endif
428 static void sparc_trampoline_init (rtx, tree, rtx);
429 
430 #ifdef SUBTARGET_ATTRIBUTE_TABLE
431 /* Table of valid machine attributes.  */
432 static const struct attribute_spec sparc_attribute_table[] =
433 {
434   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
435   SUBTARGET_ATTRIBUTE_TABLE,
436   { NULL,        0, 0, false, false, false, NULL }
437 };
438 #endif
439 
440 /* Option handling.  */
441 
442 /* Parsed value.  */
443 enum cmodel sparc_cmodel;
444 
445 char sparc_hard_reg_printed[8];
446 
447 struct sparc_cpu_select sparc_select[] =
448 {
449   /* switch	name,		tune	arch */
450   { (char *)0,	"default",	1,	1 },
451   { (char *)0,	"-mcpu=",	1,	1 },
452   { (char *)0,	"-mtune=",	1,	0 },
453   { 0, 0, 0, 0 }
454 };
455 
456 /* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
457 enum processor_type sparc_cpu;
458 
459 /* Whetheran FPU option was specified.  */
460 static bool fpu_option_set = false;
461 
462 /* Initialize the GCC target structure.  */
463 
464 /* The default is to use .half rather than .short for aligned HI objects.  */
465 #undef TARGET_ASM_ALIGNED_HI_OP
466 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
467 
468 #undef TARGET_ASM_UNALIGNED_HI_OP
469 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
470 #undef TARGET_ASM_UNALIGNED_SI_OP
471 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
472 #undef TARGET_ASM_UNALIGNED_DI_OP
473 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
474 
475 /* The target hook has to handle DI-mode values.  */
476 #undef TARGET_ASM_INTEGER
477 #define TARGET_ASM_INTEGER sparc_assemble_integer
478 
479 #undef TARGET_ASM_FUNCTION_PROLOGUE
480 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
481 #undef TARGET_ASM_FUNCTION_EPILOGUE
482 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
483 
484 #undef TARGET_SCHED_ADJUST_COST
485 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
486 #undef TARGET_SCHED_ISSUE_RATE
487 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
488 #undef TARGET_SCHED_INIT
489 #define TARGET_SCHED_INIT sparc_sched_init
490 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
491 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
492 
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
495 
496 #undef TARGET_INIT_LIBFUNCS
497 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
498 #undef TARGET_INIT_BUILTINS
499 #define TARGET_INIT_BUILTINS sparc_init_builtins
500 
501 #undef TARGET_LEGITIMIZE_ADDRESS
502 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
503 
504 #undef TARGET_EXPAND_BUILTIN
505 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
506 #undef TARGET_FOLD_BUILTIN
507 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
508 
509 #if TARGET_TLS
510 #undef TARGET_HAVE_TLS
511 #define TARGET_HAVE_TLS true
512 #endif
513 
514 #undef TARGET_CANNOT_FORCE_CONST_MEM
515 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
516 
517 #undef TARGET_ASM_OUTPUT_MI_THUNK
518 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
519 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
520 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
521 
522 #undef TARGET_RTX_COSTS
523 #define TARGET_RTX_COSTS sparc_rtx_costs
524 #undef TARGET_ADDRESS_COST
525 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
526 
527 #undef TARGET_PROMOTE_FUNCTION_MODE
528 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
529 
530 #undef TARGET_PROMOTE_PROTOTYPES
531 #define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes
532 
533 #undef TARGET_STRUCT_VALUE_RTX
534 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
535 #undef TARGET_RETURN_IN_MEMORY
536 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
537 #undef TARGET_MUST_PASS_IN_STACK
538 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
539 #undef TARGET_PASS_BY_REFERENCE
540 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
541 #undef TARGET_ARG_PARTIAL_BYTES
542 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
543 
544 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
545 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
546 #undef TARGET_STRICT_ARGUMENT_NAMING
547 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
548 
549 #undef TARGET_EXPAND_BUILTIN_VA_START
550 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
551 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
552 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
553 
554 #undef TARGET_VECTOR_MODE_SUPPORTED_P
555 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
556 
557 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
558 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
559 
560 #ifdef SUBTARGET_INSERT_ATTRIBUTES
561 #undef TARGET_INSERT_ATTRIBUTES
562 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
563 #endif
564 
565 #ifdef SUBTARGET_ATTRIBUTE_TABLE
566 #undef TARGET_ATTRIBUTE_TABLE
567 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
568 #endif
569 
570 #undef TARGET_RELAXED_ORDERING
571 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
572 
573 #undef TARGET_DEFAULT_TARGET_FLAGS
574 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
575 #undef TARGET_HANDLE_OPTION
576 #define TARGET_HANDLE_OPTION sparc_handle_option
577 
578 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
579 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
580 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
581 #endif
582 
583 #undef TARGET_ASM_FILE_END
584 #define TARGET_ASM_FILE_END sparc_file_end
585 
586 #undef TARGET_FRAME_POINTER_REQUIRED
587 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
588 
589 #undef TARGET_CAN_ELIMINATE
590 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
591 
592 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
593 #undef TARGET_MANGLE_TYPE
594 #define TARGET_MANGLE_TYPE sparc_mangle_type
595 #endif
596 
597 #undef TARGET_LEGITIMATE_ADDRESS_P
598 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
599 
600 #undef TARGET_TRAMPOLINE_INIT
601 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
602 
603 struct gcc_target targetm = TARGET_INITIALIZER;
604 
605 /* Implement TARGET_HANDLE_OPTION.  */
606 
607 static bool
608 sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
609 {
610   switch (code)
611     {
612     case OPT_mfpu:
613     case OPT_mhard_float:
614     case OPT_msoft_float:
615       fpu_option_set = true;
616       break;
617 
618     case OPT_mcpu_:
619       sparc_select[1].string = arg;
620       break;
621 
622     case OPT_mtune_:
623       sparc_select[2].string = arg;
624       break;
625     }
626 
627   return true;
628 }
629 
630 /* Specify default optimizations.  */
631 
632 void
633 sparc_optimization_options (int l ATTRIBUTE_UNUSED, int s ATTRIBUTE_UNUSED)
634 {
635   /* Disable save slot sharing for call-clobbered registers by default.
636      The IRA sharing algorithm works on single registers only and this
637      pessimizes for double floating-point registers.  */
638   flag_ira_share_save_slots = 0;
639 }
640 
641 /* Validate and override various options, and do some machine dependent
642    initialization.  */
643 
644 void
645 sparc_override_options (void)
646 {
647   static struct code_model {
648     const char *const name;
649     const enum cmodel value;
650   } const cmodels[] = {
651     { "32", CM_32 },
652     { "medlow", CM_MEDLOW },
653     { "medmid", CM_MEDMID },
654     { "medany", CM_MEDANY },
655     { "embmedany", CM_EMBMEDANY },
656     { NULL, (enum cmodel) 0 }
657   };
658   const struct code_model *cmodel;
659   /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
660   static struct cpu_default {
661     const int cpu;
662     const char *const name;
663   } const cpu_default[] = {
664     /* There must be one entry here for each TARGET_CPU value.  */
665     { TARGET_CPU_sparc, "cypress" },
666     { TARGET_CPU_sparclet, "tsc701" },
667     { TARGET_CPU_sparclite, "f930" },
668     { TARGET_CPU_v8, "v8" },
669     { TARGET_CPU_hypersparc, "hypersparc" },
670     { TARGET_CPU_sparclite86x, "sparclite86x" },
671     { TARGET_CPU_supersparc, "supersparc" },
672     { TARGET_CPU_v9, "v9" },
673     { TARGET_CPU_ultrasparc, "ultrasparc" },
674     { TARGET_CPU_ultrasparc3, "ultrasparc3" },
675     { TARGET_CPU_niagara, "niagara" },
676     { TARGET_CPU_niagara2, "niagara2" },
677     { 0, 0 }
678   };
679   const struct cpu_default *def;
680   /* Table of values for -m{cpu,tune}=.  */
681   static struct cpu_table {
682     const char *const name;
683     const enum processor_type processor;
684     const int disable;
685     const int enable;
686   } const cpu_table[] = {
687     { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
688     { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
689     { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
690     /* TI TMS390Z55 supersparc */
691     { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
692     { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
693     /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
694        The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
695     { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
696     { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
697     { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
698     { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
699       MASK_SPARCLITE },
700     { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
701     /* TEMIC sparclet */
702     { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
703     { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
704     /* TI ultrasparc I, II, IIi */
705     { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
706     /* Although insns using %y are deprecated, it is a clear win on current
707        ultrasparcs.  */
708     						    |MASK_DEPRECATED_V8_INSNS},
709     /* TI ultrasparc III */
710     /* ??? Check if %y issue still holds true in ultra3.  */
711     { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
712     /* UltraSPARC T1 */
713     { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
714     { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9},
715     { 0, (enum processor_type) 0, 0, 0 }
716   };
717   const struct cpu_table *cpu;
718   const struct sparc_cpu_select *sel;
719   int fpu;
720 
721 #ifndef SPARC_BI_ARCH
722   /* Check for unsupported architecture size.  */
723   if (! TARGET_64BIT != DEFAULT_ARCH32_P)
724     error ("%s is not supported by this configuration",
725 	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
726 #endif
727 
728   /* We force all 64bit archs to use 128 bit long double */
729   if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
730     {
731       error ("-mlong-double-64 not allowed with -m64");
732       target_flags |= MASK_LONG_DOUBLE_128;
733     }
734 
735   /* Code model selection.  */
736   sparc_cmodel = SPARC_DEFAULT_CMODEL;
737 
738 #ifdef SPARC_BI_ARCH
739   if (TARGET_ARCH32)
740     sparc_cmodel = CM_32;
741 #endif
742 
743   if (sparc_cmodel_string != NULL)
744     {
745       if (TARGET_ARCH64)
746 	{
747 	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
748 	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
749 	      break;
750 	  if (cmodel->name == NULL)
751 	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
752 	  else
753 	    sparc_cmodel = cmodel->value;
754 	}
755       else
756 	error ("-mcmodel= is not supported on 32 bit systems");
757     }
758 
759   fpu = target_flags & MASK_FPU; /* save current -mfpu status */
760 
761   /* Set the default CPU.  */
762   for (def = &cpu_default[0]; def->name; ++def)
763     if (def->cpu == TARGET_CPU_DEFAULT)
764       break;
765   gcc_assert (def->name);
766   sparc_select[0].string = def->name;
767 
768   for (sel = &sparc_select[0]; sel->name; ++sel)
769     {
770       if (sel->string)
771 	{
772 	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
773 	    if (! strcmp (sel->string, cpu->name))
774 	      {
775 		if (sel->set_tune_p)
776 		  sparc_cpu = cpu->processor;
777 
778 		if (sel->set_arch_p)
779 		  {
780 		    target_flags &= ~cpu->disable;
781 		    target_flags |= cpu->enable;
782 		  }
783 		break;
784 	      }
785 
786 	  if (! cpu->name)
787 	    error ("bad value (%s) for %s switch", sel->string, sel->name);
788 	}
789     }
790 
791   /* If -mfpu or -mno-fpu was explicitly used, don't override with
792      the processor default.  */
793   if (fpu_option_set)
794     target_flags = (target_flags & ~MASK_FPU) | fpu;
795 
796   /* Don't allow -mvis if FPU is disabled.  */
797   if (! TARGET_FPU)
798     target_flags &= ~MASK_VIS;
799 
800   /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
801      are available.
802      -m64 also implies v9.  */
803   if (TARGET_VIS || TARGET_ARCH64)
804     {
805       target_flags |= MASK_V9;
806       target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
807     }
808 
809   /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
810   if (TARGET_V9 && TARGET_ARCH32)
811     target_flags |= MASK_DEPRECATED_V8_INSNS;
812 
813   /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
814   if (! TARGET_V9 || TARGET_ARCH64)
815     target_flags &= ~MASK_V8PLUS;
816 
817   /* Don't use stack biasing in 32 bit mode.  */
818   if (TARGET_ARCH32)
819     target_flags &= ~MASK_STACK_BIAS;
820 
821   /* Supply a default value for align_functions.  */
822   if (align_functions == 0
823       && (sparc_cpu == PROCESSOR_ULTRASPARC
824 	  || sparc_cpu == PROCESSOR_ULTRASPARC3
825 	  || sparc_cpu == PROCESSOR_NIAGARA
826 	  || sparc_cpu == PROCESSOR_NIAGARA2))
827     align_functions = 32;
828 
829   /* Validate PCC_STRUCT_RETURN.  */
830   if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
831     flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
832 
833   /* Only use .uaxword when compiling for a 64-bit target.  */
834   if (!TARGET_ARCH64)
835     targetm.asm_out.unaligned_op.di = NULL;
836 
837   /* Do various machine dependent initializations.  */
838   sparc_init_modes ();
839 
840   /* Acquire unique alias sets for our private stuff.  */
841   sparc_sr_alias_set = new_alias_set ();
842   struct_value_alias_set = new_alias_set ();
843 
844   /* Set up function hooks.  */
845   init_machine_status = sparc_init_machine_status;
846 
847   switch (sparc_cpu)
848     {
849     case PROCESSOR_V7:
850     case PROCESSOR_CYPRESS:
851       sparc_costs = &cypress_costs;
852       break;
853     case PROCESSOR_V8:
854     case PROCESSOR_SPARCLITE:
855     case PROCESSOR_SUPERSPARC:
856       sparc_costs = &supersparc_costs;
857       break;
858     case PROCESSOR_F930:
859     case PROCESSOR_F934:
860     case PROCESSOR_HYPERSPARC:
861     case PROCESSOR_SPARCLITE86X:
862       sparc_costs = &hypersparc_costs;
863       break;
864     case PROCESSOR_SPARCLET:
865     case PROCESSOR_TSC701:
866       sparc_costs = &sparclet_costs;
867       break;
868     case PROCESSOR_V9:
869     case PROCESSOR_ULTRASPARC:
870       sparc_costs = &ultrasparc_costs;
871       break;
872     case PROCESSOR_ULTRASPARC3:
873       sparc_costs = &ultrasparc3_costs;
874       break;
875     case PROCESSOR_NIAGARA:
876       sparc_costs = &niagara_costs;
877       break;
878     case PROCESSOR_NIAGARA2:
879       sparc_costs = &niagara2_costs;
880       break;
881     };
882 
883 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
884   if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
885     target_flags |= MASK_LONG_DOUBLE_128;
886 #endif
887 
888   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
889     set_param_value ("simultaneous-prefetches",
890 		     ((sparc_cpu == PROCESSOR_ULTRASPARC
891 		       || sparc_cpu == PROCESSOR_NIAGARA
892 		       || sparc_cpu == PROCESSOR_NIAGARA2)
893 		      ? 2
894 		      : (sparc_cpu == PROCESSOR_ULTRASPARC3
895 			 ? 8 : 3)));
896   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
897     set_param_value ("l1-cache-line-size",
898 		     ((sparc_cpu == PROCESSOR_ULTRASPARC
899 		       || sparc_cpu == PROCESSOR_ULTRASPARC3
900 		       || sparc_cpu == PROCESSOR_NIAGARA
901 		       || sparc_cpu == PROCESSOR_NIAGARA2)
902 		      ? 64 : 32));
903 }
904 
905 /* Miscellaneous utilities.  */
906 
907 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
908    or branch on register contents instructions.  */
909 
910 int
911 v9_regcmp_p (enum rtx_code code)
912 {
913   return (code == EQ || code == NE || code == GE || code == LT
914 	  || code == LE || code == GT);
915 }
916 
917 /* Nonzero if OP is a floating point constant which can
918    be loaded into an integer register using a single
919    sethi instruction.  */
920 
921 int
922 fp_sethi_p (rtx op)
923 {
924   if (GET_CODE (op) == CONST_DOUBLE)
925     {
926       REAL_VALUE_TYPE r;
927       long i;
928 
929       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
930       REAL_VALUE_TO_TARGET_SINGLE (r, i);
931       return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
932     }
933 
934   return 0;
935 }
936 
937 /* Nonzero if OP is a floating point constant which can
938    be loaded into an integer register using a single
939    mov instruction.  */
940 
941 int
942 fp_mov_p (rtx op)
943 {
944   if (GET_CODE (op) == CONST_DOUBLE)
945     {
946       REAL_VALUE_TYPE r;
947       long i;
948 
949       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
950       REAL_VALUE_TO_TARGET_SINGLE (r, i);
951       return SPARC_SIMM13_P (i);
952     }
953 
954   return 0;
955 }
956 
957 /* Nonzero if OP is a floating point constant which can
958    be loaded into an integer register using a high/losum
959    instruction sequence.  */
960 
961 int
962 fp_high_losum_p (rtx op)
963 {
964   /* The constraints calling this should only be in
965      SFmode move insns, so any constant which cannot
966      be moved using a single insn will do.  */
967   if (GET_CODE (op) == CONST_DOUBLE)
968     {
969       REAL_VALUE_TYPE r;
970       long i;
971 
972       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
973       REAL_VALUE_TO_TARGET_SINGLE (r, i);
974       return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
975     }
976 
977   return 0;
978 }
979 
980 /* Return true if the address of LABEL can be loaded by means of the
981    mov{si,di}_pic_label_ref patterns in PIC mode.  */
982 
983 static bool
984 can_use_mov_pic_label_ref (rtx label)
985 {
986   /* VxWorks does not impose a fixed gap between segments; the run-time
987      gap can be different from the object-file gap.  We therefore can't
988      assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
989      are absolutely sure that X is in the same segment as the GOT.
990      Unfortunately, the flexibility of linker scripts means that we
991      can't be sure of that in general, so assume that GOT-relative
992      accesses are never valid on VxWorks.  */
993   if (TARGET_VXWORKS_RTP)
994     return false;
995 
996   /* Similarly, if the label is non-local, it might end up being placed
997      in a different section than the current one; now mov_pic_label_ref
998      requires the label and the code to be in the same section.  */
999   if (LABEL_REF_NONLOCAL_P (label))
1000     return false;
1001 
1002   /* Finally, if we are reordering basic blocks and partition into hot
1003      and cold sections, this might happen for any label.  */
1004   if (flag_reorder_blocks_and_partition)
1005     return false;
1006 
1007   return true;
1008 }
1009 
1010 /* Expand a move instruction.  Return true if all work is done.  */
1011 
1012 bool
1013 sparc_expand_move (enum machine_mode mode, rtx *operands)
1014 {
1015   /* Handle sets of MEM first.  */
1016   if (GET_CODE (operands[0]) == MEM)
1017     {
1018       /* 0 is a register (or a pair of registers) on SPARC.  */
1019       if (register_or_zero_operand (operands[1], mode))
1020 	return false;
1021 
1022       if (!reload_in_progress)
1023 	{
1024 	  operands[0] = validize_mem (operands[0]);
1025 	  operands[1] = force_reg (mode, operands[1]);
1026 	}
1027     }
1028 
1029   /* Fixup TLS cases.  */
1030   if (TARGET_HAVE_TLS
1031       && CONSTANT_P (operands[1])
1032       && sparc_tls_referenced_p (operands [1]))
1033     {
1034       operands[1] = legitimize_tls_address (operands[1]);
1035       return false;
1036     }
1037 
1038   /* Fixup PIC cases.  */
1039   if (flag_pic && CONSTANT_P (operands[1]))
1040     {
1041       if (pic_address_needs_scratch (operands[1]))
1042 	operands[1] = legitimize_pic_address (operands[1], NULL_RTX);
1043 
1044       /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
1045       if (GET_CODE (operands[1]) == LABEL_REF
1046 	  && can_use_mov_pic_label_ref (operands[1]))
1047 	{
1048 	  if (mode == SImode)
1049 	    {
1050 	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1051 	      return true;
1052 	    }
1053 
1054 	  if (mode == DImode)
1055 	    {
1056 	      gcc_assert (TARGET_ARCH64);
1057 	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1058 	      return true;
1059 	    }
1060 	}
1061 
1062       if (symbolic_operand (operands[1], mode))
1063 	{
1064 	  operands[1] = legitimize_pic_address (operands[1],
1065 						reload_in_progress
1066 						? operands[0] : NULL_RTX);
1067 	  return false;
1068 	}
1069     }
1070 
1071   /* If we are trying to toss an integer constant into FP registers,
1072      or loading a FP or vector constant, force it into memory.  */
1073   if (CONSTANT_P (operands[1])
1074       && REG_P (operands[0])
1075       && (SPARC_FP_REG_P (REGNO (operands[0]))
1076 	  || SCALAR_FLOAT_MODE_P (mode)
1077 	  || VECTOR_MODE_P (mode)))
1078     {
1079       /* emit_group_store will send such bogosity to us when it is
1080          not storing directly into memory.  So fix this up to avoid
1081          crashes in output_constant_pool.  */
1082       if (operands [1] == const0_rtx)
1083 	operands[1] = CONST0_RTX (mode);
1084 
1085       /* We can clear FP registers if TARGET_VIS, and always other regs.  */
1086       if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1087 	  && const_zero_operand (operands[1], mode))
1088 	return false;
1089 
1090       if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1091 	  /* We are able to build any SF constant in integer registers
1092 	     with at most 2 instructions.  */
1093 	  && (mode == SFmode
1094 	      /* And any DF constant in integer registers.  */
1095 	      || (mode == DFmode
1096 		  && (reload_completed || reload_in_progress))))
1097 	return false;
1098 
1099       operands[1] = force_const_mem (mode, operands[1]);
1100       if (!reload_in_progress)
1101 	operands[1] = validize_mem (operands[1]);
1102       return false;
1103     }
1104 
1105   /* Accept non-constants and valid constants unmodified.  */
1106   if (!CONSTANT_P (operands[1])
1107       || GET_CODE (operands[1]) == HIGH
1108       || input_operand (operands[1], mode))
1109     return false;
1110 
1111   switch (mode)
1112     {
1113     case QImode:
1114       /* All QImode constants require only one insn, so proceed.  */
1115       break;
1116 
1117     case HImode:
1118     case SImode:
1119       sparc_emit_set_const32 (operands[0], operands[1]);
1120       return true;
1121 
1122     case DImode:
1123       /* input_operand should have filtered out 32-bit mode.  */
1124       sparc_emit_set_const64 (operands[0], operands[1]);
1125       return true;
1126 
1127     default:
1128       gcc_unreachable ();
1129     }
1130 
1131   return false;
1132 }
1133 
1134 /* Load OP1, a 32-bit constant, into OP0, a register.
1135    We know it can't be done in one insn when we get
1136    here, the move expander guarantees this.  */
1137 
1138 void
1139 sparc_emit_set_const32 (rtx op0, rtx op1)
1140 {
1141   enum machine_mode mode = GET_MODE (op0);
1142   rtx temp;
1143 
1144   if (reload_in_progress || reload_completed)
1145     temp = op0;
1146   else
1147     temp = gen_reg_rtx (mode);
1148 
1149   if (GET_CODE (op1) == CONST_INT)
1150     {
1151       gcc_assert (!small_int_operand (op1, mode)
1152 		  && !const_high_operand (op1, mode));
1153 
1154       /* Emit them as real moves instead of a HIGH/LO_SUM,
1155 	 this way CSE can see everything and reuse intermediate
1156 	 values if it wants.  */
1157       emit_insn (gen_rtx_SET (VOIDmode, temp,
1158 			      GEN_INT (INTVAL (op1)
1159 			        & ~(HOST_WIDE_INT)0x3ff)));
1160 
1161       emit_insn (gen_rtx_SET (VOIDmode,
1162 			      op0,
1163 			      gen_rtx_IOR (mode, temp,
1164 					   GEN_INT (INTVAL (op1) & 0x3ff))));
1165     }
1166   else
1167     {
1168       /* A symbol, emit in the traditional way.  */
1169       emit_insn (gen_rtx_SET (VOIDmode, temp,
1170 			      gen_rtx_HIGH (mode, op1)));
1171       emit_insn (gen_rtx_SET (VOIDmode,
1172 			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
1173     }
1174 }
1175 
1176 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1177    If TEMP is nonzero, we are forbidden to use any other scratch
1178    registers.  Otherwise, we are allowed to generate them as needed.
1179 
1180    Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1181    or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
1182 
1183 void
1184 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1185 {
1186   rtx temp1, temp2, temp3, temp4, temp5;
1187   rtx ti_temp = 0;
1188 
1189   if (temp && GET_MODE (temp) == TImode)
1190     {
1191       ti_temp = temp;
1192       temp = gen_rtx_REG (DImode, REGNO (temp));
1193     }
1194 
1195   /* SPARC-V9 code-model support.  */
1196   switch (sparc_cmodel)
1197     {
1198     case CM_MEDLOW:
1199       /* The range spanned by all instructions in the object is less
1200 	 than 2^31 bytes (2GB) and the distance from any instruction
1201 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1202 	 than 2^31 bytes (2GB).
1203 
1204 	 The executable must be in the low 4TB of the virtual address
1205 	 space.
1206 
1207 	 sethi	%hi(symbol), %temp1
1208 	 or	%temp1, %lo(symbol), %reg  */
1209       if (temp)
1210 	temp1 = temp;  /* op0 is allowed.  */
1211       else
1212 	temp1 = gen_reg_rtx (DImode);
1213 
1214       emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1215       emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1216       break;
1217 
1218     case CM_MEDMID:
1219       /* The range spanned by all instructions in the object is less
1220 	 than 2^31 bytes (2GB) and the distance from any instruction
1221 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1222 	 than 2^31 bytes (2GB).
1223 
1224 	 The executable must be in the low 16TB of the virtual address
1225 	 space.
1226 
1227 	 sethi	%h44(symbol), %temp1
1228 	 or	%temp1, %m44(symbol), %temp2
1229 	 sllx	%temp2, 12, %temp3
1230 	 or	%temp3, %l44(symbol), %reg  */
1231       if (temp)
1232 	{
1233 	  temp1 = op0;
1234 	  temp2 = op0;
1235 	  temp3 = temp;  /* op0 is allowed.  */
1236 	}
1237       else
1238 	{
1239 	  temp1 = gen_reg_rtx (DImode);
1240 	  temp2 = gen_reg_rtx (DImode);
1241 	  temp3 = gen_reg_rtx (DImode);
1242 	}
1243 
1244       emit_insn (gen_seth44 (temp1, op1));
1245       emit_insn (gen_setm44 (temp2, temp1, op1));
1246       emit_insn (gen_rtx_SET (VOIDmode, temp3,
1247 			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1248       emit_insn (gen_setl44 (op0, temp3, op1));
1249       break;
1250 
1251     case CM_MEDANY:
1252       /* The range spanned by all instructions in the object is less
1253 	 than 2^31 bytes (2GB) and the distance from any instruction
1254 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1255 	 than 2^31 bytes (2GB).
1256 
1257 	 The executable can be placed anywhere in the virtual address
1258 	 space.
1259 
1260 	 sethi	%hh(symbol), %temp1
1261 	 sethi	%lm(symbol), %temp2
1262 	 or	%temp1, %hm(symbol), %temp3
1263 	 sllx	%temp3, 32, %temp4
1264 	 or	%temp4, %temp2, %temp5
1265 	 or	%temp5, %lo(symbol), %reg  */
1266       if (temp)
1267 	{
1268 	  /* It is possible that one of the registers we got for operands[2]
1269 	     might coincide with that of operands[0] (which is why we made
1270 	     it TImode).  Pick the other one to use as our scratch.  */
1271 	  if (rtx_equal_p (temp, op0))
1272 	    {
1273 	      gcc_assert (ti_temp);
1274 	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1275 	    }
1276 	  temp1 = op0;
1277 	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1278 	  temp3 = op0;
1279 	  temp4 = op0;
1280 	  temp5 = op0;
1281 	}
1282       else
1283 	{
1284 	  temp1 = gen_reg_rtx (DImode);
1285 	  temp2 = gen_reg_rtx (DImode);
1286 	  temp3 = gen_reg_rtx (DImode);
1287 	  temp4 = gen_reg_rtx (DImode);
1288 	  temp5 = gen_reg_rtx (DImode);
1289 	}
1290 
1291       emit_insn (gen_sethh (temp1, op1));
1292       emit_insn (gen_setlm (temp2, op1));
1293       emit_insn (gen_sethm (temp3, temp1, op1));
1294       emit_insn (gen_rtx_SET (VOIDmode, temp4,
1295 			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1296       emit_insn (gen_rtx_SET (VOIDmode, temp5,
1297 			      gen_rtx_PLUS (DImode, temp4, temp2)));
1298       emit_insn (gen_setlo (op0, temp5, op1));
1299       break;
1300 
1301     case CM_EMBMEDANY:
1302       /* Old old old backwards compatibility kruft here.
1303 	 Essentially it is MEDLOW with a fixed 64-bit
1304 	 virtual base added to all data segment addresses.
1305 	 Text-segment stuff is computed like MEDANY, we can't
1306 	 reuse the code above because the relocation knobs
1307 	 look different.
1308 
1309 	 Data segment:	sethi	%hi(symbol), %temp1
1310 			add	%temp1, EMBMEDANY_BASE_REG, %temp2
1311 			or	%temp2, %lo(symbol), %reg  */
1312       if (data_segment_operand (op1, GET_MODE (op1)))
1313 	{
1314 	  if (temp)
1315 	    {
1316 	      temp1 = temp;  /* op0 is allowed.  */
1317 	      temp2 = op0;
1318 	    }
1319 	  else
1320 	    {
1321 	      temp1 = gen_reg_rtx (DImode);
1322 	      temp2 = gen_reg_rtx (DImode);
1323 	    }
1324 
1325 	  emit_insn (gen_embmedany_sethi (temp1, op1));
1326 	  emit_insn (gen_embmedany_brsum (temp2, temp1));
1327 	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
1328 	}
1329 
1330       /* Text segment:	sethi	%uhi(symbol), %temp1
1331 			sethi	%hi(symbol), %temp2
1332 			or	%temp1, %ulo(symbol), %temp3
1333 			sllx	%temp3, 32, %temp4
1334 			or	%temp4, %temp2, %temp5
1335 			or	%temp5, %lo(symbol), %reg  */
1336       else
1337 	{
1338 	  if (temp)
1339 	    {
1340 	      /* It is possible that one of the registers we got for operands[2]
1341 		 might coincide with that of operands[0] (which is why we made
1342 		 it TImode).  Pick the other one to use as our scratch.  */
1343 	      if (rtx_equal_p (temp, op0))
1344 		{
1345 		  gcc_assert (ti_temp);
1346 		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1347 		}
1348 	      temp1 = op0;
1349 	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1350 	      temp3 = op0;
1351 	      temp4 = op0;
1352 	      temp5 = op0;
1353 	    }
1354 	  else
1355 	    {
1356 	      temp1 = gen_reg_rtx (DImode);
1357 	      temp2 = gen_reg_rtx (DImode);
1358 	      temp3 = gen_reg_rtx (DImode);
1359 	      temp4 = gen_reg_rtx (DImode);
1360 	      temp5 = gen_reg_rtx (DImode);
1361 	    }
1362 
1363 	  emit_insn (gen_embmedany_textuhi (temp1, op1));
1364 	  emit_insn (gen_embmedany_texthi  (temp2, op1));
1365 	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1366 	  emit_insn (gen_rtx_SET (VOIDmode, temp4,
1367 				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1368 	  emit_insn (gen_rtx_SET (VOIDmode, temp5,
1369 				  gen_rtx_PLUS (DImode, temp4, temp2)));
1370 	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
1371 	}
1372       break;
1373 
1374     default:
1375       gcc_unreachable ();
1376     }
1377 }
1378 
1379 #if HOST_BITS_PER_WIDE_INT == 32
1380 void
1381 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1382 {
1383   gcc_unreachable ();
1384 }
1385 #else
1386 /* These avoid problems when cross compiling.  If we do not
1387    go through all this hair then the optimizer will see
1388    invalid REG_EQUAL notes or in some cases none at all.  */
1389 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1390 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1391 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1392 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1393 
1394 /* The optimizer is not to assume anything about exactly
1395    which bits are set for a HIGH, they are unspecified.
1396    Unfortunately this leads to many missed optimizations
1397    during CSE.  We mask out the non-HIGH bits, and matches
1398    a plain movdi, to alleviate this problem.  */
1399 static rtx
1400 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1401 {
1402   return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1403 }
1404 
1405 static rtx
1406 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1407 {
1408   return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1409 }
1410 
1411 static rtx
1412 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1413 {
1414   return gen_rtx_IOR (DImode, src, GEN_INT (val));
1415 }
1416 
1417 static rtx
1418 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1419 {
1420   return gen_rtx_XOR (DImode, src, GEN_INT (val));
1421 }
1422 
1423 /* Worker routines for 64-bit constant formation on arch64.
1424    One of the key things to be doing in these emissions is
1425    to create as many temp REGs as possible.  This makes it
1426    possible for half-built constants to be used later when
1427    such values are similar to something required later on.
1428    Without doing this, the optimizer cannot see such
1429    opportunities.  */
1430 
1431 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1432 					   unsigned HOST_WIDE_INT, int);
1433 
1434 static void
1435 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1436 			       unsigned HOST_WIDE_INT low_bits, int is_neg)
1437 {
1438   unsigned HOST_WIDE_INT high_bits;
1439 
1440   if (is_neg)
1441     high_bits = (~low_bits) & 0xffffffff;
1442   else
1443     high_bits = low_bits;
1444 
1445   emit_insn (gen_safe_HIGH64 (temp, high_bits));
1446   if (!is_neg)
1447     {
1448       emit_insn (gen_rtx_SET (VOIDmode, op0,
1449 			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1450     }
1451   else
1452     {
1453       /* If we are XOR'ing with -1, then we should emit a one's complement
1454 	 instead.  This way the combiner will notice logical operations
1455 	 such as ANDN later on and substitute.  */
1456       if ((low_bits & 0x3ff) == 0x3ff)
1457 	{
1458 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1459 				  gen_rtx_NOT (DImode, temp)));
1460 	}
1461       else
1462 	{
1463 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1464 				  gen_safe_XOR64 (temp,
1465 						  (-(HOST_WIDE_INT)0x400
1466 						   | (low_bits & 0x3ff)))));
1467 	}
1468     }
1469 }
1470 
1471 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1472 					   unsigned HOST_WIDE_INT, int);
1473 
1474 static void
1475 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1476 			       unsigned HOST_WIDE_INT high_bits,
1477 			       unsigned HOST_WIDE_INT low_immediate,
1478 			       int shift_count)
1479 {
1480   rtx temp2 = op0;
1481 
1482   if ((high_bits & 0xfffffc00) != 0)
1483     {
1484       emit_insn (gen_safe_HIGH64 (temp, high_bits));
1485       if ((high_bits & ~0xfffffc00) != 0)
1486 	emit_insn (gen_rtx_SET (VOIDmode, op0,
1487 				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1488       else
1489 	temp2 = temp;
1490     }
1491   else
1492     {
1493       emit_insn (gen_safe_SET64 (temp, high_bits));
1494       temp2 = temp;
1495     }
1496 
1497   /* Now shift it up into place.  */
1498   emit_insn (gen_rtx_SET (VOIDmode, op0,
1499 			  gen_rtx_ASHIFT (DImode, temp2,
1500 					  GEN_INT (shift_count))));
1501 
1502   /* If there is a low immediate part piece, finish up by
1503      putting that in as well.  */
1504   if (low_immediate != 0)
1505     emit_insn (gen_rtx_SET (VOIDmode, op0,
1506 			    gen_safe_OR64 (op0, low_immediate)));
1507 }
1508 
1509 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1510 					    unsigned HOST_WIDE_INT);
1511 
1512 /* Full 64-bit constant decomposition.  Even though this is the
1513    'worst' case, we still optimize a few things away.  */
1514 static void
1515 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1516 				unsigned HOST_WIDE_INT high_bits,
1517 				unsigned HOST_WIDE_INT low_bits)
1518 {
1519   rtx sub_temp;
1520 
1521   if (reload_in_progress || reload_completed)
1522     sub_temp = op0;
1523   else
1524     sub_temp = gen_reg_rtx (DImode);
1525 
1526   if ((high_bits & 0xfffffc00) != 0)
1527     {
1528       emit_insn (gen_safe_HIGH64 (temp, high_bits));
1529       if ((high_bits & ~0xfffffc00) != 0)
1530 	emit_insn (gen_rtx_SET (VOIDmode,
1531 				sub_temp,
1532 				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1533       else
1534 	sub_temp = temp;
1535     }
1536   else
1537     {
1538       emit_insn (gen_safe_SET64 (temp, high_bits));
1539       sub_temp = temp;
1540     }
1541 
1542   if (!reload_in_progress && !reload_completed)
1543     {
1544       rtx temp2 = gen_reg_rtx (DImode);
1545       rtx temp3 = gen_reg_rtx (DImode);
1546       rtx temp4 = gen_reg_rtx (DImode);
1547 
1548       emit_insn (gen_rtx_SET (VOIDmode, temp4,
1549 			      gen_rtx_ASHIFT (DImode, sub_temp,
1550 					      GEN_INT (32))));
1551 
1552       emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1553       if ((low_bits & ~0xfffffc00) != 0)
1554 	{
1555 	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1556 				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1557 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1558 				  gen_rtx_PLUS (DImode, temp4, temp3)));
1559 	}
1560       else
1561 	{
1562 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1563 				  gen_rtx_PLUS (DImode, temp4, temp2)));
1564 	}
1565     }
1566   else
1567     {
1568       rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1569       rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1570       rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1571       int to_shift = 12;
1572 
1573       /* We are in the middle of reload, so this is really
1574 	 painful.  However we do still make an attempt to
1575 	 avoid emitting truly stupid code.  */
1576       if (low1 != const0_rtx)
1577 	{
1578 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1579 				  gen_rtx_ASHIFT (DImode, sub_temp,
1580 						  GEN_INT (to_shift))));
1581 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1582 				  gen_rtx_IOR (DImode, op0, low1)));
1583 	  sub_temp = op0;
1584 	  to_shift = 12;
1585 	}
1586       else
1587 	{
1588 	  to_shift += 12;
1589 	}
1590       if (low2 != const0_rtx)
1591 	{
1592 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1593 				  gen_rtx_ASHIFT (DImode, sub_temp,
1594 						  GEN_INT (to_shift))));
1595 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1596 				  gen_rtx_IOR (DImode, op0, low2)));
1597 	  sub_temp = op0;
1598 	  to_shift = 8;
1599 	}
1600       else
1601 	{
1602 	  to_shift += 8;
1603 	}
1604       emit_insn (gen_rtx_SET (VOIDmode, op0,
1605 			      gen_rtx_ASHIFT (DImode, sub_temp,
1606 					      GEN_INT (to_shift))));
1607       if (low3 != const0_rtx)
1608 	emit_insn (gen_rtx_SET (VOIDmode, op0,
1609 				gen_rtx_IOR (DImode, op0, low3)));
1610       /* phew...  */
1611     }
1612 }
1613 
1614 /* Analyze a 64-bit constant for certain properties.  */
1615 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1616 				    unsigned HOST_WIDE_INT,
1617 				    int *, int *, int *);
1618 
1619 static void
1620 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1621 			unsigned HOST_WIDE_INT low_bits,
1622 			int *hbsp, int *lbsp, int *abbasp)
1623 {
1624   int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1625   int i;
1626 
1627   lowest_bit_set = highest_bit_set = -1;
1628   i = 0;
1629   do
1630     {
1631       if ((lowest_bit_set == -1)
1632 	  && ((low_bits >> i) & 1))
1633 	lowest_bit_set = i;
1634       if ((highest_bit_set == -1)
1635 	  && ((high_bits >> (32 - i - 1)) & 1))
1636 	highest_bit_set = (64 - i - 1);
1637     }
1638   while (++i < 32
1639 	 && ((highest_bit_set == -1)
1640 	     || (lowest_bit_set == -1)));
1641   if (i == 32)
1642     {
1643       i = 0;
1644       do
1645 	{
1646 	  if ((lowest_bit_set == -1)
1647 	      && ((high_bits >> i) & 1))
1648 	    lowest_bit_set = i + 32;
1649 	  if ((highest_bit_set == -1)
1650 	      && ((low_bits >> (32 - i - 1)) & 1))
1651 	    highest_bit_set = 32 - i - 1;
1652 	}
1653       while (++i < 32
1654 	     && ((highest_bit_set == -1)
1655 		 || (lowest_bit_set == -1)));
1656     }
1657   /* If there are no bits set this should have gone out
1658      as one instruction!  */
1659   gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1660   all_bits_between_are_set = 1;
1661   for (i = lowest_bit_set; i <= highest_bit_set; i++)
1662     {
1663       if (i < 32)
1664 	{
1665 	  if ((low_bits & (1 << i)) != 0)
1666 	    continue;
1667 	}
1668       else
1669 	{
1670 	  if ((high_bits & (1 << (i - 32))) != 0)
1671 	    continue;
1672 	}
1673       all_bits_between_are_set = 0;
1674       break;
1675     }
1676   *hbsp = highest_bit_set;
1677   *lbsp = lowest_bit_set;
1678   *abbasp = all_bits_between_are_set;
1679 }
1680 
1681 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1682 
1683 static int
1684 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1685 		   unsigned HOST_WIDE_INT low_bits)
1686 {
1687   int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1688 
1689   if (high_bits == 0
1690       || high_bits == 0xffffffff)
1691     return 1;
1692 
1693   analyze_64bit_constant (high_bits, low_bits,
1694 			  &highest_bit_set, &lowest_bit_set,
1695 			  &all_bits_between_are_set);
1696 
1697   if ((highest_bit_set == 63
1698        || lowest_bit_set == 0)
1699       && all_bits_between_are_set != 0)
1700     return 1;
1701 
1702   if ((highest_bit_set - lowest_bit_set) < 21)
1703     return 1;
1704 
1705   return 0;
1706 }
1707 
1708 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1709 							unsigned HOST_WIDE_INT,
1710 							int, int);
1711 
1712 static unsigned HOST_WIDE_INT
1713 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1714 			  unsigned HOST_WIDE_INT low_bits,
1715 			  int lowest_bit_set, int shift)
1716 {
1717   HOST_WIDE_INT hi, lo;
1718 
1719   if (lowest_bit_set < 32)
1720     {
1721       lo = (low_bits >> lowest_bit_set) << shift;
1722       hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1723     }
1724   else
1725     {
1726       lo = 0;
1727       hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1728     }
1729   gcc_assert (! (hi & lo));
1730   return (hi | lo);
1731 }
1732 
1733 /* Here we are sure to be arch64 and this is an integer constant
1734    being loaded into a register.  Emit the most efficient
1735    insn sequence possible.  Detection of all the 1-insn cases
1736    has been done already.  */
1737 void
1738 sparc_emit_set_const64 (rtx op0, rtx op1)
1739 {
1740   unsigned HOST_WIDE_INT high_bits, low_bits;
1741   int lowest_bit_set, highest_bit_set;
1742   int all_bits_between_are_set;
1743   rtx temp = 0;
1744 
1745   /* Sanity check that we know what we are working with.  */
1746   gcc_assert (TARGET_ARCH64
1747 	      && (GET_CODE (op0) == SUBREG
1748 		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
1749 
1750   if (reload_in_progress || reload_completed)
1751     temp = op0;
1752 
1753   if (GET_CODE (op1) != CONST_INT)
1754     {
1755       sparc_emit_set_symbolic_const64 (op0, op1, temp);
1756       return;
1757     }
1758 
1759   if (! temp)
1760     temp = gen_reg_rtx (DImode);
1761 
1762   high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1763   low_bits = (INTVAL (op1) & 0xffffffff);
1764 
1765   /* low_bits	bits 0  --> 31
1766      high_bits	bits 32 --> 63  */
1767 
1768   analyze_64bit_constant (high_bits, low_bits,
1769 			  &highest_bit_set, &lowest_bit_set,
1770 			  &all_bits_between_are_set);
1771 
1772   /* First try for a 2-insn sequence.  */
1773 
1774   /* These situations are preferred because the optimizer can
1775    * do more things with them:
1776    * 1) mov	-1, %reg
1777    *    sllx	%reg, shift, %reg
1778    * 2) mov	-1, %reg
1779    *    srlx	%reg, shift, %reg
1780    * 3) mov	some_small_const, %reg
1781    *    sllx	%reg, shift, %reg
1782    */
1783   if (((highest_bit_set == 63
1784 	|| lowest_bit_set == 0)
1785        && all_bits_between_are_set != 0)
1786       || ((highest_bit_set - lowest_bit_set) < 12))
1787     {
1788       HOST_WIDE_INT the_const = -1;
1789       int shift = lowest_bit_set;
1790 
1791       if ((highest_bit_set != 63
1792 	   && lowest_bit_set != 0)
1793 	  || all_bits_between_are_set == 0)
1794 	{
1795 	  the_const =
1796 	    create_simple_focus_bits (high_bits, low_bits,
1797 				      lowest_bit_set, 0);
1798 	}
1799       else if (lowest_bit_set == 0)
1800 	shift = -(63 - highest_bit_set);
1801 
1802       gcc_assert (SPARC_SIMM13_P (the_const));
1803       gcc_assert (shift != 0);
1804 
1805       emit_insn (gen_safe_SET64 (temp, the_const));
1806       if (shift > 0)
1807 	emit_insn (gen_rtx_SET (VOIDmode,
1808 				op0,
1809 				gen_rtx_ASHIFT (DImode,
1810 						temp,
1811 						GEN_INT (shift))));
1812       else if (shift < 0)
1813 	emit_insn (gen_rtx_SET (VOIDmode,
1814 				op0,
1815 				gen_rtx_LSHIFTRT (DImode,
1816 						  temp,
1817 						  GEN_INT (-shift))));
1818       return;
1819     }
1820 
1821   /* Now a range of 22 or less bits set somewhere.
1822    * 1) sethi	%hi(focus_bits), %reg
1823    *    sllx	%reg, shift, %reg
1824    * 2) sethi	%hi(focus_bits), %reg
1825    *    srlx	%reg, shift, %reg
1826    */
1827   if ((highest_bit_set - lowest_bit_set) < 21)
1828     {
1829       unsigned HOST_WIDE_INT focus_bits =
1830 	create_simple_focus_bits (high_bits, low_bits,
1831 				  lowest_bit_set, 10);
1832 
1833       gcc_assert (SPARC_SETHI_P (focus_bits));
1834       gcc_assert (lowest_bit_set != 10);
1835 
1836       emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1837 
1838       /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1839       if (lowest_bit_set < 10)
1840 	emit_insn (gen_rtx_SET (VOIDmode,
1841 				op0,
1842 				gen_rtx_LSHIFTRT (DImode, temp,
1843 						  GEN_INT (10 - lowest_bit_set))));
1844       else if (lowest_bit_set > 10)
1845 	emit_insn (gen_rtx_SET (VOIDmode,
1846 				op0,
1847 				gen_rtx_ASHIFT (DImode, temp,
1848 						GEN_INT (lowest_bit_set - 10))));
1849       return;
1850     }
1851 
1852   /* 1) sethi	%hi(low_bits), %reg
1853    *    or	%reg, %lo(low_bits), %reg
1854    * 2) sethi	%hi(~low_bits), %reg
1855    *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1856    */
1857   if (high_bits == 0
1858       || high_bits == 0xffffffff)
1859     {
1860       sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1861 				     (high_bits == 0xffffffff));
1862       return;
1863     }
1864 
1865   /* Now, try 3-insn sequences.  */
1866 
1867   /* 1) sethi	%hi(high_bits), %reg
1868    *    or	%reg, %lo(high_bits), %reg
1869    *    sllx	%reg, 32, %reg
1870    */
1871   if (low_bits == 0)
1872     {
1873       sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1874       return;
1875     }
1876 
1877   /* We may be able to do something quick
1878      when the constant is negated, so try that.  */
1879   if (const64_is_2insns ((~high_bits) & 0xffffffff,
1880 			 (~low_bits) & 0xfffffc00))
1881     {
1882       /* NOTE: The trailing bits get XOR'd so we need the
1883 	 non-negated bits, not the negated ones.  */
1884       unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1885 
1886       if ((((~high_bits) & 0xffffffff) == 0
1887 	   && ((~low_bits) & 0x80000000) == 0)
1888 	  || (((~high_bits) & 0xffffffff) == 0xffffffff
1889 	      && ((~low_bits) & 0x80000000) != 0))
1890 	{
1891 	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1892 
1893 	  if ((SPARC_SETHI_P (fast_int)
1894 	       && (~high_bits & 0xffffffff) == 0)
1895 	      || SPARC_SIMM13_P (fast_int))
1896 	    emit_insn (gen_safe_SET64 (temp, fast_int));
1897 	  else
1898 	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1899 	}
1900       else
1901 	{
1902 	  rtx negated_const;
1903 	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1904 				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1905 	  sparc_emit_set_const64 (temp, negated_const);
1906 	}
1907 
1908       /* If we are XOR'ing with -1, then we should emit a one's complement
1909 	 instead.  This way the combiner will notice logical operations
1910 	 such as ANDN later on and substitute.  */
1911       if (trailing_bits == 0x3ff)
1912 	{
1913 	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1914 				  gen_rtx_NOT (DImode, temp)));
1915 	}
1916       else
1917 	{
1918 	  emit_insn (gen_rtx_SET (VOIDmode,
1919 				  op0,
1920 				  gen_safe_XOR64 (temp,
1921 						  (-0x400 | trailing_bits))));
1922 	}
1923       return;
1924     }
1925 
1926   /* 1) sethi	%hi(xxx), %reg
1927    *    or	%reg, %lo(xxx), %reg
1928    *	sllx	%reg, yyy, %reg
1929    *
1930    * ??? This is just a generalized version of the low_bits==0
1931    * thing above, FIXME...
1932    */
1933   if ((highest_bit_set - lowest_bit_set) < 32)
1934     {
1935       unsigned HOST_WIDE_INT focus_bits =
1936 	create_simple_focus_bits (high_bits, low_bits,
1937 				  lowest_bit_set, 0);
1938 
1939       /* We can't get here in this state.  */
1940       gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
1941 
1942       /* So what we know is that the set bits straddle the
1943 	 middle of the 64-bit word.  */
1944       sparc_emit_set_const64_quick2 (op0, temp,
1945 				     focus_bits, 0,
1946 				     lowest_bit_set);
1947       return;
1948     }
1949 
1950   /* 1) sethi	%hi(high_bits), %reg
1951    *    or	%reg, %lo(high_bits), %reg
1952    *    sllx	%reg, 32, %reg
1953    *	or	%reg, low_bits, %reg
1954    */
1955   if (SPARC_SIMM13_P(low_bits)
1956       && ((int)low_bits > 0))
1957     {
1958       sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1959       return;
1960     }
1961 
1962   /* The easiest way when all else fails, is full decomposition.  */
1963 #if 0
1964   printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1965 	  high_bits, low_bits, ~high_bits, ~low_bits);
1966 #endif
1967   sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1968 }
1969 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
1970 
1971 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1972    return the mode to be used for the comparison.  For floating-point,
1973    CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
1974    is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
1975    processing is needed.  */
1976 
1977 enum machine_mode
1978 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
1979 {
1980   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1981     {
1982       switch (op)
1983 	{
1984 	case EQ:
1985 	case NE:
1986 	case UNORDERED:
1987 	case ORDERED:
1988 	case UNLT:
1989 	case UNLE:
1990 	case UNGT:
1991 	case UNGE:
1992 	case UNEQ:
1993 	case LTGT:
1994 	  return CCFPmode;
1995 
1996 	case LT:
1997 	case LE:
1998 	case GT:
1999 	case GE:
2000 	  return CCFPEmode;
2001 
2002 	default:
2003 	  gcc_unreachable ();
2004 	}
2005     }
2006   else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2007 	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2008     {
2009       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2010 	return CCX_NOOVmode;
2011       else
2012 	return CC_NOOVmode;
2013     }
2014   else
2015     {
2016       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2017 	return CCXmode;
2018       else
2019 	return CCmode;
2020     }
2021 }
2022 
2023 /* Emit the compare insn and return the CC reg for a CODE comparison
2024    with operands X and Y.  */
2025 
2026 static rtx
2027 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2028 {
2029   enum machine_mode mode;
2030   rtx cc_reg;
2031 
2032   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2033     return x;
2034 
2035   mode = SELECT_CC_MODE (code, x, y);
2036 
2037   /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2038      fcc regs (cse can't tell they're really call clobbered regs and will
2039      remove a duplicate comparison even if there is an intervening function
2040      call - it will then try to reload the cc reg via an int reg which is why
2041      we need the movcc patterns).  It is possible to provide the movcc
2042      patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2043      registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2044      to tell cse that CCFPE mode registers (even pseudos) are call
2045      clobbered.  */
2046 
2047   /* ??? This is an experiment.  Rather than making changes to cse which may
2048      or may not be easy/clean, we do our own cse.  This is possible because
2049      we will generate hard registers.  Cse knows they're call clobbered (it
2050      doesn't know the same thing about pseudos). If we guess wrong, no big
2051      deal, but if we win, great!  */
2052 
2053   if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2054 #if 1 /* experiment */
2055     {
2056       int reg;
2057       /* We cycle through the registers to ensure they're all exercised.  */
2058       static int next_fcc_reg = 0;
2059       /* Previous x,y for each fcc reg.  */
2060       static rtx prev_args[4][2];
2061 
2062       /* Scan prev_args for x,y.  */
2063       for (reg = 0; reg < 4; reg++)
2064 	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2065 	  break;
2066       if (reg == 4)
2067 	{
2068 	  reg = next_fcc_reg;
2069 	  prev_args[reg][0] = x;
2070 	  prev_args[reg][1] = y;
2071 	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2072 	}
2073       cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2074     }
2075 #else
2076     cc_reg = gen_reg_rtx (mode);
2077 #endif /* ! experiment */
2078   else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2079     cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2080   else
2081     cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2082 
2083   /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
2084      will only result in an unrecognizable insn so no point in asserting.  */
2085   emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2086 
2087   return cc_reg;
2088 }
2089 
2090 
2091 /* Emit the compare insn and return the CC reg for the comparison in CMP.  */
2092 
2093 rtx
2094 gen_compare_reg (rtx cmp)
2095 {
2096   return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2097 }
2098 
2099 /* This function is used for v9 only.
2100    DEST is the target of the Scc insn.
2101    CODE is the code for an Scc's comparison.
2102    X and Y are the values we compare.
2103 
2104    This function is needed to turn
2105 
2106 	   (set (reg:SI 110)
2107 	       (gt (reg:CCX 100 %icc)
2108 	           (const_int 0)))
2109    into
2110 	   (set (reg:SI 110)
2111 	       (gt:DI (reg:CCX 100 %icc)
2112 	           (const_int 0)))
2113 
2114    IE: The instruction recognizer needs to see the mode of the comparison to
2115    find the right instruction. We could use "gt:DI" right in the
2116    define_expand, but leaving it out allows us to handle DI, SI, etc.  */
2117 
2118 static int
2119 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2120 {
2121   if (! TARGET_ARCH64
2122       && (GET_MODE (x) == DImode
2123 	  || GET_MODE (dest) == DImode))
2124     return 0;
2125 
2126   /* Try to use the movrCC insns.  */
2127   if (TARGET_ARCH64
2128       && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2129       && y == const0_rtx
2130       && v9_regcmp_p (compare_code))
2131     {
2132       rtx op0 = x;
2133       rtx temp;
2134 
2135       /* Special case for op0 != 0.  This can be done with one instruction if
2136 	 dest == x.  */
2137 
2138       if (compare_code == NE
2139 	  && GET_MODE (dest) == DImode
2140 	  && rtx_equal_p (op0, dest))
2141 	{
2142 	  emit_insn (gen_rtx_SET (VOIDmode, dest,
2143 			      gen_rtx_IF_THEN_ELSE (DImode,
2144 				       gen_rtx_fmt_ee (compare_code, DImode,
2145 						       op0, const0_rtx),
2146 				       const1_rtx,
2147 				       dest)));
2148 	  return 1;
2149 	}
2150 
2151       if (reg_overlap_mentioned_p (dest, op0))
2152 	{
2153 	  /* Handle the case where dest == x.
2154 	     We "early clobber" the result.  */
2155 	  op0 = gen_reg_rtx (GET_MODE (x));
2156 	  emit_move_insn (op0, x);
2157 	}
2158 
2159       emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2160       if (GET_MODE (op0) != DImode)
2161 	{
2162 	  temp = gen_reg_rtx (DImode);
2163 	  convert_move (temp, op0, 0);
2164 	}
2165       else
2166 	temp = op0;
2167       emit_insn (gen_rtx_SET (VOIDmode, dest,
2168 			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2169 				   gen_rtx_fmt_ee (compare_code, DImode,
2170 						   temp, const0_rtx),
2171 				   const1_rtx,
2172 				   dest)));
2173       return 1;
2174     }
2175   else
2176     {
2177       x = gen_compare_reg_1 (compare_code, x, y);
2178       y = const0_rtx;
2179 
2180       gcc_assert (GET_MODE (x) != CC_NOOVmode
2181 		  && GET_MODE (x) != CCX_NOOVmode);
2182 
2183       emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2184       emit_insn (gen_rtx_SET (VOIDmode, dest,
2185 			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2186 				   gen_rtx_fmt_ee (compare_code,
2187 						   GET_MODE (x), x, y),
2188 				    const1_rtx, dest)));
2189       return 1;
2190     }
2191 }
2192 
2193 
2194 /* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
2195    without jumps using the addx/subx instructions.  */
2196 
2197 bool
2198 emit_scc_insn (rtx operands[])
2199 {
2200   rtx tem;
2201   rtx x;
2202   rtx y;
2203   enum rtx_code code;
2204 
2205   /* The quad-word fp compare library routines all return nonzero to indicate
2206      true, which is different from the equivalent libgcc routines, so we must
2207      handle them specially here.  */
2208   if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2209     {
2210       operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2211 					      GET_CODE (operands[1]));
2212       operands[2] = XEXP (operands[1], 0);
2213       operands[3] = XEXP (operands[1], 1);
2214     }
2215 
2216   code = GET_CODE (operands[1]);
2217   x = operands[2];
2218   y = operands[3];
2219 
2220   /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2221      more applications).  The exception to this is "reg != 0" which can
2222      be done in one instruction on v9 (so we do it).  */
2223   if (code == EQ)
2224     {
2225       if (GET_MODE (x) == SImode)
2226         {
2227           rtx pat = gen_seqsi_special (operands[0], x, y);
2228           emit_insn (pat);
2229           return true;
2230         }
2231       else if (GET_MODE (x) == DImode)
2232         {
2233           rtx pat = gen_seqdi_special (operands[0], x, y);
2234           emit_insn (pat);
2235           return true;
2236         }
2237     }
2238 
2239   if (code == NE)
2240     {
2241       if (GET_MODE (x) == SImode)
2242         {
2243           rtx pat = gen_snesi_special (operands[0], x, y);
2244           emit_insn (pat);
2245           return true;
2246         }
2247       else if (GET_MODE (x) == DImode)
2248         {
2249           rtx pat = gen_snedi_special (operands[0], x, y);
2250           emit_insn (pat);
2251           return true;
2252         }
2253     }
2254 
2255   /* For the rest, on v9 we can use conditional moves.  */
2256 
2257   if (TARGET_V9)
2258     {
2259       if (gen_v9_scc (operands[0], code, x, y))
2260         return true;
2261     }
2262 
2263   /* We can do LTU and GEU using the addx/subx instructions too.  And
2264      for GTU/LEU, if both operands are registers swap them and fall
2265      back to the easy case.  */
2266   if (code == GTU || code == LEU)
2267     {
2268       if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2269           && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2270         {
2271           tem = x;
2272           x = y;
2273           y = tem;
2274           code = swap_condition (code);
2275         }
2276     }
2277 
2278   if (code == LTU || code == GEU)
2279     {
2280       emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2281 			      gen_rtx_fmt_ee (code, SImode,
2282 					      gen_compare_reg_1 (code, x, y),
2283 					      const0_rtx)));
2284       return true;
2285     }
2286 
2287   /* Nope, do branches.  */
2288   return false;
2289 }
2290 
2291 /* Emit a conditional jump insn for the v9 architecture using comparison code
2292    CODE and jump target LABEL.
2293    This function exists to take advantage of the v9 brxx insns.  */
2294 
2295 static void
2296 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2297 {
2298   emit_jump_insn (gen_rtx_SET (VOIDmode,
2299 			   pc_rtx,
2300 			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2301 				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2302 						    op0, const0_rtx),
2303 				    gen_rtx_LABEL_REF (VOIDmode, label),
2304 				    pc_rtx)));
2305 }
2306 
2307 void
2308 emit_conditional_branch_insn (rtx operands[])
2309 {
2310   /* The quad-word fp compare library routines all return nonzero to indicate
2311      true, which is different from the equivalent libgcc routines, so we must
2312      handle them specially here.  */
2313   if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2314     {
2315       operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2316 					      GET_CODE (operands[0]));
2317       operands[1] = XEXP (operands[0], 0);
2318       operands[2] = XEXP (operands[0], 1);
2319     }
2320 
2321   if (TARGET_ARCH64 && operands[2] == const0_rtx
2322       && GET_CODE (operands[1]) == REG
2323       && GET_MODE (operands[1]) == DImode)
2324     {
2325       emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2326       return;
2327     }
2328 
2329   operands[1] = gen_compare_reg (operands[0]);
2330   operands[2] = const0_rtx;
2331   operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2332 				operands[1], operands[2]);
2333   emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2334 				  operands[3]));
2335 }
2336 
2337 
2338 /* Generate a DFmode part of a hard TFmode register.
2339    REG is the TFmode hard register, LOW is 1 for the
2340    low 64bit of the register and 0 otherwise.
2341  */
2342 rtx
2343 gen_df_reg (rtx reg, int low)
2344 {
2345   int regno = REGNO (reg);
2346 
2347   if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2348     regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2349   return gen_rtx_REG (DFmode, regno);
2350 }
2351 
2352 /* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
2353    Unlike normal calls, TFmode operands are passed by reference.  It is
2354    assumed that no more than 3 operands are required.  */
2355 
2356 static void
2357 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2358 {
2359   rtx ret_slot = NULL, arg[3], func_sym;
2360   int i;
2361 
2362   /* We only expect to be called for conversions, unary, and binary ops.  */
2363   gcc_assert (nargs == 2 || nargs == 3);
2364 
2365   for (i = 0; i < nargs; ++i)
2366     {
2367       rtx this_arg = operands[i];
2368       rtx this_slot;
2369 
2370       /* TFmode arguments and return values are passed by reference.  */
2371       if (GET_MODE (this_arg) == TFmode)
2372 	{
2373 	  int force_stack_temp;
2374 
2375 	  force_stack_temp = 0;
2376 	  if (TARGET_BUGGY_QP_LIB && i == 0)
2377 	    force_stack_temp = 1;
2378 
2379 	  if (GET_CODE (this_arg) == MEM
2380 	      && ! force_stack_temp)
2381 	    this_arg = XEXP (this_arg, 0);
2382 	  else if (CONSTANT_P (this_arg)
2383 		   && ! force_stack_temp)
2384 	    {
2385 	      this_slot = force_const_mem (TFmode, this_arg);
2386 	      this_arg = XEXP (this_slot, 0);
2387 	    }
2388 	  else
2389 	    {
2390 	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2391 
2392 	      /* Operand 0 is the return value.  We'll copy it out later.  */
2393 	      if (i > 0)
2394 		emit_move_insn (this_slot, this_arg);
2395 	      else
2396 		ret_slot = this_slot;
2397 
2398 	      this_arg = XEXP (this_slot, 0);
2399 	    }
2400 	}
2401 
2402       arg[i] = this_arg;
2403     }
2404 
2405   func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2406 
2407   if (GET_MODE (operands[0]) == TFmode)
2408     {
2409       if (nargs == 2)
2410 	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2411 			   arg[0], GET_MODE (arg[0]),
2412 			   arg[1], GET_MODE (arg[1]));
2413       else
2414 	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2415 			   arg[0], GET_MODE (arg[0]),
2416 			   arg[1], GET_MODE (arg[1]),
2417 			   arg[2], GET_MODE (arg[2]));
2418 
2419       if (ret_slot)
2420 	emit_move_insn (operands[0], ret_slot);
2421     }
2422   else
2423     {
2424       rtx ret;
2425 
2426       gcc_assert (nargs == 2);
2427 
2428       ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2429 				     GET_MODE (operands[0]), 1,
2430 				     arg[1], GET_MODE (arg[1]));
2431 
2432       if (ret != operands[0])
2433 	emit_move_insn (operands[0], ret);
2434     }
2435 }
2436 
2437 /* Expand soft-float TFmode calls to sparc abi routines.  */
2438 
2439 static void
2440 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2441 {
2442   const char *func;
2443 
2444   switch (code)
2445     {
2446     case PLUS:
2447       func = "_Qp_add";
2448       break;
2449     case MINUS:
2450       func = "_Qp_sub";
2451       break;
2452     case MULT:
2453       func = "_Qp_mul";
2454       break;
2455     case DIV:
2456       func = "_Qp_div";
2457       break;
2458     default:
2459       gcc_unreachable ();
2460     }
2461 
2462   emit_soft_tfmode_libcall (func, 3, operands);
2463 }
2464 
2465 static void
2466 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2467 {
2468   const char *func;
2469 
2470   gcc_assert (code == SQRT);
2471   func = "_Qp_sqrt";
2472 
2473   emit_soft_tfmode_libcall (func, 2, operands);
2474 }
2475 
2476 static void
2477 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2478 {
2479   const char *func;
2480 
2481   switch (code)
2482     {
2483     case FLOAT_EXTEND:
2484       switch (GET_MODE (operands[1]))
2485 	{
2486 	case SFmode:
2487 	  func = "_Qp_stoq";
2488 	  break;
2489 	case DFmode:
2490 	  func = "_Qp_dtoq";
2491 	  break;
2492 	default:
2493 	  gcc_unreachable ();
2494 	}
2495       break;
2496 
2497     case FLOAT_TRUNCATE:
2498       switch (GET_MODE (operands[0]))
2499 	{
2500 	case SFmode:
2501 	  func = "_Qp_qtos";
2502 	  break;
2503 	case DFmode:
2504 	  func = "_Qp_qtod";
2505 	  break;
2506 	default:
2507 	  gcc_unreachable ();
2508 	}
2509       break;
2510 
2511     case FLOAT:
2512       switch (GET_MODE (operands[1]))
2513 	{
2514 	case SImode:
2515 	  func = "_Qp_itoq";
2516 	  if (TARGET_ARCH64)
2517 	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2518 	  break;
2519 	case DImode:
2520 	  func = "_Qp_xtoq";
2521 	  break;
2522 	default:
2523 	  gcc_unreachable ();
2524 	}
2525       break;
2526 
2527     case UNSIGNED_FLOAT:
2528       switch (GET_MODE (operands[1]))
2529 	{
2530 	case SImode:
2531 	  func = "_Qp_uitoq";
2532 	  if (TARGET_ARCH64)
2533 	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2534 	  break;
2535 	case DImode:
2536 	  func = "_Qp_uxtoq";
2537 	  break;
2538 	default:
2539 	  gcc_unreachable ();
2540 	}
2541       break;
2542 
2543     case FIX:
2544       switch (GET_MODE (operands[0]))
2545 	{
2546 	case SImode:
2547 	  func = "_Qp_qtoi";
2548 	  break;
2549 	case DImode:
2550 	  func = "_Qp_qtox";
2551 	  break;
2552 	default:
2553 	  gcc_unreachable ();
2554 	}
2555       break;
2556 
2557     case UNSIGNED_FIX:
2558       switch (GET_MODE (operands[0]))
2559 	{
2560 	case SImode:
2561 	  func = "_Qp_qtoui";
2562 	  break;
2563 	case DImode:
2564 	  func = "_Qp_qtoux";
2565 	  break;
2566 	default:
2567 	  gcc_unreachable ();
2568 	}
2569       break;
2570 
2571     default:
2572       gcc_unreachable ();
2573     }
2574 
2575   emit_soft_tfmode_libcall (func, 2, operands);
2576 }
2577 
2578 /* Expand a hard-float tfmode operation.  All arguments must be in
2579    registers.  */
2580 
2581 static void
2582 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2583 {
2584   rtx op, dest;
2585 
2586   if (GET_RTX_CLASS (code) == RTX_UNARY)
2587     {
2588       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2589       op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2590     }
2591   else
2592     {
2593       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2594       operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2595       op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2596 			   operands[1], operands[2]);
2597     }
2598 
2599   if (register_operand (operands[0], VOIDmode))
2600     dest = operands[0];
2601   else
2602     dest = gen_reg_rtx (GET_MODE (operands[0]));
2603 
2604   emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2605 
2606   if (dest != operands[0])
2607     emit_move_insn (operands[0], dest);
2608 }
2609 
2610 void
2611 emit_tfmode_binop (enum rtx_code code, rtx *operands)
2612 {
2613   if (TARGET_HARD_QUAD)
2614     emit_hard_tfmode_operation (code, operands);
2615   else
2616     emit_soft_tfmode_binop (code, operands);
2617 }
2618 
2619 void
2620 emit_tfmode_unop (enum rtx_code code, rtx *operands)
2621 {
2622   if (TARGET_HARD_QUAD)
2623     emit_hard_tfmode_operation (code, operands);
2624   else
2625     emit_soft_tfmode_unop (code, operands);
2626 }
2627 
2628 void
2629 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2630 {
2631   if (TARGET_HARD_QUAD)
2632     emit_hard_tfmode_operation (code, operands);
2633   else
2634     emit_soft_tfmode_cvt (code, operands);
2635 }
2636 
2637 /* Return nonzero if a branch/jump/call instruction will be emitting
2638    nop into its delay slot.  */
2639 
2640 int
2641 empty_delay_slot (rtx insn)
2642 {
2643   rtx seq;
2644 
2645   /* If no previous instruction (should not happen), return true.  */
2646   if (PREV_INSN (insn) == NULL)
2647     return 1;
2648 
2649   seq = NEXT_INSN (PREV_INSN (insn));
2650   if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2651     return 0;
2652 
2653   return 1;
2654 }
2655 
2656 /* Return nonzero if TRIAL can go into the call delay slot.  */
2657 
2658 int
2659 tls_call_delay (rtx trial)
2660 {
2661   rtx pat;
2662 
2663   /* Binutils allows
2664        call __tls_get_addr, %tgd_call (foo)
2665         add %l7, %o0, %o0, %tgd_add (foo)
2666      while Sun as/ld does not.  */
2667   if (TARGET_GNU_TLS || !TARGET_TLS)
2668     return 1;
2669 
2670   pat = PATTERN (trial);
2671 
2672   /* We must reject tgd_add{32|64}, i.e.
2673        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2674      and tldm_add{32|64}, i.e.
2675        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2676      for Sun as/ld.  */
2677   if (GET_CODE (pat) == SET
2678       && GET_CODE (SET_SRC (pat)) == PLUS)
2679     {
2680       rtx unspec = XEXP (SET_SRC (pat), 1);
2681 
2682       if (GET_CODE (unspec) == UNSPEC
2683 	  && (XINT (unspec, 1) == UNSPEC_TLSGD
2684 	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
2685 	return 0;
2686     }
2687 
2688   return 1;
2689 }
2690 
2691 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2692    instruction.  RETURN_P is true if the v9 variant 'return' is to be
2693    considered in the test too.
2694 
2695    TRIAL must be a SET whose destination is a REG appropriate for the
2696    'restore' instruction or, if RETURN_P is true, for the 'return'
2697    instruction.  */
2698 
2699 static int
2700 eligible_for_restore_insn (rtx trial, bool return_p)
2701 {
2702   rtx pat = PATTERN (trial);
2703   rtx src = SET_SRC (pat);
2704 
2705   /* The 'restore src,%g0,dest' pattern for word mode and below.  */
2706   if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2707       && arith_operand (src, GET_MODE (src)))
2708     {
2709       if (TARGET_ARCH64)
2710         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2711       else
2712         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2713     }
2714 
2715   /* The 'restore src,%g0,dest' pattern for double-word mode.  */
2716   else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2717 	   && arith_double_operand (src, GET_MODE (src)))
2718     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2719 
2720   /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
2721   else if (! TARGET_FPU && register_operand (src, SFmode))
2722     return 1;
2723 
2724   /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
2725   else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2726     return 1;
2727 
2728   /* If we have the 'return' instruction, anything that does not use
2729      local or output registers and can go into a delay slot wins.  */
2730   else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2731 	   && (get_attr_in_uncond_branch_delay (trial)
2732 	       == IN_UNCOND_BRANCH_DELAY_TRUE))
2733     return 1;
2734 
2735   /* The 'restore src1,src2,dest' pattern for SImode.  */
2736   else if (GET_CODE (src) == PLUS
2737 	   && register_operand (XEXP (src, 0), SImode)
2738 	   && arith_operand (XEXP (src, 1), SImode))
2739     return 1;
2740 
2741   /* The 'restore src1,src2,dest' pattern for DImode.  */
2742   else if (GET_CODE (src) == PLUS
2743 	   && register_operand (XEXP (src, 0), DImode)
2744 	   && arith_double_operand (XEXP (src, 1), DImode))
2745     return 1;
2746 
2747   /* The 'restore src1,%lo(src2),dest' pattern.  */
2748   else if (GET_CODE (src) == LO_SUM
2749 	   && ! TARGET_CM_MEDMID
2750 	   && ((register_operand (XEXP (src, 0), SImode)
2751 	        && immediate_operand (XEXP (src, 1), SImode))
2752 	       || (TARGET_ARCH64
2753 		   && register_operand (XEXP (src, 0), DImode)
2754 		   && immediate_operand (XEXP (src, 1), DImode))))
2755     return 1;
2756 
2757   /* The 'restore src,src,dest' pattern.  */
2758   else if (GET_CODE (src) == ASHIFT
2759 	   && (register_operand (XEXP (src, 0), SImode)
2760 	       || register_operand (XEXP (src, 0), DImode))
2761 	   && XEXP (src, 1) == const1_rtx)
2762     return 1;
2763 
2764   return 0;
2765 }
2766 
2767 /* Return nonzero if TRIAL can go into the function return's
2768    delay slot.  */
2769 
2770 int
2771 eligible_for_return_delay (rtx trial)
2772 {
2773   rtx pat;
2774 
2775   if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2776     return 0;
2777 
2778   if (get_attr_length (trial) != 1)
2779     return 0;
2780 
2781   /* If the function uses __builtin_eh_return, the eh_return machinery
2782      occupies the delay slot.  */
2783   if (crtl->calls_eh_return)
2784     return 0;
2785 
2786   /* In the case of a true leaf function, anything can go into the slot.  */
2787   if (sparc_leaf_function_p)
2788     return get_attr_in_uncond_branch_delay (trial)
2789 	   == IN_UNCOND_BRANCH_DELAY_TRUE;
2790 
2791   pat = PATTERN (trial);
2792 
2793   /* Otherwise, only operations which can be done in tandem with
2794      a `restore' or `return' insn can go into the delay slot.  */
2795   if (GET_CODE (SET_DEST (pat)) != REG
2796       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2797     return 0;
2798 
2799   /* If this instruction sets up floating point register and we have a return
2800      instruction, it can probably go in.  But restore will not work
2801      with FP_REGS.  */
2802   if (REGNO (SET_DEST (pat)) >= 32)
2803     return (TARGET_V9
2804 	    && ! epilogue_renumber (&pat, 1)
2805 	    && (get_attr_in_uncond_branch_delay (trial)
2806 		== IN_UNCOND_BRANCH_DELAY_TRUE));
2807 
2808   return eligible_for_restore_insn (trial, true);
2809 }
2810 
2811 /* Return nonzero if TRIAL can go into the sibling call's
2812    delay slot.  */
2813 
2814 int
2815 eligible_for_sibcall_delay (rtx trial)
2816 {
2817   rtx pat;
2818 
2819   if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2820     return 0;
2821 
2822   if (get_attr_length (trial) != 1)
2823     return 0;
2824 
2825   pat = PATTERN (trial);
2826 
2827   if (sparc_leaf_function_p)
2828     {
2829       /* If the tail call is done using the call instruction,
2830 	 we have to restore %o7 in the delay slot.  */
2831       if (LEAF_SIBCALL_SLOT_RESERVED_P)
2832 	return 0;
2833 
2834       /* %g1 is used to build the function address */
2835       if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2836 	return 0;
2837 
2838       return 1;
2839     }
2840 
2841   /* Otherwise, only operations which can be done in tandem with
2842      a `restore' insn can go into the delay slot.  */
2843   if (GET_CODE (SET_DEST (pat)) != REG
2844       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2845       || REGNO (SET_DEST (pat)) >= 32)
2846     return 0;
2847 
2848   /* If it mentions %o7, it can't go in, because sibcall will clobber it
2849      in most cases.  */
2850   if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2851     return 0;
2852 
2853   return eligible_for_restore_insn (trial, false);
2854 }
2855 
2856 int
2857 short_branch (int uid1, int uid2)
2858 {
2859   int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2860 
2861   /* Leave a few words of "slop".  */
2862   if (delta >= -1023 && delta <= 1022)
2863     return 1;
2864 
2865   return 0;
2866 }
2867 
2868 /* Return nonzero if REG is not used after INSN.
2869    We assume REG is a reload reg, and therefore does
2870    not live past labels or calls or jumps.  */
2871 int
2872 reg_unused_after (rtx reg, rtx insn)
2873 {
2874   enum rtx_code code, prev_code = UNKNOWN;
2875 
2876   while ((insn = NEXT_INSN (insn)))
2877     {
2878       if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2879 	return 1;
2880 
2881       code = GET_CODE (insn);
2882       if (GET_CODE (insn) == CODE_LABEL)
2883 	return 1;
2884 
2885       if (INSN_P (insn))
2886 	{
2887 	  rtx set = single_set (insn);
2888 	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2889 	  if (set && in_src)
2890 	    return 0;
2891 	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2892 	    return 1;
2893 	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2894 	    return 0;
2895 	}
2896       prev_code = code;
2897     }
2898   return 1;
2899 }
2900 
2901 /* Determine if it's legal to put X into the constant pool.  This
2902    is not possible if X contains the address of a symbol that is
2903    not constant (TLS) or not known at final link time (PIC).  */
2904 
2905 static bool
2906 sparc_cannot_force_const_mem (rtx x)
2907 {
2908   switch (GET_CODE (x))
2909     {
2910     case CONST_INT:
2911     case CONST_DOUBLE:
2912     case CONST_VECTOR:
2913       /* Accept all non-symbolic constants.  */
2914       return false;
2915 
2916     case LABEL_REF:
2917       /* Labels are OK iff we are non-PIC.  */
2918       return flag_pic != 0;
2919 
2920     case SYMBOL_REF:
2921       /* 'Naked' TLS symbol references are never OK,
2922 	 non-TLS symbols are OK iff we are non-PIC.  */
2923       if (SYMBOL_REF_TLS_MODEL (x))
2924 	return true;
2925       else
2926 	return flag_pic != 0;
2927 
2928     case CONST:
2929       return sparc_cannot_force_const_mem (XEXP (x, 0));
2930     case PLUS:
2931     case MINUS:
2932       return sparc_cannot_force_const_mem (XEXP (x, 0))
2933          || sparc_cannot_force_const_mem (XEXP (x, 1));
2934     case UNSPEC:
2935       return true;
2936     default:
2937       gcc_unreachable ();
2938     }
2939 }
2940 
2941 /* Global Offset Table support.  */
2942 static GTY(()) rtx got_helper_rtx = NULL_RTX;
2943 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
2944 
2945 /* Return the SYMBOL_REF for the Global Offset Table.  */
2946 
2947 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
2948 
2949 static rtx
2950 sparc_got (void)
2951 {
2952   if (!sparc_got_symbol)
2953     sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2954 
2955   return sparc_got_symbol;
2956 }
2957 
2958 /* Ensure that we are not using patterns that are not OK with PIC.  */
2959 
2960 int
2961 check_pic (int i)
2962 {
2963   rtx op;
2964 
2965   switch (flag_pic)
2966     {
2967     case 1:
2968       op = recog_data.operand[i];
2969       gcc_assert (GET_CODE (op) != SYMBOL_REF
2970 	  	  && (GET_CODE (op) != CONST
2971 		      || (GET_CODE (XEXP (op, 0)) == MINUS
2972 			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
2973 			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
2974     case 2:
2975     default:
2976       return 1;
2977     }
2978 }
2979 
2980 /* Return true if X is an address which needs a temporary register when
2981    reloaded while generating PIC code.  */
2982 
2983 int
2984 pic_address_needs_scratch (rtx x)
2985 {
2986   /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2987   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2988       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2989       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2990       && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2991     return 1;
2992 
2993   return 0;
2994 }
2995 
2996 /* Determine if a given RTX is a valid constant.  We already know this
2997    satisfies CONSTANT_P.  */
2998 
2999 bool
3000 legitimate_constant_p (rtx x)
3001 {
3002   switch (GET_CODE (x))
3003     {
3004     case CONST:
3005     case SYMBOL_REF:
3006       if (sparc_tls_referenced_p (x))
3007 	return false;
3008       break;
3009 
3010     case CONST_DOUBLE:
3011       if (GET_MODE (x) == VOIDmode)
3012         return true;
3013 
3014       /* Floating point constants are generally not ok.
3015 	 The only exception is 0.0 in VIS.  */
3016       if (TARGET_VIS
3017 	  && SCALAR_FLOAT_MODE_P (GET_MODE (x))
3018 	  && const_zero_operand (x, GET_MODE (x)))
3019 	return true;
3020 
3021       return false;
3022 
3023     case CONST_VECTOR:
3024       /* Vector constants are generally not ok.
3025 	 The only exception is 0 in VIS.  */
3026       if (TARGET_VIS
3027 	  && const_zero_operand (x, GET_MODE (x)))
3028 	return true;
3029 
3030       return false;
3031 
3032     default:
3033       break;
3034     }
3035 
3036   return true;
3037 }
3038 
3039 /* Determine if a given RTX is a valid constant address.  */
3040 
3041 bool
3042 constant_address_p (rtx x)
3043 {
3044   switch (GET_CODE (x))
3045     {
3046     case LABEL_REF:
3047     case CONST_INT:
3048     case HIGH:
3049       return true;
3050 
3051     case CONST:
3052       if (flag_pic && pic_address_needs_scratch (x))
3053 	return false;
3054       return legitimate_constant_p (x);
3055 
3056     case SYMBOL_REF:
3057       return !flag_pic && legitimate_constant_p (x);
3058 
3059     default:
3060       return false;
3061     }
3062 }
3063 
3064 /* Nonzero if the constant value X is a legitimate general operand
3065    when generating PIC code.  It is given that flag_pic is on and
3066    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
3067 
3068 bool
3069 legitimate_pic_operand_p (rtx x)
3070 {
3071   if (pic_address_needs_scratch (x))
3072     return false;
3073   if (sparc_tls_referenced_p (x))
3074     return false;
3075   return true;
3076 }
3077 
3078 /* Return nonzero if ADDR is a valid memory address.
3079    STRICT specifies whether strict register checking applies.  */
3080 
3081 static bool
3082 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3083 {
3084   rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3085 
3086   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3087     rs1 = addr;
3088   else if (GET_CODE (addr) == PLUS)
3089     {
3090       rs1 = XEXP (addr, 0);
3091       rs2 = XEXP (addr, 1);
3092 
3093       /* Canonicalize.  REG comes first, if there are no regs,
3094 	 LO_SUM comes first.  */
3095       if (!REG_P (rs1)
3096 	  && GET_CODE (rs1) != SUBREG
3097 	  && (REG_P (rs2)
3098 	      || GET_CODE (rs2) == SUBREG
3099 	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3100 	{
3101 	  rs1 = XEXP (addr, 1);
3102 	  rs2 = XEXP (addr, 0);
3103 	}
3104 
3105       if ((flag_pic == 1
3106 	   && rs1 == pic_offset_table_rtx
3107 	   && !REG_P (rs2)
3108 	   && GET_CODE (rs2) != SUBREG
3109 	   && GET_CODE (rs2) != LO_SUM
3110 	   && GET_CODE (rs2) != MEM
3111 	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3112 	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3113 	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3114 	  || ((REG_P (rs1)
3115 	       || GET_CODE (rs1) == SUBREG)
3116 	      && RTX_OK_FOR_OFFSET_P (rs2)))
3117 	{
3118 	  imm1 = rs2;
3119 	  rs2 = NULL;
3120 	}
3121       else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3122 	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3123 	{
3124 	  /* We prohibit REG + REG for TFmode when there are no quad move insns
3125 	     and we consequently need to split.  We do this because REG+REG
3126 	     is not an offsettable address.  If we get the situation in reload
3127 	     where source and destination of a movtf pattern are both MEMs with
3128 	     REG+REG address, then only one of them gets converted to an
3129 	     offsettable address.  */
3130 	  if (mode == TFmode
3131 	      && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
3132 	    return 0;
3133 
3134 	  /* We prohibit REG + REG on ARCH32 if not optimizing for
3135 	     DFmode/DImode because then mem_min_alignment is likely to be zero
3136 	     after reload and the  forced split would lack a matching splitter
3137 	     pattern.  */
3138 	  if (TARGET_ARCH32 && !optimize
3139 	      && (mode == DFmode || mode == DImode))
3140 	    return 0;
3141 	}
3142       else if (USE_AS_OFFSETABLE_LO10
3143 	       && GET_CODE (rs1) == LO_SUM
3144 	       && TARGET_ARCH64
3145 	       && ! TARGET_CM_MEDMID
3146 	       && RTX_OK_FOR_OLO10_P (rs2))
3147 	{
3148 	  rs2 = NULL;
3149 	  imm1 = XEXP (rs1, 1);
3150 	  rs1 = XEXP (rs1, 0);
3151 	  if (!CONSTANT_P (imm1)
3152 	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3153 	    return 0;
3154 	}
3155     }
3156   else if (GET_CODE (addr) == LO_SUM)
3157     {
3158       rs1 = XEXP (addr, 0);
3159       imm1 = XEXP (addr, 1);
3160 
3161       if (!CONSTANT_P (imm1)
3162 	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3163 	return 0;
3164 
3165       /* We can't allow TFmode in 32-bit mode, because an offset greater
3166 	 than the alignment (8) may cause the LO_SUM to overflow.  */
3167       if (mode == TFmode && TARGET_ARCH32)
3168 	return 0;
3169     }
3170   else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3171     return 1;
3172   else
3173     return 0;
3174 
3175   if (GET_CODE (rs1) == SUBREG)
3176     rs1 = SUBREG_REG (rs1);
3177   if (!REG_P (rs1))
3178     return 0;
3179 
3180   if (rs2)
3181     {
3182       if (GET_CODE (rs2) == SUBREG)
3183 	rs2 = SUBREG_REG (rs2);
3184       if (!REG_P (rs2))
3185 	return 0;
3186     }
3187 
3188   if (strict)
3189     {
3190       if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3191 	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3192 	return 0;
3193     }
3194   else
3195     {
3196       if ((REGNO (rs1) >= 32
3197 	   && REGNO (rs1) != FRAME_POINTER_REGNUM
3198 	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3199 	  || (rs2
3200 	      && (REGNO (rs2) >= 32
3201 		  && REGNO (rs2) != FRAME_POINTER_REGNUM
3202 		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3203 	return 0;
3204     }
3205   return 1;
3206 }
3207 
3208 /* Return the SYMBOL_REF for the tls_get_addr function.  */
3209 
3210 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3211 
3212 static rtx
3213 sparc_tls_get_addr (void)
3214 {
3215   if (!sparc_tls_symbol)
3216     sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3217 
3218   return sparc_tls_symbol;
3219 }
3220 
3221 /* Return the Global Offset Table to be used in TLS mode.  */
3222 
3223 static rtx
3224 sparc_tls_got (void)
3225 {
3226   /* In PIC mode, this is just the PIC offset table.  */
3227   if (flag_pic)
3228     {
3229       crtl->uses_pic_offset_table = 1;
3230       return pic_offset_table_rtx;
3231     }
3232 
3233   /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3234      the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
3235   if (TARGET_SUN_TLS && TARGET_ARCH32)
3236     {
3237       load_got_register ();
3238       return global_offset_table_rtx;
3239     }
3240 
3241   /* In all other cases, we load a new pseudo with the GOT symbol.  */
3242   return copy_to_reg (sparc_got ());
3243 }
3244 
3245 /* Return true if X contains a thread-local symbol.  */
3246 
3247 static bool
3248 sparc_tls_referenced_p (rtx x)
3249 {
3250   if (!TARGET_HAVE_TLS)
3251     return false;
3252 
3253   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3254     x = XEXP (XEXP (x, 0), 0);
3255 
3256   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3257     return true;
3258 
3259   /* That's all we handle in legitimize_tls_address for now.  */
3260   return false;
3261 }
3262 
3263 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
3264    this (thread-local) address.  */
3265 
3266 static rtx
3267 legitimize_tls_address (rtx addr)
3268 {
3269   rtx temp1, temp2, temp3, ret, o0, got, insn;
3270 
3271   gcc_assert (can_create_pseudo_p ());
3272 
3273   if (GET_CODE (addr) == SYMBOL_REF)
3274     switch (SYMBOL_REF_TLS_MODEL (addr))
3275       {
3276       case TLS_MODEL_GLOBAL_DYNAMIC:
3277 	start_sequence ();
3278 	temp1 = gen_reg_rtx (SImode);
3279 	temp2 = gen_reg_rtx (SImode);
3280 	ret = gen_reg_rtx (Pmode);
3281 	o0 = gen_rtx_REG (Pmode, 8);
3282 	got = sparc_tls_got ();
3283 	emit_insn (gen_tgd_hi22 (temp1, addr));
3284 	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3285 	if (TARGET_ARCH32)
3286 	  {
3287 	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3288 	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3289 						   addr, const1_rtx));
3290 	  }
3291 	else
3292 	  {
3293 	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3294 	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3295 						   addr, const1_rtx));
3296 	  }
3297         CALL_INSN_FUNCTION_USAGE (insn)
3298 	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3299 			       CALL_INSN_FUNCTION_USAGE (insn));
3300 	insn = get_insns ();
3301 	end_sequence ();
3302 	emit_libcall_block (insn, ret, o0, addr);
3303 	break;
3304 
3305       case TLS_MODEL_LOCAL_DYNAMIC:
3306 	start_sequence ();
3307 	temp1 = gen_reg_rtx (SImode);
3308 	temp2 = gen_reg_rtx (SImode);
3309 	temp3 = gen_reg_rtx (Pmode);
3310 	ret = gen_reg_rtx (Pmode);
3311 	o0 = gen_rtx_REG (Pmode, 8);
3312 	got = sparc_tls_got ();
3313 	emit_insn (gen_tldm_hi22 (temp1));
3314 	emit_insn (gen_tldm_lo10 (temp2, temp1));
3315 	if (TARGET_ARCH32)
3316 	  {
3317 	    emit_insn (gen_tldm_add32 (o0, got, temp2));
3318 	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3319 						    const1_rtx));
3320 	  }
3321 	else
3322 	  {
3323 	    emit_insn (gen_tldm_add64 (o0, got, temp2));
3324 	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3325 						    const1_rtx));
3326 	  }
3327         CALL_INSN_FUNCTION_USAGE (insn)
3328 	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3329 			       CALL_INSN_FUNCTION_USAGE (insn));
3330 	insn = get_insns ();
3331 	end_sequence ();
3332 	emit_libcall_block (insn, temp3, o0,
3333 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3334 					    UNSPEC_TLSLD_BASE));
3335 	temp1 = gen_reg_rtx (SImode);
3336 	temp2 = gen_reg_rtx (SImode);
3337 	emit_insn (gen_tldo_hix22 (temp1, addr));
3338 	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3339 	if (TARGET_ARCH32)
3340 	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3341 	else
3342 	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3343 	break;
3344 
3345       case TLS_MODEL_INITIAL_EXEC:
3346 	temp1 = gen_reg_rtx (SImode);
3347 	temp2 = gen_reg_rtx (SImode);
3348 	temp3 = gen_reg_rtx (Pmode);
3349 	got = sparc_tls_got ();
3350 	emit_insn (gen_tie_hi22 (temp1, addr));
3351 	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3352 	if (TARGET_ARCH32)
3353 	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3354 	else
3355 	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3356         if (TARGET_SUN_TLS)
3357 	  {
3358 	    ret = gen_reg_rtx (Pmode);
3359 	    if (TARGET_ARCH32)
3360 	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3361 					temp3, addr));
3362 	    else
3363 	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3364 					temp3, addr));
3365 	  }
3366 	else
3367 	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3368 	break;
3369 
3370       case TLS_MODEL_LOCAL_EXEC:
3371 	temp1 = gen_reg_rtx (Pmode);
3372 	temp2 = gen_reg_rtx (Pmode);
3373 	if (TARGET_ARCH32)
3374 	  {
3375 	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3376 	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3377 	  }
3378 	else
3379 	  {
3380 	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3381 	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3382 	  }
3383 	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3384 	break;
3385 
3386       default:
3387 	gcc_unreachable ();
3388       }
3389 
3390   else if (GET_CODE (addr) == CONST)
3391     {
3392       rtx base, offset;
3393 
3394       gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3395 
3396       base = legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3397       offset = XEXP (XEXP (addr, 0), 1);
3398 
3399       base = force_operand (base, NULL_RTX);
3400       if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3401 	offset = force_reg (Pmode, offset);
3402       ret = gen_rtx_PLUS (Pmode, base, offset);
3403     }
3404 
3405   else
3406     gcc_unreachable ();  /* for now ... */
3407 
3408   return ret;
3409 }
3410 
3411 /* Legitimize PIC addresses.  If the address is already position-independent,
3412    we return ORIG.  Newly generated position-independent addresses go into a
3413    reg.  This is REG if nonzero, otherwise we allocate register(s) as
3414    necessary.  */
3415 
3416 static rtx
3417 legitimize_pic_address (rtx orig, rtx reg)
3418 {
3419   bool gotdata_op = false;
3420 
3421   if (GET_CODE (orig) == SYMBOL_REF
3422       /* See the comment in sparc_expand_move.  */
3423       || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
3424     {
3425       rtx pic_ref, address;
3426       rtx insn;
3427 
3428       if (reg == 0)
3429 	{
3430 	  gcc_assert (! reload_in_progress && ! reload_completed);
3431 	  reg = gen_reg_rtx (Pmode);
3432 	}
3433 
3434       if (flag_pic == 2)
3435 	{
3436 	  /* If not during reload, allocate another temp reg here for loading
3437 	     in the address, so that these instructions can be optimized
3438 	     properly.  */
3439 	  rtx temp_reg = ((reload_in_progress || reload_completed)
3440 			  ? reg : gen_reg_rtx (Pmode));
3441 
3442 	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3443 	     won't get confused into thinking that these two instructions
3444 	     are loading in the true address of the symbol.  If in the
3445 	     future a PIC rtx exists, that should be used instead.  */
3446 	  if (TARGET_ARCH64)
3447 	    {
3448 	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
3449 	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3450 	    }
3451 	  else
3452 	    {
3453 	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
3454 	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3455 	    }
3456 	  address = temp_reg;
3457 	  gotdata_op = true;
3458 	}
3459       else
3460 	address = orig;
3461 
3462       crtl->uses_pic_offset_table = 1;
3463       if (gotdata_op)
3464 	{
3465 	  if (TARGET_ARCH64)
3466 	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg, pic_offset_table_rtx,
3467 							address, orig));
3468 	  else
3469 	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg, pic_offset_table_rtx,
3470 							address, orig));
3471 	}
3472       else
3473 	{
3474 	  pic_ref
3475 	    = gen_const_mem (Pmode,
3476 			     gen_rtx_PLUS (Pmode,
3477 					   pic_offset_table_rtx, address));
3478 	  insn = emit_move_insn (reg, pic_ref);
3479 	}
3480 
3481       /* Put a REG_EQUAL note on this insn, so that it can be optimized
3482 	 by loop.  */
3483       set_unique_reg_note (insn, REG_EQUAL, orig);
3484       return reg;
3485     }
3486   else if (GET_CODE (orig) == CONST)
3487     {
3488       rtx base, offset;
3489 
3490       if (GET_CODE (XEXP (orig, 0)) == PLUS
3491 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3492 	return orig;
3493 
3494       if (reg == 0)
3495 	{
3496 	  gcc_assert (! reload_in_progress && ! reload_completed);
3497 	  reg = gen_reg_rtx (Pmode);
3498 	}
3499 
3500       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3501       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3502       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3503 			 	       base == reg ? NULL_RTX : reg);
3504 
3505       if (GET_CODE (offset) == CONST_INT)
3506 	{
3507 	  if (SMALL_INT (offset))
3508 	    return plus_constant (base, INTVAL (offset));
3509 	  else if (! reload_in_progress && ! reload_completed)
3510 	    offset = force_reg (Pmode, offset);
3511 	  else
3512 	    /* If we reach here, then something is seriously wrong.  */
3513 	    gcc_unreachable ();
3514 	}
3515       return gen_rtx_PLUS (Pmode, base, offset);
3516     }
3517   else if (GET_CODE (orig) == LABEL_REF)
3518     /* ??? We ought to be checking that the register is live instead, in case
3519        it is eliminated.  */
3520     crtl->uses_pic_offset_table = 1;
3521 
3522   return orig;
3523 }
3524 
3525 /* Try machine-dependent ways of modifying an illegitimate address X
3526    to be legitimate.  If we find one, return the new, valid address.
3527 
3528    OLDX is the address as it was before break_out_memory_refs was called.
3529    In some cases it is useful to look at this to decide what needs to be done.
3530 
3531    MODE is the mode of the operand pointed to by X.
3532 
3533    On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
3534 
3535 static rtx
3536 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3537 			  enum machine_mode mode)
3538 {
3539   rtx orig_x = x;
3540 
3541   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3542     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3543 		      force_operand (XEXP (x, 0), NULL_RTX));
3544   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3545     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3546 		      force_operand (XEXP (x, 1), NULL_RTX));
3547   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3548     x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3549 		      XEXP (x, 1));
3550   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3551     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3552 		      force_operand (XEXP (x, 1), NULL_RTX));
3553 
3554   if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3555     return x;
3556 
3557   if (sparc_tls_referenced_p (x))
3558     x = legitimize_tls_address (x);
3559   else if (flag_pic)
3560     x = legitimize_pic_address (x, NULL_RTX);
3561   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3562     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3563 		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
3564   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3565     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3566 		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
3567   else if (GET_CODE (x) == SYMBOL_REF
3568 	   || GET_CODE (x) == CONST
3569 	   || GET_CODE (x) == LABEL_REF)
3570     x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3571 
3572   return x;
3573 }
3574 
3575 #ifdef HAVE_GAS_HIDDEN
3576 # define USE_HIDDEN_LINKONCE 1
3577 #else
3578 # define USE_HIDDEN_LINKONCE 0
3579 #endif
3580 
3581 static void
3582 get_pc_thunk_name (char name[32], unsigned int regno)
3583 {
3584   const char *reg_name = reg_names[regno];
3585 
3586   /* Skip the leading '%' as that cannot be used in a
3587      symbol name.  */
3588   reg_name += 1;
3589 
3590   if (USE_HIDDEN_LINKONCE)
3591     sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
3592   else
3593     ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
3594 }
3595 
3596 /* Wrapper around the load_pcrel_sym{si,di} patterns.  */
3597 
3598 static rtx
3599 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
3600 {
3601   int orig_flag_pic = flag_pic;
3602   rtx insn;
3603 
3604   /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
3605   flag_pic = 0;
3606   if (TARGET_ARCH64)
3607     insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
3608   else
3609     insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
3610   flag_pic = orig_flag_pic;
3611 
3612   return insn;
3613 }
3614 
3615 /* Emit code to load the GOT register.  */
3616 
3617 static void
3618 load_got_register (void)
3619 {
3620   /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
3621   if (!global_offset_table_rtx)
3622     global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
3623 
3624   if (TARGET_VXWORKS_RTP)
3625     emit_insn (gen_vxworks_load_got ());
3626   else
3627     {
3628       /* The GOT symbol is subject to a PC-relative relocation so we need a
3629 	 helper function to add the PC value and thus get the final value.  */
3630       if (!got_helper_rtx)
3631 	{
3632 	  char name[32];
3633 	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
3634 	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3635 	}
3636 
3637       emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
3638 				     got_helper_rtx,
3639 				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
3640     }
3641 
3642   /* Need to emit this whether or not we obey regdecls,
3643      since setjmp/longjmp can cause life info to screw up.
3644      ??? In the case where we don't obey regdecls, this is not sufficient
3645      since we may not fall out the bottom.  */
3646   emit_use (global_offset_table_rtx);
3647 }
3648 
3649 /* Emit a call instruction with the pattern given by PAT.  ADDR is the
3650    address of the call target.  */
3651 
3652 void
3653 sparc_emit_call_insn (rtx pat, rtx addr)
3654 {
3655   rtx insn;
3656 
3657   insn = emit_call_insn (pat);
3658 
3659   /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
3660   if (TARGET_VXWORKS_RTP
3661       && flag_pic
3662       && GET_CODE (addr) == SYMBOL_REF
3663       && (SYMBOL_REF_DECL (addr)
3664 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
3665 	  : !SYMBOL_REF_LOCAL_P (addr)))
3666     {
3667       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
3668       crtl->uses_pic_offset_table = 1;
3669     }
3670 }
3671 
3672 /* Return 1 if RTX is a MEM which is known to be aligned to at
3673    least a DESIRED byte boundary.  */
3674 
3675 int
3676 mem_min_alignment (rtx mem, int desired)
3677 {
3678   rtx addr, base, offset;
3679 
3680   /* If it's not a MEM we can't accept it.  */
3681   if (GET_CODE (mem) != MEM)
3682     return 0;
3683 
3684   /* Obviously...  */
3685   if (!TARGET_UNALIGNED_DOUBLES
3686       && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3687     return 1;
3688 
3689   /* ??? The rest of the function predates MEM_ALIGN so
3690      there is probably a bit of redundancy.  */
3691   addr = XEXP (mem, 0);
3692   base = offset = NULL_RTX;
3693   if (GET_CODE (addr) == PLUS)
3694     {
3695       if (GET_CODE (XEXP (addr, 0)) == REG)
3696 	{
3697 	  base = XEXP (addr, 0);
3698 
3699 	  /* What we are saying here is that if the base
3700 	     REG is aligned properly, the compiler will make
3701 	     sure any REG based index upon it will be so
3702 	     as well.  */
3703 	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3704 	    offset = XEXP (addr, 1);
3705 	  else
3706 	    offset = const0_rtx;
3707 	}
3708     }
3709   else if (GET_CODE (addr) == REG)
3710     {
3711       base = addr;
3712       offset = const0_rtx;
3713     }
3714 
3715   if (base != NULL_RTX)
3716     {
3717       int regno = REGNO (base);
3718 
3719       if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3720 	{
3721 	  /* Check if the compiler has recorded some information
3722 	     about the alignment of the base REG.  If reload has
3723 	     completed, we already matched with proper alignments.
3724 	     If not running global_alloc, reload might give us
3725 	     unaligned pointer to local stack though.  */
3726 	  if (((cfun != 0
3727 		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3728 	       || (optimize && reload_completed))
3729 	      && (INTVAL (offset) & (desired - 1)) == 0)
3730 	    return 1;
3731 	}
3732       else
3733 	{
3734 	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3735 	    return 1;
3736 	}
3737     }
3738   else if (! TARGET_UNALIGNED_DOUBLES
3739 	   || CONSTANT_P (addr)
3740 	   || GET_CODE (addr) == LO_SUM)
3741     {
3742       /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3743 	 is true, in which case we can only assume that an access is aligned if
3744 	 it is to a constant address, or the address involves a LO_SUM.  */
3745       return 1;
3746     }
3747 
3748   /* An obviously unaligned address.  */
3749   return 0;
3750 }
3751 
3752 
3753 /* Vectors to keep interesting information about registers where it can easily
3754    be got.  We used to use the actual mode value as the bit number, but there
3755    are more than 32 modes now.  Instead we use two tables: one indexed by
3756    hard register number, and one indexed by mode.  */
3757 
3758 /* The purpose of sparc_mode_class is to shrink the range of modes so that
3759    they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
3760    mapped into one sparc_mode_class mode.  */
3761 
3762 enum sparc_mode_class {
3763   S_MODE, D_MODE, T_MODE, O_MODE,
3764   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3765   CC_MODE, CCFP_MODE
3766 };
3767 
3768 /* Modes for single-word and smaller quantities.  */
3769 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3770 
3771 /* Modes for double-word and smaller quantities.  */
3772 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3773 
3774 /* Modes for quad-word and smaller quantities.  */
3775 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3776 
3777 /* Modes for 8-word and smaller quantities.  */
3778 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3779 
3780 /* Modes for single-float quantities.  We must allow any single word or
3781    smaller quantity.  This is because the fix/float conversion instructions
3782    take integer inputs/outputs from the float registers.  */
3783 #define SF_MODES (S_MODES)
3784 
3785 /* Modes for double-float and smaller quantities.  */
3786 #define DF_MODES (D_MODES)
3787 
3788 /* Modes for quad-float and smaller quantities.  */
3789 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
3790 
3791 /* Modes for quad-float pairs and smaller quantities.  */
3792 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
3793 
3794 /* Modes for double-float only quantities.  */
3795 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3796 
3797 /* Modes for quad-float and double-float only quantities.  */
3798 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
3799 
3800 /* Modes for quad-float pairs and double-float only quantities.  */
3801 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
3802 
3803 /* Modes for condition codes.  */
3804 #define CC_MODES (1 << (int) CC_MODE)
3805 #define CCFP_MODES (1 << (int) CCFP_MODE)
3806 
3807 /* Value is 1 if register/mode pair is acceptable on sparc.
3808    The funny mixture of D and T modes is because integer operations
3809    do not specially operate on tetra quantities, so non-quad-aligned
3810    registers can hold quadword quantities (except %o4 and %i4 because
3811    they cross fixed registers).  */
3812 
3813 /* This points to either the 32 bit or the 64 bit version.  */
3814 const int *hard_regno_mode_classes;
3815 
3816 static const int hard_32bit_mode_classes[] = {
3817   S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3818   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3819   T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3820   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3821 
3822   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3823   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3824   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3825   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3826 
3827   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3828      and none can hold SFmode/SImode values.  */
3829   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3830   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3831   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3832   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3833 
3834   /* %fcc[0123] */
3835   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3836 
3837   /* %icc */
3838   CC_MODES
3839 };
3840 
3841 static const int hard_64bit_mode_classes[] = {
3842   D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3843   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3844   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3845   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3846 
3847   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3848   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3849   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3850   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3851 
3852   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3853      and none can hold SFmode/SImode values.  */
3854   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3855   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3856   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3857   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3858 
3859   /* %fcc[0123] */
3860   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3861 
3862   /* %icc */
3863   CC_MODES
3864 };
3865 
3866 int sparc_mode_class [NUM_MACHINE_MODES];
3867 
3868 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3869 
3870 static void
3871 sparc_init_modes (void)
3872 {
3873   int i;
3874 
3875   for (i = 0; i < NUM_MACHINE_MODES; i++)
3876     {
3877       switch (GET_MODE_CLASS (i))
3878 	{
3879 	case MODE_INT:
3880 	case MODE_PARTIAL_INT:
3881 	case MODE_COMPLEX_INT:
3882 	  if (GET_MODE_SIZE (i) <= 4)
3883 	    sparc_mode_class[i] = 1 << (int) S_MODE;
3884 	  else if (GET_MODE_SIZE (i) == 8)
3885 	    sparc_mode_class[i] = 1 << (int) D_MODE;
3886 	  else if (GET_MODE_SIZE (i) == 16)
3887 	    sparc_mode_class[i] = 1 << (int) T_MODE;
3888 	  else if (GET_MODE_SIZE (i) == 32)
3889 	    sparc_mode_class[i] = 1 << (int) O_MODE;
3890 	  else
3891 	    sparc_mode_class[i] = 0;
3892 	  break;
3893 	case MODE_VECTOR_INT:
3894 	  if (GET_MODE_SIZE (i) <= 4)
3895 	    sparc_mode_class[i] = 1 << (int)SF_MODE;
3896 	  else if (GET_MODE_SIZE (i) == 8)
3897 	    sparc_mode_class[i] = 1 << (int)DF_MODE;
3898 	  break;
3899 	case MODE_FLOAT:
3900 	case MODE_COMPLEX_FLOAT:
3901 	  if (GET_MODE_SIZE (i) <= 4)
3902 	    sparc_mode_class[i] = 1 << (int) SF_MODE;
3903 	  else if (GET_MODE_SIZE (i) == 8)
3904 	    sparc_mode_class[i] = 1 << (int) DF_MODE;
3905 	  else if (GET_MODE_SIZE (i) == 16)
3906 	    sparc_mode_class[i] = 1 << (int) TF_MODE;
3907 	  else if (GET_MODE_SIZE (i) == 32)
3908 	    sparc_mode_class[i] = 1 << (int) OF_MODE;
3909 	  else
3910 	    sparc_mode_class[i] = 0;
3911 	  break;
3912 	case MODE_CC:
3913 	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
3914 	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3915 	  else
3916 	    sparc_mode_class[i] = 1 << (int) CC_MODE;
3917 	  break;
3918 	default:
3919 	  sparc_mode_class[i] = 0;
3920 	  break;
3921 	}
3922     }
3923 
3924   if (TARGET_ARCH64)
3925     hard_regno_mode_classes = hard_64bit_mode_classes;
3926   else
3927     hard_regno_mode_classes = hard_32bit_mode_classes;
3928 
3929   /* Initialize the array used by REGNO_REG_CLASS.  */
3930   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3931     {
3932       if (i < 16 && TARGET_V8PLUS)
3933 	sparc_regno_reg_class[i] = I64_REGS;
3934       else if (i < 32 || i == FRAME_POINTER_REGNUM)
3935 	sparc_regno_reg_class[i] = GENERAL_REGS;
3936       else if (i < 64)
3937 	sparc_regno_reg_class[i] = FP_REGS;
3938       else if (i < 96)
3939 	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3940       else if (i < 100)
3941 	sparc_regno_reg_class[i] = FPCC_REGS;
3942       else
3943 	sparc_regno_reg_class[i] = NO_REGS;
3944     }
3945 }
3946 
3947 /* Compute the frame size required by the function.  This function is called
3948    during the reload pass and also by sparc_expand_prologue.  */
3949 
3950 HOST_WIDE_INT
3951 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
3952 {
3953   int outgoing_args_size = (crtl->outgoing_args_size
3954 			    + REG_PARM_STACK_SPACE (current_function_decl));
3955   int n_regs = 0;  /* N_REGS is the number of 4-byte regs saved thus far.  */
3956   int i;
3957 
3958   if (TARGET_ARCH64)
3959     {
3960       for (i = 0; i < 8; i++)
3961 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3962 	  n_regs += 2;
3963     }
3964   else
3965     {
3966       for (i = 0; i < 8; i += 2)
3967 	if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3968 	    || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3969 	  n_regs += 2;
3970     }
3971 
3972   for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3973     if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3974 	|| (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3975       n_regs += 2;
3976 
3977   /* Set up values for use in prologue and epilogue.  */
3978   num_gfregs = n_regs;
3979 
3980   if (leaf_function_p
3981       && n_regs == 0
3982       && size == 0
3983       && crtl->outgoing_args_size == 0)
3984     actual_fsize = apparent_fsize = 0;
3985   else
3986     {
3987       /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
3988       apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
3989       apparent_fsize += n_regs * 4;
3990       actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3991     }
3992 
3993   /* Make sure nothing can clobber our register windows.
3994      If a SAVE must be done, or there is a stack-local variable,
3995      the register window area must be allocated.  */
3996   if (! leaf_function_p || size > 0)
3997     actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
3998 
3999   return SPARC_STACK_ALIGN (actual_fsize);
4000 }
4001 
4002 /* Output any necessary .register pseudo-ops.  */
4003 
4004 void
4005 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4006 {
4007 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4008   int i;
4009 
4010   if (TARGET_ARCH32)
4011     return;
4012 
4013   /* Check if %g[2367] were used without
4014      .register being printed for them already.  */
4015   for (i = 2; i < 8; i++)
4016     {
4017       if (df_regs_ever_live_p (i)
4018 	  && ! sparc_hard_reg_printed [i])
4019 	{
4020 	  sparc_hard_reg_printed [i] = 1;
4021 	  /* %g7 is used as TLS base register, use #ignore
4022 	     for it instead of #scratch.  */
4023 	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4024 		   i == 7 ? "ignore" : "scratch");
4025 	}
4026       if (i == 3) i = 5;
4027     }
4028 #endif
4029 }
4030 
4031 /* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
4032    as needed.  LOW should be double-word aligned for 32-bit registers.
4033    Return the new OFFSET.  */
4034 
4035 #define SORR_SAVE    0
4036 #define SORR_RESTORE 1
4037 
4038 static int
4039 save_or_restore_regs (int low, int high, rtx base, int offset, int action)
4040 {
4041   rtx mem, insn;
4042   int i;
4043 
4044   if (TARGET_ARCH64 && high <= 32)
4045     {
4046       for (i = low; i < high; i++)
4047 	{
4048 	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4049 	    {
4050 	      mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
4051 	      set_mem_alias_set (mem, sparc_sr_alias_set);
4052 	      if (action == SORR_SAVE)
4053 		{
4054 		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4055 		  RTX_FRAME_RELATED_P (insn) = 1;
4056 		}
4057 	      else  /* action == SORR_RESTORE */
4058 		emit_move_insn (gen_rtx_REG (DImode, i), mem);
4059 	      offset += 8;
4060 	    }
4061 	}
4062     }
4063   else
4064     {
4065       for (i = low; i < high; i += 2)
4066 	{
4067 	  bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
4068 	  bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
4069 	  enum machine_mode mode;
4070 	  int regno;
4071 
4072 	  if (reg0 && reg1)
4073 	    {
4074 	      mode = i < 32 ? DImode : DFmode;
4075 	      regno = i;
4076 	    }
4077 	  else if (reg0)
4078 	    {
4079 	      mode = i < 32 ? SImode : SFmode;
4080 	      regno = i;
4081 	    }
4082 	  else if (reg1)
4083 	    {
4084 	      mode = i < 32 ? SImode : SFmode;
4085 	      regno = i + 1;
4086 	      offset += 4;
4087 	    }
4088 	  else
4089 	    continue;
4090 
4091 	  mem = gen_rtx_MEM (mode, plus_constant (base, offset));
4092 	  set_mem_alias_set (mem, sparc_sr_alias_set);
4093 	  if (action == SORR_SAVE)
4094 	    {
4095 	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4096 	      RTX_FRAME_RELATED_P (insn) = 1;
4097 	    }
4098 	  else  /* action == SORR_RESTORE */
4099 	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
4100 
4101 	  /* Always preserve double-word alignment.  */
4102 	  offset = (offset + 8) & -8;
4103 	}
4104     }
4105 
4106   return offset;
4107 }
4108 
4109 /* Emit code to save call-saved registers.  */
4110 
4111 static void
4112 emit_save_or_restore_regs (int action)
4113 {
4114   HOST_WIDE_INT offset;
4115   rtx base;
4116 
4117   offset = frame_base_offset - apparent_fsize;
4118 
4119   if (offset < -4096 || offset + num_gfregs * 4 > 4095)
4120     {
4121       /* ??? This might be optimized a little as %g1 might already have a
4122 	 value close enough that a single add insn will do.  */
4123       /* ??? Although, all of this is probably only a temporary fix
4124 	 because if %g1 can hold a function result, then
4125 	 sparc_expand_epilogue will lose (the result will be
4126 	 clobbered).  */
4127       base = gen_rtx_REG (Pmode, 1);
4128       emit_move_insn (base, GEN_INT (offset));
4129       emit_insn (gen_rtx_SET (VOIDmode,
4130 			      base,
4131 			      gen_rtx_PLUS (Pmode, frame_base_reg, base)));
4132       offset = 0;
4133     }
4134   else
4135     base = frame_base_reg;
4136 
4137   offset = save_or_restore_regs (0, 8, base, offset, action);
4138   save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
4139 }
4140 
4141 /* Generate a save_register_window insn.  */
4142 
4143 static rtx
4144 gen_save_register_window (rtx increment)
4145 {
4146   if (TARGET_ARCH64)
4147     return gen_save_register_windowdi (increment);
4148   else
4149     return gen_save_register_windowsi (increment);
4150 }
4151 
4152 /* Generate an increment for the stack pointer.  */
4153 
4154 static rtx
4155 gen_stack_pointer_inc (rtx increment)
4156 {
4157   return gen_rtx_SET (VOIDmode,
4158 		      stack_pointer_rtx,
4159 		      gen_rtx_PLUS (Pmode,
4160 				    stack_pointer_rtx,
4161 				    increment));
4162 }
4163 
4164 /* Generate a decrement for the stack pointer.  */
4165 
4166 static rtx
4167 gen_stack_pointer_dec (rtx decrement)
4168 {
4169   return gen_rtx_SET (VOIDmode,
4170 		      stack_pointer_rtx,
4171 		      gen_rtx_MINUS (Pmode,
4172 				     stack_pointer_rtx,
4173 				     decrement));
4174 }
4175 
4176 /* Expand the function prologue.  The prologue is responsible for reserving
4177    storage for the frame, saving the call-saved registers and loading the
4178    GOT register if needed.  */
4179 
4180 void
4181 sparc_expand_prologue (void)
4182 {
4183   rtx insn;
4184   int i;
4185 
4186   /* Compute a snapshot of current_function_uses_only_leaf_regs.  Relying
4187      on the final value of the flag means deferring the prologue/epilogue
4188      expansion until just before the second scheduling pass, which is too
4189      late to emit multiple epilogues or return insns.
4190 
4191      Of course we are making the assumption that the value of the flag
4192      will not change between now and its final value.  Of the three parts
4193      of the formula, only the last one can reasonably vary.  Let's take a
4194      closer look, after assuming that the first two ones are set to true
4195      (otherwise the last value is effectively silenced).
4196 
4197      If only_leaf_regs_used returns false, the global predicate will also
4198      be false so the actual frame size calculated below will be positive.
4199      As a consequence, the save_register_window insn will be emitted in
4200      the instruction stream; now this insn explicitly references %fp
4201      which is not a leaf register so only_leaf_regs_used will always
4202      return false subsequently.
4203 
4204      If only_leaf_regs_used returns true, we hope that the subsequent
4205      optimization passes won't cause non-leaf registers to pop up.  For
4206      example, the regrename pass has special provisions to not rename to
4207      non-leaf registers in a leaf function.  */
4208   sparc_leaf_function_p
4209     = optimize > 0 && current_function_is_leaf && only_leaf_regs_used ();
4210 
4211   /* Need to use actual_fsize, since we are also allocating
4212      space for our callee (and our own register save area).  */
4213   actual_fsize
4214     = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4215 
4216   /* Advertise that the data calculated just above are now valid.  */
4217   sparc_prologue_data_valid_p = true;
4218 
4219   if (sparc_leaf_function_p)
4220     {
4221       frame_base_reg = stack_pointer_rtx;
4222       frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
4223     }
4224   else
4225     {
4226       frame_base_reg = hard_frame_pointer_rtx;
4227       frame_base_offset = SPARC_STACK_BIAS;
4228     }
4229 
4230   if (actual_fsize == 0)
4231     /* do nothing.  */ ;
4232   else if (sparc_leaf_function_p)
4233     {
4234       if (actual_fsize <= 4096)
4235 	insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4236       else if (actual_fsize <= 8192)
4237 	{
4238 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4239 	  RTX_FRAME_RELATED_P (insn) = 1;
4240 
4241 	  /* %sp is still the CFA register.  */
4242 	  insn
4243 	    = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4244 	}
4245       else
4246 	{
4247 	  rtx reg = gen_rtx_REG (Pmode, 1);
4248 	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4249 	  insn = emit_insn (gen_stack_pointer_inc (reg));
4250 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4251 			gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4252 	}
4253 
4254       RTX_FRAME_RELATED_P (insn) = 1;
4255     }
4256   else
4257     {
4258       if (actual_fsize <= 4096)
4259 	insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
4260       else if (actual_fsize <= 8192)
4261 	{
4262 	  insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
4263 
4264 	  /* %sp is not the CFA register anymore.  */
4265 	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4266 
4267 	  /* Make sure no %fp-based store is issued until after the frame is
4268 	     established.  The offset between the frame pointer and the stack
4269 	     pointer is calculated relative to the value of the stack pointer
4270 	     at the end of the function prologue, and moving instructions that
4271 	     access the stack via the frame pointer between the instructions
4272 	     that decrement the stack pointer could result in accessing the
4273 	     register window save area, which is volatile.  */
4274 	  emit_insn (gen_frame_blockage ());
4275 	}
4276       else
4277 	{
4278 	  rtx reg = gen_rtx_REG (Pmode, 1);
4279 	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4280 	  insn = emit_insn (gen_save_register_window (reg));
4281 	}
4282 
4283       RTX_FRAME_RELATED_P (insn) = 1;
4284       for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
4285         RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
4286     }
4287 
4288   if (num_gfregs)
4289     emit_save_or_restore_regs (SORR_SAVE);
4290 
4291   /* Load the GOT register if needed.  */
4292   if (crtl->uses_pic_offset_table)
4293     load_got_register ();
4294 }
4295 
4296 /* This function generates the assembly code for function entry, which boils
4297    down to emitting the necessary .register directives.  */
4298 
4299 static void
4300 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4301 {
4302   /* Check that the assumption we made in sparc_expand_prologue is valid.  */
4303   gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
4304 
4305   sparc_output_scratch_registers (file);
4306 }
4307 
4308 /* Expand the function epilogue, either normal or part of a sibcall.
4309    We emit all the instructions except the return or the call.  */
4310 
4311 void
4312 sparc_expand_epilogue (void)
4313 {
4314   if (num_gfregs)
4315     emit_save_or_restore_regs (SORR_RESTORE);
4316 
4317   if (actual_fsize == 0)
4318     /* do nothing.  */ ;
4319   else if (sparc_leaf_function_p)
4320     {
4321       if (actual_fsize <= 4096)
4322 	emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
4323       else if (actual_fsize <= 8192)
4324 	{
4325 	  emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
4326 	  emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
4327 	}
4328       else
4329 	{
4330 	  rtx reg = gen_rtx_REG (Pmode, 1);
4331 	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4332 	  emit_insn (gen_stack_pointer_dec (reg));
4333 	}
4334     }
4335 }
4336 
4337 /* Return true if it is appropriate to emit `return' instructions in the
4338    body of a function.  */
4339 
4340 bool
4341 sparc_can_use_return_insn_p (void)
4342 {
4343   return sparc_prologue_data_valid_p
4344 	 && num_gfregs == 0
4345 	 && (actual_fsize == 0 || !sparc_leaf_function_p);
4346 }
4347 
4348 /* This function generates the assembly code for function exit.  */
4349 
4350 static void
4351 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4352 {
4353   /* If code does not drop into the epilogue, we have to still output
4354      a dummy nop for the sake of sane backtraces.  Otherwise, if the
4355      last two instructions of a function were "call foo; dslot;" this
4356      can make the return PC of foo (i.e. address of call instruction
4357      plus 8) point to the first instruction in the next function.  */
4358 
4359   rtx insn, last_real_insn;
4360 
4361   insn = get_last_insn ();
4362 
4363   last_real_insn = prev_real_insn (insn);
4364   if (last_real_insn
4365       && GET_CODE (last_real_insn) == INSN
4366       && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
4367     last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
4368 
4369   if (last_real_insn && GET_CODE (last_real_insn) == CALL_INSN)
4370     fputs("\tnop\n", file);
4371 
4372   sparc_output_deferred_case_vectors ();
4373 }
4374 
4375 /* Output a 'restore' instruction.  */
4376 
4377 static void
4378 output_restore (rtx pat)
4379 {
4380   rtx operands[3];
4381 
4382   if (! pat)
4383     {
4384       fputs ("\t restore\n", asm_out_file);
4385       return;
4386     }
4387 
4388   gcc_assert (GET_CODE (pat) == SET);
4389 
4390   operands[0] = SET_DEST (pat);
4391   pat = SET_SRC (pat);
4392 
4393   switch (GET_CODE (pat))
4394     {
4395       case PLUS:
4396 	operands[1] = XEXP (pat, 0);
4397 	operands[2] = XEXP (pat, 1);
4398 	output_asm_insn (" restore %r1, %2, %Y0", operands);
4399 	break;
4400       case LO_SUM:
4401 	operands[1] = XEXP (pat, 0);
4402 	operands[2] = XEXP (pat, 1);
4403 	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4404 	break;
4405       case ASHIFT:
4406 	operands[1] = XEXP (pat, 0);
4407 	gcc_assert (XEXP (pat, 1) == const1_rtx);
4408 	output_asm_insn (" restore %r1, %r1, %Y0", operands);
4409 	break;
4410       default:
4411 	operands[1] = pat;
4412 	output_asm_insn (" restore %%g0, %1, %Y0", operands);
4413 	break;
4414     }
4415 }
4416 
4417 /* Output a return.  */
4418 
4419 const char *
4420 output_return (rtx insn)
4421 {
4422   if (sparc_leaf_function_p)
4423     {
4424       /* This is a leaf function so we don't have to bother restoring the
4425 	 register window, which frees us from dealing with the convoluted
4426 	 semantics of restore/return.  We simply output the jump to the
4427 	 return address and the insn in the delay slot (if any).  */
4428 
4429       gcc_assert (! crtl->calls_eh_return);
4430 
4431       return "jmp\t%%o7+%)%#";
4432     }
4433   else
4434     {
4435       /* This is a regular function so we have to restore the register window.
4436 	 We may have a pending insn for the delay slot, which will be either
4437 	 combined with the 'restore' instruction or put in the delay slot of
4438 	 the 'return' instruction.  */
4439 
4440       if (crtl->calls_eh_return)
4441 	{
4442 	  /* If the function uses __builtin_eh_return, the eh_return
4443 	     machinery occupies the delay slot.  */
4444 	  gcc_assert (! final_sequence);
4445 
4446           if (flag_delayed_branch)
4447 	    {
4448 	      if (TARGET_V9)
4449 		fputs ("\treturn\t%i7+8\n", asm_out_file);
4450 	      else
4451 		fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
4452 
4453 	      fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
4454 	    }
4455 	  else
4456 	    {
4457 	      fputs ("\trestore\n\tadd\t%sp, %g1, %sp\n", asm_out_file);
4458 	      fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
4459 	    }
4460 	}
4461       else if (final_sequence)
4462 	{
4463 	  rtx delay, pat;
4464 
4465 	  delay = NEXT_INSN (insn);
4466 	  gcc_assert (delay);
4467 
4468 	  pat = PATTERN (delay);
4469 
4470 	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4471 	    {
4472 	      epilogue_renumber (&pat, 0);
4473 	      return "return\t%%i7+%)%#";
4474 	    }
4475 	  else
4476 	    {
4477 	      output_asm_insn ("jmp\t%%i7+%)", NULL);
4478 	      output_restore (pat);
4479 	      PATTERN (delay) = gen_blockage ();
4480 	      INSN_CODE (delay) = -1;
4481 	    }
4482 	}
4483       else
4484         {
4485 	  /* The delay slot is empty.  */
4486 	  if (TARGET_V9)
4487 	    return "return\t%%i7+%)\n\t nop";
4488 	  else if (flag_delayed_branch)
4489 	    return "jmp\t%%i7+%)\n\t restore";
4490 	  else
4491 	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
4492 	}
4493     }
4494 
4495   return "";
4496 }
4497 
4498 /* Output a sibling call.  */
4499 
4500 const char *
4501 output_sibcall (rtx insn, rtx call_operand)
4502 {
4503   rtx operands[1];
4504 
4505   gcc_assert (flag_delayed_branch);
4506 
4507   operands[0] = call_operand;
4508 
4509   if (sparc_leaf_function_p)
4510     {
4511       /* This is a leaf function so we don't have to bother restoring the
4512 	 register window.  We simply output the jump to the function and
4513 	 the insn in the delay slot (if any).  */
4514 
4515       gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4516 
4517       if (final_sequence)
4518 	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4519 			 operands);
4520       else
4521 	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4522 	   it into branch if possible.  */
4523 	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4524 			 operands);
4525     }
4526   else
4527     {
4528       /* This is a regular function so we have to restore the register window.
4529 	 We may have a pending insn for the delay slot, which will be combined
4530 	 with the 'restore' instruction.  */
4531 
4532       output_asm_insn ("call\t%a0, 0", operands);
4533 
4534       if (final_sequence)
4535 	{
4536 	  rtx delay = NEXT_INSN (insn);
4537 	  gcc_assert (delay);
4538 
4539 	  output_restore (PATTERN (delay));
4540 
4541 	  PATTERN (delay) = gen_blockage ();
4542 	  INSN_CODE (delay) = -1;
4543 	}
4544       else
4545 	output_restore (NULL_RTX);
4546     }
4547 
4548   return "";
4549 }
4550 
4551 /* Functions for handling argument passing.
4552 
4553    For 32-bit, the first 6 args are normally in registers and the rest are
4554    pushed.  Any arg that starts within the first 6 words is at least
4555    partially passed in a register unless its data type forbids.
4556 
4557    For 64-bit, the argument registers are laid out as an array of 16 elements
4558    and arguments are added sequentially.  The first 6 int args and up to the
4559    first 16 fp args (depending on size) are passed in regs.
4560 
4561    Slot    Stack   Integral   Float   Float in structure   Double   Long Double
4562    ----    -----   --------   -----   ------------------   ------   -----------
4563     15   [SP+248]              %f31       %f30,%f31         %d30
4564     14   [SP+240]              %f29       %f28,%f29         %d28       %q28
4565     13   [SP+232]              %f27       %f26,%f27         %d26
4566     12   [SP+224]              %f25       %f24,%f25         %d24       %q24
4567     11   [SP+216]              %f23       %f22,%f23         %d22
4568     10   [SP+208]              %f21       %f20,%f21         %d20       %q20
4569      9   [SP+200]              %f19       %f18,%f19         %d18
4570      8   [SP+192]              %f17       %f16,%f17         %d16       %q16
4571      7   [SP+184]              %f15       %f14,%f15         %d14
4572      6   [SP+176]              %f13       %f12,%f13         %d12       %q12
4573      5   [SP+168]     %o5      %f11       %f10,%f11         %d10
4574      4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
4575      3   [SP+152]     %o3       %f7        %f6,%f7           %d6
4576      2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
4577      1   [SP+136]     %o1       %f3        %f2,%f3           %d2
4578      0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
4579 
4580    Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4581 
4582    Integral arguments are always passed as 64-bit quantities appropriately
4583    extended.
4584 
4585    Passing of floating point values is handled as follows.
4586    If a prototype is in scope:
4587      If the value is in a named argument (i.e. not a stdarg function or a
4588      value not part of the `...') then the value is passed in the appropriate
4589      fp reg.
4590      If the value is part of the `...' and is passed in one of the first 6
4591      slots then the value is passed in the appropriate int reg.
4592      If the value is part of the `...' and is not passed in one of the first 6
4593      slots then the value is passed in memory.
4594    If a prototype is not in scope:
4595      If the value is one of the first 6 arguments the value is passed in the
4596      appropriate integer reg and the appropriate fp reg.
4597      If the value is not one of the first 6 arguments the value is passed in
4598      the appropriate fp reg and in memory.
4599 
4600 
4601    Summary of the calling conventions implemented by GCC on the SPARC:
4602 
4603    32-bit ABI:
4604                                 size      argument     return value
4605 
4606       small integer              <4       int. reg.      int. reg.
4607       word                        4       int. reg.      int. reg.
4608       double word                 8       int. reg.      int. reg.
4609 
4610       _Complex small integer     <8       int. reg.      int. reg.
4611       _Complex word               8       int. reg.      int. reg.
4612       _Complex double word       16        memory        int. reg.
4613 
4614       vector integer            <=8       int. reg.       FP reg.
4615       vector integer             >8        memory         memory
4616 
4617       float                       4       int. reg.       FP reg.
4618       double                      8       int. reg.       FP reg.
4619       long double                16        memory         memory
4620 
4621       _Complex float              8        memory         FP reg.
4622       _Complex double            16        memory         FP reg.
4623       _Complex long double       32        memory         FP reg.
4624 
4625       vector float              any        memory         memory
4626 
4627       aggregate                 any        memory         memory
4628 
4629 
4630 
4631     64-bit ABI:
4632                                 size      argument     return value
4633 
4634       small integer              <8       int. reg.      int. reg.
4635       word                        8       int. reg.      int. reg.
4636       double word                16       int. reg.      int. reg.
4637 
4638       _Complex small integer    <16       int. reg.      int. reg.
4639       _Complex word              16       int. reg.      int. reg.
4640       _Complex double word       32        memory        int. reg.
4641 
4642       vector integer           <=16        FP reg.        FP reg.
4643       vector integer       16<s<=32        memory         FP reg.
4644       vector integer            >32        memory         memory
4645 
4646       float                       4        FP reg.        FP reg.
4647       double                      8        FP reg.        FP reg.
4648       long double                16        FP reg.        FP reg.
4649 
4650       _Complex float              8        FP reg.        FP reg.
4651       _Complex double            16        FP reg.        FP reg.
4652       _Complex long double       32        memory         FP reg.
4653 
4654       vector float             <=16        FP reg.        FP reg.
4655       vector float         16<s<=32        memory         FP reg.
4656       vector float              >32        memory         memory
4657 
4658       aggregate                <=16         reg.           reg.
4659       aggregate            16<s<=32        memory          reg.
4660       aggregate                 >32        memory         memory
4661 
4662 
4663 
4664 Note #1: complex floating-point types follow the extended SPARC ABIs as
4665 implemented by the Sun compiler.
4666 
4667 Note #2: integral vector types follow the scalar floating-point types
4668 conventions to match what is implemented by the Sun VIS SDK.
4669 
4670 Note #3: floating-point vector types follow the aggregate types
4671 conventions.  */
4672 
4673 
4674 /* Maximum number of int regs for args.  */
4675 #define SPARC_INT_ARG_MAX 6
4676 /* Maximum number of fp regs for args.  */
4677 #define SPARC_FP_ARG_MAX 16
4678 
4679 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4680 
4681 /* Handle the INIT_CUMULATIVE_ARGS macro.
4682    Initialize a variable CUM of type CUMULATIVE_ARGS
4683    for a call to a function whose data type is FNTYPE.
4684    For a library call, FNTYPE is 0.  */
4685 
4686 void
4687 init_cumulative_args (struct sparc_args *cum, tree fntype,
4688 		      rtx libname ATTRIBUTE_UNUSED,
4689 		      tree fndecl ATTRIBUTE_UNUSED)
4690 {
4691   cum->words = 0;
4692   cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4693   cum->libcall_p = fntype == 0;
4694 }
4695 
4696 /* Handle the TARGET_PROMOTE_PROTOTYPES target hook.
4697    When a prototype says `char' or `short', really pass an `int'.  */
4698 
4699 static bool
4700 sparc_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED)
4701 {
4702   return TARGET_ARCH32 ? true : false;
4703 }
4704 
4705 /* Handle promotion of pointer and integer arguments.  */
4706 
4707 static enum machine_mode
4708 sparc_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
4709                              enum machine_mode mode,
4710                              int *punsignedp ATTRIBUTE_UNUSED,
4711                              const_tree fntype ATTRIBUTE_UNUSED,
4712                              int for_return ATTRIBUTE_UNUSED)
4713 {
4714   if (POINTER_TYPE_P (type))
4715     {
4716       *punsignedp = POINTERS_EXTEND_UNSIGNED;
4717       return Pmode;
4718     }
4719 
4720   /* For TARGET_ARCH64 we need this, as we don't have instructions
4721      for arithmetic operations which do zero/sign extension at the same time,
4722      so without this we end up with a srl/sra after every assignment to an
4723      user variable,  which means very very bad code.  */
4724   if (TARGET_ARCH64
4725       && GET_MODE_CLASS (mode) == MODE_INT
4726       && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4727     return word_mode;
4728 
4729   return mode;
4730 }
4731 
4732 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
4733 
4734 static bool
4735 sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
4736 {
4737   return TARGET_ARCH64 ? true : false;
4738 }
4739 
4740 /* Scan the record type TYPE and return the following predicates:
4741     - INTREGS_P: the record contains at least one field or sub-field
4742       that is eligible for promotion in integer registers.
4743     - FP_REGS_P: the record contains at least one field or sub-field
4744       that is eligible for promotion in floating-point registers.
4745     - PACKED_P: the record contains at least one field that is packed.
4746 
4747    Sub-fields are not taken into account for the PACKED_P predicate.  */
4748 
4749 static void
4750 scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p)
4751 {
4752   tree field;
4753 
4754   for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4755     {
4756       if (TREE_CODE (field) == FIELD_DECL)
4757 	{
4758 	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4759 	    scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
4760 	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4761 		   || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4762 		  && TARGET_FPU)
4763 	    *fpregs_p = 1;
4764 	  else
4765 	    *intregs_p = 1;
4766 
4767 	  if (packed_p && DECL_PACKED (field))
4768 	    *packed_p = 1;
4769 	}
4770     }
4771 }
4772 
4773 /* Compute the slot number to pass an argument in.
4774    Return the slot number or -1 if passing on the stack.
4775 
4776    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4777     the preceding args and about the function being called.
4778    MODE is the argument's machine mode.
4779    TYPE is the data type of the argument (as a tree).
4780     This is null for libcalls where that information may
4781     not be available.
4782    NAMED is nonzero if this argument is a named parameter
4783     (otherwise it is an extra parameter matching an ellipsis).
4784    INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4785    *PREGNO records the register number to use if scalar type.
4786    *PPADDING records the amount of padding needed in words.  */
4787 
4788 static int
4789 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
4790 		     tree type, int named, int incoming_p,
4791 		     int *pregno, int *ppadding)
4792 {
4793   int regbase = (incoming_p
4794 		 ? SPARC_INCOMING_INT_ARG_FIRST
4795 		 : SPARC_OUTGOING_INT_ARG_FIRST);
4796   int slotno = cum->words;
4797   enum mode_class mclass;
4798   int regno;
4799 
4800   *ppadding = 0;
4801 
4802   if (type && TREE_ADDRESSABLE (type))
4803     return -1;
4804 
4805   if (TARGET_ARCH32
4806       && mode == BLKmode
4807       && type
4808       && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4809     return -1;
4810 
4811   /* For SPARC64, objects requiring 16-byte alignment get it.  */
4812   if (TARGET_ARCH64
4813       && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
4814       && (slotno & 1) != 0)
4815     slotno++, *ppadding = 1;
4816 
4817   mclass = GET_MODE_CLASS (mode);
4818   if (type && TREE_CODE (type) == VECTOR_TYPE)
4819     {
4820       /* Vector types deserve special treatment because they are
4821 	 polymorphic wrt their mode, depending upon whether VIS
4822 	 instructions are enabled.  */
4823       if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4824 	{
4825 	  /* The SPARC port defines no floating-point vector modes.  */
4826 	  gcc_assert (mode == BLKmode);
4827 	}
4828       else
4829 	{
4830 	  /* Integral vector types should either have a vector
4831 	     mode or an integral mode, because we are guaranteed
4832 	     by pass_by_reference that their size is not greater
4833 	     than 16 bytes and TImode is 16-byte wide.  */
4834 	  gcc_assert (mode != BLKmode);
4835 
4836 	  /* Vector integers are handled like floats according to
4837 	     the Sun VIS SDK.  */
4838 	  mclass = MODE_FLOAT;
4839 	}
4840     }
4841 
4842   switch (mclass)
4843     {
4844     case MODE_FLOAT:
4845     case MODE_COMPLEX_FLOAT:
4846     case MODE_VECTOR_INT:
4847       if (TARGET_ARCH64 && TARGET_FPU && named)
4848 	{
4849 	  if (slotno >= SPARC_FP_ARG_MAX)
4850 	    return -1;
4851 	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
4852 	  /* Arguments filling only one single FP register are
4853 	     right-justified in the outer double FP register.  */
4854 	  if (GET_MODE_SIZE (mode) <= 4)
4855 	    regno++;
4856 	  break;
4857 	}
4858       /* fallthrough */
4859 
4860     case MODE_INT:
4861     case MODE_COMPLEX_INT:
4862       if (slotno >= SPARC_INT_ARG_MAX)
4863 	return -1;
4864       regno = regbase + slotno;
4865       break;
4866 
4867     case MODE_RANDOM:
4868       if (mode == VOIDmode)
4869 	/* MODE is VOIDmode when generating the actual call.  */
4870 	return -1;
4871 
4872       gcc_assert (mode == BLKmode);
4873 
4874       if (TARGET_ARCH32
4875 	  || !type
4876 	  || (TREE_CODE (type) != VECTOR_TYPE
4877 	      && TREE_CODE (type) != RECORD_TYPE))
4878 	{
4879 	  if (slotno >= SPARC_INT_ARG_MAX)
4880 	    return -1;
4881 	  regno = regbase + slotno;
4882 	}
4883       else  /* TARGET_ARCH64 && type */
4884 	{
4885 	  int intregs_p = 0, fpregs_p = 0, packed_p = 0;
4886 
4887 	  /* First see what kinds of registers we would need.  */
4888 	  if (TREE_CODE (type) == VECTOR_TYPE)
4889 	    fpregs_p = 1;
4890 	  else
4891 	    scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
4892 
4893 	  /* The ABI obviously doesn't specify how packed structures
4894 	     are passed.  These are defined to be passed in int regs
4895 	     if possible, otherwise memory.  */
4896 	  if (packed_p || !named)
4897 	    fpregs_p = 0, intregs_p = 1;
4898 
4899 	  /* If all arg slots are filled, then must pass on stack.  */
4900 	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4901 	    return -1;
4902 
4903 	  /* If there are only int args and all int arg slots are filled,
4904 	     then must pass on stack.  */
4905 	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4906 	    return -1;
4907 
4908 	  /* Note that even if all int arg slots are filled, fp members may
4909 	     still be passed in regs if such regs are available.
4910 	     *PREGNO isn't set because there may be more than one, it's up
4911 	     to the caller to compute them.  */
4912 	  return slotno;
4913 	}
4914       break;
4915 
4916     default :
4917       gcc_unreachable ();
4918     }
4919 
4920   *pregno = regno;
4921   return slotno;
4922 }
4923 
4924 /* Handle recursive register counting for structure field layout.  */
4925 
4926 struct function_arg_record_value_parms
4927 {
4928   rtx ret;		/* return expression being built.  */
4929   int slotno;		/* slot number of the argument.  */
4930   int named;		/* whether the argument is named.  */
4931   int regbase;		/* regno of the base register.  */
4932   int stack;		/* 1 if part of the argument is on the stack.  */
4933   int intoffset;	/* offset of the first pending integer field.  */
4934   unsigned int nregs;	/* number of words passed in registers.  */
4935 };
4936 
4937 static void function_arg_record_value_3
4938  (HOST_WIDE_INT, struct function_arg_record_value_parms *);
4939 static void function_arg_record_value_2
4940  (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4941 static void function_arg_record_value_1
4942  (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4943 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
4944 static rtx function_arg_union_value (int, enum machine_mode, int, int);
4945 
4946 /* A subroutine of function_arg_record_value.  Traverse the structure
4947    recursively and determine how many registers will be required.  */
4948 
4949 static void
4950 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
4951 			     struct function_arg_record_value_parms *parms,
4952 			     bool packed_p)
4953 {
4954   tree field;
4955 
4956   /* We need to compute how many registers are needed so we can
4957      allocate the PARALLEL but before we can do that we need to know
4958      whether there are any packed fields.  The ABI obviously doesn't
4959      specify how structures are passed in this case, so they are
4960      defined to be passed in int regs if possible, otherwise memory,
4961      regardless of whether there are fp values present.  */
4962 
4963   if (! packed_p)
4964     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4965       {
4966 	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4967 	  {
4968 	    packed_p = true;
4969 	    break;
4970 	  }
4971       }
4972 
4973   /* Compute how many registers we need.  */
4974   for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4975     {
4976       if (TREE_CODE (field) == FIELD_DECL)
4977 	{
4978 	  HOST_WIDE_INT bitpos = startbitpos;
4979 
4980 	  if (DECL_SIZE (field) != 0)
4981 	    {
4982 	      if (integer_zerop (DECL_SIZE (field)))
4983 		continue;
4984 
4985 	      if (host_integerp (bit_position (field), 1))
4986 		bitpos += int_bit_position (field);
4987 	    }
4988 
4989 	  /* ??? FIXME: else assume zero offset.  */
4990 
4991 	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4992 	    function_arg_record_value_1 (TREE_TYPE (field),
4993 	    				 bitpos,
4994 					 parms,
4995 					 packed_p);
4996 	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4997 		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4998 		   && TARGET_FPU
4999 		   && parms->named
5000 		   && ! packed_p)
5001 	    {
5002 	      if (parms->intoffset != -1)
5003 		{
5004 		  unsigned int startbit, endbit;
5005 		  int intslots, this_slotno;
5006 
5007 		  startbit = parms->intoffset & -BITS_PER_WORD;
5008 		  endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5009 
5010 		  intslots = (endbit - startbit) / BITS_PER_WORD;
5011 		  this_slotno = parms->slotno + parms->intoffset
5012 		    / BITS_PER_WORD;
5013 
5014 		  if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5015 		    {
5016 		      intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5017 		      /* We need to pass this field on the stack.  */
5018 		      parms->stack = 1;
5019 		    }
5020 
5021 		  parms->nregs += intslots;
5022 		  parms->intoffset = -1;
5023 		}
5024 
5025 	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
5026 		 If it wasn't true we wouldn't be here.  */
5027 	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5028 		  && DECL_MODE (field) == BLKmode)
5029 		parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5030 	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5031 		parms->nregs += 2;
5032 	      else
5033 		parms->nregs += 1;
5034 	    }
5035 	  else
5036 	    {
5037 	      if (parms->intoffset == -1)
5038 		parms->intoffset = bitpos;
5039 	    }
5040 	}
5041     }
5042 }
5043 
5044 /* A subroutine of function_arg_record_value.  Assign the bits of the
5045    structure between parms->intoffset and bitpos to integer registers.  */
5046 
5047 static void
5048 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
5049 			     struct function_arg_record_value_parms *parms)
5050 {
5051   enum machine_mode mode;
5052   unsigned int regno;
5053   unsigned int startbit, endbit;
5054   int this_slotno, intslots, intoffset;
5055   rtx reg;
5056 
5057   if (parms->intoffset == -1)
5058     return;
5059 
5060   intoffset = parms->intoffset;
5061   parms->intoffset = -1;
5062 
5063   startbit = intoffset & -BITS_PER_WORD;
5064   endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5065   intslots = (endbit - startbit) / BITS_PER_WORD;
5066   this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
5067 
5068   intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
5069   if (intslots <= 0)
5070     return;
5071 
5072   /* If this is the trailing part of a word, only load that much into
5073      the register.  Otherwise load the whole register.  Note that in
5074      the latter case we may pick up unwanted bits.  It's not a problem
5075      at the moment but may wish to revisit.  */
5076 
5077   if (intoffset % BITS_PER_WORD != 0)
5078     mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
5079 			  	   MODE_INT);
5080   else
5081     mode = word_mode;
5082 
5083   intoffset /= BITS_PER_UNIT;
5084   do
5085     {
5086       regno = parms->regbase + this_slotno;
5087       reg = gen_rtx_REG (mode, regno);
5088       XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5089 	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5090 
5091       this_slotno += 1;
5092       intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5093       mode = word_mode;
5094       parms->nregs += 1;
5095       intslots -= 1;
5096     }
5097   while (intslots > 0);
5098 }
5099 
5100 /* A subroutine of function_arg_record_value.  Traverse the structure
5101    recursively and assign bits to floating point registers.  Track which
5102    bits in between need integer registers; invoke function_arg_record_value_3
5103    to make that happen.  */
5104 
5105 static void
5106 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
5107 			     struct function_arg_record_value_parms *parms,
5108 			     bool packed_p)
5109 {
5110   tree field;
5111 
5112   if (! packed_p)
5113     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5114       {
5115 	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5116 	  {
5117 	    packed_p = true;
5118 	    break;
5119 	  }
5120       }
5121 
5122   for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5123     {
5124       if (TREE_CODE (field) == FIELD_DECL)
5125 	{
5126 	  HOST_WIDE_INT bitpos = startbitpos;
5127 
5128 	  if (DECL_SIZE (field) != 0)
5129 	    {
5130 	      if (integer_zerop (DECL_SIZE (field)))
5131 		continue;
5132 
5133 	      if (host_integerp (bit_position (field), 1))
5134 		bitpos += int_bit_position (field);
5135 	    }
5136 
5137 	  /* ??? FIXME: else assume zero offset.  */
5138 
5139 	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5140 	    function_arg_record_value_2 (TREE_TYPE (field),
5141 	    				 bitpos,
5142 					 parms,
5143 					 packed_p);
5144 	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5145 		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5146 		   && TARGET_FPU
5147 		   && parms->named
5148 		   && ! packed_p)
5149 	    {
5150 	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
5151 	      int regno, nregs, pos;
5152 	      enum machine_mode mode = DECL_MODE (field);
5153 	      rtx reg;
5154 
5155 	      function_arg_record_value_3 (bitpos, parms);
5156 
5157 	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5158 		  && mode == BLKmode)
5159 	        {
5160 		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5161 		  nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5162 		}
5163 	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5164 	        {
5165 		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5166 		  nregs = 2;
5167 		}
5168 	      else
5169 	        nregs = 1;
5170 
5171 	      regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
5172 	      if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
5173 		regno++;
5174 	      reg = gen_rtx_REG (mode, regno);
5175 	      pos = bitpos / BITS_PER_UNIT;
5176 	      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5177 		= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5178 	      parms->nregs += 1;
5179 	      while (--nregs > 0)
5180 		{
5181 		  regno += GET_MODE_SIZE (mode) / 4;
5182 	  	  reg = gen_rtx_REG (mode, regno);
5183 		  pos += GET_MODE_SIZE (mode);
5184 		  XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5185 		    = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5186 		  parms->nregs += 1;
5187 		}
5188 	    }
5189 	  else
5190 	    {
5191 	      if (parms->intoffset == -1)
5192 		parms->intoffset = bitpos;
5193 	    }
5194 	}
5195     }
5196 }
5197 
5198 /* Used by function_arg and function_value to implement the complex
5199    conventions of the 64-bit ABI for passing and returning structures.
5200    Return an expression valid as a return value for the two macros
5201    FUNCTION_ARG and FUNCTION_VALUE.
5202 
5203    TYPE is the data type of the argument (as a tree).
5204     This is null for libcalls where that information may
5205     not be available.
5206    MODE is the argument's machine mode.
5207    SLOTNO is the index number of the argument's slot in the parameter array.
5208    NAMED is nonzero if this argument is a named parameter
5209     (otherwise it is an extra parameter matching an ellipsis).
5210    REGBASE is the regno of the base register for the parameter array.  */
5211 
5212 static rtx
5213 function_arg_record_value (const_tree type, enum machine_mode mode,
5214 			   int slotno, int named, int regbase)
5215 {
5216   HOST_WIDE_INT typesize = int_size_in_bytes (type);
5217   struct function_arg_record_value_parms parms;
5218   unsigned int nregs;
5219 
5220   parms.ret = NULL_RTX;
5221   parms.slotno = slotno;
5222   parms.named = named;
5223   parms.regbase = regbase;
5224   parms.stack = 0;
5225 
5226   /* Compute how many registers we need.  */
5227   parms.nregs = 0;
5228   parms.intoffset = 0;
5229   function_arg_record_value_1 (type, 0, &parms, false);
5230 
5231   /* Take into account pending integer fields.  */
5232   if (parms.intoffset != -1)
5233     {
5234       unsigned int startbit, endbit;
5235       int intslots, this_slotno;
5236 
5237       startbit = parms.intoffset & -BITS_PER_WORD;
5238       endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5239       intslots = (endbit - startbit) / BITS_PER_WORD;
5240       this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
5241 
5242       if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5243         {
5244 	  intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5245 	  /* We need to pass this field on the stack.  */
5246 	  parms.stack = 1;
5247         }
5248 
5249       parms.nregs += intslots;
5250     }
5251   nregs = parms.nregs;
5252 
5253   /* Allocate the vector and handle some annoying special cases.  */
5254   if (nregs == 0)
5255     {
5256       /* ??? Empty structure has no value?  Duh?  */
5257       if (typesize <= 0)
5258 	{
5259 	  /* Though there's nothing really to store, return a word register
5260 	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
5261 	     leads to breakage due to the fact that there are zero bytes to
5262 	     load.  */
5263 	  return gen_rtx_REG (mode, regbase);
5264 	}
5265       else
5266 	{
5267 	  /* ??? C++ has structures with no fields, and yet a size.  Give up
5268 	     for now and pass everything back in integer registers.  */
5269 	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5270 	}
5271       if (nregs + slotno > SPARC_INT_ARG_MAX)
5272 	nregs = SPARC_INT_ARG_MAX - slotno;
5273     }
5274   gcc_assert (nregs != 0);
5275 
5276   parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
5277 
5278   /* If at least one field must be passed on the stack, generate
5279      (parallel [(expr_list (nil) ...) ...]) so that all fields will
5280      also be passed on the stack.  We can't do much better because the
5281      semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
5282      of structures for which the fields passed exclusively in registers
5283      are not at the beginning of the structure.  */
5284   if (parms.stack)
5285     XVECEXP (parms.ret, 0, 0)
5286       = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5287 
5288   /* Fill in the entries.  */
5289   parms.nregs = 0;
5290   parms.intoffset = 0;
5291   function_arg_record_value_2 (type, 0, &parms, false);
5292   function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
5293 
5294   gcc_assert (parms.nregs == nregs);
5295 
5296   return parms.ret;
5297 }
5298 
5299 /* Used by function_arg and function_value to implement the conventions
5300    of the 64-bit ABI for passing and returning unions.
5301    Return an expression valid as a return value for the two macros
5302    FUNCTION_ARG and FUNCTION_VALUE.
5303 
5304    SIZE is the size in bytes of the union.
5305    MODE is the argument's machine mode.
5306    REGNO is the hard register the union will be passed in.  */
5307 
5308 static rtx
5309 function_arg_union_value (int size, enum machine_mode mode, int slotno,
5310 			  int regno)
5311 {
5312   int nwords = ROUND_ADVANCE (size), i;
5313   rtx regs;
5314 
5315   /* See comment in previous function for empty structures.  */
5316   if (nwords == 0)
5317     return gen_rtx_REG (mode, regno);
5318 
5319   if (slotno == SPARC_INT_ARG_MAX - 1)
5320     nwords = 1;
5321 
5322   regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
5323 
5324   for (i = 0; i < nwords; i++)
5325     {
5326       /* Unions are passed left-justified.  */
5327       XVECEXP (regs, 0, i)
5328 	= gen_rtx_EXPR_LIST (VOIDmode,
5329 			     gen_rtx_REG (word_mode, regno),
5330 			     GEN_INT (UNITS_PER_WORD * i));
5331       regno++;
5332     }
5333 
5334   return regs;
5335 }
5336 
5337 /* Used by function_arg and function_value to implement the conventions
5338    for passing and returning large (BLKmode) vectors.
5339    Return an expression valid as a return value for the two macros
5340    FUNCTION_ARG and FUNCTION_VALUE.
5341 
5342    SIZE is the size in bytes of the vector (at least 8 bytes).
5343    REGNO is the FP hard register the vector will be passed in.  */
5344 
5345 static rtx
5346 function_arg_vector_value (int size, int regno)
5347 {
5348   int i, nregs = size / 8;
5349   rtx regs;
5350 
5351   regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
5352 
5353   for (i = 0; i < nregs; i++)
5354     {
5355       XVECEXP (regs, 0, i)
5356 	= gen_rtx_EXPR_LIST (VOIDmode,
5357 			     gen_rtx_REG (DImode, regno + 2*i),
5358 			     GEN_INT (i*8));
5359     }
5360 
5361   return regs;
5362 }
5363 
5364 /* Handle the FUNCTION_ARG macro.
5365    Determine where to put an argument to a function.
5366    Value is zero to push the argument on the stack,
5367    or a hard register in which to store the argument.
5368 
5369    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5370     the preceding args and about the function being called.
5371    MODE is the argument's machine mode.
5372    TYPE is the data type of the argument (as a tree).
5373     This is null for libcalls where that information may
5374     not be available.
5375    NAMED is nonzero if this argument is a named parameter
5376     (otherwise it is an extra parameter matching an ellipsis).
5377    INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
5378 
5379 rtx
5380 function_arg (const struct sparc_args *cum, enum machine_mode mode,
5381 	      tree type, int named, int incoming_p)
5382 {
5383   int regbase = (incoming_p
5384 		 ? SPARC_INCOMING_INT_ARG_FIRST
5385 		 : SPARC_OUTGOING_INT_ARG_FIRST);
5386   int slotno, regno, padding;
5387   enum mode_class mclass = GET_MODE_CLASS (mode);
5388 
5389   slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
5390 				&regno, &padding);
5391   if (slotno == -1)
5392     return 0;
5393 
5394   /* Vector types deserve special treatment because they are polymorphic wrt
5395      their mode, depending upon whether VIS instructions are enabled.  */
5396   if (type && TREE_CODE (type) == VECTOR_TYPE)
5397     {
5398       HOST_WIDE_INT size = int_size_in_bytes (type);
5399       gcc_assert ((TARGET_ARCH32 && size <= 8)
5400 		  || (TARGET_ARCH64 && size <= 16));
5401 
5402       if (mode == BLKmode)
5403 	return function_arg_vector_value (size,
5404 					  SPARC_FP_ARG_FIRST + 2*slotno);
5405       else
5406 	mclass = MODE_FLOAT;
5407     }
5408 
5409   if (TARGET_ARCH32)
5410     return gen_rtx_REG (mode, regno);
5411 
5412   /* Structures up to 16 bytes in size are passed in arg slots on the stack
5413      and are promoted to registers if possible.  */
5414   if (type && TREE_CODE (type) == RECORD_TYPE)
5415     {
5416       HOST_WIDE_INT size = int_size_in_bytes (type);
5417       gcc_assert (size <= 16);
5418 
5419       return function_arg_record_value (type, mode, slotno, named, regbase);
5420     }
5421 
5422   /* Unions up to 16 bytes in size are passed in integer registers.  */
5423   else if (type && TREE_CODE (type) == UNION_TYPE)
5424     {
5425       HOST_WIDE_INT size = int_size_in_bytes (type);
5426       gcc_assert (size <= 16);
5427 
5428       return function_arg_union_value (size, mode, slotno, regno);
5429     }
5430 
5431   /* v9 fp args in reg slots beyond the int reg slots get passed in regs
5432      but also have the slot allocated for them.
5433      If no prototype is in scope fp values in register slots get passed
5434      in two places, either fp regs and int regs or fp regs and memory.  */
5435   else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5436 	   && SPARC_FP_REG_P (regno))
5437     {
5438       rtx reg = gen_rtx_REG (mode, regno);
5439       if (cum->prototype_p || cum->libcall_p)
5440 	{
5441 	  /* "* 2" because fp reg numbers are recorded in 4 byte
5442 	     quantities.  */
5443 #if 0
5444 	  /* ??? This will cause the value to be passed in the fp reg and
5445 	     in the stack.  When a prototype exists we want to pass the
5446 	     value in the reg but reserve space on the stack.  That's an
5447 	     optimization, and is deferred [for a bit].  */
5448 	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
5449 	    return gen_rtx_PARALLEL (mode,
5450 			    gen_rtvec (2,
5451 				       gen_rtx_EXPR_LIST (VOIDmode,
5452 						NULL_RTX, const0_rtx),
5453 				       gen_rtx_EXPR_LIST (VOIDmode,
5454 						reg, const0_rtx)));
5455 	  else
5456 #else
5457 	  /* ??? It seems that passing back a register even when past
5458 	     the area declared by REG_PARM_STACK_SPACE will allocate
5459 	     space appropriately, and will not copy the data onto the
5460 	     stack, exactly as we desire.
5461 
5462 	     This is due to locate_and_pad_parm being called in
5463 	     expand_call whenever reg_parm_stack_space > 0, which
5464 	     while beneficial to our example here, would seem to be
5465 	     in error from what had been intended.  Ho hum...  -- r~ */
5466 #endif
5467 	    return reg;
5468 	}
5469       else
5470 	{
5471 	  rtx v0, v1;
5472 
5473 	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
5474 	    {
5475 	      int intreg;
5476 
5477 	      /* On incoming, we don't need to know that the value
5478 		 is passed in %f0 and %i0, and it confuses other parts
5479 		 causing needless spillage even on the simplest cases.  */
5480 	      if (incoming_p)
5481 		return reg;
5482 
5483 	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5484 			+ (regno - SPARC_FP_ARG_FIRST) / 2);
5485 
5486 	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5487 	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5488 				      const0_rtx);
5489 	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5490 	    }
5491 	  else
5492 	    {
5493 	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5494 	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5495 	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5496 	    }
5497 	}
5498     }
5499 
5500   /* All other aggregate types are passed in an integer register in a mode
5501      corresponding to the size of the type.  */
5502   else if (type && AGGREGATE_TYPE_P (type))
5503     {
5504       HOST_WIDE_INT size = int_size_in_bytes (type);
5505       gcc_assert (size <= 16);
5506 
5507       mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5508     }
5509 
5510   return gen_rtx_REG (mode, regno);
5511 }
5512 
5513 /* For an arg passed partly in registers and partly in memory,
5514    this is the number of bytes of registers used.
5515    For args passed entirely in registers or entirely in memory, zero.
5516 
5517    Any arg that starts in the first 6 regs but won't entirely fit in them
5518    needs partial registers on v8.  On v9, structures with integer
5519    values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5520    values that begin in the last fp reg [where "last fp reg" varies with the
5521    mode] will be split between that reg and memory.  */
5522 
5523 static int
5524 sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5525 			 tree type, bool named)
5526 {
5527   int slotno, regno, padding;
5528 
5529   /* We pass 0 for incoming_p here, it doesn't matter.  */
5530   slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5531 
5532   if (slotno == -1)
5533     return 0;
5534 
5535   if (TARGET_ARCH32)
5536     {
5537       if ((slotno + (mode == BLKmode
5538 		     ? ROUND_ADVANCE (int_size_in_bytes (type))
5539 		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5540 	  > SPARC_INT_ARG_MAX)
5541 	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5542     }
5543   else
5544     {
5545       /* We are guaranteed by pass_by_reference that the size of the
5546 	 argument is not greater than 16 bytes, so we only need to return
5547 	 one word if the argument is partially passed in registers.  */
5548 
5549       if (type && AGGREGATE_TYPE_P (type))
5550 	{
5551 	  int size = int_size_in_bytes (type);
5552 
5553 	  if (size > UNITS_PER_WORD
5554 	      && slotno == SPARC_INT_ARG_MAX - 1)
5555 	    return UNITS_PER_WORD;
5556 	}
5557       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5558 	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5559 		   && ! (TARGET_FPU && named)))
5560 	{
5561 	  /* The complex types are passed as packed types.  */
5562 	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5563 	      && slotno == SPARC_INT_ARG_MAX - 1)
5564 	    return UNITS_PER_WORD;
5565 	}
5566       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5567 	{
5568 	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5569 	      > SPARC_FP_ARG_MAX)
5570 	    return UNITS_PER_WORD;
5571 	}
5572     }
5573 
5574   return 0;
5575 }
5576 
5577 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
5578    Specify whether to pass the argument by reference.  */
5579 
5580 static bool
5581 sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5582 			 enum machine_mode mode, const_tree type,
5583 			 bool named ATTRIBUTE_UNUSED)
5584 {
5585   if (TARGET_ARCH32)
5586     /* Original SPARC 32-bit ABI says that structures and unions,
5587        and quad-precision floats are passed by reference.  For Pascal,
5588        also pass arrays by reference.  All other base types are passed
5589        in registers.
5590 
5591        Extended ABI (as implemented by the Sun compiler) says that all
5592        complex floats are passed by reference.  Pass complex integers
5593        in registers up to 8 bytes.  More generally, enforce the 2-word
5594        cap for passing arguments in registers.
5595 
5596        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5597        integers are passed like floats of the same size, that is in
5598        registers up to 8 bytes.  Pass all vector floats by reference
5599        like structure and unions.  */
5600     return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5601 	    || mode == SCmode
5602 	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
5603 	    || GET_MODE_SIZE (mode) > 8
5604 	    || (type
5605 		&& TREE_CODE (type) == VECTOR_TYPE
5606 		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5607   else
5608     /* Original SPARC 64-bit ABI says that structures and unions
5609        smaller than 16 bytes are passed in registers, as well as
5610        all other base types.
5611 
5612        Extended ABI (as implemented by the Sun compiler) says that
5613        complex floats are passed in registers up to 16 bytes.  Pass
5614        all complex integers in registers up to 16 bytes.  More generally,
5615        enforce the 2-word cap for passing arguments in registers.
5616 
5617        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5618        integers are passed like floats of the same size, that is in
5619        registers (up to 16 bytes).  Pass all vector floats like structure
5620        and unions.  */
5621     return ((type
5622 	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5623 	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5624 	    /* Catch CTImode and TCmode.  */
5625 	    || GET_MODE_SIZE (mode) > 16);
5626 }
5627 
5628 /* Handle the FUNCTION_ARG_ADVANCE macro.
5629    Update the data in CUM to advance over an argument
5630    of mode MODE and data type TYPE.
5631    TYPE is null for libcalls where that information may not be available.  */
5632 
5633 void
5634 function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5635 		      tree type, int named)
5636 {
5637   int regno, padding;
5638 
5639   /* We pass 0 for incoming_p here, it doesn't matter.  */
5640   function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5641 
5642   /* If argument requires leading padding, add it.  */
5643   cum->words += padding;
5644 
5645   if (TARGET_ARCH32)
5646     {
5647       cum->words += (mode != BLKmode
5648 		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5649 		     : ROUND_ADVANCE (int_size_in_bytes (type)));
5650     }
5651   else
5652     {
5653       if (type && AGGREGATE_TYPE_P (type))
5654 	{
5655 	  int size = int_size_in_bytes (type);
5656 
5657 	  if (size <= 8)
5658 	    ++cum->words;
5659 	  else if (size <= 16)
5660 	    cum->words += 2;
5661 	  else /* passed by reference */
5662 	    ++cum->words;
5663 	}
5664       else
5665 	{
5666 	  cum->words += (mode != BLKmode
5667 			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5668 			 : ROUND_ADVANCE (int_size_in_bytes (type)));
5669 	}
5670     }
5671 }
5672 
5673 /* Handle the FUNCTION_ARG_PADDING macro.
5674    For the 64 bit ABI structs are always stored left shifted in their
5675    argument slot.  */
5676 
5677 enum direction
5678 function_arg_padding (enum machine_mode mode, const_tree type)
5679 {
5680   if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5681     return upward;
5682 
5683   /* Fall back to the default.  */
5684   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
5685 }
5686 
5687 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
5688    Specify whether to return the return value in memory.  */
5689 
5690 static bool
5691 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5692 {
5693   if (TARGET_ARCH32)
5694     /* Original SPARC 32-bit ABI says that structures and unions,
5695        and quad-precision floats are returned in memory.  All other
5696        base types are returned in registers.
5697 
5698        Extended ABI (as implemented by the Sun compiler) says that
5699        all complex floats are returned in registers (8 FP registers
5700        at most for '_Complex long double').  Return all complex integers
5701        in registers (4 at most for '_Complex long long').
5702 
5703        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5704        integers are returned like floats of the same size, that is in
5705        registers up to 8 bytes and in memory otherwise.  Return all
5706        vector floats in memory like structure and unions; note that
5707        they always have BLKmode like the latter.  */
5708     return (TYPE_MODE (type) == BLKmode
5709 	    || TYPE_MODE (type) == TFmode
5710 	    || (TREE_CODE (type) == VECTOR_TYPE
5711 		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5712   else
5713     /* Original SPARC 64-bit ABI says that structures and unions
5714        smaller than 32 bytes are returned in registers, as well as
5715        all other base types.
5716 
5717        Extended ABI (as implemented by the Sun compiler) says that all
5718        complex floats are returned in registers (8 FP registers at most
5719        for '_Complex long double').  Return all complex integers in
5720        registers (4 at most for '_Complex TItype').
5721 
5722        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5723        integers are returned like floats of the same size, that is in
5724        registers.  Return all vector floats like structure and unions;
5725        note that they always have BLKmode like the latter.  */
5726     return ((TYPE_MODE (type) == BLKmode
5727 	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32));
5728 }
5729 
5730 /* Handle the TARGET_STRUCT_VALUE target hook.
5731    Return where to find the structure return value address.  */
5732 
5733 static rtx
5734 sparc_struct_value_rtx (tree fndecl, int incoming)
5735 {
5736   if (TARGET_ARCH64)
5737     return 0;
5738   else
5739     {
5740       rtx mem;
5741 
5742       if (incoming)
5743 	mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
5744 						 STRUCT_VALUE_OFFSET));
5745       else
5746 	mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
5747 						 STRUCT_VALUE_OFFSET));
5748 
5749       /* Only follow the SPARC ABI for fixed-size structure returns.
5750          Variable size structure returns are handled per the normal
5751          procedures in GCC. This is enabled by -mstd-struct-return */
5752       if (incoming == 2
5753 	  && sparc_std_struct_return
5754 	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
5755 	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
5756 	{
5757 	  /* We must check and adjust the return address, as it is
5758 	     optional as to whether the return object is really
5759 	     provided.  */
5760 	  rtx ret_rtx = gen_rtx_REG (Pmode, 31);
5761 	  rtx scratch = gen_reg_rtx (SImode);
5762 	  rtx endlab = gen_label_rtx ();
5763 
5764 	  /* Calculate the return object size */
5765 	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
5766 	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
5767 	  /* Construct a temporary return value */
5768 	  rtx temp_val = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
5769 
5770 	  /* Implement SPARC 32-bit psABI callee returns struck checking
5771 	     requirements:
5772 
5773 	      Fetch the instruction where we will return to and see if
5774 	     it's an unimp instruction (the most significant 10 bits
5775 	     will be zero).  */
5776 	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
5777 						plus_constant (ret_rtx, 8)));
5778 	  /* Assume the size is valid and pre-adjust */
5779 	  emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5780 	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 0, endlab);
5781 	  emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5782 	  /* Assign stack temp:
5783 	     Write the address of the memory pointed to by temp_val into
5784 	     the memory pointed to by mem */
5785 	  emit_move_insn (mem, XEXP (temp_val, 0));
5786 	  emit_label (endlab);
5787 	}
5788 
5789       set_mem_alias_set (mem, struct_value_alias_set);
5790       return mem;
5791     }
5792 }
5793 
5794 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
5795    For v9, function return values are subject to the same rules as arguments,
5796    except that up to 32 bytes may be returned in registers.  */
5797 
5798 rtx
5799 function_value (const_tree type, enum machine_mode mode, int incoming_p)
5800 {
5801   /* Beware that the two values are swapped here wrt function_arg.  */
5802   int regbase = (incoming_p
5803 		 ? SPARC_OUTGOING_INT_ARG_FIRST
5804 		 : SPARC_INCOMING_INT_ARG_FIRST);
5805   enum mode_class mclass = GET_MODE_CLASS (mode);
5806   int regno;
5807 
5808   /* Vector types deserve special treatment because they are polymorphic wrt
5809      their mode, depending upon whether VIS instructions are enabled.  */
5810   if (type && TREE_CODE (type) == VECTOR_TYPE)
5811     {
5812       HOST_WIDE_INT size = int_size_in_bytes (type);
5813       gcc_assert ((TARGET_ARCH32 && size <= 8)
5814 		  || (TARGET_ARCH64 && size <= 32));
5815 
5816       if (mode == BLKmode)
5817 	return function_arg_vector_value (size,
5818 					  SPARC_FP_ARG_FIRST);
5819       else
5820 	mclass = MODE_FLOAT;
5821     }
5822 
5823   if (TARGET_ARCH64 && type)
5824     {
5825       /* Structures up to 32 bytes in size are returned in registers.  */
5826       if (TREE_CODE (type) == RECORD_TYPE)
5827 	{
5828 	  HOST_WIDE_INT size = int_size_in_bytes (type);
5829 	  gcc_assert (size <= 32);
5830 
5831 	  return function_arg_record_value (type, mode, 0, 1, regbase);
5832 	}
5833 
5834       /* Unions up to 32 bytes in size are returned in integer registers.  */
5835       else if (TREE_CODE (type) == UNION_TYPE)
5836 	{
5837 	  HOST_WIDE_INT size = int_size_in_bytes (type);
5838 	  gcc_assert (size <= 32);
5839 
5840 	  return function_arg_union_value (size, mode, 0, regbase);
5841 	}
5842 
5843       /* Objects that require it are returned in FP registers.  */
5844       else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5845 	;
5846 
5847       /* All other aggregate types are returned in an integer register in a
5848 	 mode corresponding to the size of the type.  */
5849       else if (AGGREGATE_TYPE_P (type))
5850 	{
5851 	  /* All other aggregate types are passed in an integer register
5852 	     in a mode corresponding to the size of the type.  */
5853 	  HOST_WIDE_INT size = int_size_in_bytes (type);
5854 	  gcc_assert (size <= 32);
5855 
5856 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5857 
5858 	  /* ??? We probably should have made the same ABI change in
5859 	     3.4.0 as the one we made for unions.   The latter was
5860 	     required by the SCD though, while the former is not
5861 	     specified, so we favored compatibility and efficiency.
5862 
5863 	     Now we're stuck for aggregates larger than 16 bytes,
5864 	     because OImode vanished in the meantime.  Let's not
5865 	     try to be unduly clever, and simply follow the ABI
5866 	     for unions in that case.  */
5867 	  if (mode == BLKmode)
5868 	    return function_arg_union_value (size, mode, 0, regbase);
5869 	  else
5870 	    mclass = MODE_INT;
5871 	}
5872 
5873       /* This must match sparc_promote_function_mode.
5874 	 ??? Maybe 32-bit pointers should actually remain in Pmode?  */
5875       else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5876 	mode = word_mode;
5877     }
5878 
5879   if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
5880     regno = SPARC_FP_ARG_FIRST;
5881   else
5882     regno = regbase;
5883 
5884   return gen_rtx_REG (mode, regno);
5885 }
5886 
5887 /* Do what is necessary for `va_start'.  We look at the current function
5888    to determine if stdarg or varargs is used and return the address of
5889    the first unnamed parameter.  */
5890 
5891 static rtx
5892 sparc_builtin_saveregs (void)
5893 {
5894   int first_reg = crtl->args.info.words;
5895   rtx address;
5896   int regno;
5897 
5898   for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
5899     emit_move_insn (gen_rtx_MEM (word_mode,
5900 				 gen_rtx_PLUS (Pmode,
5901 					       frame_pointer_rtx,
5902 					       GEN_INT (FIRST_PARM_OFFSET (0)
5903 							+ (UNITS_PER_WORD
5904 							   * regno)))),
5905 		    gen_rtx_REG (word_mode,
5906 				 SPARC_INCOMING_INT_ARG_FIRST + regno));
5907 
5908   address = gen_rtx_PLUS (Pmode,
5909 			  frame_pointer_rtx,
5910 			  GEN_INT (FIRST_PARM_OFFSET (0)
5911 				   + UNITS_PER_WORD * first_reg));
5912 
5913   return address;
5914 }
5915 
5916 /* Implement `va_start' for stdarg.  */
5917 
5918 static void
5919 sparc_va_start (tree valist, rtx nextarg)
5920 {
5921   nextarg = expand_builtin_saveregs ();
5922   std_expand_builtin_va_start (valist, nextarg);
5923 }
5924 
5925 /* Implement `va_arg' for stdarg.  */
5926 
5927 static tree
5928 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5929 		       gimple_seq *post_p)
5930 {
5931   HOST_WIDE_INT size, rsize, align;
5932   tree addr, incr;
5933   bool indirect;
5934   tree ptrtype = build_pointer_type (type);
5935 
5936   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5937     {
5938       indirect = true;
5939       size = rsize = UNITS_PER_WORD;
5940       align = 0;
5941     }
5942   else
5943     {
5944       indirect = false;
5945       size = int_size_in_bytes (type);
5946       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5947       align = 0;
5948 
5949       if (TARGET_ARCH64)
5950 	{
5951 	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
5952 	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
5953 	    align = 2 * UNITS_PER_WORD;
5954 
5955 	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
5956 	     are left-justified in their slots.  */
5957 	  if (AGGREGATE_TYPE_P (type))
5958 	    {
5959 	      if (size == 0)
5960 		size = rsize = UNITS_PER_WORD;
5961 	      else
5962 		size = rsize;
5963 	    }
5964 	}
5965     }
5966 
5967   incr = valist;
5968   if (align)
5969     {
5970       incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5971 			  size_int (align - 1));
5972       incr = fold_convert (sizetype, incr);
5973       incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
5974 			  size_int (-align));
5975       incr = fold_convert (ptr_type_node, incr);
5976     }
5977 
5978   gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
5979   addr = incr;
5980 
5981   if (BYTES_BIG_ENDIAN && size < rsize)
5982     addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5983 			size_int (rsize - size));
5984 
5985   if (indirect)
5986     {
5987       addr = fold_convert (build_pointer_type (ptrtype), addr);
5988       addr = build_va_arg_indirect_ref (addr);
5989     }
5990 
5991   /* If the address isn't aligned properly for the type, we need a temporary.
5992      FIXME: This is inefficient, usually we can do this in registers.  */
5993   else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
5994     {
5995       tree tmp = create_tmp_var (type, "va_arg_tmp");
5996       tree dest_addr = build_fold_addr_expr (tmp);
5997       tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
5998 				   3, dest_addr, addr, size_int (rsize));
5999       TREE_ADDRESSABLE (tmp) = 1;
6000       gimplify_and_add (copy, pre_p);
6001       addr = dest_addr;
6002     }
6003 
6004   else
6005     addr = fold_convert (ptrtype, addr);
6006 
6007   incr
6008     = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
6009   gimplify_assign (valist, incr, post_p);
6010 
6011   return build_va_arg_indirect_ref (addr);
6012 }
6013 
6014 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
6015    Specify whether the vector mode is supported by the hardware.  */
6016 
6017 static bool
6018 sparc_vector_mode_supported_p (enum machine_mode mode)
6019 {
6020   return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
6021 }
6022 
6023 /* Return the string to output an unconditional branch to LABEL, which is
6024    the operand number of the label.
6025 
6026    DEST is the destination insn (i.e. the label), INSN is the source.  */
6027 
6028 const char *
6029 output_ubranch (rtx dest, int label, rtx insn)
6030 {
6031   static char string[64];
6032   bool v9_form = false;
6033   char *p;
6034 
6035   if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
6036     {
6037       int delta = (INSN_ADDRESSES (INSN_UID (dest))
6038 		   - INSN_ADDRESSES (INSN_UID (insn)));
6039       /* Leave some instructions for "slop".  */
6040       if (delta >= -260000 && delta < 260000)
6041 	v9_form = true;
6042     }
6043 
6044   if (v9_form)
6045     strcpy (string, "ba%*,pt\t%%xcc, ");
6046   else
6047     strcpy (string, "b%*\t");
6048 
6049   p = strchr (string, '\0');
6050   *p++ = '%';
6051   *p++ = 'l';
6052   *p++ = '0' + label;
6053   *p++ = '%';
6054   *p++ = '(';
6055   *p = '\0';
6056 
6057   return string;
6058 }
6059 
6060 /* Return the string to output a conditional branch to LABEL, which is
6061    the operand number of the label.  OP is the conditional expression.
6062    XEXP (OP, 0) is assumed to be a condition code register (integer or
6063    floating point) and its mode specifies what kind of comparison we made.
6064 
6065    DEST is the destination insn (i.e. the label), INSN is the source.
6066 
6067    REVERSED is nonzero if we should reverse the sense of the comparison.
6068 
6069    ANNUL is nonzero if we should generate an annulling branch.  */
6070 
6071 const char *
6072 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
6073 		rtx insn)
6074 {
6075   static char string[64];
6076   enum rtx_code code = GET_CODE (op);
6077   rtx cc_reg = XEXP (op, 0);
6078   enum machine_mode mode = GET_MODE (cc_reg);
6079   const char *labelno, *branch;
6080   int spaces = 8, far;
6081   char *p;
6082 
6083   /* v9 branches are limited to +-1MB.  If it is too far away,
6084      change
6085 
6086      bne,pt %xcc, .LC30
6087 
6088      to
6089 
6090      be,pn %xcc, .+12
6091       nop
6092      ba .LC30
6093 
6094      and
6095 
6096      fbne,a,pn %fcc2, .LC29
6097 
6098      to
6099 
6100      fbe,pt %fcc2, .+16
6101       nop
6102      ba .LC29  */
6103 
6104   far = TARGET_V9 && (get_attr_length (insn) >= 3);
6105   if (reversed ^ far)
6106     {
6107       /* Reversal of FP compares takes care -- an ordered compare
6108 	 becomes an unordered compare and vice versa.  */
6109       if (mode == CCFPmode || mode == CCFPEmode)
6110 	code = reverse_condition_maybe_unordered (code);
6111       else
6112 	code = reverse_condition (code);
6113     }
6114 
6115   /* Start by writing the branch condition.  */
6116   if (mode == CCFPmode || mode == CCFPEmode)
6117     {
6118       switch (code)
6119 	{
6120 	case NE:
6121 	  branch = "fbne";
6122 	  break;
6123 	case EQ:
6124 	  branch = "fbe";
6125 	  break;
6126 	case GE:
6127 	  branch = "fbge";
6128 	  break;
6129 	case GT:
6130 	  branch = "fbg";
6131 	  break;
6132 	case LE:
6133 	  branch = "fble";
6134 	  break;
6135 	case LT:
6136 	  branch = "fbl";
6137 	  break;
6138 	case UNORDERED:
6139 	  branch = "fbu";
6140 	  break;
6141 	case ORDERED:
6142 	  branch = "fbo";
6143 	  break;
6144 	case UNGT:
6145 	  branch = "fbug";
6146 	  break;
6147 	case UNLT:
6148 	  branch = "fbul";
6149 	  break;
6150 	case UNEQ:
6151 	  branch = "fbue";
6152 	  break;
6153 	case UNGE:
6154 	  branch = "fbuge";
6155 	  break;
6156 	case UNLE:
6157 	  branch = "fbule";
6158 	  break;
6159 	case LTGT:
6160 	  branch = "fblg";
6161 	  break;
6162 
6163 	default:
6164 	  gcc_unreachable ();
6165 	}
6166 
6167       /* ??? !v9: FP branches cannot be preceded by another floating point
6168 	 insn.  Because there is currently no concept of pre-delay slots,
6169 	 we can fix this only by always emitting a nop before a floating
6170 	 point branch.  */
6171 
6172       string[0] = '\0';
6173       if (! TARGET_V9)
6174 	strcpy (string, "nop\n\t");
6175       strcat (string, branch);
6176     }
6177   else
6178     {
6179       switch (code)
6180 	{
6181 	case NE:
6182 	  branch = "bne";
6183 	  break;
6184 	case EQ:
6185 	  branch = "be";
6186 	  break;
6187 	case GE:
6188 	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6189 	    branch = "bpos";
6190 	  else
6191 	    branch = "bge";
6192 	  break;
6193 	case GT:
6194 	  branch = "bg";
6195 	  break;
6196 	case LE:
6197 	  branch = "ble";
6198 	  break;
6199 	case LT:
6200 	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6201 	    branch = "bneg";
6202 	  else
6203 	    branch = "bl";
6204 	  break;
6205 	case GEU:
6206 	  branch = "bgeu";
6207 	  break;
6208 	case GTU:
6209 	  branch = "bgu";
6210 	  break;
6211 	case LEU:
6212 	  branch = "bleu";
6213 	  break;
6214 	case LTU:
6215 	  branch = "blu";
6216 	  break;
6217 
6218 	default:
6219 	  gcc_unreachable ();
6220 	}
6221       strcpy (string, branch);
6222     }
6223   spaces -= strlen (branch);
6224   p = strchr (string, '\0');
6225 
6226   /* Now add the annulling, the label, and a possible noop.  */
6227   if (annul && ! far)
6228     {
6229       strcpy (p, ",a");
6230       p += 2;
6231       spaces -= 2;
6232     }
6233 
6234   if (TARGET_V9)
6235     {
6236       rtx note;
6237       int v8 = 0;
6238 
6239       if (! far && insn && INSN_ADDRESSES_SET_P ())
6240 	{
6241 	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
6242 		       - INSN_ADDRESSES (INSN_UID (insn)));
6243 	  /* Leave some instructions for "slop".  */
6244 	  if (delta < -260000 || delta >= 260000)
6245 	    v8 = 1;
6246 	}
6247 
6248       if (mode == CCFPmode || mode == CCFPEmode)
6249 	{
6250 	  static char v9_fcc_labelno[] = "%%fccX, ";
6251 	  /* Set the char indicating the number of the fcc reg to use.  */
6252 	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
6253 	  labelno = v9_fcc_labelno;
6254 	  if (v8)
6255 	    {
6256 	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
6257 	      labelno = "";
6258 	    }
6259 	}
6260       else if (mode == CCXmode || mode == CCX_NOOVmode)
6261 	{
6262 	  labelno = "%%xcc, ";
6263 	  gcc_assert (! v8);
6264 	}
6265       else
6266 	{
6267 	  labelno = "%%icc, ";
6268 	  if (v8)
6269 	    labelno = "";
6270 	}
6271 
6272       if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6273 	{
6274 	  strcpy (p,
6275 		  ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6276 		  ? ",pt" : ",pn");
6277 	  p += 3;
6278 	  spaces -= 3;
6279 	}
6280     }
6281   else
6282     labelno = "";
6283 
6284   if (spaces > 0)
6285     *p++ = '\t';
6286   else
6287     *p++ = ' ';
6288   strcpy (p, labelno);
6289   p = strchr (p, '\0');
6290   if (far)
6291     {
6292       strcpy (p, ".+12\n\t nop\n\tb\t");
6293       /* Skip the next insn if requested or
6294 	 if we know that it will be a nop.  */
6295       if (annul || ! final_sequence)
6296         p[3] = '6';
6297       p += 14;
6298     }
6299   *p++ = '%';
6300   *p++ = 'l';
6301   *p++ = label + '0';
6302   *p++ = '%';
6303   *p++ = '#';
6304   *p = '\0';
6305 
6306   return string;
6307 }
6308 
6309 /* Emit a library call comparison between floating point X and Y.
6310    COMPARISON is the operator to compare with (EQ, NE, GT, etc).
6311    Return the new operator to be used in the comparison sequence.
6312 
6313    TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
6314    values as arguments instead of the TFmode registers themselves,
6315    that's why we cannot call emit_float_lib_cmp.  */
6316 
6317 rtx
6318 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
6319 {
6320   const char *qpfunc;
6321   rtx slot0, slot1, result, tem, tem2, libfunc;
6322   enum machine_mode mode;
6323   enum rtx_code new_comparison;
6324 
6325   switch (comparison)
6326     {
6327     case EQ:
6328       qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
6329       break;
6330 
6331     case NE:
6332       qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
6333       break;
6334 
6335     case GT:
6336       qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
6337       break;
6338 
6339     case GE:
6340       qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
6341       break;
6342 
6343     case LT:
6344       qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
6345       break;
6346 
6347     case LE:
6348       qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
6349       break;
6350 
6351     case ORDERED:
6352     case UNORDERED:
6353     case UNGT:
6354     case UNLT:
6355     case UNEQ:
6356     case UNGE:
6357     case UNLE:
6358     case LTGT:
6359       qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
6360       break;
6361 
6362     default:
6363       gcc_unreachable ();
6364     }
6365 
6366   if (TARGET_ARCH64)
6367     {
6368       if (MEM_P (x))
6369 	slot0 = x;
6370       else
6371 	{
6372 	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6373 	  emit_move_insn (slot0, x);
6374 	}
6375 
6376       if (MEM_P (y))
6377 	slot1 = y;
6378       else
6379 	{
6380 	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6381 	  emit_move_insn (slot1, y);
6382 	}
6383 
6384       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6385       emit_library_call (libfunc, LCT_NORMAL,
6386 			 DImode, 2,
6387 			 XEXP (slot0, 0), Pmode,
6388 			 XEXP (slot1, 0), Pmode);
6389       mode = DImode;
6390     }
6391   else
6392     {
6393       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6394       emit_library_call (libfunc, LCT_NORMAL,
6395 			 SImode, 2,
6396 			 x, TFmode, y, TFmode);
6397       mode = SImode;
6398     }
6399 
6400 
6401   /* Immediately move the result of the libcall into a pseudo
6402      register so reload doesn't clobber the value if it needs
6403      the return register for a spill reg.  */
6404   result = gen_reg_rtx (mode);
6405   emit_move_insn (result, hard_libcall_value (mode, libfunc));
6406 
6407   switch (comparison)
6408     {
6409     default:
6410       return gen_rtx_NE (VOIDmode, result, const0_rtx);
6411     case ORDERED:
6412     case UNORDERED:
6413       new_comparison = (comparison == UNORDERED ? EQ : NE);
6414       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
6415     case UNGT:
6416     case UNGE:
6417       new_comparison = (comparison == UNGT ? GT : NE);
6418       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
6419     case UNLE:
6420       return gen_rtx_NE (VOIDmode, result, const2_rtx);
6421     case UNLT:
6422       tem = gen_reg_rtx (mode);
6423       if (TARGET_ARCH32)
6424 	emit_insn (gen_andsi3 (tem, result, const1_rtx));
6425       else
6426 	emit_insn (gen_anddi3 (tem, result, const1_rtx));
6427       return gen_rtx_NE (VOIDmode, tem, const0_rtx);
6428     case UNEQ:
6429     case LTGT:
6430       tem = gen_reg_rtx (mode);
6431       if (TARGET_ARCH32)
6432 	emit_insn (gen_addsi3 (tem, result, const1_rtx));
6433       else
6434 	emit_insn (gen_adddi3 (tem, result, const1_rtx));
6435       tem2 = gen_reg_rtx (mode);
6436       if (TARGET_ARCH32)
6437 	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
6438       else
6439 	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
6440       new_comparison = (comparison == UNEQ ? EQ : NE);
6441       return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
6442     }
6443 
6444   gcc_unreachable ();
6445 }
6446 
6447 /* Generate an unsigned DImode to FP conversion.  This is the same code
6448    optabs would emit if we didn't have TFmode patterns.  */
6449 
6450 void
6451 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
6452 {
6453   rtx neglab, donelab, i0, i1, f0, in, out;
6454 
6455   out = operands[0];
6456   in = force_reg (DImode, operands[1]);
6457   neglab = gen_label_rtx ();
6458   donelab = gen_label_rtx ();
6459   i0 = gen_reg_rtx (DImode);
6460   i1 = gen_reg_rtx (DImode);
6461   f0 = gen_reg_rtx (mode);
6462 
6463   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
6464 
6465   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
6466   emit_jump_insn (gen_jump (donelab));
6467   emit_barrier ();
6468 
6469   emit_label (neglab);
6470 
6471   emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
6472   emit_insn (gen_anddi3 (i1, in, const1_rtx));
6473   emit_insn (gen_iordi3 (i0, i0, i1));
6474   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
6475   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
6476 
6477   emit_label (donelab);
6478 }
6479 
6480 /* Generate an FP to unsigned DImode conversion.  This is the same code
6481    optabs would emit if we didn't have TFmode patterns.  */
6482 
6483 void
6484 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
6485 {
6486   rtx neglab, donelab, i0, i1, f0, in, out, limit;
6487 
6488   out = operands[0];
6489   in = force_reg (mode, operands[1]);
6490   neglab = gen_label_rtx ();
6491   donelab = gen_label_rtx ();
6492   i0 = gen_reg_rtx (DImode);
6493   i1 = gen_reg_rtx (DImode);
6494   limit = gen_reg_rtx (mode);
6495   f0 = gen_reg_rtx (mode);
6496 
6497   emit_move_insn (limit,
6498 		  CONST_DOUBLE_FROM_REAL_VALUE (
6499 		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
6500   emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
6501 
6502   emit_insn (gen_rtx_SET (VOIDmode,
6503 			  out,
6504 			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
6505   emit_jump_insn (gen_jump (donelab));
6506   emit_barrier ();
6507 
6508   emit_label (neglab);
6509 
6510   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
6511   emit_insn (gen_rtx_SET (VOIDmode,
6512 			  i0,
6513 			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
6514   emit_insn (gen_movdi (i1, const1_rtx));
6515   emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
6516   emit_insn (gen_xordi3 (out, i0, i1));
6517 
6518   emit_label (donelab);
6519 }
6520 
6521 /* Return the string to output a conditional branch to LABEL, testing
6522    register REG.  LABEL is the operand number of the label; REG is the
6523    operand number of the reg.  OP is the conditional expression.  The mode
6524    of REG says what kind of comparison we made.
6525 
6526    DEST is the destination insn (i.e. the label), INSN is the source.
6527 
6528    REVERSED is nonzero if we should reverse the sense of the comparison.
6529 
6530    ANNUL is nonzero if we should generate an annulling branch.  */
6531 
6532 const char *
6533 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6534 		 int annul, rtx insn)
6535 {
6536   static char string[64];
6537   enum rtx_code code = GET_CODE (op);
6538   enum machine_mode mode = GET_MODE (XEXP (op, 0));
6539   rtx note;
6540   int far;
6541   char *p;
6542 
6543   /* branch on register are limited to +-128KB.  If it is too far away,
6544      change
6545 
6546      brnz,pt %g1, .LC30
6547 
6548      to
6549 
6550      brz,pn %g1, .+12
6551       nop
6552      ba,pt %xcc, .LC30
6553 
6554      and
6555 
6556      brgez,a,pn %o1, .LC29
6557 
6558      to
6559 
6560      brlz,pt %o1, .+16
6561       nop
6562      ba,pt %xcc, .LC29  */
6563 
6564   far = get_attr_length (insn) >= 3;
6565 
6566   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
6567   if (reversed ^ far)
6568     code = reverse_condition (code);
6569 
6570   /* Only 64 bit versions of these instructions exist.  */
6571   gcc_assert (mode == DImode);
6572 
6573   /* Start by writing the branch condition.  */
6574 
6575   switch (code)
6576     {
6577     case NE:
6578       strcpy (string, "brnz");
6579       break;
6580 
6581     case EQ:
6582       strcpy (string, "brz");
6583       break;
6584 
6585     case GE:
6586       strcpy (string, "brgez");
6587       break;
6588 
6589     case LT:
6590       strcpy (string, "brlz");
6591       break;
6592 
6593     case LE:
6594       strcpy (string, "brlez");
6595       break;
6596 
6597     case GT:
6598       strcpy (string, "brgz");
6599       break;
6600 
6601     default:
6602       gcc_unreachable ();
6603     }
6604 
6605   p = strchr (string, '\0');
6606 
6607   /* Now add the annulling, reg, label, and nop.  */
6608   if (annul && ! far)
6609     {
6610       strcpy (p, ",a");
6611       p += 2;
6612     }
6613 
6614   if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6615     {
6616       strcpy (p,
6617 	      ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6618 	      ? ",pt" : ",pn");
6619       p += 3;
6620     }
6621 
6622   *p = p < string + 8 ? '\t' : ' ';
6623   p++;
6624   *p++ = '%';
6625   *p++ = '0' + reg;
6626   *p++ = ',';
6627   *p++ = ' ';
6628   if (far)
6629     {
6630       int veryfar = 1, delta;
6631 
6632       if (INSN_ADDRESSES_SET_P ())
6633 	{
6634 	  delta = (INSN_ADDRESSES (INSN_UID (dest))
6635 		   - INSN_ADDRESSES (INSN_UID (insn)));
6636 	  /* Leave some instructions for "slop".  */
6637 	  if (delta >= -260000 && delta < 260000)
6638 	    veryfar = 0;
6639 	}
6640 
6641       strcpy (p, ".+12\n\t nop\n\t");
6642       /* Skip the next insn if requested or
6643 	 if we know that it will be a nop.  */
6644       if (annul || ! final_sequence)
6645         p[3] = '6';
6646       p += 12;
6647       if (veryfar)
6648 	{
6649 	  strcpy (p, "b\t");
6650 	  p += 2;
6651 	}
6652       else
6653 	{
6654 	  strcpy (p, "ba,pt\t%%xcc, ");
6655 	  p += 13;
6656 	}
6657     }
6658   *p++ = '%';
6659   *p++ = 'l';
6660   *p++ = '0' + label;
6661   *p++ = '%';
6662   *p++ = '#';
6663   *p = '\0';
6664 
6665   return string;
6666 }
6667 
6668 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
6669    Such instructions cannot be used in the delay slot of return insn on v9.
6670    If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
6671  */
6672 
6673 static int
6674 epilogue_renumber (register rtx *where, int test)
6675 {
6676   register const char *fmt;
6677   register int i;
6678   register enum rtx_code code;
6679 
6680   if (*where == 0)
6681     return 0;
6682 
6683   code = GET_CODE (*where);
6684 
6685   switch (code)
6686     {
6687     case REG:
6688       if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
6689 	return 1;
6690       if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
6691 	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
6692     case SCRATCH:
6693     case CC0:
6694     case PC:
6695     case CONST_INT:
6696     case CONST_DOUBLE:
6697       return 0;
6698 
6699       /* Do not replace the frame pointer with the stack pointer because
6700 	 it can cause the delayed instruction to load below the stack.
6701 	 This occurs when instructions like:
6702 
6703 	 (set (reg/i:SI 24 %i0)
6704 	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
6705                        (const_int -20 [0xffffffec])) 0))
6706 
6707 	 are in the return delayed slot.  */
6708     case PLUS:
6709       if (GET_CODE (XEXP (*where, 0)) == REG
6710 	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
6711 	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
6712 	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
6713 	return 1;
6714       break;
6715 
6716     case MEM:
6717       if (SPARC_STACK_BIAS
6718 	  && GET_CODE (XEXP (*where, 0)) == REG
6719 	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6720 	return 1;
6721       break;
6722 
6723     default:
6724       break;
6725     }
6726 
6727   fmt = GET_RTX_FORMAT (code);
6728 
6729   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6730     {
6731       if (fmt[i] == 'E')
6732 	{
6733 	  register int j;
6734 	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6735 	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6736 	      return 1;
6737 	}
6738       else if (fmt[i] == 'e'
6739 	       && epilogue_renumber (&(XEXP (*where, i)), test))
6740 	return 1;
6741     }
6742   return 0;
6743 }
6744 
6745 /* Leaf functions and non-leaf functions have different needs.  */
6746 
6747 static const int
6748 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6749 
6750 static const int
6751 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6752 
6753 static const int *const reg_alloc_orders[] = {
6754   reg_leaf_alloc_order,
6755   reg_nonleaf_alloc_order};
6756 
6757 void
6758 order_regs_for_local_alloc (void)
6759 {
6760   static int last_order_nonleaf = 1;
6761 
6762   if (df_regs_ever_live_p (15) != last_order_nonleaf)
6763     {
6764       last_order_nonleaf = !last_order_nonleaf;
6765       memcpy ((char *) reg_alloc_order,
6766 	      (const char *) reg_alloc_orders[last_order_nonleaf],
6767 	      FIRST_PSEUDO_REGISTER * sizeof (int));
6768     }
6769 }
6770 
6771 /* Return 1 if REG and MEM are legitimate enough to allow the various
6772    mem<-->reg splits to be run.  */
6773 
6774 int
6775 sparc_splitdi_legitimate (rtx reg, rtx mem)
6776 {
6777   /* Punt if we are here by mistake.  */
6778   gcc_assert (reload_completed);
6779 
6780   /* We must have an offsettable memory reference.  */
6781   if (! offsettable_memref_p (mem))
6782     return 0;
6783 
6784   /* If we have legitimate args for ldd/std, we do not want
6785      the split to happen.  */
6786   if ((REGNO (reg) % 2) == 0
6787       && mem_min_alignment (mem, 8))
6788     return 0;
6789 
6790   /* Success.  */
6791   return 1;
6792 }
6793 
6794 /* Return 1 if x and y are some kind of REG and they refer to
6795    different hard registers.  This test is guaranteed to be
6796    run after reload.  */
6797 
6798 int
6799 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
6800 {
6801   if (GET_CODE (x) != REG)
6802     return 0;
6803   if (GET_CODE (y) != REG)
6804     return 0;
6805   if (REGNO (x) == REGNO (y))
6806     return 0;
6807   return 1;
6808 }
6809 
6810 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
6811    This makes them candidates for using ldd and std insns.
6812 
6813    Note reg1 and reg2 *must* be hard registers.  */
6814 
6815 int
6816 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
6817 {
6818   /* We might have been passed a SUBREG.  */
6819   if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
6820     return 0;
6821 
6822   if (REGNO (reg1) % 2 != 0)
6823     return 0;
6824 
6825   /* Integer ldd is deprecated in SPARC V9 */
6826   if (TARGET_V9 && REGNO (reg1) < 32)
6827     return 0;
6828 
6829   return (REGNO (reg1) == REGNO (reg2) - 1);
6830 }
6831 
6832 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
6833    an ldd or std insn.
6834 
6835    This can only happen when addr1 and addr2, the addresses in mem1
6836    and mem2, are consecutive memory locations (addr1 + 4 == addr2).
6837    addr1 must also be aligned on a 64-bit boundary.
6838 
6839    Also iff dependent_reg_rtx is not null it should not be used to
6840    compute the address for mem1, i.e. we cannot optimize a sequence
6841    like:
6842    	ld [%o0], %o0
6843 	ld [%o0 + 4], %o1
6844    to
6845    	ldd [%o0], %o0
6846    nor:
6847 	ld [%g3 + 4], %g3
6848 	ld [%g3], %g2
6849    to
6850         ldd [%g3], %g2
6851 
6852    But, note that the transformation from:
6853 	ld [%g2 + 4], %g3
6854         ld [%g2], %g2
6855    to
6856 	ldd [%g2], %g2
6857    is perfectly fine.  Thus, the peephole2 patterns always pass us
6858    the destination register of the first load, never the second one.
6859 
6860    For stores we don't have a similar problem, so dependent_reg_rtx is
6861    NULL_RTX.  */
6862 
6863 int
6864 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
6865 {
6866   rtx addr1, addr2;
6867   unsigned int reg1;
6868   HOST_WIDE_INT offset1;
6869 
6870   /* The mems cannot be volatile.  */
6871   if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
6872     return 0;
6873 
6874   /* MEM1 should be aligned on a 64-bit boundary.  */
6875   if (MEM_ALIGN (mem1) < 64)
6876     return 0;
6877 
6878   addr1 = XEXP (mem1, 0);
6879   addr2 = XEXP (mem2, 0);
6880 
6881   /* Extract a register number and offset (if used) from the first addr.  */
6882   if (GET_CODE (addr1) == PLUS)
6883     {
6884       /* If not a REG, return zero.  */
6885       if (GET_CODE (XEXP (addr1, 0)) != REG)
6886 	return 0;
6887       else
6888 	{
6889           reg1 = REGNO (XEXP (addr1, 0));
6890 	  /* The offset must be constant!  */
6891 	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
6892             return 0;
6893           offset1 = INTVAL (XEXP (addr1, 1));
6894 	}
6895     }
6896   else if (GET_CODE (addr1) != REG)
6897     return 0;
6898   else
6899     {
6900       reg1 = REGNO (addr1);
6901       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
6902       offset1 = 0;
6903     }
6904 
6905   /* Make sure the second address is a (mem (plus (reg) (const_int).  */
6906   if (GET_CODE (addr2) != PLUS)
6907     return 0;
6908 
6909   if (GET_CODE (XEXP (addr2, 0)) != REG
6910       || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
6911     return 0;
6912 
6913   if (reg1 != REGNO (XEXP (addr2, 0)))
6914     return 0;
6915 
6916   if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
6917     return 0;
6918 
6919   /* The first offset must be evenly divisible by 8 to ensure the
6920      address is 64 bit aligned.  */
6921   if (offset1 % 8 != 0)
6922     return 0;
6923 
6924   /* The offset for the second addr must be 4 more than the first addr.  */
6925   if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
6926     return 0;
6927 
6928   /* All the tests passed.  addr1 and addr2 are valid for ldd and std
6929      instructions.  */
6930   return 1;
6931 }
6932 
6933 /* Return 1 if reg is a pseudo, or is the first register in
6934    a hard register pair.  This makes it suitable for use in
6935    ldd and std insns.  */
6936 
6937 int
6938 register_ok_for_ldd (rtx reg)
6939 {
6940   /* We might have been passed a SUBREG.  */
6941   if (!REG_P (reg))
6942     return 0;
6943 
6944   if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
6945     return (REGNO (reg) % 2 == 0);
6946 
6947   return 1;
6948 }
6949 
6950 /* Return 1 if OP is a memory whose address is known to be
6951    aligned to 8-byte boundary, or a pseudo during reload.
6952    This makes it suitable for use in ldd and std insns.  */
6953 
6954 int
6955 memory_ok_for_ldd (rtx op)
6956 {
6957   if (MEM_P (op))
6958     {
6959       /* In 64-bit mode, we assume that the address is word-aligned.  */
6960       if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
6961 	return 0;
6962 
6963       if ((reload_in_progress || reload_completed)
6964 	  && !strict_memory_address_p (Pmode, XEXP (op, 0)))
6965 	return 0;
6966     }
6967   else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
6968     {
6969       if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
6970 	return 0;
6971     }
6972   else
6973     return 0;
6974 
6975   return 1;
6976 }
6977 
6978 /* Print operand X (an rtx) in assembler syntax to file FILE.
6979    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
6980    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
6981 
6982 void
6983 print_operand (FILE *file, rtx x, int code)
6984 {
6985   switch (code)
6986     {
6987     case '#':
6988       /* Output an insn in a delay slot.  */
6989       if (final_sequence)
6990         sparc_indent_opcode = 1;
6991       else
6992 	fputs ("\n\t nop", file);
6993       return;
6994     case '*':
6995       /* Output an annul flag if there's nothing for the delay slot and we
6996 	 are optimizing.  This is always used with '(' below.
6997          Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
6998 	 this is a dbx bug.  So, we only do this when optimizing.
6999          On UltraSPARC, a branch in a delay slot causes a pipeline flush.
7000 	 Always emit a nop in case the next instruction is a branch.  */
7001       if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
7002 	fputs (",a", file);
7003       return;
7004     case '(':
7005       /* Output a 'nop' if there's nothing for the delay slot and we are
7006 	 not optimizing.  This is always used with '*' above.  */
7007       if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
7008 	fputs ("\n\t nop", file);
7009       else if (final_sequence)
7010         sparc_indent_opcode = 1;
7011       return;
7012     case ')':
7013       /* Output the right displacement from the saved PC on function return.
7014 	 The caller may have placed an "unimp" insn immediately after the call
7015 	 so we have to account for it.  This insn is used in the 32-bit ABI
7016 	 when calling a function that returns a non zero-sized structure.  The
7017 	 64-bit ABI doesn't have it.  Be careful to have this test be the same
7018 	 as that for the call.  The exception is when sparc_std_struct_return
7019 	 is enabled, the psABI is followed exactly and the adjustment is made
7020 	 by the code in sparc_struct_value_rtx.  The call emitted is the same
7021 	 when sparc_std_struct_return is enabled. */
7022      if (!TARGET_ARCH64
7023 	 && cfun->returns_struct
7024 	 && !sparc_std_struct_return
7025 	 && DECL_SIZE (DECL_RESULT (current_function_decl))
7026 	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
7027 	     == INTEGER_CST
7028 	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
7029 	fputs ("12", file);
7030       else
7031         fputc ('8', file);
7032       return;
7033     case '_':
7034       /* Output the Embedded Medium/Anywhere code model base register.  */
7035       fputs (EMBMEDANY_BASE_REG, file);
7036       return;
7037     case '&':
7038       /* Print some local dynamic TLS name.  */
7039       assemble_name (file, get_some_local_dynamic_name ());
7040       return;
7041 
7042     case 'Y':
7043       /* Adjust the operand to take into account a RESTORE operation.  */
7044       if (GET_CODE (x) == CONST_INT)
7045 	break;
7046       else if (GET_CODE (x) != REG)
7047 	output_operand_lossage ("invalid %%Y operand");
7048       else if (REGNO (x) < 8)
7049 	fputs (reg_names[REGNO (x)], file);
7050       else if (REGNO (x) >= 24 && REGNO (x) < 32)
7051 	fputs (reg_names[REGNO (x)-16], file);
7052       else
7053 	output_operand_lossage ("invalid %%Y operand");
7054       return;
7055     case 'L':
7056       /* Print out the low order register name of a register pair.  */
7057       if (WORDS_BIG_ENDIAN)
7058 	fputs (reg_names[REGNO (x)+1], file);
7059       else
7060 	fputs (reg_names[REGNO (x)], file);
7061       return;
7062     case 'H':
7063       /* Print out the high order register name of a register pair.  */
7064       if (WORDS_BIG_ENDIAN)
7065 	fputs (reg_names[REGNO (x)], file);
7066       else
7067 	fputs (reg_names[REGNO (x)+1], file);
7068       return;
7069     case 'R':
7070       /* Print out the second register name of a register pair or quad.
7071 	 I.e., R (%o0) => %o1.  */
7072       fputs (reg_names[REGNO (x)+1], file);
7073       return;
7074     case 'S':
7075       /* Print out the third register name of a register quad.
7076 	 I.e., S (%o0) => %o2.  */
7077       fputs (reg_names[REGNO (x)+2], file);
7078       return;
7079     case 'T':
7080       /* Print out the fourth register name of a register quad.
7081 	 I.e., T (%o0) => %o3.  */
7082       fputs (reg_names[REGNO (x)+3], file);
7083       return;
7084     case 'x':
7085       /* Print a condition code register.  */
7086       if (REGNO (x) == SPARC_ICC_REG)
7087 	{
7088 	  /* We don't handle CC[X]_NOOVmode because they're not supposed
7089 	     to occur here.  */
7090 	  if (GET_MODE (x) == CCmode)
7091 	    fputs ("%icc", file);
7092 	  else if (GET_MODE (x) == CCXmode)
7093 	    fputs ("%xcc", file);
7094 	  else
7095 	    gcc_unreachable ();
7096 	}
7097       else
7098 	/* %fccN register */
7099 	fputs (reg_names[REGNO (x)], file);
7100       return;
7101     case 'm':
7102       /* Print the operand's address only.  */
7103       output_address (XEXP (x, 0));
7104       return;
7105     case 'r':
7106       /* In this case we need a register.  Use %g0 if the
7107 	 operand is const0_rtx.  */
7108       if (x == const0_rtx
7109 	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
7110 	{
7111 	  fputs ("%g0", file);
7112 	  return;
7113 	}
7114       else
7115 	break;
7116 
7117     case 'A':
7118       switch (GET_CODE (x))
7119 	{
7120 	case IOR: fputs ("or", file); break;
7121 	case AND: fputs ("and", file); break;
7122 	case XOR: fputs ("xor", file); break;
7123 	default: output_operand_lossage ("invalid %%A operand");
7124 	}
7125       return;
7126 
7127     case 'B':
7128       switch (GET_CODE (x))
7129 	{
7130 	case IOR: fputs ("orn", file); break;
7131 	case AND: fputs ("andn", file); break;
7132 	case XOR: fputs ("xnor", file); break;
7133 	default: output_operand_lossage ("invalid %%B operand");
7134 	}
7135       return;
7136 
7137       /* These are used by the conditional move instructions.  */
7138     case 'c' :
7139     case 'C':
7140       {
7141 	enum rtx_code rc = GET_CODE (x);
7142 
7143 	if (code == 'c')
7144 	  {
7145 	    enum machine_mode mode = GET_MODE (XEXP (x, 0));
7146 	    if (mode == CCFPmode || mode == CCFPEmode)
7147 	      rc = reverse_condition_maybe_unordered (GET_CODE (x));
7148 	    else
7149 	      rc = reverse_condition (GET_CODE (x));
7150 	  }
7151 	switch (rc)
7152 	  {
7153 	  case NE: fputs ("ne", file); break;
7154 	  case EQ: fputs ("e", file); break;
7155 	  case GE: fputs ("ge", file); break;
7156 	  case GT: fputs ("g", file); break;
7157 	  case LE: fputs ("le", file); break;
7158 	  case LT: fputs ("l", file); break;
7159 	  case GEU: fputs ("geu", file); break;
7160 	  case GTU: fputs ("gu", file); break;
7161 	  case LEU: fputs ("leu", file); break;
7162 	  case LTU: fputs ("lu", file); break;
7163 	  case LTGT: fputs ("lg", file); break;
7164 	  case UNORDERED: fputs ("u", file); break;
7165 	  case ORDERED: fputs ("o", file); break;
7166 	  case UNLT: fputs ("ul", file); break;
7167 	  case UNLE: fputs ("ule", file); break;
7168 	  case UNGT: fputs ("ug", file); break;
7169 	  case UNGE: fputs ("uge", file); break;
7170 	  case UNEQ: fputs ("ue", file); break;
7171 	  default: output_operand_lossage (code == 'c'
7172 					   ? "invalid %%c operand"
7173 					   : "invalid %%C operand");
7174 	  }
7175 	return;
7176       }
7177 
7178       /* These are used by the movr instruction pattern.  */
7179     case 'd':
7180     case 'D':
7181       {
7182 	enum rtx_code rc = (code == 'd'
7183 			    ? reverse_condition (GET_CODE (x))
7184 			    : GET_CODE (x));
7185 	switch (rc)
7186 	  {
7187 	  case NE: fputs ("ne", file); break;
7188 	  case EQ: fputs ("e", file); break;
7189 	  case GE: fputs ("gez", file); break;
7190 	  case LT: fputs ("lz", file); break;
7191 	  case LE: fputs ("lez", file); break;
7192 	  case GT: fputs ("gz", file); break;
7193 	  default: output_operand_lossage (code == 'd'
7194 					   ? "invalid %%d operand"
7195 					   : "invalid %%D operand");
7196 	  }
7197 	return;
7198       }
7199 
7200     case 'b':
7201       {
7202 	/* Print a sign-extended character.  */
7203 	int i = trunc_int_for_mode (INTVAL (x), QImode);
7204 	fprintf (file, "%d", i);
7205 	return;
7206       }
7207 
7208     case 'f':
7209       /* Operand must be a MEM; write its address.  */
7210       if (GET_CODE (x) != MEM)
7211 	output_operand_lossage ("invalid %%f operand");
7212       output_address (XEXP (x, 0));
7213       return;
7214 
7215     case 's':
7216       {
7217 	/* Print a sign-extended 32-bit value.  */
7218 	HOST_WIDE_INT i;
7219 	if (GET_CODE(x) == CONST_INT)
7220 	  i = INTVAL (x);
7221 	else if (GET_CODE(x) == CONST_DOUBLE)
7222 	  i = CONST_DOUBLE_LOW (x);
7223 	else
7224 	  {
7225 	    output_operand_lossage ("invalid %%s operand");
7226 	    return;
7227 	  }
7228 	i = trunc_int_for_mode (i, SImode);
7229 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
7230 	return;
7231       }
7232 
7233     case 0:
7234       /* Do nothing special.  */
7235       break;
7236 
7237     default:
7238       /* Undocumented flag.  */
7239       output_operand_lossage ("invalid operand output code");
7240     }
7241 
7242   if (GET_CODE (x) == REG)
7243     fputs (reg_names[REGNO (x)], file);
7244   else if (GET_CODE (x) == MEM)
7245     {
7246       fputc ('[', file);
7247 	/* Poor Sun assembler doesn't understand absolute addressing.  */
7248       if (CONSTANT_P (XEXP (x, 0)))
7249 	fputs ("%g0+", file);
7250       output_address (XEXP (x, 0));
7251       fputc (']', file);
7252     }
7253   else if (GET_CODE (x) == HIGH)
7254     {
7255       fputs ("%hi(", file);
7256       output_addr_const (file, XEXP (x, 0));
7257       fputc (')', file);
7258     }
7259   else if (GET_CODE (x) == LO_SUM)
7260     {
7261       print_operand (file, XEXP (x, 0), 0);
7262       if (TARGET_CM_MEDMID)
7263 	fputs ("+%l44(", file);
7264       else
7265 	fputs ("+%lo(", file);
7266       output_addr_const (file, XEXP (x, 1));
7267       fputc (')', file);
7268     }
7269   else if (GET_CODE (x) == CONST_DOUBLE
7270 	   && (GET_MODE (x) == VOIDmode
7271 	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
7272     {
7273       if (CONST_DOUBLE_HIGH (x) == 0)
7274 	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
7275       else if (CONST_DOUBLE_HIGH (x) == -1
7276 	       && CONST_DOUBLE_LOW (x) < 0)
7277 	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
7278       else
7279 	output_operand_lossage ("long long constant not a valid immediate operand");
7280     }
7281   else if (GET_CODE (x) == CONST_DOUBLE)
7282     output_operand_lossage ("floating point constant not a valid immediate operand");
7283   else { output_addr_const (file, x); }
7284 }
7285 
7286 /* Target hook for assembling integer objects.  The sparc version has
7287    special handling for aligned DI-mode objects.  */
7288 
7289 static bool
7290 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
7291 {
7292   /* ??? We only output .xword's for symbols and only then in environments
7293      where the assembler can handle them.  */
7294   if (aligned_p && size == 8
7295       && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
7296     {
7297       if (TARGET_V9)
7298 	{
7299 	  assemble_integer_with_op ("\t.xword\t", x);
7300 	  return true;
7301 	}
7302       else
7303 	{
7304 	  assemble_aligned_integer (4, const0_rtx);
7305 	  assemble_aligned_integer (4, x);
7306 	  return true;
7307 	}
7308     }
7309   return default_assemble_integer (x, size, aligned_p);
7310 }
7311 
7312 /* Return the value of a code used in the .proc pseudo-op that says
7313    what kind of result this function returns.  For non-C types, we pick
7314    the closest C type.  */
7315 
7316 #ifndef SHORT_TYPE_SIZE
7317 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
7318 #endif
7319 
7320 #ifndef INT_TYPE_SIZE
7321 #define INT_TYPE_SIZE BITS_PER_WORD
7322 #endif
7323 
7324 #ifndef LONG_TYPE_SIZE
7325 #define LONG_TYPE_SIZE BITS_PER_WORD
7326 #endif
7327 
7328 #ifndef LONG_LONG_TYPE_SIZE
7329 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
7330 #endif
7331 
7332 #ifndef FLOAT_TYPE_SIZE
7333 #define FLOAT_TYPE_SIZE BITS_PER_WORD
7334 #endif
7335 
7336 #ifndef DOUBLE_TYPE_SIZE
7337 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7338 #endif
7339 
7340 #ifndef LONG_DOUBLE_TYPE_SIZE
7341 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7342 #endif
7343 
7344 unsigned long
7345 sparc_type_code (register tree type)
7346 {
7347   register unsigned long qualifiers = 0;
7348   register unsigned shift;
7349 
7350   /* Only the first 30 bits of the qualifier are valid.  We must refrain from
7351      setting more, since some assemblers will give an error for this.  Also,
7352      we must be careful to avoid shifts of 32 bits or more to avoid getting
7353      unpredictable results.  */
7354 
7355   for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
7356     {
7357       switch (TREE_CODE (type))
7358 	{
7359 	case ERROR_MARK:
7360 	  return qualifiers;
7361 
7362 	case ARRAY_TYPE:
7363 	  qualifiers |= (3 << shift);
7364 	  break;
7365 
7366 	case FUNCTION_TYPE:
7367 	case METHOD_TYPE:
7368 	  qualifiers |= (2 << shift);
7369 	  break;
7370 
7371 	case POINTER_TYPE:
7372 	case REFERENCE_TYPE:
7373 	case OFFSET_TYPE:
7374 	  qualifiers |= (1 << shift);
7375 	  break;
7376 
7377 	case RECORD_TYPE:
7378 	  return (qualifiers | 8);
7379 
7380 	case UNION_TYPE:
7381 	case QUAL_UNION_TYPE:
7382 	  return (qualifiers | 9);
7383 
7384 	case ENUMERAL_TYPE:
7385 	  return (qualifiers | 10);
7386 
7387 	case VOID_TYPE:
7388 	  return (qualifiers | 16);
7389 
7390 	case INTEGER_TYPE:
7391 	  /* If this is a range type, consider it to be the underlying
7392 	     type.  */
7393 	  if (TREE_TYPE (type) != 0)
7394 	    break;
7395 
7396 	  /* Carefully distinguish all the standard types of C,
7397 	     without messing up if the language is not C.  We do this by
7398 	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
7399 	     look at both the names and the above fields, but that's redundant.
7400 	     Any type whose size is between two C types will be considered
7401 	     to be the wider of the two types.  Also, we do not have a
7402 	     special code to use for "long long", so anything wider than
7403 	     long is treated the same.  Note that we can't distinguish
7404 	     between "int" and "long" in this code if they are the same
7405 	     size, but that's fine, since neither can the assembler.  */
7406 
7407 	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
7408 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
7409 
7410 	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
7411 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
7412 
7413 	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
7414 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
7415 
7416 	  else
7417 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
7418 
7419 	case REAL_TYPE:
7420 	  /* If this is a range type, consider it to be the underlying
7421 	     type.  */
7422 	  if (TREE_TYPE (type) != 0)
7423 	    break;
7424 
7425 	  /* Carefully distinguish all the standard types of C,
7426 	     without messing up if the language is not C.  */
7427 
7428 	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
7429 	    return (qualifiers | 6);
7430 
7431 	  else
7432 	    return (qualifiers | 7);
7433 
7434 	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
7435 	  /* ??? We need to distinguish between double and float complex types,
7436 	     but I don't know how yet because I can't reach this code from
7437 	     existing front-ends.  */
7438 	  return (qualifiers | 7);	/* Who knows? */
7439 
7440 	case VECTOR_TYPE:
7441 	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
7442 	case LANG_TYPE:		/* ? */
7443 	  return qualifiers;
7444 
7445 	default:
7446 	  gcc_unreachable ();		/* Not a type! */
7447         }
7448     }
7449 
7450   return qualifiers;
7451 }
7452 
7453 /* Nested function support.  */
7454 
7455 /* Emit RTL insns to initialize the variable parts of a trampoline.
7456    FNADDR is an RTX for the address of the function's pure code.
7457    CXT is an RTX for the static chain value for the function.
7458 
7459    This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
7460    (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
7461    (to store insns).  This is a bit excessive.  Perhaps a different
7462    mechanism would be better here.
7463 
7464    Emit enough FLUSH insns to synchronize the data and instruction caches.  */
7465 
7466 static void
7467 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7468 {
7469   /* SPARC 32-bit trampoline:
7470 
7471  	sethi	%hi(fn), %g1
7472  	sethi	%hi(static), %g2
7473  	jmp	%g1+%lo(fn)
7474  	or	%g2, %lo(static), %g2
7475 
7476     SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
7477     JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
7478    */
7479 
7480   emit_move_insn
7481     (adjust_address (m_tramp, SImode, 0),
7482      expand_binop (SImode, ior_optab,
7483 		   expand_shift (RSHIFT_EXPR, SImode, fnaddr,
7484 				 size_int (10), 0, 1),
7485 		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
7486 		   NULL_RTX, 1, OPTAB_DIRECT));
7487 
7488   emit_move_insn
7489     (adjust_address (m_tramp, SImode, 4),
7490      expand_binop (SImode, ior_optab,
7491 		   expand_shift (RSHIFT_EXPR, SImode, cxt,
7492 				 size_int (10), 0, 1),
7493 		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
7494 		   NULL_RTX, 1, OPTAB_DIRECT));
7495 
7496   emit_move_insn
7497     (adjust_address (m_tramp, SImode, 8),
7498      expand_binop (SImode, ior_optab,
7499 		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
7500 		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
7501 		   NULL_RTX, 1, OPTAB_DIRECT));
7502 
7503   emit_move_insn
7504     (adjust_address (m_tramp, SImode, 12),
7505      expand_binop (SImode, ior_optab,
7506 		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
7507 		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
7508 		   NULL_RTX, 1, OPTAB_DIRECT));
7509 
7510   /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
7511      aligned on a 16 byte boundary so one flush clears it all.  */
7512   emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
7513   if (sparc_cpu != PROCESSOR_ULTRASPARC
7514       && sparc_cpu != PROCESSOR_ULTRASPARC3
7515       && sparc_cpu != PROCESSOR_NIAGARA
7516       && sparc_cpu != PROCESSOR_NIAGARA2)
7517     emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
7518 
7519   /* Call __enable_execute_stack after writing onto the stack to make sure
7520      the stack address is accessible.  */
7521 #ifdef ENABLE_EXECUTE_STACK
7522   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7523                      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7524 #endif
7525 
7526 }
7527 
7528 /* The 64-bit version is simpler because it makes more sense to load the
7529    values as "immediate" data out of the trampoline.  It's also easier since
7530    we can read the PC without clobbering a register.  */
7531 
7532 static void
7533 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7534 {
7535   /* SPARC 64-bit trampoline:
7536 
7537 	rd	%pc, %g1
7538 	ldx	[%g1+24], %g5
7539 	jmp	%g5
7540 	ldx	[%g1+16], %g5
7541 	+16 bytes data
7542    */
7543 
7544   emit_move_insn (adjust_address (m_tramp, SImode, 0),
7545 		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
7546   emit_move_insn (adjust_address (m_tramp, SImode, 4),
7547 		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
7548   emit_move_insn (adjust_address (m_tramp, SImode, 8),
7549 		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
7550   emit_move_insn (adjust_address (m_tramp, SImode, 12),
7551 		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
7552   emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
7553   emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
7554   emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
7555 
7556   if (sparc_cpu != PROCESSOR_ULTRASPARC
7557       && sparc_cpu != PROCESSOR_ULTRASPARC3
7558       && sparc_cpu != PROCESSOR_NIAGARA
7559       && sparc_cpu != PROCESSOR_NIAGARA2)
7560     emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
7561 
7562   /* Call __enable_execute_stack after writing onto the stack to make sure
7563      the stack address is accessible.  */
7564 #ifdef ENABLE_EXECUTE_STACK
7565   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7566                      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7567 #endif
7568 }
7569 
7570 /* Worker for TARGET_TRAMPOLINE_INIT.  */
7571 
7572 static void
7573 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
7574 {
7575   rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
7576   cxt = force_reg (Pmode, cxt);
7577   if (TARGET_ARCH64)
7578     sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
7579   else
7580     sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
7581 }
7582 
7583 /* Adjust the cost of a scheduling dependency.  Return the new cost of
7584    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
7585 
7586 static int
7587 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7588 {
7589   enum attr_type insn_type;
7590 
7591   if (! recog_memoized (insn))
7592     return 0;
7593 
7594   insn_type = get_attr_type (insn);
7595 
7596   if (REG_NOTE_KIND (link) == 0)
7597     {
7598       /* Data dependency; DEP_INSN writes a register that INSN reads some
7599 	 cycles later.  */
7600 
7601       /* if a load, then the dependence must be on the memory address;
7602 	 add an extra "cycle".  Note that the cost could be two cycles
7603 	 if the reg was written late in an instruction group; we ca not tell
7604 	 here.  */
7605       if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7606 	return cost + 3;
7607 
7608       /* Get the delay only if the address of the store is the dependence.  */
7609       if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7610 	{
7611 	  rtx pat = PATTERN(insn);
7612 	  rtx dep_pat = PATTERN (dep_insn);
7613 
7614 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7615 	    return cost;  /* This should not happen!  */
7616 
7617 	  /* The dependency between the two instructions was on the data that
7618 	     is being stored.  Assume that this implies that the address of the
7619 	     store is not dependent.  */
7620 	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7621 	    return cost;
7622 
7623 	  return cost + 3;  /* An approximation.  */
7624 	}
7625 
7626       /* A shift instruction cannot receive its data from an instruction
7627 	 in the same cycle; add a one cycle penalty.  */
7628       if (insn_type == TYPE_SHIFT)
7629 	return cost + 3;   /* Split before cascade into shift.  */
7630     }
7631   else
7632     {
7633       /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7634 	 INSN writes some cycles later.  */
7635 
7636       /* These are only significant for the fpu unit; writing a fp reg before
7637          the fpu has finished with it stalls the processor.  */
7638 
7639       /* Reusing an integer register causes no problems.  */
7640       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7641 	return 0;
7642     }
7643 
7644   return cost;
7645 }
7646 
7647 static int
7648 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7649 {
7650   enum attr_type insn_type, dep_type;
7651   rtx pat = PATTERN(insn);
7652   rtx dep_pat = PATTERN (dep_insn);
7653 
7654   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7655     return cost;
7656 
7657   insn_type = get_attr_type (insn);
7658   dep_type = get_attr_type (dep_insn);
7659 
7660   switch (REG_NOTE_KIND (link))
7661     {
7662     case 0:
7663       /* Data dependency; DEP_INSN writes a register that INSN reads some
7664 	 cycles later.  */
7665 
7666       switch (insn_type)
7667 	{
7668 	case TYPE_STORE:
7669 	case TYPE_FPSTORE:
7670 	  /* Get the delay iff the address of the store is the dependence.  */
7671 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7672 	    return cost;
7673 
7674 	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7675 	    return cost;
7676 	  return cost + 3;
7677 
7678 	case TYPE_LOAD:
7679 	case TYPE_SLOAD:
7680 	case TYPE_FPLOAD:
7681 	  /* If a load, then the dependence must be on the memory address.  If
7682 	     the addresses aren't equal, then it might be a false dependency */
7683 	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7684 	    {
7685 	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7686 		  || GET_CODE (SET_DEST (dep_pat)) != MEM
7687 		  || GET_CODE (SET_SRC (pat)) != MEM
7688 		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7689 				    XEXP (SET_SRC (pat), 0)))
7690 		return cost + 2;
7691 
7692 	      return cost + 8;
7693 	    }
7694 	  break;
7695 
7696 	case TYPE_BRANCH:
7697 	  /* Compare to branch latency is 0.  There is no benefit from
7698 	     separating compare and branch.  */
7699 	  if (dep_type == TYPE_COMPARE)
7700 	    return 0;
7701 	  /* Floating point compare to branch latency is less than
7702 	     compare to conditional move.  */
7703 	  if (dep_type == TYPE_FPCMP)
7704 	    return cost - 1;
7705 	  break;
7706 	default:
7707 	  break;
7708 	}
7709 	break;
7710 
7711     case REG_DEP_ANTI:
7712       /* Anti-dependencies only penalize the fpu unit.  */
7713       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7714         return 0;
7715       break;
7716 
7717     default:
7718       break;
7719     }
7720 
7721   return cost;
7722 }
7723 
7724 static int
7725 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
7726 {
7727   switch (sparc_cpu)
7728     {
7729     case PROCESSOR_SUPERSPARC:
7730       cost = supersparc_adjust_cost (insn, link, dep, cost);
7731       break;
7732     case PROCESSOR_HYPERSPARC:
7733     case PROCESSOR_SPARCLITE86X:
7734       cost = hypersparc_adjust_cost (insn, link, dep, cost);
7735       break;
7736     default:
7737       break;
7738     }
7739   return cost;
7740 }
7741 
7742 static void
7743 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7744 		  int sched_verbose ATTRIBUTE_UNUSED,
7745 		  int max_ready ATTRIBUTE_UNUSED)
7746 {}
7747 
7748 static int
7749 sparc_use_sched_lookahead (void)
7750 {
7751   if (sparc_cpu == PROCESSOR_NIAGARA
7752       || sparc_cpu == PROCESSOR_NIAGARA2)
7753     return 0;
7754   if (sparc_cpu == PROCESSOR_ULTRASPARC
7755       || sparc_cpu == PROCESSOR_ULTRASPARC3)
7756     return 4;
7757   if ((1 << sparc_cpu) &
7758       ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
7759        (1 << PROCESSOR_SPARCLITE86X)))
7760     return 3;
7761   return 0;
7762 }
7763 
7764 static int
7765 sparc_issue_rate (void)
7766 {
7767   switch (sparc_cpu)
7768     {
7769     case PROCESSOR_NIAGARA:
7770     case PROCESSOR_NIAGARA2:
7771     default:
7772       return 1;
7773     case PROCESSOR_V9:
7774       /* Assume V9 processors are capable of at least dual-issue.  */
7775       return 2;
7776     case PROCESSOR_SUPERSPARC:
7777       return 3;
7778     case PROCESSOR_HYPERSPARC:
7779     case PROCESSOR_SPARCLITE86X:
7780       return 2;
7781     case PROCESSOR_ULTRASPARC:
7782     case PROCESSOR_ULTRASPARC3:
7783       return 4;
7784     }
7785 }
7786 
7787 static int
7788 set_extends (rtx insn)
7789 {
7790   register rtx pat = PATTERN (insn);
7791 
7792   switch (GET_CODE (SET_SRC (pat)))
7793     {
7794       /* Load and some shift instructions zero extend.  */
7795     case MEM:
7796     case ZERO_EXTEND:
7797       /* sethi clears the high bits */
7798     case HIGH:
7799       /* LO_SUM is used with sethi.  sethi cleared the high
7800 	 bits and the values used with lo_sum are positive */
7801     case LO_SUM:
7802       /* Store flag stores 0 or 1 */
7803     case LT: case LTU:
7804     case GT: case GTU:
7805     case LE: case LEU:
7806     case GE: case GEU:
7807     case EQ:
7808     case NE:
7809       return 1;
7810     case AND:
7811       {
7812 	rtx op0 = XEXP (SET_SRC (pat), 0);
7813 	rtx op1 = XEXP (SET_SRC (pat), 1);
7814 	if (GET_CODE (op1) == CONST_INT)
7815 	  return INTVAL (op1) >= 0;
7816 	if (GET_CODE (op0) != REG)
7817 	  return 0;
7818 	if (sparc_check_64 (op0, insn) == 1)
7819 	  return 1;
7820 	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7821       }
7822     case IOR:
7823     case XOR:
7824       {
7825 	rtx op0 = XEXP (SET_SRC (pat), 0);
7826 	rtx op1 = XEXP (SET_SRC (pat), 1);
7827 	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
7828 	  return 0;
7829 	if (GET_CODE (op1) == CONST_INT)
7830 	  return INTVAL (op1) >= 0;
7831 	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7832       }
7833     case LSHIFTRT:
7834       return GET_MODE (SET_SRC (pat)) == SImode;
7835       /* Positive integers leave the high bits zero.  */
7836     case CONST_DOUBLE:
7837       return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
7838     case CONST_INT:
7839       return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
7840     case ASHIFTRT:
7841     case SIGN_EXTEND:
7842       return - (GET_MODE (SET_SRC (pat)) == SImode);
7843     case REG:
7844       return sparc_check_64 (SET_SRC (pat), insn);
7845     default:
7846       return 0;
7847     }
7848 }
7849 
7850 /* We _ought_ to have only one kind per function, but...  */
7851 static GTY(()) rtx sparc_addr_diff_list;
7852 static GTY(()) rtx sparc_addr_list;
7853 
7854 void
7855 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
7856 {
7857   vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7858   if (diff)
7859     sparc_addr_diff_list
7860       = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7861   else
7862     sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7863 }
7864 
7865 static void
7866 sparc_output_addr_vec (rtx vec)
7867 {
7868   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7869   int idx, vlen = XVECLEN (body, 0);
7870 
7871 #ifdef ASM_OUTPUT_ADDR_VEC_START
7872   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7873 #endif
7874 
7875 #ifdef ASM_OUTPUT_CASE_LABEL
7876   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7877 			 NEXT_INSN (lab));
7878 #else
7879   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7880 #endif
7881 
7882   for (idx = 0; idx < vlen; idx++)
7883     {
7884       ASM_OUTPUT_ADDR_VEC_ELT
7885 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7886     }
7887 
7888 #ifdef ASM_OUTPUT_ADDR_VEC_END
7889   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7890 #endif
7891 }
7892 
7893 static void
7894 sparc_output_addr_diff_vec (rtx vec)
7895 {
7896   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7897   rtx base = XEXP (XEXP (body, 0), 0);
7898   int idx, vlen = XVECLEN (body, 1);
7899 
7900 #ifdef ASM_OUTPUT_ADDR_VEC_START
7901   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7902 #endif
7903 
7904 #ifdef ASM_OUTPUT_CASE_LABEL
7905   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7906 			 NEXT_INSN (lab));
7907 #else
7908   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7909 #endif
7910 
7911   for (idx = 0; idx < vlen; idx++)
7912     {
7913       ASM_OUTPUT_ADDR_DIFF_ELT
7914         (asm_out_file,
7915          body,
7916          CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7917          CODE_LABEL_NUMBER (base));
7918     }
7919 
7920 #ifdef ASM_OUTPUT_ADDR_VEC_END
7921   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7922 #endif
7923 }
7924 
7925 static void
7926 sparc_output_deferred_case_vectors (void)
7927 {
7928   rtx t;
7929   int align;
7930 
7931   if (sparc_addr_list == NULL_RTX
7932       && sparc_addr_diff_list == NULL_RTX)
7933     return;
7934 
7935   /* Align to cache line in the function's code section.  */
7936   switch_to_section (current_function_section ());
7937 
7938   align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7939   if (align > 0)
7940     ASM_OUTPUT_ALIGN (asm_out_file, align);
7941 
7942   for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7943     sparc_output_addr_vec (XEXP (t, 0));
7944   for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7945     sparc_output_addr_diff_vec (XEXP (t, 0));
7946 
7947   sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7948 }
7949 
7950 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7951    unknown.  Return 1 if the high bits are zero, -1 if the register is
7952    sign extended.  */
7953 int
7954 sparc_check_64 (rtx x, rtx insn)
7955 {
7956   /* If a register is set only once it is safe to ignore insns this
7957      code does not know how to handle.  The loop will either recognize
7958      the single set and return the correct value or fail to recognize
7959      it and return 0.  */
7960   int set_once = 0;
7961   rtx y = x;
7962 
7963   gcc_assert (GET_CODE (x) == REG);
7964 
7965   if (GET_MODE (x) == DImode)
7966     y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
7967 
7968   if (flag_expensive_optimizations
7969       && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
7970     set_once = 1;
7971 
7972   if (insn == 0)
7973     {
7974       if (set_once)
7975 	insn = get_last_insn_anywhere ();
7976       else
7977 	return 0;
7978     }
7979 
7980   while ((insn = PREV_INSN (insn)))
7981     {
7982       switch (GET_CODE (insn))
7983 	{
7984 	case JUMP_INSN:
7985 	case NOTE:
7986 	  break;
7987 	case CODE_LABEL:
7988 	case CALL_INSN:
7989 	default:
7990 	  if (! set_once)
7991 	    return 0;
7992 	  break;
7993 	case INSN:
7994 	  {
7995 	    rtx pat = PATTERN (insn);
7996 	    if (GET_CODE (pat) != SET)
7997 	      return 0;
7998 	    if (rtx_equal_p (x, SET_DEST (pat)))
7999 	      return set_extends (insn);
8000 	    if (y && rtx_equal_p (y, SET_DEST (pat)))
8001 	      return set_extends (insn);
8002 	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
8003 	      return 0;
8004 	  }
8005 	}
8006     }
8007   return 0;
8008 }
8009 
8010 /* Returns assembly code to perform a DImode shift using
8011    a 64-bit global or out register on SPARC-V8+.  */
8012 const char *
8013 output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
8014 {
8015   static char asm_code[60];
8016 
8017   /* The scratch register is only required when the destination
8018      register is not a 64-bit global or out register.  */
8019   if (which_alternative != 2)
8020     operands[3] = operands[0];
8021 
8022   /* We can only shift by constants <= 63. */
8023   if (GET_CODE (operands[2]) == CONST_INT)
8024     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
8025 
8026   if (GET_CODE (operands[1]) == CONST_INT)
8027     {
8028       output_asm_insn ("mov\t%1, %3", operands);
8029     }
8030   else
8031     {
8032       output_asm_insn ("sllx\t%H1, 32, %3", operands);
8033       if (sparc_check_64 (operands[1], insn) <= 0)
8034 	output_asm_insn ("srl\t%L1, 0, %L1", operands);
8035       output_asm_insn ("or\t%L1, %3, %3", operands);
8036     }
8037 
8038   strcpy(asm_code, opcode);
8039 
8040   if (which_alternative != 2)
8041     return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
8042   else
8043     return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
8044 }
8045 
8046 /* Output rtl to increment the profiler label LABELNO
8047    for profiling a function entry.  */
8048 
8049 void
8050 sparc_profile_hook (int labelno)
8051 {
8052   char buf[32];
8053   rtx lab, fun;
8054 
8055   fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
8056   if (NO_PROFILE_COUNTERS)
8057     {
8058       emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
8059     }
8060   else
8061     {
8062       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8063       lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
8064       emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
8065     }
8066 }
8067 
8068 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
8069 
8070 static void
8071 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
8072 				     tree decl ATTRIBUTE_UNUSED)
8073 {
8074   fprintf (asm_out_file, "\t.section\t\"%s\"", name);
8075 
8076   if (!(flags & SECTION_DEBUG))
8077     fputs (",#alloc", asm_out_file);
8078   if (flags & SECTION_WRITE)
8079     fputs (",#write", asm_out_file);
8080   if (flags & SECTION_TLS)
8081     fputs (",#tls", asm_out_file);
8082   if (flags & SECTION_CODE)
8083     fputs (",#execinstr", asm_out_file);
8084 
8085   /* ??? Handle SECTION_BSS.  */
8086 
8087   fputc ('\n', asm_out_file);
8088 }
8089 
8090 /* We do not allow indirect calls to be optimized into sibling calls.
8091 
8092    We cannot use sibling calls when delayed branches are disabled
8093    because they will likely require the call delay slot to be filled.
8094 
8095    Also, on SPARC 32-bit we cannot emit a sibling call when the
8096    current function returns a structure.  This is because the "unimp
8097    after call" convention would cause the callee to return to the
8098    wrong place.  The generic code already disallows cases where the
8099    function being called returns a structure.
8100 
8101    It may seem strange how this last case could occur.  Usually there
8102    is code after the call which jumps to epilogue code which dumps the
8103    return value into the struct return area.  That ought to invalidate
8104    the sibling call right?  Well, in the C++ case we can end up passing
8105    the pointer to the struct return area to a constructor (which returns
8106    void) and then nothing else happens.  Such a sibling call would look
8107    valid without the added check here.
8108 
8109    VxWorks PIC PLT entries require the global pointer to be initialized
8110    on entry.  We therefore can't emit sibling calls to them.  */
8111 static bool
8112 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8113 {
8114   return (decl
8115 	  && flag_delayed_branch
8116 	  && (TARGET_ARCH64 || ! cfun->returns_struct)
8117 	  && !(TARGET_VXWORKS_RTP
8118 	       && flag_pic
8119 	       && !targetm.binds_local_p (decl)));
8120 }
8121 
8122 /* libfunc renaming.  */
8123 #include "config/gofast.h"
8124 
8125 static void
8126 sparc_init_libfuncs (void)
8127 {
8128   if (TARGET_ARCH32)
8129     {
8130       /* Use the subroutines that Sun's library provides for integer
8131 	 multiply and divide.  The `*' prevents an underscore from
8132 	 being prepended by the compiler. .umul is a little faster
8133 	 than .mul.  */
8134       set_optab_libfunc (smul_optab, SImode, "*.umul");
8135       set_optab_libfunc (sdiv_optab, SImode, "*.div");
8136       set_optab_libfunc (udiv_optab, SImode, "*.udiv");
8137       set_optab_libfunc (smod_optab, SImode, "*.rem");
8138       set_optab_libfunc (umod_optab, SImode, "*.urem");
8139 
8140       /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
8141       set_optab_libfunc (add_optab, TFmode, "_Q_add");
8142       set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
8143       set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
8144       set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
8145       set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
8146 
8147       /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
8148 	 is because with soft-float, the SFmode and DFmode sqrt
8149 	 instructions will be absent, and the compiler will notice and
8150 	 try to use the TFmode sqrt instruction for calls to the
8151 	 builtin function sqrt, but this fails.  */
8152       if (TARGET_FPU)
8153 	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
8154 
8155       set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
8156       set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
8157       set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
8158       set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
8159       set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
8160       set_optab_libfunc (le_optab, TFmode, "_Q_fle");
8161 
8162       set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
8163       set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
8164       set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
8165       set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
8166 
8167       set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
8168       set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
8169       set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
8170       set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
8171 
8172       if (DITF_CONVERSION_LIBFUNCS)
8173 	{
8174 	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
8175 	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
8176 	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
8177 	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
8178 	}
8179 
8180       if (SUN_CONVERSION_LIBFUNCS)
8181 	{
8182 	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
8183 	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
8184 	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
8185 	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
8186 	}
8187     }
8188   if (TARGET_ARCH64)
8189     {
8190       /* In the SPARC 64bit ABI, SImode multiply and divide functions
8191 	 do not exist in the library.  Make sure the compiler does not
8192 	 emit calls to them by accident.  (It should always use the
8193          hardware instructions.)  */
8194       set_optab_libfunc (smul_optab, SImode, 0);
8195       set_optab_libfunc (sdiv_optab, SImode, 0);
8196       set_optab_libfunc (udiv_optab, SImode, 0);
8197       set_optab_libfunc (smod_optab, SImode, 0);
8198       set_optab_libfunc (umod_optab, SImode, 0);
8199 
8200       if (SUN_INTEGER_MULTIPLY_64)
8201 	{
8202 	  set_optab_libfunc (smul_optab, DImode, "__mul64");
8203 	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
8204 	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
8205 	  set_optab_libfunc (smod_optab, DImode, "__rem64");
8206 	  set_optab_libfunc (umod_optab, DImode, "__urem64");
8207 	}
8208 
8209       if (SUN_CONVERSION_LIBFUNCS)
8210 	{
8211 	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
8212 	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
8213 	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
8214 	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
8215 	}
8216     }
8217 
8218   gofast_maybe_init_libfuncs ();
8219 }
8220 
8221 #define def_builtin(NAME, CODE, TYPE) \
8222   add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
8223                        NULL_TREE)
8224 
8225 /* Implement the TARGET_INIT_BUILTINS target hook.
8226    Create builtin functions for special SPARC instructions.  */
8227 
8228 static void
8229 sparc_init_builtins (void)
8230 {
8231   if (TARGET_VIS)
8232     sparc_vis_init_builtins ();
8233 }
8234 
8235 /* Create builtin functions for VIS 1.0 instructions.  */
8236 
8237 static void
8238 sparc_vis_init_builtins (void)
8239 {
8240   tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
8241   tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
8242   tree v4hi = build_vector_type (intHI_type_node, 4);
8243   tree v2hi = build_vector_type (intHI_type_node, 2);
8244   tree v2si = build_vector_type (intSI_type_node, 2);
8245 
8246   tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
8247   tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
8248   tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
8249   tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
8250   tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
8251   tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
8252   tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
8253   tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
8254   tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
8255   tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
8256   tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
8257   tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
8258   tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
8259 							 v8qi, v8qi,
8260 							 intDI_type_node, 0);
8261   tree di_ftype_di_di = build_function_type_list (intDI_type_node,
8262 						  intDI_type_node,
8263 						  intDI_type_node, 0);
8264   tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
8265 		        			    ptr_type_node,
8266 					            intSI_type_node, 0);
8267   tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
8268 		        			    ptr_type_node,
8269 					            intDI_type_node, 0);
8270 
8271   /* Packing and expanding vectors.  */
8272   def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
8273   def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
8274 	       v8qi_ftype_v2si_v8qi);
8275   def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
8276 	       v2hi_ftype_v2si);
8277   def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
8278   def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
8279 	       v8qi_ftype_v4qi_v4qi);
8280 
8281   /* Multiplications.  */
8282   def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
8283 	       v4hi_ftype_v4qi_v4hi);
8284   def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
8285 	       v4hi_ftype_v4qi_v2hi);
8286   def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
8287 	       v4hi_ftype_v4qi_v2hi);
8288   def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
8289 	       v4hi_ftype_v8qi_v4hi);
8290   def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
8291 	       v4hi_ftype_v8qi_v4hi);
8292   def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
8293 	       v2si_ftype_v4qi_v2hi);
8294   def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
8295 	       v2si_ftype_v4qi_v2hi);
8296 
8297   /* Data aligning.  */
8298   def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
8299 	       v4hi_ftype_v4hi_v4hi);
8300   def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
8301 	       v8qi_ftype_v8qi_v8qi);
8302   def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
8303 	       v2si_ftype_v2si_v2si);
8304   def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
8305                di_ftype_di_di);
8306   if (TARGET_ARCH64)
8307     def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
8308 	         ptr_ftype_ptr_di);
8309   else
8310     def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
8311 	         ptr_ftype_ptr_si);
8312 
8313   /* Pixel distance.  */
8314   def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
8315 	       di_ftype_v8qi_v8qi_di);
8316 }
8317 
8318 /* Handle TARGET_EXPAND_BUILTIN target hook.
8319    Expand builtin functions for sparc intrinsics.  */
8320 
8321 static rtx
8322 sparc_expand_builtin (tree exp, rtx target,
8323 		      rtx subtarget ATTRIBUTE_UNUSED,
8324 		      enum machine_mode tmode ATTRIBUTE_UNUSED,
8325 		      int ignore ATTRIBUTE_UNUSED)
8326 {
8327   tree arg;
8328   call_expr_arg_iterator iter;
8329   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
8330   unsigned int icode = DECL_FUNCTION_CODE (fndecl);
8331   rtx pat, op[4];
8332   enum machine_mode mode[4];
8333   int arg_count = 0;
8334 
8335   mode[0] = insn_data[icode].operand[0].mode;
8336   if (!target
8337       || GET_MODE (target) != mode[0]
8338       || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
8339     op[0] = gen_reg_rtx (mode[0]);
8340   else
8341     op[0] = target;
8342 
8343   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8344     {
8345       arg_count++;
8346       mode[arg_count] = insn_data[icode].operand[arg_count].mode;
8347       op[arg_count] = expand_normal (arg);
8348 
8349       if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
8350 							      mode[arg_count]))
8351 	op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
8352     }
8353 
8354   switch (arg_count)
8355     {
8356     case 1:
8357       pat = GEN_FCN (icode) (op[0], op[1]);
8358       break;
8359     case 2:
8360       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
8361       break;
8362     case 3:
8363       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
8364       break;
8365     default:
8366       gcc_unreachable ();
8367     }
8368 
8369   if (!pat)
8370     return NULL_RTX;
8371 
8372   emit_insn (pat);
8373 
8374   return op[0];
8375 }
8376 
8377 static int
8378 sparc_vis_mul8x16 (int e8, int e16)
8379 {
8380   return (e8 * e16 + 128) / 256;
8381 }
8382 
8383 /* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
8384    by FNCODE.  All of the elements in ELTS0 and ELTS1 lists must be integer
8385    constants.  A tree list with the results of the multiplications is returned,
8386    and each element in the list is of INNER_TYPE.  */
8387 
8388 static tree
8389 sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
8390 {
8391   tree n_elts = NULL_TREE;
8392   int scale;
8393 
8394   switch (fncode)
8395     {
8396     case CODE_FOR_fmul8x16_vis:
8397       for (; elts0 && elts1;
8398 	   elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8399 	{
8400 	  int val
8401 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8402 				 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
8403 	  n_elts = tree_cons (NULL_TREE,
8404 			      build_int_cst (inner_type, val),
8405 			      n_elts);
8406 	}
8407       break;
8408 
8409     case CODE_FOR_fmul8x16au_vis:
8410       scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8411 
8412       for (; elts0; elts0 = TREE_CHAIN (elts0))
8413 	{
8414 	  int val
8415 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8416 				 scale);
8417 	  n_elts = tree_cons (NULL_TREE,
8418 			      build_int_cst (inner_type, val),
8419 			      n_elts);
8420 	}
8421       break;
8422 
8423     case CODE_FOR_fmul8x16al_vis:
8424       scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
8425 
8426       for (; elts0; elts0 = TREE_CHAIN (elts0))
8427 	{
8428 	  int val
8429 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8430 				 scale);
8431 	  n_elts = tree_cons (NULL_TREE,
8432 			      build_int_cst (inner_type, val),
8433 			      n_elts);
8434 	}
8435       break;
8436 
8437     default:
8438       gcc_unreachable ();
8439     }
8440 
8441   return nreverse (n_elts);
8442 
8443 }
8444 /* Handle TARGET_FOLD_BUILTIN target hook.
8445    Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
8446    result of the function call is ignored.  NULL_TREE is returned if the
8447    function could not be folded.  */
8448 
8449 static tree
8450 sparc_fold_builtin (tree fndecl, tree arglist, bool ignore)
8451 {
8452   tree arg0, arg1, arg2;
8453   tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
8454   enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
8455 
8456   if (ignore
8457       && icode != CODE_FOR_alignaddrsi_vis
8458       && icode != CODE_FOR_alignaddrdi_vis)
8459     return fold_convert (rtype, integer_zero_node);
8460 
8461   switch (icode)
8462     {
8463     case CODE_FOR_fexpand_vis:
8464       arg0 = TREE_VALUE (arglist);
8465       STRIP_NOPS (arg0);
8466 
8467       if (TREE_CODE (arg0) == VECTOR_CST)
8468 	{
8469 	  tree inner_type = TREE_TYPE (rtype);
8470 	  tree elts = TREE_VECTOR_CST_ELTS (arg0);
8471 	  tree n_elts = NULL_TREE;
8472 
8473 	  for (; elts; elts = TREE_CHAIN (elts))
8474 	    {
8475 	      unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
8476 	      n_elts = tree_cons (NULL_TREE,
8477 				  build_int_cst (inner_type, val),
8478 				  n_elts);
8479 	    }
8480 	  return build_vector (rtype, nreverse (n_elts));
8481 	}
8482       break;
8483 
8484     case CODE_FOR_fmul8x16_vis:
8485     case CODE_FOR_fmul8x16au_vis:
8486     case CODE_FOR_fmul8x16al_vis:
8487       arg0 = TREE_VALUE (arglist);
8488       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8489       STRIP_NOPS (arg0);
8490       STRIP_NOPS (arg1);
8491 
8492       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8493 	{
8494 	  tree inner_type = TREE_TYPE (rtype);
8495 	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8496 	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8497 	  tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
8498 						  elts1);
8499 
8500 	  return build_vector (rtype, n_elts);
8501 	}
8502       break;
8503 
8504     case CODE_FOR_fpmerge_vis:
8505       arg0 = TREE_VALUE (arglist);
8506       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8507       STRIP_NOPS (arg0);
8508       STRIP_NOPS (arg1);
8509 
8510       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8511 	{
8512 	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8513 	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8514 	  tree n_elts = NULL_TREE;
8515 
8516 	  for (; elts0 && elts1;
8517 	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8518 	    {
8519 	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
8520 	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
8521 	    }
8522 
8523 	  return build_vector (rtype, nreverse (n_elts));
8524 	}
8525       break;
8526 
8527     case CODE_FOR_pdist_vis:
8528       arg0 = TREE_VALUE (arglist);
8529       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8530       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8531       STRIP_NOPS (arg0);
8532       STRIP_NOPS (arg1);
8533       STRIP_NOPS (arg2);
8534 
8535       if (TREE_CODE (arg0) == VECTOR_CST
8536 	  && TREE_CODE (arg1) == VECTOR_CST
8537 	  && TREE_CODE (arg2) == INTEGER_CST)
8538 	{
8539 	  int overflow = 0;
8540 	  unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
8541 	  HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
8542 	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8543 	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8544 
8545 	  for (; elts0 && elts1;
8546 	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8547 	    {
8548 	      unsigned HOST_WIDE_INT
8549 		low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8550 		low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8551 	      HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
8552 	      HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
8553 
8554 	      unsigned HOST_WIDE_INT l;
8555 	      HOST_WIDE_INT h;
8556 
8557 	      overflow |= neg_double (low1, high1, &l, &h);
8558 	      overflow |= add_double (low0, high0, l, h, &l, &h);
8559 	      if (h < 0)
8560 		overflow |= neg_double (l, h, &l, &h);
8561 
8562 	      overflow |= add_double (low, high, l, h, &low, &high);
8563 	    }
8564 
8565 	  gcc_assert (overflow == 0);
8566 
8567 	  return build_int_cst_wide (rtype, low, high);
8568 	}
8569 
8570     default:
8571       break;
8572     }
8573 
8574   return NULL_TREE;
8575 }
8576 
8577 /* ??? This duplicates information provided to the compiler by the
8578    ??? scheduler description.  Some day, teach genautomata to output
8579    ??? the latencies and then CSE will just use that.  */
8580 
8581 static bool
8582 sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
8583 		 bool speed ATTRIBUTE_UNUSED)
8584 {
8585   enum machine_mode mode = GET_MODE (x);
8586   bool float_mode_p = FLOAT_MODE_P (mode);
8587 
8588   switch (code)
8589     {
8590     case CONST_INT:
8591       if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
8592 	{
8593 	  *total = 0;
8594 	  return true;
8595 	}
8596       /* FALLTHRU */
8597 
8598     case HIGH:
8599       *total = 2;
8600       return true;
8601 
8602     case CONST:
8603     case LABEL_REF:
8604     case SYMBOL_REF:
8605       *total = 4;
8606       return true;
8607 
8608     case CONST_DOUBLE:
8609       if (GET_MODE (x) == VOIDmode
8610 	  && ((CONST_DOUBLE_HIGH (x) == 0
8611 	       && CONST_DOUBLE_LOW (x) < 0x1000)
8612 	      || (CONST_DOUBLE_HIGH (x) == -1
8613 		  && CONST_DOUBLE_LOW (x) < 0
8614 		  && CONST_DOUBLE_LOW (x) >= -0x1000)))
8615 	*total = 0;
8616       else
8617 	*total = 8;
8618       return true;
8619 
8620     case MEM:
8621       /* If outer-code was a sign or zero extension, a cost
8622 	 of COSTS_N_INSNS (1) was already added in.  This is
8623 	 why we are subtracting it back out.  */
8624       if (outer_code == ZERO_EXTEND)
8625 	{
8626 	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
8627 	}
8628       else if (outer_code == SIGN_EXTEND)
8629 	{
8630 	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
8631 	}
8632       else if (float_mode_p)
8633 	{
8634 	  *total = sparc_costs->float_load;
8635 	}
8636       else
8637 	{
8638 	  *total = sparc_costs->int_load;
8639 	}
8640 
8641       return true;
8642 
8643     case PLUS:
8644     case MINUS:
8645       if (float_mode_p)
8646 	*total = sparc_costs->float_plusminus;
8647       else
8648 	*total = COSTS_N_INSNS (1);
8649       return false;
8650 
8651     case MULT:
8652       if (float_mode_p)
8653 	*total = sparc_costs->float_mul;
8654       else if (! TARGET_HARD_MUL)
8655 	*total = COSTS_N_INSNS (25);
8656       else
8657 	{
8658 	  int bit_cost;
8659 
8660 	  bit_cost = 0;
8661 	  if (sparc_costs->int_mul_bit_factor)
8662 	    {
8663 	      int nbits;
8664 
8665 	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8666 		{
8667 		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
8668 		  for (nbits = 0; value != 0; value &= value - 1)
8669 		    nbits++;
8670 		}
8671 	      else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
8672 		       && GET_MODE (XEXP (x, 1)) == VOIDmode)
8673 		{
8674 		  rtx x1 = XEXP (x, 1);
8675 		  unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
8676 		  unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
8677 
8678 		  for (nbits = 0; value1 != 0; value1 &= value1 - 1)
8679 		    nbits++;
8680 		  for (; value2 != 0; value2 &= value2 - 1)
8681 		    nbits++;
8682 		}
8683 	      else
8684 		nbits = 7;
8685 
8686 	      if (nbits < 3)
8687 		nbits = 3;
8688 	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
8689 	      bit_cost = COSTS_N_INSNS (bit_cost);
8690 	    }
8691 
8692 	  if (mode == DImode)
8693 	    *total = sparc_costs->int_mulX + bit_cost;
8694 	  else
8695 	    *total = sparc_costs->int_mul + bit_cost;
8696 	}
8697       return false;
8698 
8699     case ASHIFT:
8700     case ASHIFTRT:
8701     case LSHIFTRT:
8702       *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
8703       return false;
8704 
8705     case DIV:
8706     case UDIV:
8707     case MOD:
8708     case UMOD:
8709       if (float_mode_p)
8710 	{
8711 	  if (mode == DFmode)
8712 	    *total = sparc_costs->float_div_df;
8713 	  else
8714 	    *total = sparc_costs->float_div_sf;
8715 	}
8716       else
8717 	{
8718 	  if (mode == DImode)
8719 	    *total = sparc_costs->int_divX;
8720 	  else
8721 	    *total = sparc_costs->int_div;
8722 	}
8723       return false;
8724 
8725     case NEG:
8726       if (! float_mode_p)
8727 	{
8728 	  *total = COSTS_N_INSNS (1);
8729 	  return false;
8730 	}
8731       /* FALLTHRU */
8732 
8733     case ABS:
8734     case FLOAT:
8735     case UNSIGNED_FLOAT:
8736     case FIX:
8737     case UNSIGNED_FIX:
8738     case FLOAT_EXTEND:
8739     case FLOAT_TRUNCATE:
8740       *total = sparc_costs->float_move;
8741       return false;
8742 
8743     case SQRT:
8744       if (mode == DFmode)
8745 	*total = sparc_costs->float_sqrt_df;
8746       else
8747 	*total = sparc_costs->float_sqrt_sf;
8748       return false;
8749 
8750     case COMPARE:
8751       if (float_mode_p)
8752 	*total = sparc_costs->float_cmp;
8753       else
8754 	*total = COSTS_N_INSNS (1);
8755       return false;
8756 
8757     case IF_THEN_ELSE:
8758       if (float_mode_p)
8759 	*total = sparc_costs->float_cmove;
8760       else
8761 	*total = sparc_costs->int_cmove;
8762       return false;
8763 
8764     case IOR:
8765       /* Handle the NAND vector patterns.  */
8766       if (sparc_vector_mode_supported_p (GET_MODE (x))
8767 	  && GET_CODE (XEXP (x, 0)) == NOT
8768 	  && GET_CODE (XEXP (x, 1)) == NOT)
8769 	{
8770 	  *total = COSTS_N_INSNS (1);
8771 	  return true;
8772 	}
8773       else
8774         return false;
8775 
8776     default:
8777       return false;
8778     }
8779 }
8780 
8781 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
8782    This is achieved by means of a manual dynamic stack space allocation in
8783    the current frame.  We make the assumption that SEQ doesn't contain any
8784    function calls, with the possible exception of calls to the GOT helper.  */
8785 
8786 static void
8787 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
8788 {
8789   /* We must preserve the lowest 16 words for the register save area.  */
8790   HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
8791   /* We really need only 2 words of fresh stack space.  */
8792   HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
8793 
8794   rtx slot
8795     = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
8796 					     SPARC_STACK_BIAS + offset));
8797 
8798   emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
8799   emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
8800   if (reg2)
8801     emit_insn (gen_rtx_SET (VOIDmode,
8802 			    adjust_address (slot, word_mode, UNITS_PER_WORD),
8803 			    reg2));
8804   emit_insn (seq);
8805   if (reg2)
8806     emit_insn (gen_rtx_SET (VOIDmode,
8807 			    reg2,
8808 			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
8809   emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
8810   emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
8811 }
8812 
8813 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8814    declaration for the thunk function itself, FUNCTION is the decl for
8815    the target function.  DELTA is an immediate constant offset to be
8816    added to THIS.  If VCALL_OFFSET is nonzero, the word at address
8817    (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
8818 
8819 static void
8820 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8821 		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8822 		       tree function)
8823 {
8824   rtx this_rtx, insn, funexp;
8825   unsigned int int_arg_first;
8826 
8827   reload_completed = 1;
8828   epilogue_completed = 1;
8829 
8830   emit_note (NOTE_INSN_PROLOGUE_END);
8831 
8832   if (flag_delayed_branch)
8833     {
8834       /* We will emit a regular sibcall below, so we need to instruct
8835 	 output_sibcall that we are in a leaf function.  */
8836       sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
8837 
8838       /* This will cause final.c to invoke leaf_renumber_regs so we
8839 	 must behave as if we were in a not-yet-leafified function.  */
8840       int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
8841     }
8842   else
8843     {
8844       /* We will emit the sibcall manually below, so we will need to
8845 	 manually spill non-leaf registers.  */
8846       sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
8847 
8848       /* We really are in a leaf function.  */
8849       int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
8850     }
8851 
8852   /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
8853      returns a structure, the structure return pointer is there instead.  */
8854   if (TARGET_ARCH64
8855       && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8856     this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
8857   else
8858     this_rtx = gen_rtx_REG (Pmode, int_arg_first);
8859 
8860   /* Add DELTA.  When possible use a plain add, otherwise load it into
8861      a register first.  */
8862   if (delta)
8863     {
8864       rtx delta_rtx = GEN_INT (delta);
8865 
8866       if (! SPARC_SIMM13_P (delta))
8867 	{
8868 	  rtx scratch = gen_rtx_REG (Pmode, 1);
8869 	  emit_move_insn (scratch, delta_rtx);
8870 	  delta_rtx = scratch;
8871 	}
8872 
8873       /* THIS_RTX += DELTA.  */
8874       emit_insn (gen_add2_insn (this_rtx, delta_rtx));
8875     }
8876 
8877   /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
8878   if (vcall_offset)
8879     {
8880       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8881       rtx scratch = gen_rtx_REG (Pmode, 1);
8882 
8883       gcc_assert (vcall_offset < 0);
8884 
8885       /* SCRATCH = *THIS_RTX.  */
8886       emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
8887 
8888       /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
8889 	 may not have any available scratch register at this point.  */
8890       if (SPARC_SIMM13_P (vcall_offset))
8891 	;
8892       /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
8893       else if (! fixed_regs[5]
8894 	       /* The below sequence is made up of at least 2 insns,
8895 		  while the default method may need only one.  */
8896 	       && vcall_offset < -8192)
8897 	{
8898 	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
8899 	  emit_move_insn (scratch2, vcall_offset_rtx);
8900 	  vcall_offset_rtx = scratch2;
8901 	}
8902       else
8903 	{
8904 	  rtx increment = GEN_INT (-4096);
8905 
8906 	  /* VCALL_OFFSET is a negative number whose typical range can be
8907 	     estimated as -32768..0 in 32-bit mode.  In almost all cases
8908 	     it is therefore cheaper to emit multiple add insns than
8909 	     spilling and loading the constant into a register (at least
8910 	     6 insns).  */
8911 	  while (! SPARC_SIMM13_P (vcall_offset))
8912 	    {
8913 	      emit_insn (gen_add2_insn (scratch, increment));
8914 	      vcall_offset += 4096;
8915 	    }
8916 	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
8917 	}
8918 
8919       /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
8920       emit_move_insn (scratch, gen_rtx_MEM (Pmode,
8921 					    gen_rtx_PLUS (Pmode,
8922 							  scratch,
8923 							  vcall_offset_rtx)));
8924 
8925       /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
8926       emit_insn (gen_add2_insn (this_rtx, scratch));
8927     }
8928 
8929   /* Generate a tail call to the target function.  */
8930   if (! TREE_USED (function))
8931     {
8932       assemble_external (function);
8933       TREE_USED (function) = 1;
8934     }
8935   funexp = XEXP (DECL_RTL (function), 0);
8936 
8937   if (flag_delayed_branch)
8938     {
8939       funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8940       insn = emit_call_insn (gen_sibcall (funexp));
8941       SIBLING_CALL_P (insn) = 1;
8942     }
8943   else
8944     {
8945       /* The hoops we have to jump through in order to generate a sibcall
8946 	 without using delay slots...  */
8947       rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
8948 
8949       if (flag_pic)
8950         {
8951 	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
8952 	  start_sequence ();
8953 	  /* Delay emitting the GOT helper function because it needs to
8954 	     change the section and we are emitting assembly code.  */
8955 	  load_got_register ();  /* clobbers %o7 */
8956 	  scratch = legitimize_pic_address (funexp, scratch);
8957 	  seq = get_insns ();
8958 	  end_sequence ();
8959 	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
8960 	}
8961       else if (TARGET_ARCH32)
8962 	{
8963 	  emit_insn (gen_rtx_SET (VOIDmode,
8964 				  scratch,
8965 				  gen_rtx_HIGH (SImode, funexp)));
8966 	  emit_insn (gen_rtx_SET (VOIDmode,
8967 				  scratch,
8968 				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
8969 	}
8970       else  /* TARGET_ARCH64 */
8971         {
8972 	  switch (sparc_cmodel)
8973 	    {
8974 	    case CM_MEDLOW:
8975 	    case CM_MEDMID:
8976 	      /* The destination can serve as a temporary.  */
8977 	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
8978 	      break;
8979 
8980 	    case CM_MEDANY:
8981 	    case CM_EMBMEDANY:
8982 	      /* The destination cannot serve as a temporary.  */
8983 	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
8984 	      start_sequence ();
8985 	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
8986 	      seq = get_insns ();
8987 	      end_sequence ();
8988 	      emit_and_preserve (seq, spill_reg, 0);
8989 	      break;
8990 
8991 	    default:
8992 	      gcc_unreachable ();
8993 	    }
8994 	}
8995 
8996       emit_jump_insn (gen_indirect_jump (scratch));
8997     }
8998 
8999   emit_barrier ();
9000 
9001   /* Run just enough of rest_of_compilation to get the insns emitted.
9002      There's not really enough bulk here to make other passes such as
9003      instruction scheduling worth while.  Note that use_thunk calls
9004      assemble_start_function and assemble_end_function.  */
9005   insn = get_insns ();
9006   insn_locators_alloc ();
9007   shorten_branches (insn);
9008   final_start_function (insn, file, 1);
9009   final (insn, file, 1);
9010   final_end_function ();
9011 
9012   reload_completed = 0;
9013   epilogue_completed = 0;
9014 }
9015 
9016 /* Return true if sparc_output_mi_thunk would be able to output the
9017    assembler code for the thunk function specified by the arguments
9018    it is passed, and false otherwise.  */
9019 static bool
9020 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
9021 			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
9022 			   HOST_WIDE_INT vcall_offset,
9023 			   const_tree function ATTRIBUTE_UNUSED)
9024 {
9025   /* Bound the loop used in the default method above.  */
9026   return (vcall_offset >= -32768 || ! fixed_regs[5]);
9027 }
9028 
9029 /* How to allocate a 'struct machine_function'.  */
9030 
9031 static struct machine_function *
9032 sparc_init_machine_status (void)
9033 {
9034   return GGC_CNEW (struct machine_function);
9035 }
9036 
9037 /* Locate some local-dynamic symbol still in use by this function
9038    so that we can print its name in local-dynamic base patterns.  */
9039 
9040 static const char *
9041 get_some_local_dynamic_name (void)
9042 {
9043   rtx insn;
9044 
9045   if (cfun->machine->some_ld_name)
9046     return cfun->machine->some_ld_name;
9047 
9048   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9049     if (INSN_P (insn)
9050 	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9051       return cfun->machine->some_ld_name;
9052 
9053   gcc_unreachable ();
9054 }
9055 
9056 static int
9057 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9058 {
9059   rtx x = *px;
9060 
9061   if (x
9062       && GET_CODE (x) == SYMBOL_REF
9063       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9064     {
9065       cfun->machine->some_ld_name = XSTR (x, 0);
9066       return 1;
9067     }
9068 
9069   return 0;
9070 }
9071 
9072 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
9073    This is called from dwarf2out.c to emit call frame instructions
9074    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
9075 static void
9076 sparc_dwarf_handle_frame_unspec (const char *label,
9077 				 rtx pattern ATTRIBUTE_UNUSED,
9078 				 int index ATTRIBUTE_UNUSED)
9079 {
9080   gcc_assert (index == UNSPECV_SAVEW);
9081   dwarf2out_window_save (label);
9082 }
9083 
9084 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9085    We need to emit DTP-relative relocations.  */
9086 
9087 static void
9088 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
9089 {
9090   switch (size)
9091     {
9092     case 4:
9093       fputs ("\t.word\t%r_tls_dtpoff32(", file);
9094       break;
9095     case 8:
9096       fputs ("\t.xword\t%r_tls_dtpoff64(", file);
9097       break;
9098     default:
9099       gcc_unreachable ();
9100     }
9101   output_addr_const (file, x);
9102   fputs (")", file);
9103 }
9104 
9105 /* Do whatever processing is required at the end of a file.  */
9106 
9107 static void
9108 sparc_file_end (void)
9109 {
9110   /* If we need to emit the special GOT helper function, do so now.  */
9111   if (got_helper_rtx)
9112     {
9113       const char *name = XSTR (got_helper_rtx, 0);
9114       const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
9115 #ifdef DWARF2_UNWIND_INFO
9116       bool do_cfi;
9117 #endif
9118 
9119       if (USE_HIDDEN_LINKONCE)
9120 	{
9121 	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9122 				  get_identifier (name),
9123 				  build_function_type (void_type_node,
9124 						       void_list_node));
9125 	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9126 					   NULL_TREE, void_type_node);
9127 	  TREE_PUBLIC (decl) = 1;
9128 	  TREE_STATIC (decl) = 1;
9129 	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
9130 	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
9131 	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
9132 	  resolve_unique_section (decl, 0, flag_function_sections);
9133 	  allocate_struct_function (decl, true);
9134 	  cfun->is_thunk = 1;
9135 	  current_function_decl = decl;
9136 	  init_varasm_status ();
9137 	  assemble_start_function (decl, name);
9138 	}
9139       else
9140 	{
9141 	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9142           switch_to_section (text_section);
9143 	  if (align > 0)
9144 	    ASM_OUTPUT_ALIGN (asm_out_file, align);
9145 	  ASM_OUTPUT_LABEL (asm_out_file, name);
9146 	}
9147 
9148 #ifdef DWARF2_UNWIND_INFO
9149       do_cfi = dwarf2out_do_cfi_asm ();
9150       if (do_cfi)
9151 	fprintf (asm_out_file, "\t.cfi_startproc\n");
9152 #endif
9153       if (flag_delayed_branch)
9154 	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
9155 		 reg_name, reg_name);
9156       else
9157 	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
9158 		 reg_name, reg_name);
9159 #ifdef DWARF2_UNWIND_INFO
9160       if (do_cfi)
9161 	fprintf (asm_out_file, "\t.cfi_endproc\n");
9162 #endif
9163     }
9164 
9165   if (NEED_INDICATE_EXEC_STACK)
9166     file_end_indicate_exec_stack ();
9167 }
9168 
9169 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9170 /* Implement TARGET_MANGLE_TYPE.  */
9171 
9172 static const char *
9173 sparc_mangle_type (const_tree type)
9174 {
9175   if (!TARGET_64BIT
9176       && TYPE_MAIN_VARIANT (type) == long_double_type_node
9177       && TARGET_LONG_DOUBLE_128)
9178     return "g";
9179 
9180   /* For all other types, use normal C++ mangling.  */
9181   return NULL;
9182 }
9183 #endif
9184 
9185 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
9186    compare and swap on the word containing the byte or half-word.  */
9187 
9188 void
9189 sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
9190 {
9191   rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9192   rtx addr = gen_reg_rtx (Pmode);
9193   rtx off = gen_reg_rtx (SImode);
9194   rtx oldv = gen_reg_rtx (SImode);
9195   rtx newv = gen_reg_rtx (SImode);
9196   rtx oldvalue = gen_reg_rtx (SImode);
9197   rtx newvalue = gen_reg_rtx (SImode);
9198   rtx res = gen_reg_rtx (SImode);
9199   rtx resv = gen_reg_rtx (SImode);
9200   rtx memsi, val, mask, end_label, loop_label, cc;
9201 
9202   emit_insn (gen_rtx_SET (VOIDmode, addr,
9203 			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9204 
9205   if (Pmode != SImode)
9206     addr1 = gen_lowpart (SImode, addr1);
9207   emit_insn (gen_rtx_SET (VOIDmode, off,
9208 			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9209 
9210   memsi = gen_rtx_MEM (SImode, addr);
9211   set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9212   MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9213 
9214   val = force_reg (SImode, memsi);
9215 
9216   emit_insn (gen_rtx_SET (VOIDmode, off,
9217 			  gen_rtx_XOR (SImode, off,
9218 				       GEN_INT (GET_MODE (mem) == QImode
9219 						? 3 : 2))));
9220 
9221   emit_insn (gen_rtx_SET (VOIDmode, off,
9222 			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9223 
9224   if (GET_MODE (mem) == QImode)
9225     mask = force_reg (SImode, GEN_INT (0xff));
9226   else
9227     mask = force_reg (SImode, GEN_INT (0xffff));
9228 
9229   emit_insn (gen_rtx_SET (VOIDmode, mask,
9230 			  gen_rtx_ASHIFT (SImode, mask, off)));
9231 
9232   emit_insn (gen_rtx_SET (VOIDmode, val,
9233 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9234 				       val)));
9235 
9236   oldval = gen_lowpart (SImode, oldval);
9237   emit_insn (gen_rtx_SET (VOIDmode, oldv,
9238 			  gen_rtx_ASHIFT (SImode, oldval, off)));
9239 
9240   newval = gen_lowpart_common (SImode, newval);
9241   emit_insn (gen_rtx_SET (VOIDmode, newv,
9242 			  gen_rtx_ASHIFT (SImode, newval, off)));
9243 
9244   emit_insn (gen_rtx_SET (VOIDmode, oldv,
9245 			  gen_rtx_AND (SImode, oldv, mask)));
9246 
9247   emit_insn (gen_rtx_SET (VOIDmode, newv,
9248 			  gen_rtx_AND (SImode, newv, mask)));
9249 
9250   end_label = gen_label_rtx ();
9251   loop_label = gen_label_rtx ();
9252   emit_label (loop_label);
9253 
9254   emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
9255 			  gen_rtx_IOR (SImode, oldv, val)));
9256 
9257   emit_insn (gen_rtx_SET (VOIDmode, newvalue,
9258 			  gen_rtx_IOR (SImode, newv, val)));
9259 
9260   emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
9261 
9262   emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
9263 
9264   emit_insn (gen_rtx_SET (VOIDmode, resv,
9265 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9266 				       res)));
9267 
9268   cc = gen_compare_reg_1 (NE, resv, val);
9269   emit_insn (gen_rtx_SET (VOIDmode, val, resv));
9270 
9271   /* Use cbranchcc4 to separate the compare and branch!  */
9272   emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
9273 				  cc, const0_rtx, loop_label));
9274 
9275   emit_label (end_label);
9276 
9277   emit_insn (gen_rtx_SET (VOIDmode, res,
9278 			  gen_rtx_AND (SImode, res, mask)));
9279 
9280   emit_insn (gen_rtx_SET (VOIDmode, res,
9281 			  gen_rtx_LSHIFTRT (SImode, res, off)));
9282 
9283   emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9284 }
9285 
9286 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
9287 
9288 bool
9289 sparc_frame_pointer_required (void)
9290 {
9291   return !(current_function_is_leaf && only_leaf_regs_used ());
9292 }
9293 
9294 /* The way this is structured, we can't eliminate SFP in favor of SP
9295    if the frame pointer is required: we want to use the SFP->HFP elimination
9296    in that case.  But the test in update_eliminables doesn't know we are
9297    assuming below that we only do the former elimination.  */
9298 
9299 bool
9300 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
9301 {
9302   return (to == HARD_FRAME_POINTER_REGNUM
9303           || !targetm.frame_pointer_required ());
9304 }
9305 
9306 #include "gt-sparc.h"
9307