xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/sparc/sparc.c (revision 9fb66d812c00ebfb445c0b47dea128f32aa6fe96)
1 /* Subroutines for insn-output.c for SPARC.
2    Copyright (C) 1987-2019 Free Software Foundation, Inc.
3    Contributed by Michael Tiemann (tiemann@cygnus.com)
4    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5    at Cygnus Support.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13 
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "params.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "tree-vector-builder.h"
64 
65 /* This file should be included last.  */
66 #include "target-def.h"
67 
68 /* Processor costs */
69 
70 struct processor_costs {
71   /* Integer load */
72   const int int_load;
73 
74   /* Integer signed load */
75   const int int_sload;
76 
77   /* Integer zeroed load */
78   const int int_zload;
79 
80   /* Float load */
81   const int float_load;
82 
83   /* fmov, fneg, fabs */
84   const int float_move;
85 
86   /* fadd, fsub */
87   const int float_plusminus;
88 
89   /* fcmp */
90   const int float_cmp;
91 
92   /* fmov, fmovr */
93   const int float_cmove;
94 
95   /* fmul */
96   const int float_mul;
97 
98   /* fdivs */
99   const int float_div_sf;
100 
101   /* fdivd */
102   const int float_div_df;
103 
104   /* fsqrts */
105   const int float_sqrt_sf;
106 
107   /* fsqrtd */
108   const int float_sqrt_df;
109 
110   /* umul/smul */
111   const int int_mul;
112 
113   /* mulX */
114   const int int_mulX;
115 
116   /* integer multiply cost for each bit set past the most
117      significant 3, so the formula for multiply cost becomes:
118 
119 	if (rs1 < 0)
120 	  highest_bit = highest_clear_bit(rs1);
121 	else
122 	  highest_bit = highest_set_bit(rs1);
123 	if (highest_bit < 3)
124 	  highest_bit = 3;
125 	cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126 
127      A value of zero indicates that the multiply costs is fixed,
128      and not variable.  */
129   const int int_mul_bit_factor;
130 
131   /* udiv/sdiv */
132   const int int_div;
133 
134   /* divX */
135   const int int_divX;
136 
137   /* movcc, movr */
138   const int int_cmove;
139 
140   /* penalty for shifts, due to scheduling rules etc. */
141   const int shift_penalty;
142 
143   /* cost of a (predictable) branch.  */
144   const int branch_cost;
145 };
146 
147 static const
148 struct processor_costs cypress_costs = {
149   COSTS_N_INSNS (2), /* int load */
150   COSTS_N_INSNS (2), /* int signed load */
151   COSTS_N_INSNS (2), /* int zeroed load */
152   COSTS_N_INSNS (2), /* float load */
153   COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154   COSTS_N_INSNS (5), /* fadd, fsub */
155   COSTS_N_INSNS (1), /* fcmp */
156   COSTS_N_INSNS (1), /* fmov, fmovr */
157   COSTS_N_INSNS (7), /* fmul */
158   COSTS_N_INSNS (37), /* fdivs */
159   COSTS_N_INSNS (37), /* fdivd */
160   COSTS_N_INSNS (63), /* fsqrts */
161   COSTS_N_INSNS (63), /* fsqrtd */
162   COSTS_N_INSNS (1), /* imul */
163   COSTS_N_INSNS (1), /* imulX */
164   0, /* imul bit factor */
165   COSTS_N_INSNS (1), /* idiv */
166   COSTS_N_INSNS (1), /* idivX */
167   COSTS_N_INSNS (1), /* movcc/movr */
168   0, /* shift penalty */
169   3 /* branch cost */
170 };
171 
172 static const
173 struct processor_costs supersparc_costs = {
174   COSTS_N_INSNS (1), /* int load */
175   COSTS_N_INSNS (1), /* int signed load */
176   COSTS_N_INSNS (1), /* int zeroed load */
177   COSTS_N_INSNS (0), /* float load */
178   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179   COSTS_N_INSNS (3), /* fadd, fsub */
180   COSTS_N_INSNS (3), /* fcmp */
181   COSTS_N_INSNS (1), /* fmov, fmovr */
182   COSTS_N_INSNS (3), /* fmul */
183   COSTS_N_INSNS (6), /* fdivs */
184   COSTS_N_INSNS (9), /* fdivd */
185   COSTS_N_INSNS (12), /* fsqrts */
186   COSTS_N_INSNS (12), /* fsqrtd */
187   COSTS_N_INSNS (4), /* imul */
188   COSTS_N_INSNS (4), /* imulX */
189   0, /* imul bit factor */
190   COSTS_N_INSNS (4), /* idiv */
191   COSTS_N_INSNS (4), /* idivX */
192   COSTS_N_INSNS (1), /* movcc/movr */
193   1, /* shift penalty */
194   3 /* branch cost */
195 };
196 
197 static const
198 struct processor_costs hypersparc_costs = {
199   COSTS_N_INSNS (1), /* int load */
200   COSTS_N_INSNS (1), /* int signed load */
201   COSTS_N_INSNS (1), /* int zeroed load */
202   COSTS_N_INSNS (1), /* float load */
203   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204   COSTS_N_INSNS (1), /* fadd, fsub */
205   COSTS_N_INSNS (1), /* fcmp */
206   COSTS_N_INSNS (1), /* fmov, fmovr */
207   COSTS_N_INSNS (1), /* fmul */
208   COSTS_N_INSNS (8), /* fdivs */
209   COSTS_N_INSNS (12), /* fdivd */
210   COSTS_N_INSNS (17), /* fsqrts */
211   COSTS_N_INSNS (17), /* fsqrtd */
212   COSTS_N_INSNS (17), /* imul */
213   COSTS_N_INSNS (17), /* imulX */
214   0, /* imul bit factor */
215   COSTS_N_INSNS (17), /* idiv */
216   COSTS_N_INSNS (17), /* idivX */
217   COSTS_N_INSNS (1), /* movcc/movr */
218   0, /* shift penalty */
219   3 /* branch cost */
220 };
221 
222 static const
223 struct processor_costs leon_costs = {
224   COSTS_N_INSNS (1), /* int load */
225   COSTS_N_INSNS (1), /* int signed load */
226   COSTS_N_INSNS (1), /* int zeroed load */
227   COSTS_N_INSNS (1), /* float load */
228   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229   COSTS_N_INSNS (1), /* fadd, fsub */
230   COSTS_N_INSNS (1), /* fcmp */
231   COSTS_N_INSNS (1), /* fmov, fmovr */
232   COSTS_N_INSNS (1), /* fmul */
233   COSTS_N_INSNS (15), /* fdivs */
234   COSTS_N_INSNS (15), /* fdivd */
235   COSTS_N_INSNS (23), /* fsqrts */
236   COSTS_N_INSNS (23), /* fsqrtd */
237   COSTS_N_INSNS (5), /* imul */
238   COSTS_N_INSNS (5), /* imulX */
239   0, /* imul bit factor */
240   COSTS_N_INSNS (5), /* idiv */
241   COSTS_N_INSNS (5), /* idivX */
242   COSTS_N_INSNS (1), /* movcc/movr */
243   0, /* shift penalty */
244   3 /* branch cost */
245 };
246 
247 static const
248 struct processor_costs leon3_costs = {
249   COSTS_N_INSNS (1), /* int load */
250   COSTS_N_INSNS (1), /* int signed load */
251   COSTS_N_INSNS (1), /* int zeroed load */
252   COSTS_N_INSNS (1), /* float load */
253   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254   COSTS_N_INSNS (1), /* fadd, fsub */
255   COSTS_N_INSNS (1), /* fcmp */
256   COSTS_N_INSNS (1), /* fmov, fmovr */
257   COSTS_N_INSNS (1), /* fmul */
258   COSTS_N_INSNS (14), /* fdivs */
259   COSTS_N_INSNS (15), /* fdivd */
260   COSTS_N_INSNS (22), /* fsqrts */
261   COSTS_N_INSNS (23), /* fsqrtd */
262   COSTS_N_INSNS (5), /* imul */
263   COSTS_N_INSNS (5), /* imulX */
264   0, /* imul bit factor */
265   COSTS_N_INSNS (35), /* idiv */
266   COSTS_N_INSNS (35), /* idivX */
267   COSTS_N_INSNS (1), /* movcc/movr */
268   0, /* shift penalty */
269   3 /* branch cost */
270 };
271 
272 static const
273 struct processor_costs sparclet_costs = {
274   COSTS_N_INSNS (3), /* int load */
275   COSTS_N_INSNS (3), /* int signed load */
276   COSTS_N_INSNS (1), /* int zeroed load */
277   COSTS_N_INSNS (1), /* float load */
278   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279   COSTS_N_INSNS (1), /* fadd, fsub */
280   COSTS_N_INSNS (1), /* fcmp */
281   COSTS_N_INSNS (1), /* fmov, fmovr */
282   COSTS_N_INSNS (1), /* fmul */
283   COSTS_N_INSNS (1), /* fdivs */
284   COSTS_N_INSNS (1), /* fdivd */
285   COSTS_N_INSNS (1), /* fsqrts */
286   COSTS_N_INSNS (1), /* fsqrtd */
287   COSTS_N_INSNS (5), /* imul */
288   COSTS_N_INSNS (5), /* imulX */
289   0, /* imul bit factor */
290   COSTS_N_INSNS (5), /* idiv */
291   COSTS_N_INSNS (5), /* idivX */
292   COSTS_N_INSNS (1), /* movcc/movr */
293   0, /* shift penalty */
294   3 /* branch cost */
295 };
296 
297 static const
298 struct processor_costs ultrasparc_costs = {
299   COSTS_N_INSNS (2), /* int load */
300   COSTS_N_INSNS (3), /* int signed load */
301   COSTS_N_INSNS (2), /* int zeroed load */
302   COSTS_N_INSNS (2), /* float load */
303   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304   COSTS_N_INSNS (4), /* fadd, fsub */
305   COSTS_N_INSNS (1), /* fcmp */
306   COSTS_N_INSNS (2), /* fmov, fmovr */
307   COSTS_N_INSNS (4), /* fmul */
308   COSTS_N_INSNS (13), /* fdivs */
309   COSTS_N_INSNS (23), /* fdivd */
310   COSTS_N_INSNS (13), /* fsqrts */
311   COSTS_N_INSNS (23), /* fsqrtd */
312   COSTS_N_INSNS (4), /* imul */
313   COSTS_N_INSNS (4), /* imulX */
314   2, /* imul bit factor */
315   COSTS_N_INSNS (37), /* idiv */
316   COSTS_N_INSNS (68), /* idivX */
317   COSTS_N_INSNS (2), /* movcc/movr */
318   2, /* shift penalty */
319   2 /* branch cost */
320 };
321 
322 static const
323 struct processor_costs ultrasparc3_costs = {
324   COSTS_N_INSNS (2), /* int load */
325   COSTS_N_INSNS (3), /* int signed load */
326   COSTS_N_INSNS (3), /* int zeroed load */
327   COSTS_N_INSNS (2), /* float load */
328   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
329   COSTS_N_INSNS (4), /* fadd, fsub */
330   COSTS_N_INSNS (5), /* fcmp */
331   COSTS_N_INSNS (3), /* fmov, fmovr */
332   COSTS_N_INSNS (4), /* fmul */
333   COSTS_N_INSNS (17), /* fdivs */
334   COSTS_N_INSNS (20), /* fdivd */
335   COSTS_N_INSNS (20), /* fsqrts */
336   COSTS_N_INSNS (29), /* fsqrtd */
337   COSTS_N_INSNS (6), /* imul */
338   COSTS_N_INSNS (6), /* imulX */
339   0, /* imul bit factor */
340   COSTS_N_INSNS (40), /* idiv */
341   COSTS_N_INSNS (71), /* idivX */
342   COSTS_N_INSNS (2), /* movcc/movr */
343   0, /* shift penalty */
344   2 /* branch cost */
345 };
346 
347 static const
348 struct processor_costs niagara_costs = {
349   COSTS_N_INSNS (3), /* int load */
350   COSTS_N_INSNS (3), /* int signed load */
351   COSTS_N_INSNS (3), /* int zeroed load */
352   COSTS_N_INSNS (9), /* float load */
353   COSTS_N_INSNS (8), /* fmov, fneg, fabs */
354   COSTS_N_INSNS (8), /* fadd, fsub */
355   COSTS_N_INSNS (26), /* fcmp */
356   COSTS_N_INSNS (8), /* fmov, fmovr */
357   COSTS_N_INSNS (29), /* fmul */
358   COSTS_N_INSNS (54), /* fdivs */
359   COSTS_N_INSNS (83), /* fdivd */
360   COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
361   COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
362   COSTS_N_INSNS (11), /* imul */
363   COSTS_N_INSNS (11), /* imulX */
364   0, /* imul bit factor */
365   COSTS_N_INSNS (72), /* idiv */
366   COSTS_N_INSNS (72), /* idivX */
367   COSTS_N_INSNS (1), /* movcc/movr */
368   0, /* shift penalty */
369   4 /* branch cost */
370 };
371 
372 static const
373 struct processor_costs niagara2_costs = {
374   COSTS_N_INSNS (3), /* int load */
375   COSTS_N_INSNS (3), /* int signed load */
376   COSTS_N_INSNS (3), /* int zeroed load */
377   COSTS_N_INSNS (3), /* float load */
378   COSTS_N_INSNS (6), /* fmov, fneg, fabs */
379   COSTS_N_INSNS (6), /* fadd, fsub */
380   COSTS_N_INSNS (6), /* fcmp */
381   COSTS_N_INSNS (6), /* fmov, fmovr */
382   COSTS_N_INSNS (6), /* fmul */
383   COSTS_N_INSNS (19), /* fdivs */
384   COSTS_N_INSNS (33), /* fdivd */
385   COSTS_N_INSNS (19), /* fsqrts */
386   COSTS_N_INSNS (33), /* fsqrtd */
387   COSTS_N_INSNS (5), /* imul */
388   COSTS_N_INSNS (5), /* imulX */
389   0, /* imul bit factor */
390   COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
391   COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
392   COSTS_N_INSNS (1), /* movcc/movr */
393   0, /* shift penalty */
394   5 /* branch cost */
395 };
396 
397 static const
398 struct processor_costs niagara3_costs = {
399   COSTS_N_INSNS (3), /* int load */
400   COSTS_N_INSNS (3), /* int signed load */
401   COSTS_N_INSNS (3), /* int zeroed load */
402   COSTS_N_INSNS (3), /* float load */
403   COSTS_N_INSNS (9), /* fmov, fneg, fabs */
404   COSTS_N_INSNS (9), /* fadd, fsub */
405   COSTS_N_INSNS (9), /* fcmp */
406   COSTS_N_INSNS (9), /* fmov, fmovr */
407   COSTS_N_INSNS (9), /* fmul */
408   COSTS_N_INSNS (23), /* fdivs */
409   COSTS_N_INSNS (37), /* fdivd */
410   COSTS_N_INSNS (23), /* fsqrts */
411   COSTS_N_INSNS (37), /* fsqrtd */
412   COSTS_N_INSNS (9), /* imul */
413   COSTS_N_INSNS (9), /* imulX */
414   0, /* imul bit factor */
415   COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
416   COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
417   COSTS_N_INSNS (1), /* movcc/movr */
418   0, /* shift penalty */
419   5 /* branch cost */
420 };
421 
422 static const
423 struct processor_costs niagara4_costs = {
424   COSTS_N_INSNS (5), /* int load */
425   COSTS_N_INSNS (5), /* int signed load */
426   COSTS_N_INSNS (5), /* int zeroed load */
427   COSTS_N_INSNS (5), /* float load */
428   COSTS_N_INSNS (11), /* fmov, fneg, fabs */
429   COSTS_N_INSNS (11), /* fadd, fsub */
430   COSTS_N_INSNS (11), /* fcmp */
431   COSTS_N_INSNS (11), /* fmov, fmovr */
432   COSTS_N_INSNS (11), /* fmul */
433   COSTS_N_INSNS (24), /* fdivs */
434   COSTS_N_INSNS (37), /* fdivd */
435   COSTS_N_INSNS (24), /* fsqrts */
436   COSTS_N_INSNS (37), /* fsqrtd */
437   COSTS_N_INSNS (12), /* imul */
438   COSTS_N_INSNS (12), /* imulX */
439   0, /* imul bit factor */
440   COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
441   COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
442   COSTS_N_INSNS (1), /* movcc/movr */
443   0, /* shift penalty */
444   2 /* branch cost */
445 };
446 
447 static const
448 struct processor_costs niagara7_costs = {
449   COSTS_N_INSNS (5), /* int load */
450   COSTS_N_INSNS (5), /* int signed load */
451   COSTS_N_INSNS (5), /* int zeroed load */
452   COSTS_N_INSNS (5), /* float load */
453   COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454   COSTS_N_INSNS (11), /* fadd, fsub */
455   COSTS_N_INSNS (11), /* fcmp */
456   COSTS_N_INSNS (11), /* fmov, fmovr */
457   COSTS_N_INSNS (11), /* fmul */
458   COSTS_N_INSNS (24), /* fdivs */
459   COSTS_N_INSNS (37), /* fdivd */
460   COSTS_N_INSNS (24), /* fsqrts */
461   COSTS_N_INSNS (37), /* fsqrtd */
462   COSTS_N_INSNS (12), /* imul */
463   COSTS_N_INSNS (12), /* imulX */
464   0, /* imul bit factor */
465   COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
466   COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467   COSTS_N_INSNS (1), /* movcc/movr */
468   0, /* shift penalty */
469   1 /* branch cost */
470 };
471 
472 static const
473 struct processor_costs m8_costs = {
474   COSTS_N_INSNS (3), /* int load */
475   COSTS_N_INSNS (3), /* int signed load */
476   COSTS_N_INSNS (3), /* int zeroed load */
477   COSTS_N_INSNS (3), /* float load */
478   COSTS_N_INSNS (9), /* fmov, fneg, fabs */
479   COSTS_N_INSNS (9), /* fadd, fsub */
480   COSTS_N_INSNS (9), /* fcmp */
481   COSTS_N_INSNS (9), /* fmov, fmovr */
482   COSTS_N_INSNS (9), /* fmul */
483   COSTS_N_INSNS (26), /* fdivs */
484   COSTS_N_INSNS (30), /* fdivd */
485   COSTS_N_INSNS (33), /* fsqrts */
486   COSTS_N_INSNS (41), /* fsqrtd */
487   COSTS_N_INSNS (12), /* imul */
488   COSTS_N_INSNS (10), /* imulX */
489   0, /* imul bit factor */
490   COSTS_N_INSNS (57), /* udiv/sdiv */
491   COSTS_N_INSNS (30), /* udivx/sdivx */
492   COSTS_N_INSNS (1), /* movcc/movr */
493   0, /* shift penalty */
494   1 /* branch cost */
495 };
496 
497 static const struct processor_costs *sparc_costs = &cypress_costs;
498 
499 #ifdef HAVE_AS_RELAX_OPTION
500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
501    "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
502    With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
503    somebody does not branch between the sethi and jmp.  */
504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
505 #else
506 #define LEAF_SIBCALL_SLOT_RESERVED_P \
507   ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
508 #endif
509 
510 /* Vector to say how input registers are mapped to output registers.
511    HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
512    eliminate it.  You must use -fomit-frame-pointer to get that.  */
513 char leaf_reg_remap[] =
514 { 0, 1, 2, 3, 4, 5, 6, 7,
515   -1, -1, -1, -1, -1, -1, 14, -1,
516   -1, -1, -1, -1, -1, -1, -1, -1,
517   8, 9, 10, 11, 12, 13, -1, 15,
518 
519   32, 33, 34, 35, 36, 37, 38, 39,
520   40, 41, 42, 43, 44, 45, 46, 47,
521   48, 49, 50, 51, 52, 53, 54, 55,
522   56, 57, 58, 59, 60, 61, 62, 63,
523   64, 65, 66, 67, 68, 69, 70, 71,
524   72, 73, 74, 75, 76, 77, 78, 79,
525   80, 81, 82, 83, 84, 85, 86, 87,
526   88, 89, 90, 91, 92, 93, 94, 95,
527   96, 97, 98, 99, 100, 101, 102};
528 
529 /* Vector, indexed by hard register number, which contains 1
530    for a register that is allowable in a candidate for leaf
531    function treatment.  */
532 char sparc_leaf_regs[] =
533 { 1, 1, 1, 1, 1, 1, 1, 1,
534   0, 0, 0, 0, 0, 0, 1, 0,
535   0, 0, 0, 0, 0, 0, 0, 0,
536   1, 1, 1, 1, 1, 1, 0, 1,
537   1, 1, 1, 1, 1, 1, 1, 1,
538   1, 1, 1, 1, 1, 1, 1, 1,
539   1, 1, 1, 1, 1, 1, 1, 1,
540   1, 1, 1, 1, 1, 1, 1, 1,
541   1, 1, 1, 1, 1, 1, 1, 1,
542   1, 1, 1, 1, 1, 1, 1, 1,
543   1, 1, 1, 1, 1, 1, 1, 1,
544   1, 1, 1, 1, 1, 1, 1, 1,
545   1, 1, 1, 1, 1, 1, 1};
546 
547 struct GTY(()) machine_function
548 {
549   /* Size of the frame of the function.  */
550   HOST_WIDE_INT frame_size;
551 
552   /* Size of the frame of the function minus the register window save area
553      and the outgoing argument area.  */
554   HOST_WIDE_INT apparent_frame_size;
555 
556   /* Register we pretend the frame pointer is allocated to.  Normally, this
557      is %fp, but if we are in a leaf procedure, this is (%sp + offset).  We
558      record "offset" separately as it may be too big for (reg + disp).  */
559   rtx frame_base_reg;
560   HOST_WIDE_INT frame_base_offset;
561 
562   /* Number of global or FP registers to be saved (as 4-byte quantities).  */
563   int n_global_fp_regs;
564 
565   /* True if the current function is leaf and uses only leaf regs,
566      so that the SPARC leaf function optimization can be applied.
567      Private version of crtl->uses_only_leaf_regs, see
568      sparc_expand_prologue for the rationale.  */
569   int leaf_function_p;
570 
571   /* True if the prologue saves local or in registers.  */
572   bool save_local_in_regs_p;
573 
574   /* True if the data calculated by sparc_expand_prologue are valid.  */
575   bool prologue_data_valid_p;
576 };
577 
578 #define sparc_frame_size		cfun->machine->frame_size
579 #define sparc_apparent_frame_size	cfun->machine->apparent_frame_size
580 #define sparc_frame_base_reg		cfun->machine->frame_base_reg
581 #define sparc_frame_base_offset		cfun->machine->frame_base_offset
582 #define sparc_n_global_fp_regs		cfun->machine->n_global_fp_regs
583 #define sparc_leaf_function_p		cfun->machine->leaf_function_p
584 #define sparc_save_local_in_regs_p	cfun->machine->save_local_in_regs_p
585 #define sparc_prologue_data_valid_p	cfun->machine->prologue_data_valid_p
586 
587 /* 1 if the next opcode is to be specially indented.  */
588 int sparc_indent_opcode = 0;
589 
590 static void sparc_option_override (void);
591 static void sparc_init_modes (void);
592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
593 				const_tree, bool, bool, int *, int *);
594 
595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
597 
598 static void sparc_emit_set_const32 (rtx, rtx);
599 static void sparc_emit_set_const64 (rtx, rtx);
600 static void sparc_output_addr_vec (rtx);
601 static void sparc_output_addr_diff_vec (rtx);
602 static void sparc_output_deferred_case_vectors (void);
603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
604 static bool sparc_legitimate_constant_p (machine_mode, rtx);
605 static rtx sparc_builtin_saveregs (void);
606 static int epilogue_renumber (rtx *, int);
607 static bool sparc_assemble_integer (rtx, unsigned int, int);
608 static int set_extends (rtx_insn *);
609 static void sparc_asm_function_prologue (FILE *);
610 static void sparc_asm_function_epilogue (FILE *);
611 #ifdef TARGET_SOLARIS
612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
613 						 tree) ATTRIBUTE_UNUSED;
614 #endif
615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
616 static int sparc_issue_rate (void);
617 static void sparc_sched_init (FILE *, int, int);
618 static int sparc_use_sched_lookahead (void);
619 
620 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
625 
626 static bool sparc_function_ok_for_sibcall (tree, tree);
627 static void sparc_init_libfuncs (void);
628 static void sparc_init_builtins (void);
629 static void sparc_fpu_init_builtins (void);
630 static void sparc_vis_init_builtins (void);
631 static tree sparc_builtin_decl (unsigned, bool);
632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
633 static tree sparc_fold_builtin (tree, int, tree *, bool);
634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
635 				   HOST_WIDE_INT, tree);
636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
637 				       HOST_WIDE_INT, const_tree);
638 static struct machine_function * sparc_init_machine_status (void);
639 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
640 static rtx sparc_tls_get_addr (void);
641 static rtx sparc_tls_got (void);
642 static int sparc_register_move_cost (machine_mode,
643 				     reg_class_t, reg_class_t);
644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
646 						      int *, const_tree, int);
647 static bool sparc_strict_argument_naming (cumulative_args_t);
648 static void sparc_va_start (tree, rtx);
649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
650 static bool sparc_vector_mode_supported_p (machine_mode);
651 static bool sparc_tls_referenced_p (rtx);
652 static rtx sparc_legitimize_tls_address (rtx);
653 static rtx sparc_legitimize_pic_address (rtx, rtx);
654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
655 static rtx sparc_delegitimize_address (rtx);
656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
657 static bool sparc_pass_by_reference (cumulative_args_t,
658 				     machine_mode, const_tree, bool);
659 static void sparc_function_arg_advance (cumulative_args_t,
660 					machine_mode, const_tree, bool);
661 static rtx sparc_function_arg_1 (cumulative_args_t,
662 				 machine_mode, const_tree, bool, bool);
663 static rtx sparc_function_arg (cumulative_args_t,
664 			       machine_mode, const_tree, bool);
665 static rtx sparc_function_incoming_arg (cumulative_args_t,
666 					machine_mode, const_tree, bool);
667 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
668 static unsigned int sparc_function_arg_boundary (machine_mode,
669 						 const_tree);
670 static int sparc_arg_partial_bytes (cumulative_args_t,
671 				    machine_mode, tree, bool);
672 static bool sparc_return_in_memory (const_tree, const_tree);
673 static rtx sparc_struct_value_rtx (tree, int);
674 static rtx sparc_function_value (const_tree, const_tree, bool);
675 static rtx sparc_libcall_value (machine_mode, const_rtx);
676 static bool sparc_function_value_regno_p (const unsigned int);
677 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
678 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
679 static void sparc_file_end (void);
680 static bool sparc_frame_pointer_required (void);
681 static bool sparc_can_eliminate (const int, const int);
682 static rtx sparc_builtin_setjmp_frame_value (void);
683 static void sparc_conditional_register_usage (void);
684 static bool sparc_use_pseudo_pic_reg (void);
685 static void sparc_init_pic_reg (void);
686 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
687 static const char *sparc_mangle_type (const_tree);
688 #endif
689 static void sparc_trampoline_init (rtx, tree, rtx);
690 static machine_mode sparc_preferred_simd_mode (scalar_mode);
691 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
692 static bool sparc_lra_p (void);
693 static bool sparc_print_operand_punct_valid_p (unsigned char);
694 static void sparc_print_operand (FILE *, rtx, int);
695 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
696 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
697 					   machine_mode,
698 					   secondary_reload_info *);
699 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
700 					   reg_class_t);
701 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
702 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
703 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
704 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
705 static unsigned int sparc_min_arithmetic_precision (void);
706 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
707 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
708 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
709 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
710 					 reg_class_t);
711 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
712 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
713 					    const vec_perm_indices &);
714 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
715 
716 #ifdef SUBTARGET_ATTRIBUTE_TABLE
717 /* Table of valid machine attributes.  */
718 static const struct attribute_spec sparc_attribute_table[] =
719 {
720   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
721        do_diagnostic, handler, exclude } */
722   SUBTARGET_ATTRIBUTE_TABLE,
723   { NULL,        0, 0, false, false, false, false, NULL, NULL }
724 };
725 #endif
726 
727 char sparc_hard_reg_printed[8];
728 
729 /* Initialize the GCC target structure.  */
730 
731 /* The default is to use .half rather than .short for aligned HI objects.  */
732 #undef TARGET_ASM_ALIGNED_HI_OP
733 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
734 
735 #undef TARGET_ASM_UNALIGNED_HI_OP
736 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
737 #undef TARGET_ASM_UNALIGNED_SI_OP
738 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
739 #undef TARGET_ASM_UNALIGNED_DI_OP
740 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
741 
742 /* The target hook has to handle DI-mode values.  */
743 #undef TARGET_ASM_INTEGER
744 #define TARGET_ASM_INTEGER sparc_assemble_integer
745 
746 #undef TARGET_ASM_FUNCTION_PROLOGUE
747 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
748 #undef TARGET_ASM_FUNCTION_EPILOGUE
749 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
750 
751 #undef TARGET_SCHED_ADJUST_COST
752 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
753 #undef TARGET_SCHED_ISSUE_RATE
754 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
755 #undef TARGET_SCHED_INIT
756 #define TARGET_SCHED_INIT sparc_sched_init
757 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
758 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
759 
760 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
761 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
762 
763 #undef TARGET_INIT_LIBFUNCS
764 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
765 
766 #undef TARGET_LEGITIMIZE_ADDRESS
767 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
768 #undef TARGET_DELEGITIMIZE_ADDRESS
769 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
770 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
771 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
772 
773 #undef TARGET_INIT_BUILTINS
774 #define TARGET_INIT_BUILTINS sparc_init_builtins
775 #undef TARGET_BUILTIN_DECL
776 #define TARGET_BUILTIN_DECL sparc_builtin_decl
777 #undef TARGET_EXPAND_BUILTIN
778 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
779 #undef TARGET_FOLD_BUILTIN
780 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
781 
782 #if TARGET_TLS
783 #undef TARGET_HAVE_TLS
784 #define TARGET_HAVE_TLS true
785 #endif
786 
787 #undef TARGET_CANNOT_FORCE_CONST_MEM
788 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
789 
790 #undef TARGET_ASM_OUTPUT_MI_THUNK
791 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
792 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
793 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
794 
795 #undef TARGET_RTX_COSTS
796 #define TARGET_RTX_COSTS sparc_rtx_costs
797 #undef TARGET_ADDRESS_COST
798 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
799 #undef TARGET_REGISTER_MOVE_COST
800 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
801 
802 #undef TARGET_PROMOTE_FUNCTION_MODE
803 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
804 #undef TARGET_STRICT_ARGUMENT_NAMING
805 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
806 
807 #undef TARGET_MUST_PASS_IN_STACK
808 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
809 #undef TARGET_PASS_BY_REFERENCE
810 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
811 #undef TARGET_ARG_PARTIAL_BYTES
812 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
813 #undef TARGET_FUNCTION_ARG_ADVANCE
814 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
815 #undef TARGET_FUNCTION_ARG
816 #define TARGET_FUNCTION_ARG sparc_function_arg
817 #undef TARGET_FUNCTION_INCOMING_ARG
818 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
819 #undef TARGET_FUNCTION_ARG_PADDING
820 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
821 #undef TARGET_FUNCTION_ARG_BOUNDARY
822 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
823 
824 #undef TARGET_RETURN_IN_MEMORY
825 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
826 #undef TARGET_STRUCT_VALUE_RTX
827 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
828 #undef TARGET_FUNCTION_VALUE
829 #define TARGET_FUNCTION_VALUE sparc_function_value
830 #undef TARGET_LIBCALL_VALUE
831 #define TARGET_LIBCALL_VALUE sparc_libcall_value
832 #undef TARGET_FUNCTION_VALUE_REGNO_P
833 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
834 
835 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
836 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
837 
838 #undef TARGET_ASAN_SHADOW_OFFSET
839 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
840 
841 #undef TARGET_EXPAND_BUILTIN_VA_START
842 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
843 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
844 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
845 
846 #undef TARGET_VECTOR_MODE_SUPPORTED_P
847 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
848 
849 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
850 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
851 
852 #ifdef SUBTARGET_INSERT_ATTRIBUTES
853 #undef TARGET_INSERT_ATTRIBUTES
854 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
855 #endif
856 
857 #ifdef SUBTARGET_ATTRIBUTE_TABLE
858 #undef TARGET_ATTRIBUTE_TABLE
859 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
860 #endif
861 
862 #undef TARGET_OPTION_OVERRIDE
863 #define TARGET_OPTION_OVERRIDE sparc_option_override
864 
865 #ifdef TARGET_THREAD_SSP_OFFSET
866 #undef TARGET_STACK_PROTECT_GUARD
867 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
868 #endif
869 
870 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
871 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
872 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
873 #endif
874 
875 #undef TARGET_ASM_FILE_END
876 #define TARGET_ASM_FILE_END sparc_file_end
877 
878 #undef TARGET_FRAME_POINTER_REQUIRED
879 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
880 
881 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
882 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
883 
884 #undef TARGET_CAN_ELIMINATE
885 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
886 
887 #undef  TARGET_PREFERRED_RELOAD_CLASS
888 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
889 
890 #undef TARGET_SECONDARY_RELOAD
891 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
892 #undef TARGET_SECONDARY_MEMORY_NEEDED
893 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
894 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
895 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
896 
897 #undef TARGET_CONDITIONAL_REGISTER_USAGE
898 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
899 
900 #undef TARGET_INIT_PIC_REG
901 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
902 
903 #undef TARGET_USE_PSEUDO_PIC_REG
904 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
905 
906 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
907 #undef TARGET_MANGLE_TYPE
908 #define TARGET_MANGLE_TYPE sparc_mangle_type
909 #endif
910 
911 #undef TARGET_LRA_P
912 #define TARGET_LRA_P sparc_lra_p
913 
914 #undef TARGET_LEGITIMATE_ADDRESS_P
915 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
916 
917 #undef TARGET_LEGITIMATE_CONSTANT_P
918 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
919 
920 #undef TARGET_TRAMPOLINE_INIT
921 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
922 
923 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
924 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
925 #undef TARGET_PRINT_OPERAND
926 #define TARGET_PRINT_OPERAND sparc_print_operand
927 #undef TARGET_PRINT_OPERAND_ADDRESS
928 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
929 
930 /* The value stored by LDSTUB.  */
931 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
932 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
933 
934 #undef TARGET_CSTORE_MODE
935 #define TARGET_CSTORE_MODE sparc_cstore_mode
936 
937 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
938 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
939 
940 #undef TARGET_FIXED_CONDITION_CODE_REGS
941 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
942 
943 #undef TARGET_MIN_ARITHMETIC_PRECISION
944 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
945 
946 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
947 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
948 
949 #undef TARGET_HARD_REGNO_NREGS
950 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
951 #undef TARGET_HARD_REGNO_MODE_OK
952 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
953 
954 #undef TARGET_MODES_TIEABLE_P
955 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
956 
957 #undef TARGET_CAN_CHANGE_MODE_CLASS
958 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
959 
960 #undef TARGET_CONSTANT_ALIGNMENT
961 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
962 
963 #undef TARGET_VECTORIZE_VEC_PERM_CONST
964 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
965 
966 #undef TARGET_CAN_FOLLOW_JUMP
967 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
968 
969 struct gcc_target targetm = TARGET_INITIALIZER;
970 
971 /* Return the memory reference contained in X if any, zero otherwise.  */
972 
973 static rtx
974 mem_ref (rtx x)
975 {
976   if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
977     x = XEXP (x, 0);
978 
979   if (MEM_P (x))
980     return x;
981 
982   return NULL_RTX;
983 }
984 
985 /* True if any of INSN's source register(s) is REG.  */
986 
987 static bool
988 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
989 {
990   extract_insn (insn);
991   return ((REG_P (recog_data.operand[1])
992 	   && REGNO (recog_data.operand[1]) == reg)
993 	  || (recog_data.n_operands == 3
994 	      && REG_P (recog_data.operand[2])
995 	      && REGNO (recog_data.operand[2]) == reg));
996 }
997 
998 /* True if INSN is a floating-point division or square-root.  */
999 
1000 static bool
1001 div_sqrt_insn_p (rtx_insn *insn)
1002 {
1003   if (GET_CODE (PATTERN (insn)) != SET)
1004     return false;
1005 
1006   switch (get_attr_type (insn))
1007     {
1008     case TYPE_FPDIVS:
1009     case TYPE_FPSQRTS:
1010     case TYPE_FPDIVD:
1011     case TYPE_FPSQRTD:
1012       return true;
1013     default:
1014       return false;
1015     }
1016 }
1017 
1018 /* True if INSN is a floating-point instruction.  */
1019 
1020 static bool
1021 fpop_insn_p (rtx_insn *insn)
1022 {
1023   if (GET_CODE (PATTERN (insn)) != SET)
1024     return false;
1025 
1026   switch (get_attr_type (insn))
1027     {
1028     case TYPE_FPMOVE:
1029     case TYPE_FPCMOVE:
1030     case TYPE_FP:
1031     case TYPE_FPCMP:
1032     case TYPE_FPMUL:
1033     case TYPE_FPDIVS:
1034     case TYPE_FPSQRTS:
1035     case TYPE_FPDIVD:
1036     case TYPE_FPSQRTD:
1037       return true;
1038     default:
1039       return false;
1040     }
1041 }
1042 
1043 /* True if INSN is an atomic instruction.  */
1044 
1045 static bool
1046 atomic_insn_for_leon3_p (rtx_insn *insn)
1047 {
1048   switch (INSN_CODE (insn))
1049     {
1050     case CODE_FOR_swapsi:
1051     case CODE_FOR_ldstub:
1052     case CODE_FOR_atomic_compare_and_swap_leon3_1:
1053       return true;
1054     default:
1055       return false;
1056     }
1057 }
1058 
1059 /* We use a machine specific pass to enable workarounds for errata.
1060 
1061    We need to have the (essentially) final form of the insn stream in order
1062    to properly detect the various hazards.  Therefore, this machine specific
1063    pass runs as late as possible.  */
1064 
1065 /* True if INSN is a md pattern or asm statement.  */
1066 #define USEFUL_INSN_P(INSN)						\
1067   (NONDEBUG_INSN_P (INSN)						\
1068    && GET_CODE (PATTERN (INSN)) != USE					\
1069    && GET_CODE (PATTERN (INSN)) != CLOBBER)
1070 
1071 static unsigned int
1072 sparc_do_work_around_errata (void)
1073 {
1074   rtx_insn *insn, *next;
1075 
1076   /* Force all instructions to be split into their final form.  */
1077   split_all_insns_noflow ();
1078 
1079   /* Now look for specific patterns in the insn stream.  */
1080   for (insn = get_insns (); insn; insn = next)
1081     {
1082       bool insert_nop = false;
1083       rtx set;
1084       rtx_insn *jump;
1085       rtx_sequence *seq;
1086 
1087       /* Look into the instruction in a delay slot.  */
1088       if (NONJUMP_INSN_P (insn)
1089 	  && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1090 	{
1091 	  jump = seq->insn (0);
1092 	  insn = seq->insn (1);
1093 	}
1094       else if (JUMP_P (insn))
1095 	jump = insn;
1096       else
1097 	jump = NULL;
1098 
1099       /* Place a NOP at the branch target of an integer branch if it is a
1100 	 floating-point operation or a floating-point branch.  */
1101       if (sparc_fix_gr712rc
1102 	  && jump
1103 	  && jump_to_label_p (jump)
1104 	  && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1105 	{
1106 	  rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1107 	  if (target
1108 	      && (fpop_insn_p (target)
1109 		  || (JUMP_P (target)
1110 		      && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1111 	    emit_insn_before (gen_nop (), target);
1112 	}
1113 
1114       /* Insert a NOP between load instruction and atomic instruction.  Insert
1115 	 a NOP at branch target if there is a load in delay slot and an atomic
1116 	 instruction at branch target.  */
1117       if (sparc_fix_ut700
1118 	  && NONJUMP_INSN_P (insn)
1119 	  && (set = single_set (insn)) != NULL_RTX
1120 	  && mem_ref (SET_SRC (set))
1121 	  && REG_P (SET_DEST (set)))
1122 	{
1123 	  if (jump && jump_to_label_p (jump))
1124 	    {
1125 	      rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1126 	      if (target && atomic_insn_for_leon3_p (target))
1127 		emit_insn_before (gen_nop (), target);
1128 	    }
1129 
1130 	  next = next_active_insn (insn);
1131 	  if (!next)
1132 	    break;
1133 
1134 	  if (atomic_insn_for_leon3_p (next))
1135 	    insert_nop = true;
1136 	}
1137 
1138       /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1139 	 ends with another fdiv or fsqrt instruction with no dependencies on
1140 	 the former, along with an appropriate pattern in between.  */
1141       if (sparc_fix_lost_divsqrt
1142 	  && NONJUMP_INSN_P (insn)
1143 	  && div_sqrt_insn_p (insn))
1144 	{
1145 	  int i;
1146 	  int fp_found = 0;
1147 	  rtx_insn *after;
1148 
1149 	  const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1150 
1151 	  next = next_active_insn (insn);
1152 	  if (!next)
1153 	    break;
1154 
1155 	  for (after = next, i = 0; i < 4; i++)
1156 	    {
1157 	      /* Count floating-point operations.  */
1158 	      if (i != 3 && fpop_insn_p (after))
1159 		{
1160 		  /* If the insn uses the destination register of
1161 		     the div/sqrt, then it cannot be problematic.  */
1162 		  if (insn_uses_reg_p (after, dest_reg))
1163 		    break;
1164 		  fp_found++;
1165 		}
1166 
1167 	      /* Count floating-point loads.  */
1168 	      if (i != 3
1169 		  && (set = single_set (after)) != NULL_RTX
1170 		  && REG_P (SET_DEST (set))
1171 		  && REGNO (SET_DEST (set)) > 31)
1172 		{
1173 		  /* If the insn uses the destination register of
1174 		     the div/sqrt, then it cannot be problematic.  */
1175 		  if (REGNO (SET_DEST (set)) == dest_reg)
1176 		    break;
1177 		  fp_found++;
1178 		}
1179 
1180 	      /* Check if this is a problematic sequence.  */
1181 	      if (i > 1
1182 		  && fp_found >= 2
1183 		  && div_sqrt_insn_p (after))
1184 		{
1185 		  /* If this is the short version of the problematic
1186 		     sequence we add two NOPs in a row to also prevent
1187 		     the long version.  */
1188 		  if (i == 2)
1189 		    emit_insn_before (gen_nop (), next);
1190 		  insert_nop = true;
1191 		  break;
1192 		}
1193 
1194 	      /* No need to scan past a second div/sqrt.  */
1195 	      if (div_sqrt_insn_p (after))
1196 		break;
1197 
1198 	      /* Insert NOP before branch.  */
1199 	      if (i < 3
1200 		  && (!NONJUMP_INSN_P (after)
1201 		      || GET_CODE (PATTERN (after)) == SEQUENCE))
1202 		{
1203 		  insert_nop = true;
1204 		  break;
1205 		}
1206 
1207 	      after = next_active_insn (after);
1208 	      if (!after)
1209 		break;
1210 	    }
1211 	}
1212 
1213       /* Look for either of these two sequences:
1214 
1215 	 Sequence A:
1216 	 1. store of word size or less (e.g. st / stb / sth / stf)
1217 	 2. any single instruction that is not a load or store
1218 	 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1219 
1220 	 Sequence B:
1221 	 1. store of double word size (e.g. std / stdf)
1222 	 2. any store instruction (e.g. st / stb / sth / stf / std / stdf)  */
1223       if (sparc_fix_b2bst
1224 	  && NONJUMP_INSN_P (insn)
1225 	  && (set = single_set (insn)) != NULL_RTX
1226 	  && MEM_P (SET_DEST (set)))
1227 	{
1228 	  /* Sequence B begins with a double-word store.  */
1229 	  bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1230 	  rtx_insn *after;
1231 	  int i;
1232 
1233 	  next = next_active_insn (insn);
1234 	  if (!next)
1235 	    break;
1236 
1237 	  for (after = next, i = 0; i < 2; i++)
1238 	    {
1239 	      /* Skip empty assembly statements.  */
1240 	      if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1241 		  || (USEFUL_INSN_P (after)
1242 		      && (asm_noperands (PATTERN (after))>=0)
1243 		      && !strcmp (decode_asm_operands (PATTERN (after),
1244 						       NULL, NULL, NULL,
1245 						       NULL, NULL), "")))
1246 		after = next_active_insn (after);
1247 	      if (!after)
1248 		break;
1249 
1250 	      /* If the insn is a branch, then it cannot be problematic.  */
1251 	      if (!NONJUMP_INSN_P (after)
1252 		  || GET_CODE (PATTERN (after)) == SEQUENCE)
1253 		break;
1254 
1255 	      /* Sequence B is only two instructions long.  */
1256 	      if (seq_b)
1257 		{
1258 		  /* Add NOP if followed by a store.  */
1259 		  if ((set = single_set (after)) != NULL_RTX
1260 		      && MEM_P (SET_DEST (set)))
1261 		    insert_nop = true;
1262 
1263 		  /* Otherwise it is ok.  */
1264 		  break;
1265 		}
1266 
1267 	      /* If the second instruction is a load or a store,
1268 		 then the sequence cannot be problematic.  */
1269 	      if (i == 0)
1270 		{
1271 		  if ((set = single_set (after)) != NULL_RTX
1272 		      && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1273 		    break;
1274 
1275 		  after = next_active_insn (after);
1276 		  if (!after)
1277 		    break;
1278 		}
1279 
1280 	      /* Add NOP if third instruction is a store.  */
1281 	      if (i == 1
1282 		  && (set = single_set (after)) != NULL_RTX
1283 		  && MEM_P (SET_DEST (set)))
1284 		insert_nop = true;
1285 	    }
1286 	}
1287 
1288       /* Look for a single-word load into an odd-numbered FP register.  */
1289       else if (sparc_fix_at697f
1290 	       && NONJUMP_INSN_P (insn)
1291 	       && (set = single_set (insn)) != NULL_RTX
1292 	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1293 	       && mem_ref (SET_SRC (set))
1294 	       && REG_P (SET_DEST (set))
1295 	       && REGNO (SET_DEST (set)) > 31
1296 	       && REGNO (SET_DEST (set)) % 2 != 0)
1297 	{
1298 	  /* The wrong dependency is on the enclosing double register.  */
1299 	  const unsigned int x = REGNO (SET_DEST (set)) - 1;
1300 	  unsigned int src1, src2, dest;
1301 	  int code;
1302 
1303 	  next = next_active_insn (insn);
1304 	  if (!next)
1305 	    break;
1306 	  /* If the insn is a branch, then it cannot be problematic.  */
1307 	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1308 	    continue;
1309 
1310 	  extract_insn (next);
1311 	  code = INSN_CODE (next);
1312 
1313 	  switch (code)
1314 	    {
1315 	    case CODE_FOR_adddf3:
1316 	    case CODE_FOR_subdf3:
1317 	    case CODE_FOR_muldf3:
1318 	    case CODE_FOR_divdf3:
1319 	      dest = REGNO (recog_data.operand[0]);
1320 	      src1 = REGNO (recog_data.operand[1]);
1321 	      src2 = REGNO (recog_data.operand[2]);
1322 	      if (src1 != src2)
1323 		{
1324 		  /* Case [1-4]:
1325 				 ld [address], %fx+1
1326 				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
1327 		  if ((src1 == x || src2 == x)
1328 		      && (dest == src1 || dest == src2))
1329 		    insert_nop = true;
1330 		}
1331 	      else
1332 		{
1333 		  /* Case 5:
1334 			     ld [address], %fx+1
1335 			     FPOPd %fx, %fx, %fx  */
1336 		  if (src1 == x
1337 		      && dest == src1
1338 		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1339 		    insert_nop = true;
1340 		}
1341 	      break;
1342 
1343 	    case CODE_FOR_sqrtdf2:
1344 	      dest = REGNO (recog_data.operand[0]);
1345 	      src1 = REGNO (recog_data.operand[1]);
1346 	      /* Case 6:
1347 			 ld [address], %fx+1
1348 			 fsqrtd %fx, %fx  */
1349 	      if (src1 == x && dest == src1)
1350 		insert_nop = true;
1351 	      break;
1352 
1353 	    default:
1354 	      break;
1355 	    }
1356 	}
1357 
1358       /* Look for a single-word load into an integer register.  */
1359       else if (sparc_fix_ut699
1360 	       && NONJUMP_INSN_P (insn)
1361 	       && (set = single_set (insn)) != NULL_RTX
1362 	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1363 	       && (mem_ref (SET_SRC (set)) != NULL_RTX
1364 		   || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1365 	       && REG_P (SET_DEST (set))
1366 	       && REGNO (SET_DEST (set)) < 32)
1367 	{
1368 	  /* There is no problem if the second memory access has a data
1369 	     dependency on the first single-cycle load.  */
1370 	  rtx x = SET_DEST (set);
1371 
1372 	  next = next_active_insn (insn);
1373 	  if (!next)
1374 	    break;
1375 	  /* If the insn is a branch, then it cannot be problematic.  */
1376 	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1377 	    continue;
1378 
1379 	  /* Look for a second memory access to/from an integer register.  */
1380 	  if ((set = single_set (next)) != NULL_RTX)
1381 	    {
1382 	      rtx src = SET_SRC (set);
1383 	      rtx dest = SET_DEST (set);
1384 	      rtx mem;
1385 
1386 	      /* LDD is affected.  */
1387 	      if ((mem = mem_ref (src)) != NULL_RTX
1388 		  && REG_P (dest)
1389 		  && REGNO (dest) < 32
1390 		  && !reg_mentioned_p (x, XEXP (mem, 0)))
1391 		insert_nop = true;
1392 
1393 	      /* STD is *not* affected.  */
1394 	      else if (MEM_P (dest)
1395 		       && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1396 		       && (src == CONST0_RTX (GET_MODE (dest))
1397 			   || (REG_P (src)
1398 			       && REGNO (src) < 32
1399 			       && REGNO (src) != REGNO (x)))
1400 		       && !reg_mentioned_p (x, XEXP (dest, 0)))
1401 		insert_nop = true;
1402 
1403 	      /* GOT accesses uses LD.  */
1404 	      else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1405 		       && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1406 		insert_nop = true;
1407 	    }
1408 	}
1409 
1410       /* Look for a single-word load/operation into an FP register.  */
1411       else if (sparc_fix_ut699
1412 	       && NONJUMP_INSN_P (insn)
1413 	       && (set = single_set (insn)) != NULL_RTX
1414 	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1415 	       && REG_P (SET_DEST (set))
1416 	       && REGNO (SET_DEST (set)) > 31)
1417 	{
1418 	  /* Number of instructions in the problematic window.  */
1419 	  const int n_insns = 4;
1420 	  /* The problematic combination is with the sibling FP register.  */
1421 	  const unsigned int x = REGNO (SET_DEST (set));
1422 	  const unsigned int y = x ^ 1;
1423 	  rtx_insn *after;
1424 	  int i;
1425 
1426 	  next = next_active_insn (insn);
1427 	  if (!next)
1428 	    break;
1429 	  /* If the insn is a branch, then it cannot be problematic.  */
1430 	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1431 	    continue;
1432 
1433 	  /* Look for a second load/operation into the sibling FP register.  */
1434 	  if (!((set = single_set (next)) != NULL_RTX
1435 		&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1436 		&& REG_P (SET_DEST (set))
1437 		&& REGNO (SET_DEST (set)) == y))
1438 	    continue;
1439 
1440 	  /* Look for a (possible) store from the FP register in the next N
1441 	     instructions, but bail out if it is again modified or if there
1442 	     is a store from the sibling FP register before this store.  */
1443 	  for (after = next, i = 0; i < n_insns; i++)
1444 	    {
1445 	      bool branch_p;
1446 
1447 	      after = next_active_insn (after);
1448 	      if (!after)
1449 		break;
1450 
1451 	      /* This is a branch with an empty delay slot.  */
1452 	      if (!NONJUMP_INSN_P (after))
1453 		{
1454 		  if (++i == n_insns)
1455 		    break;
1456 		  branch_p = true;
1457 		  after = NULL;
1458 		}
1459 	      /* This is a branch with a filled delay slot.  */
1460 	      else if (rtx_sequence *seq =
1461 		         dyn_cast <rtx_sequence *> (PATTERN (after)))
1462 		{
1463 		  if (++i == n_insns)
1464 		    break;
1465 		  branch_p = true;
1466 		  after = seq->insn (1);
1467 		}
1468 	      /* This is a regular instruction.  */
1469 	      else
1470 		branch_p = false;
1471 
1472 	      if (after && (set = single_set (after)) != NULL_RTX)
1473 		{
1474 		  const rtx src = SET_SRC (set);
1475 		  const rtx dest = SET_DEST (set);
1476 		  const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1477 
1478 		  /* If the FP register is again modified before the store,
1479 		     then the store isn't affected.  */
1480 		  if (REG_P (dest)
1481 		      && (REGNO (dest) == x
1482 			  || (REGNO (dest) == y && size == 8)))
1483 		    break;
1484 
1485 		  if (MEM_P (dest) && REG_P (src))
1486 		    {
1487 		      /* If there is a store from the sibling FP register
1488 			 before the store, then the store is not affected.  */
1489 		      if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1490 			break;
1491 
1492 		      /* Otherwise, the store is affected.  */
1493 		      if (REGNO (src) == x && size == 4)
1494 			{
1495 			  insert_nop = true;
1496 			  break;
1497 			}
1498 		    }
1499 		}
1500 
1501 	      /* If we have a branch in the first M instructions, then we
1502 		 cannot see the (M+2)th instruction so we play safe.  */
1503 	      if (branch_p && i <= (n_insns - 2))
1504 		{
1505 		  insert_nop = true;
1506 		  break;
1507 		}
1508 	    }
1509 	}
1510 
1511       else
1512 	next = NEXT_INSN (insn);
1513 
1514       if (insert_nop)
1515 	emit_insn_before (gen_nop (), next);
1516     }
1517 
1518   return 0;
1519 }
1520 
1521 namespace {
1522 
1523 const pass_data pass_data_work_around_errata =
1524 {
1525   RTL_PASS, /* type */
1526   "errata", /* name */
1527   OPTGROUP_NONE, /* optinfo_flags */
1528   TV_MACH_DEP, /* tv_id */
1529   0, /* properties_required */
1530   0, /* properties_provided */
1531   0, /* properties_destroyed */
1532   0, /* todo_flags_start */
1533   0, /* todo_flags_finish */
1534 };
1535 
1536 class pass_work_around_errata : public rtl_opt_pass
1537 {
1538 public:
1539   pass_work_around_errata(gcc::context *ctxt)
1540     : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1541   {}
1542 
1543   /* opt_pass methods: */
1544   virtual bool gate (function *)
1545     {
1546       return sparc_fix_at697f
1547 	     || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1548 	     || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1549     }
1550 
1551   virtual unsigned int execute (function *)
1552     {
1553       return sparc_do_work_around_errata ();
1554     }
1555 
1556 }; // class pass_work_around_errata
1557 
1558 } // anon namespace
1559 
1560 rtl_opt_pass *
1561 make_pass_work_around_errata (gcc::context *ctxt)
1562 {
1563   return new pass_work_around_errata (ctxt);
1564 }
1565 
1566 /* Helpers for TARGET_DEBUG_OPTIONS.  */
1567 static void
1568 dump_target_flag_bits (const int flags)
1569 {
1570   if (flags & MASK_64BIT)
1571     fprintf (stderr, "64BIT ");
1572   if (flags & MASK_APP_REGS)
1573     fprintf (stderr, "APP_REGS ");
1574   if (flags & MASK_FASTER_STRUCTS)
1575     fprintf (stderr, "FASTER_STRUCTS ");
1576   if (flags & MASK_FLAT)
1577     fprintf (stderr, "FLAT ");
1578   if (flags & MASK_FMAF)
1579     fprintf (stderr, "FMAF ");
1580   if (flags & MASK_FSMULD)
1581     fprintf (stderr, "FSMULD ");
1582   if (flags & MASK_FPU)
1583     fprintf (stderr, "FPU ");
1584   if (flags & MASK_HARD_QUAD)
1585     fprintf (stderr, "HARD_QUAD ");
1586   if (flags & MASK_POPC)
1587     fprintf (stderr, "POPC ");
1588   if (flags & MASK_PTR64)
1589     fprintf (stderr, "PTR64 ");
1590   if (flags & MASK_STACK_BIAS)
1591     fprintf (stderr, "STACK_BIAS ");
1592   if (flags & MASK_UNALIGNED_DOUBLES)
1593     fprintf (stderr, "UNALIGNED_DOUBLES ");
1594   if (flags & MASK_V8PLUS)
1595     fprintf (stderr, "V8PLUS ");
1596   if (flags & MASK_VIS)
1597     fprintf (stderr, "VIS ");
1598   if (flags & MASK_VIS2)
1599     fprintf (stderr, "VIS2 ");
1600   if (flags & MASK_VIS3)
1601     fprintf (stderr, "VIS3 ");
1602   if (flags & MASK_VIS4)
1603     fprintf (stderr, "VIS4 ");
1604   if (flags & MASK_VIS4B)
1605     fprintf (stderr, "VIS4B ");
1606   if (flags & MASK_CBCOND)
1607     fprintf (stderr, "CBCOND ");
1608   if (flags & MASK_DEPRECATED_V8_INSNS)
1609     fprintf (stderr, "DEPRECATED_V8_INSNS ");
1610   if (flags & MASK_SPARCLET)
1611     fprintf (stderr, "SPARCLET ");
1612   if (flags & MASK_SPARCLITE)
1613     fprintf (stderr, "SPARCLITE ");
1614   if (flags & MASK_V8)
1615     fprintf (stderr, "V8 ");
1616   if (flags & MASK_V9)
1617     fprintf (stderr, "V9 ");
1618 }
1619 
1620 static void
1621 dump_target_flags (const char *prefix, const int flags)
1622 {
1623   fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1624   dump_target_flag_bits (flags);
1625   fprintf(stderr, "]\n");
1626 }
1627 
1628 /* Validate and override various options, and do some machine dependent
1629    initialization.  */
1630 
1631 static void
1632 sparc_option_override (void)
1633 {
1634   /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=.  */
1635   static struct cpu_default {
1636     const int cpu;
1637     const enum sparc_processor_type processor;
1638   } const cpu_default[] = {
1639     /* There must be one entry here for each TARGET_CPU value.  */
1640     { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1641     { TARGET_CPU_v8, PROCESSOR_V8 },
1642     { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1643     { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1644     { TARGET_CPU_leon, PROCESSOR_LEON },
1645     { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1646     { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1647     { TARGET_CPU_sparclite, PROCESSOR_F930 },
1648     { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1649     { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1650     { TARGET_CPU_v9, PROCESSOR_V9 },
1651     { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1652     { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1653     { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1654     { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1655     { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1656     { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1657     { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1658     { TARGET_CPU_m8, PROCESSOR_M8 },
1659     { -1, PROCESSOR_V7 }
1660   };
1661   const struct cpu_default *def;
1662   /* Table of values for -m{cpu,tune}=.  This must match the order of
1663      the enum processor_type in sparc-opts.h.  */
1664   static struct cpu_table {
1665     const char *const name;
1666     const int disable;
1667     const int enable;
1668   } const cpu_table[] = {
1669     { "v7",		MASK_ISA, 0 },
1670     { "cypress",	MASK_ISA, 0 },
1671     { "v8",		MASK_ISA, MASK_V8 },
1672     /* TI TMS390Z55 supersparc */
1673     { "supersparc",	MASK_ISA, MASK_V8 },
1674     { "hypersparc",	MASK_ISA, MASK_V8 },
1675     { "leon",		MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1676     { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3 },
1677     { "leon3v7",	MASK_ISA, MASK_LEON3 },
1678     { "sparclite",	MASK_ISA, MASK_SPARCLITE },
1679     /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
1680     { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1681     /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
1682     { "f934",		MASK_ISA, MASK_SPARCLITE },
1683     { "sparclite86x",	MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1684     { "sparclet",	MASK_ISA, MASK_SPARCLET },
1685     /* TEMIC sparclet */
1686     { "tsc701",		MASK_ISA, MASK_SPARCLET },
1687     { "v9",		MASK_ISA, MASK_V9 },
1688     /* UltraSPARC I, II, IIi */
1689     { "ultrasparc",	MASK_ISA,
1690     /* Although insns using %y are deprecated, it is a clear win.  */
1691       MASK_V9|MASK_DEPRECATED_V8_INSNS },
1692     /* UltraSPARC III */
1693     /* ??? Check if %y issue still holds true.  */
1694     { "ultrasparc3",	MASK_ISA,
1695       MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1696     /* UltraSPARC T1 */
1697     { "niagara",	MASK_ISA,
1698       MASK_V9|MASK_DEPRECATED_V8_INSNS },
1699     /* UltraSPARC T2 */
1700     { "niagara2",	MASK_ISA,
1701       MASK_V9|MASK_POPC|MASK_VIS2 },
1702     /* UltraSPARC T3 */
1703     { "niagara3",	MASK_ISA,
1704       MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1705     /* UltraSPARC T4 */
1706     { "niagara4",	MASK_ISA,
1707       MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1708     /* UltraSPARC M7 */
1709     { "niagara7",	MASK_ISA,
1710       MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1711     /* UltraSPARC M8 */
1712     { "m8",		MASK_ISA,
1713       MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1714   };
1715   const struct cpu_table *cpu;
1716   unsigned int i;
1717 
1718   if (sparc_debug_string != NULL)
1719     {
1720       const char *q;
1721       char *p;
1722 
1723       p = ASTRDUP (sparc_debug_string);
1724       while ((q = strtok (p, ",")) != NULL)
1725 	{
1726 	  bool invert;
1727 	  int mask;
1728 
1729 	  p = NULL;
1730 	  if (*q == '!')
1731 	    {
1732 	      invert = true;
1733 	      q++;
1734 	    }
1735 	  else
1736 	    invert = false;
1737 
1738 	  if (! strcmp (q, "all"))
1739 	    mask = MASK_DEBUG_ALL;
1740 	  else if (! strcmp (q, "options"))
1741 	    mask = MASK_DEBUG_OPTIONS;
1742 	  else
1743 	    error ("unknown %<-mdebug-%s%> switch", q);
1744 
1745 	  if (invert)
1746 	    sparc_debug &= ~mask;
1747 	  else
1748 	    sparc_debug |= mask;
1749 	}
1750     }
1751 
1752   /* Enable the FsMULd instruction by default if not explicitly specified by
1753      the user.  It may be later disabled by the CPU (explicitly or not).  */
1754   if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1755     target_flags |= MASK_FSMULD;
1756 
1757   if (TARGET_DEBUG_OPTIONS)
1758     {
1759       dump_target_flags("Initial target_flags", target_flags);
1760       dump_target_flags("target_flags_explicit", target_flags_explicit);
1761     }
1762 
1763 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1764   SUBTARGET_OVERRIDE_OPTIONS;
1765 #endif
1766 
1767 #ifndef SPARC_BI_ARCH
1768   /* Check for unsupported architecture size.  */
1769   if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1770     error ("%s is not supported by this configuration",
1771 	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
1772 #endif
1773 
1774   /* We force all 64bit archs to use 128 bit long double */
1775   if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1776     {
1777       error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1778       target_flags |= MASK_LONG_DOUBLE_128;
1779     }
1780 
1781   /* Check that -fcall-saved-REG wasn't specified for out registers.  */
1782   for (i = 8; i < 16; i++)
1783     if (!call_used_regs [i])
1784       {
1785 	error ("%<-fcall-saved-REG%> is not supported for out registers");
1786         call_used_regs [i] = 1;
1787       }
1788 
1789   /* Set the default CPU if no -mcpu option was specified.  */
1790   if (!global_options_set.x_sparc_cpu_and_features)
1791     {
1792       for (def = &cpu_default[0]; def->cpu != -1; ++def)
1793 	if (def->cpu == TARGET_CPU_DEFAULT)
1794 	  break;
1795       gcc_assert (def->cpu != -1);
1796       sparc_cpu_and_features = def->processor;
1797     }
1798 
1799   /* Set the default CPU if no -mtune option was specified.  */
1800   if (!global_options_set.x_sparc_cpu)
1801     sparc_cpu = sparc_cpu_and_features;
1802 
1803   cpu = &cpu_table[(int) sparc_cpu_and_features];
1804 
1805   if (TARGET_DEBUG_OPTIONS)
1806     {
1807       fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1808       dump_target_flags ("cpu->disable", cpu->disable);
1809       dump_target_flags ("cpu->enable", cpu->enable);
1810     }
1811 
1812   target_flags &= ~cpu->disable;
1813   target_flags |= (cpu->enable
1814 #ifndef HAVE_AS_FMAF_HPC_VIS3
1815 		   & ~(MASK_FMAF | MASK_VIS3)
1816 #endif
1817 #ifndef HAVE_AS_SPARC4
1818 		   & ~MASK_CBCOND
1819 #endif
1820 #ifndef HAVE_AS_SPARC5_VIS4
1821 		   & ~(MASK_VIS4 | MASK_SUBXC)
1822 #endif
1823 #ifndef HAVE_AS_SPARC6
1824 		   & ~(MASK_VIS4B)
1825 #endif
1826 #ifndef HAVE_AS_LEON
1827 		   & ~(MASK_LEON | MASK_LEON3)
1828 #endif
1829 		   & ~(target_flags_explicit & MASK_FEATURES)
1830 		   );
1831 
1832   /* FsMULd is a V8 instruction.  */
1833   if (!TARGET_V8 && !TARGET_V9)
1834     target_flags &= ~MASK_FSMULD;
1835 
1836   /* -mvis2 implies -mvis.  */
1837   if (TARGET_VIS2)
1838     target_flags |= MASK_VIS;
1839 
1840   /* -mvis3 implies -mvis2 and -mvis.  */
1841   if (TARGET_VIS3)
1842     target_flags |= MASK_VIS2 | MASK_VIS;
1843 
1844   /* -mvis4 implies -mvis3, -mvis2 and -mvis.  */
1845   if (TARGET_VIS4)
1846     target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1847 
1848   /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1849   if (TARGET_VIS4B)
1850     target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1851 
1852   /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1853      FPU is disabled.  */
1854   if (!TARGET_FPU)
1855     target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1856 		      | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1857 
1858   /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1859      are available; -m64 also implies v9.  */
1860   if (TARGET_VIS || TARGET_ARCH64)
1861     {
1862       target_flags |= MASK_V9;
1863       target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1864     }
1865 
1866   /* -mvis also implies -mv8plus on 32-bit.  */
1867   if (TARGET_VIS && !TARGET_ARCH64)
1868     target_flags |= MASK_V8PLUS;
1869 
1870   /* Use the deprecated v8 insns for sparc64 in 32-bit mode.  */
1871   if (TARGET_V9 && TARGET_ARCH32)
1872     target_flags |= MASK_DEPRECATED_V8_INSNS;
1873 
1874   /* V8PLUS requires V9 and makes no sense in 64-bit mode.  */
1875   if (!TARGET_V9 || TARGET_ARCH64)
1876     target_flags &= ~MASK_V8PLUS;
1877 
1878   /* Don't use stack biasing in 32-bit mode.  */
1879   if (TARGET_ARCH32)
1880     target_flags &= ~MASK_STACK_BIAS;
1881 
1882   /* Use LRA instead of reload, unless otherwise instructed.  */
1883   if (!(target_flags_explicit & MASK_LRA))
1884     target_flags |= MASK_LRA;
1885 
1886   /* Enable applicable errata workarounds for LEON3FT.  */
1887   if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1888     {
1889       sparc_fix_b2bst = 1;
1890       sparc_fix_lost_divsqrt = 1;
1891     }
1892 
1893   /* Disable FsMULd for the UT699 since it doesn't work correctly.  */
1894   if (sparc_fix_ut699)
1895     target_flags &= ~MASK_FSMULD;
1896 
1897 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1898   if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1899     target_flags |= MASK_LONG_DOUBLE_128;
1900 #endif
1901 
1902   if (TARGET_DEBUG_OPTIONS)
1903     dump_target_flags ("Final target_flags", target_flags);
1904 
1905   /* Set the code model if no -mcmodel option was specified.  */
1906   if (global_options_set.x_sparc_code_model)
1907     {
1908       if (TARGET_ARCH32)
1909 	error ("%<-mcmodel=%> is not supported in 32-bit mode");
1910     }
1911   else
1912     {
1913       if (TARGET_ARCH32)
1914 	sparc_code_model = CM_32;
1915       else
1916 	sparc_code_model = SPARC_DEFAULT_CMODEL;
1917     }
1918 
1919   /* Set the memory model if no -mmemory-model option was specified.  */
1920   if (!global_options_set.x_sparc_memory_model)
1921     {
1922       /* Choose the memory model for the operating system.  */
1923       enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1924       if (os_default != SMM_DEFAULT)
1925 	sparc_memory_model = os_default;
1926       /* Choose the most relaxed model for the processor.  */
1927       else if (TARGET_V9)
1928 	sparc_memory_model = SMM_RMO;
1929       else if (TARGET_LEON3)
1930 	sparc_memory_model = SMM_TSO;
1931       else if (TARGET_LEON)
1932 	sparc_memory_model = SMM_SC;
1933       else if (TARGET_V8)
1934 	sparc_memory_model = SMM_PSO;
1935       else
1936 	sparc_memory_model = SMM_SC;
1937     }
1938 
1939   /* Supply a default value for align_functions.  */
1940   if (flag_align_functions && !str_align_functions)
1941     {
1942       if (sparc_cpu == PROCESSOR_ULTRASPARC
1943 	  || sparc_cpu == PROCESSOR_ULTRASPARC3
1944 	  || sparc_cpu == PROCESSOR_NIAGARA
1945 	  || sparc_cpu == PROCESSOR_NIAGARA2
1946 	  || sparc_cpu == PROCESSOR_NIAGARA3
1947 	  || sparc_cpu == PROCESSOR_NIAGARA4)
1948 	str_align_functions = "32";
1949       else if (sparc_cpu == PROCESSOR_NIAGARA7
1950 	       || sparc_cpu == PROCESSOR_M8)
1951 	str_align_functions = "64";
1952     }
1953 
1954   /* Validate PCC_STRUCT_RETURN.  */
1955   if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1956     flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1957 
1958   /* Only use .uaxword when compiling for a 64-bit target.  */
1959   if (!TARGET_ARCH64)
1960     targetm.asm_out.unaligned_op.di = NULL;
1961 
1962   /* Set the processor costs.  */
1963   switch (sparc_cpu)
1964     {
1965     case PROCESSOR_V7:
1966     case PROCESSOR_CYPRESS:
1967       sparc_costs = &cypress_costs;
1968       break;
1969     case PROCESSOR_V8:
1970     case PROCESSOR_SPARCLITE:
1971     case PROCESSOR_SUPERSPARC:
1972       sparc_costs = &supersparc_costs;
1973       break;
1974     case PROCESSOR_F930:
1975     case PROCESSOR_F934:
1976     case PROCESSOR_HYPERSPARC:
1977     case PROCESSOR_SPARCLITE86X:
1978       sparc_costs = &hypersparc_costs;
1979       break;
1980     case PROCESSOR_LEON:
1981       sparc_costs = &leon_costs;
1982       break;
1983     case PROCESSOR_LEON3:
1984     case PROCESSOR_LEON3V7:
1985       sparc_costs = &leon3_costs;
1986       break;
1987     case PROCESSOR_SPARCLET:
1988     case PROCESSOR_TSC701:
1989       sparc_costs = &sparclet_costs;
1990       break;
1991     case PROCESSOR_V9:
1992     case PROCESSOR_ULTRASPARC:
1993       sparc_costs = &ultrasparc_costs;
1994       break;
1995     case PROCESSOR_ULTRASPARC3:
1996       sparc_costs = &ultrasparc3_costs;
1997       break;
1998     case PROCESSOR_NIAGARA:
1999       sparc_costs = &niagara_costs;
2000       break;
2001     case PROCESSOR_NIAGARA2:
2002       sparc_costs = &niagara2_costs;
2003       break;
2004     case PROCESSOR_NIAGARA3:
2005       sparc_costs = &niagara3_costs;
2006       break;
2007     case PROCESSOR_NIAGARA4:
2008       sparc_costs = &niagara4_costs;
2009       break;
2010     case PROCESSOR_NIAGARA7:
2011       sparc_costs = &niagara7_costs;
2012       break;
2013     case PROCESSOR_M8:
2014       sparc_costs = &m8_costs;
2015       break;
2016     case PROCESSOR_NATIVE:
2017       gcc_unreachable ();
2018     };
2019 
2020   /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2021      can run at the same time.  More important, it is the threshold
2022      defining when additional prefetches will be dropped by the
2023      hardware.
2024 
2025      The UltraSPARC-III features a documented prefetch queue with a
2026      size of 8.  Additional prefetches issued in the cpu are
2027      dropped.
2028 
2029      Niagara processors are different.  In these processors prefetches
2030      are handled much like regular loads.  The L1 miss buffer is 32
2031      entries, but prefetches start getting affected when 30 entries
2032      become occupied.  That occupation could be a mix of regular loads
2033      and prefetches though.  And that buffer is shared by all threads.
2034      Once the threshold is reached, if the core is running a single
2035      thread the prefetch will retry.  If more than one thread is
2036      running, the prefetch will be dropped.
2037 
2038      All this makes it very difficult to determine how many
2039      simultaneous prefetches can be issued simultaneously, even in a
2040      single-threaded program.  Experimental results show that setting
2041      this parameter to 32 works well when the number of threads is not
2042      high.  */
2043   maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2044 			 ((sparc_cpu == PROCESSOR_ULTRASPARC
2045 			   || sparc_cpu == PROCESSOR_NIAGARA
2046 			   || sparc_cpu == PROCESSOR_NIAGARA2
2047 			   || sparc_cpu == PROCESSOR_NIAGARA3
2048 			   || sparc_cpu == PROCESSOR_NIAGARA4)
2049 			  ? 2
2050 			  : (sparc_cpu == PROCESSOR_ULTRASPARC3
2051 			     ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2052 				     || sparc_cpu == PROCESSOR_M8)
2053 				    ? 32 : 3))),
2054 			 global_options.x_param_values,
2055 			 global_options_set.x_param_values);
2056 
2057   /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2058      bytes.
2059 
2060      The Oracle SPARC Architecture (previously the UltraSPARC
2061      Architecture) specification states that when a PREFETCH[A]
2062      instruction is executed an implementation-specific amount of data
2063      is prefetched, and that it is at least 64 bytes long (aligned to
2064      at least 64 bytes).
2065 
2066      However, this is not correct.  The M7 (and implementations prior
2067      to that) does not guarantee a 64B prefetch into a cache if the
2068      line size is smaller.  A single cache line is all that is ever
2069      prefetched.  So for the M7, where the L1D$ has 32B lines and the
2070      L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2071      L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2072      is a read_n prefetch, which is the only type which allocates to
2073      the L1.)  */
2074   maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2075 			 (sparc_cpu == PROCESSOR_M8
2076 			  ? 64 : 32),
2077 			 global_options.x_param_values,
2078 			 global_options_set.x_param_values);
2079 
2080   /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2081      Hardvard level-1 caches) in kilobytes.  Both UltraSPARC and
2082      Niagara processors feature a L1D$ of 16KB.  */
2083   maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2084 			 ((sparc_cpu == PROCESSOR_ULTRASPARC
2085 			   || sparc_cpu == PROCESSOR_ULTRASPARC3
2086 			   || sparc_cpu == PROCESSOR_NIAGARA
2087 			   || sparc_cpu == PROCESSOR_NIAGARA2
2088 			   || sparc_cpu == PROCESSOR_NIAGARA3
2089 			   || sparc_cpu == PROCESSOR_NIAGARA4
2090 			   || sparc_cpu == PROCESSOR_NIAGARA7
2091 			   || sparc_cpu == PROCESSOR_M8)
2092 			  ? 16 : 64),
2093 			 global_options.x_param_values,
2094 			 global_options_set.x_param_values);
2095 
2096 
2097   /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes.  Note
2098      that 512 is the default in params.def.  */
2099   maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2100 			 ((sparc_cpu == PROCESSOR_NIAGARA4
2101 			   || sparc_cpu == PROCESSOR_M8)
2102 			  ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2103 				   ? 256 : 512)),
2104 			 global_options.x_param_values,
2105 			 global_options_set.x_param_values);
2106 
2107 
2108   /* Disable save slot sharing for call-clobbered registers by default.
2109      The IRA sharing algorithm works on single registers only and this
2110      pessimizes for double floating-point registers.  */
2111   if (!global_options_set.x_flag_ira_share_save_slots)
2112     flag_ira_share_save_slots = 0;
2113 
2114   /* Only enable REE by default in 64-bit mode where it helps to eliminate
2115      redundant 32-to-64-bit extensions.  */
2116   if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2117     flag_ree = 0;
2118 
2119   /* Do various machine dependent initializations.  */
2120   sparc_init_modes ();
2121 
2122   /* Set up function hooks.  */
2123   init_machine_status = sparc_init_machine_status;
2124 }
2125 
2126 /* Miscellaneous utilities.  */
2127 
2128 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2129    or branch on register contents instructions.  */
2130 
2131 int
2132 v9_regcmp_p (enum rtx_code code)
2133 {
2134   return (code == EQ || code == NE || code == GE || code == LT
2135 	  || code == LE || code == GT);
2136 }
2137 
2138 /* Nonzero if OP is a floating point constant which can
2139    be loaded into an integer register using a single
2140    sethi instruction.  */
2141 
2142 int
2143 fp_sethi_p (rtx op)
2144 {
2145   if (GET_CODE (op) == CONST_DOUBLE)
2146     {
2147       long i;
2148 
2149       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2150       return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2151     }
2152 
2153   return 0;
2154 }
2155 
2156 /* Nonzero if OP is a floating point constant which can
2157    be loaded into an integer register using a single
2158    mov instruction.  */
2159 
2160 int
2161 fp_mov_p (rtx op)
2162 {
2163   if (GET_CODE (op) == CONST_DOUBLE)
2164     {
2165       long i;
2166 
2167       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2168       return SPARC_SIMM13_P (i);
2169     }
2170 
2171   return 0;
2172 }
2173 
2174 /* Nonzero if OP is a floating point constant which can
2175    be loaded into an integer register using a high/losum
2176    instruction sequence.  */
2177 
2178 int
2179 fp_high_losum_p (rtx op)
2180 {
2181   /* The constraints calling this should only be in
2182      SFmode move insns, so any constant which cannot
2183      be moved using a single insn will do.  */
2184   if (GET_CODE (op) == CONST_DOUBLE)
2185     {
2186       long i;
2187 
2188       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2189       return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2190     }
2191 
2192   return 0;
2193 }
2194 
2195 /* Return true if the address of LABEL can be loaded by means of the
2196    mov{si,di}_pic_label_ref patterns in PIC mode.  */
2197 
2198 static bool
2199 can_use_mov_pic_label_ref (rtx label)
2200 {
2201   /* VxWorks does not impose a fixed gap between segments; the run-time
2202      gap can be different from the object-file gap.  We therefore can't
2203      assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2204      are absolutely sure that X is in the same segment as the GOT.
2205      Unfortunately, the flexibility of linker scripts means that we
2206      can't be sure of that in general, so assume that GOT-relative
2207      accesses are never valid on VxWorks.  */
2208   if (TARGET_VXWORKS_RTP)
2209     return false;
2210 
2211   /* Similarly, if the label is non-local, it might end up being placed
2212      in a different section than the current one; now mov_pic_label_ref
2213      requires the label and the code to be in the same section.  */
2214   if (LABEL_REF_NONLOCAL_P (label))
2215     return false;
2216 
2217   /* Finally, if we are reordering basic blocks and partition into hot
2218      and cold sections, this might happen for any label.  */
2219   if (flag_reorder_blocks_and_partition)
2220     return false;
2221 
2222   return true;
2223 }
2224 
2225 /* Expand a move instruction.  Return true if all work is done.  */
2226 
2227 bool
2228 sparc_expand_move (machine_mode mode, rtx *operands)
2229 {
2230   /* Handle sets of MEM first.  */
2231   if (GET_CODE (operands[0]) == MEM)
2232     {
2233       /* 0 is a register (or a pair of registers) on SPARC.  */
2234       if (register_or_zero_operand (operands[1], mode))
2235 	return false;
2236 
2237       if (!reload_in_progress)
2238 	{
2239 	  operands[0] = validize_mem (operands[0]);
2240 	  operands[1] = force_reg (mode, operands[1]);
2241 	}
2242     }
2243 
2244   /* Fix up TLS cases.  */
2245   if (TARGET_HAVE_TLS
2246       && CONSTANT_P (operands[1])
2247       && sparc_tls_referenced_p (operands [1]))
2248     {
2249       operands[1] = sparc_legitimize_tls_address (operands[1]);
2250       return false;
2251     }
2252 
2253   /* Fix up PIC cases.  */
2254   if (flag_pic && CONSTANT_P (operands[1]))
2255     {
2256       if (pic_address_needs_scratch (operands[1]))
2257 	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2258 
2259       /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
2260       if ((GET_CODE (operands[1]) == LABEL_REF
2261 	   && can_use_mov_pic_label_ref (operands[1]))
2262 	  || (GET_CODE (operands[1]) == CONST
2263 	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
2264 	      && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2265 	      && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2266 	      && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2267 	{
2268 	  if (mode == SImode)
2269 	    {
2270 	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2271 	      return true;
2272 	    }
2273 
2274 	  if (mode == DImode)
2275 	    {
2276 	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2277 	      return true;
2278 	    }
2279 	}
2280 
2281       if (symbolic_operand (operands[1], mode))
2282 	{
2283 	  operands[1]
2284 	    = sparc_legitimize_pic_address (operands[1],
2285 					    reload_in_progress
2286 					    ? operands[0] : NULL_RTX);
2287 	  return false;
2288 	}
2289     }
2290 
2291   /* If we are trying to toss an integer constant into FP registers,
2292      or loading a FP or vector constant, force it into memory.  */
2293   if (CONSTANT_P (operands[1])
2294       && REG_P (operands[0])
2295       && (SPARC_FP_REG_P (REGNO (operands[0]))
2296 	  || SCALAR_FLOAT_MODE_P (mode)
2297 	  || VECTOR_MODE_P (mode)))
2298     {
2299       /* emit_group_store will send such bogosity to us when it is
2300          not storing directly into memory.  So fix this up to avoid
2301          crashes in output_constant_pool.  */
2302       if (operands [1] == const0_rtx)
2303 	operands[1] = CONST0_RTX (mode);
2304 
2305       /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2306 	 always other regs.  */
2307       if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2308 	  && (const_zero_operand (operands[1], mode)
2309 	      || const_all_ones_operand (operands[1], mode)))
2310 	return false;
2311 
2312       if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2313 	  /* We are able to build any SF constant in integer registers
2314 	     with at most 2 instructions.  */
2315 	  && (mode == SFmode
2316 	      /* And any DF constant in integer registers if needed.  */
2317 	      || (mode == DFmode && !can_create_pseudo_p ())))
2318 	return false;
2319 
2320       operands[1] = force_const_mem (mode, operands[1]);
2321       if (!reload_in_progress)
2322 	operands[1] = validize_mem (operands[1]);
2323       return false;
2324     }
2325 
2326   /* Accept non-constants and valid constants unmodified.  */
2327   if (!CONSTANT_P (operands[1])
2328       || GET_CODE (operands[1]) == HIGH
2329       || input_operand (operands[1], mode))
2330     return false;
2331 
2332   switch (mode)
2333     {
2334     case E_QImode:
2335       /* All QImode constants require only one insn, so proceed.  */
2336       break;
2337 
2338     case E_HImode:
2339     case E_SImode:
2340       sparc_emit_set_const32 (operands[0], operands[1]);
2341       return true;
2342 
2343     case E_DImode:
2344       /* input_operand should have filtered out 32-bit mode.  */
2345       sparc_emit_set_const64 (operands[0], operands[1]);
2346       return true;
2347 
2348     case E_TImode:
2349       {
2350 	rtx high, low;
2351 	/* TImode isn't available in 32-bit mode.  */
2352 	split_double (operands[1], &high, &low);
2353 	emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2354 			      high));
2355 	emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2356 			      low));
2357       }
2358       return true;
2359 
2360     default:
2361       gcc_unreachable ();
2362     }
2363 
2364   return false;
2365 }
2366 
2367 /* Load OP1, a 32-bit constant, into OP0, a register.
2368    We know it can't be done in one insn when we get
2369    here, the move expander guarantees this.  */
2370 
2371 static void
2372 sparc_emit_set_const32 (rtx op0, rtx op1)
2373 {
2374   machine_mode mode = GET_MODE (op0);
2375   rtx temp = op0;
2376 
2377   if (can_create_pseudo_p ())
2378     temp = gen_reg_rtx (mode);
2379 
2380   if (GET_CODE (op1) == CONST_INT)
2381     {
2382       gcc_assert (!small_int_operand (op1, mode)
2383 		  && !const_high_operand (op1, mode));
2384 
2385       /* Emit them as real moves instead of a HIGH/LO_SUM,
2386 	 this way CSE can see everything and reuse intermediate
2387 	 values if it wants.  */
2388       emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2389 					     & ~(HOST_WIDE_INT) 0x3ff)));
2390 
2391       emit_insn (gen_rtx_SET (op0,
2392 			      gen_rtx_IOR (mode, temp,
2393 					   GEN_INT (INTVAL (op1) & 0x3ff))));
2394     }
2395   else
2396     {
2397       /* A symbol, emit in the traditional way.  */
2398       emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2399       emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2400     }
2401 }
2402 
2403 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2404    If TEMP is nonzero, we are forbidden to use any other scratch
2405    registers.  Otherwise, we are allowed to generate them as needed.
2406 
2407    Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2408    or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
2409 
2410 void
2411 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2412 {
2413   rtx cst, temp1, temp2, temp3, temp4, temp5;
2414   rtx ti_temp = 0;
2415 
2416   /* Deal with too large offsets.  */
2417   if (GET_CODE (op1) == CONST
2418       && GET_CODE (XEXP (op1, 0)) == PLUS
2419       && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2420       && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2421     {
2422       gcc_assert (!temp);
2423       temp1 = gen_reg_rtx (DImode);
2424       temp2 = gen_reg_rtx (DImode);
2425       sparc_emit_set_const64 (temp2, cst);
2426       sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2427 				       NULL_RTX);
2428       emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2429       return;
2430     }
2431 
2432   if (temp && GET_MODE (temp) == TImode)
2433     {
2434       ti_temp = temp;
2435       temp = gen_rtx_REG (DImode, REGNO (temp));
2436     }
2437 
2438   /* SPARC-V9 code model support.  */
2439   switch (sparc_code_model)
2440     {
2441     case CM_MEDLOW:
2442       /* The range spanned by all instructions in the object is less
2443 	 than 2^31 bytes (2GB) and the distance from any instruction
2444 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2445 	 than 2^31 bytes (2GB).
2446 
2447 	 The executable must be in the low 4TB of the virtual address
2448 	 space.
2449 
2450 	 sethi	%hi(symbol), %temp1
2451 	 or	%temp1, %lo(symbol), %reg  */
2452       if (temp)
2453 	temp1 = temp;  /* op0 is allowed.  */
2454       else
2455 	temp1 = gen_reg_rtx (DImode);
2456 
2457       emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2458       emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2459       break;
2460 
2461     case CM_MEDMID:
2462       /* The range spanned by all instructions in the object is less
2463 	 than 2^31 bytes (2GB) and the distance from any instruction
2464 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2465 	 than 2^31 bytes (2GB).
2466 
2467 	 The executable must be in the low 16TB of the virtual address
2468 	 space.
2469 
2470 	 sethi	%h44(symbol), %temp1
2471 	 or	%temp1, %m44(symbol), %temp2
2472 	 sllx	%temp2, 12, %temp3
2473 	 or	%temp3, %l44(symbol), %reg  */
2474       if (temp)
2475 	{
2476 	  temp1 = op0;
2477 	  temp2 = op0;
2478 	  temp3 = temp;  /* op0 is allowed.  */
2479 	}
2480       else
2481 	{
2482 	  temp1 = gen_reg_rtx (DImode);
2483 	  temp2 = gen_reg_rtx (DImode);
2484 	  temp3 = gen_reg_rtx (DImode);
2485 	}
2486 
2487       emit_insn (gen_seth44 (temp1, op1));
2488       emit_insn (gen_setm44 (temp2, temp1, op1));
2489       emit_insn (gen_rtx_SET (temp3,
2490 			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2491       emit_insn (gen_setl44 (op0, temp3, op1));
2492       break;
2493 
2494     case CM_MEDANY:
2495       /* The range spanned by all instructions in the object is less
2496 	 than 2^31 bytes (2GB) and the distance from any instruction
2497 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2498 	 than 2^31 bytes (2GB).
2499 
2500 	 The executable can be placed anywhere in the virtual address
2501 	 space.
2502 
2503 	 sethi	%hh(symbol), %temp1
2504 	 sethi	%lm(symbol), %temp2
2505 	 or	%temp1, %hm(symbol), %temp3
2506 	 sllx	%temp3, 32, %temp4
2507 	 or	%temp4, %temp2, %temp5
2508 	 or	%temp5, %lo(symbol), %reg  */
2509       if (temp)
2510 	{
2511 	  /* It is possible that one of the registers we got for operands[2]
2512 	     might coincide with that of operands[0] (which is why we made
2513 	     it TImode).  Pick the other one to use as our scratch.  */
2514 	  if (rtx_equal_p (temp, op0))
2515 	    {
2516 	      gcc_assert (ti_temp);
2517 	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2518 	    }
2519 	  temp1 = op0;
2520 	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2521 	  temp3 = op0;
2522 	  temp4 = op0;
2523 	  temp5 = op0;
2524 	}
2525       else
2526 	{
2527 	  temp1 = gen_reg_rtx (DImode);
2528 	  temp2 = gen_reg_rtx (DImode);
2529 	  temp3 = gen_reg_rtx (DImode);
2530 	  temp4 = gen_reg_rtx (DImode);
2531 	  temp5 = gen_reg_rtx (DImode);
2532 	}
2533 
2534       emit_insn (gen_sethh (temp1, op1));
2535       emit_insn (gen_setlm (temp2, op1));
2536       emit_insn (gen_sethm (temp3, temp1, op1));
2537       emit_insn (gen_rtx_SET (temp4,
2538 			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2539       emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2540       emit_insn (gen_setlo (op0, temp5, op1));
2541       break;
2542 
2543     case CM_EMBMEDANY:
2544       /* Old old old backwards compatibility kruft here.
2545 	 Essentially it is MEDLOW with a fixed 64-bit
2546 	 virtual base added to all data segment addresses.
2547 	 Text-segment stuff is computed like MEDANY, we can't
2548 	 reuse the code above because the relocation knobs
2549 	 look different.
2550 
2551 	 Data segment:	sethi	%hi(symbol), %temp1
2552 			add	%temp1, EMBMEDANY_BASE_REG, %temp2
2553 			or	%temp2, %lo(symbol), %reg  */
2554       if (data_segment_operand (op1, GET_MODE (op1)))
2555 	{
2556 	  if (temp)
2557 	    {
2558 	      temp1 = temp;  /* op0 is allowed.  */
2559 	      temp2 = op0;
2560 	    }
2561 	  else
2562 	    {
2563 	      temp1 = gen_reg_rtx (DImode);
2564 	      temp2 = gen_reg_rtx (DImode);
2565 	    }
2566 
2567 	  emit_insn (gen_embmedany_sethi (temp1, op1));
2568 	  emit_insn (gen_embmedany_brsum (temp2, temp1));
2569 	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
2570 	}
2571 
2572       /* Text segment:	sethi	%uhi(symbol), %temp1
2573 			sethi	%hi(symbol), %temp2
2574 			or	%temp1, %ulo(symbol), %temp3
2575 			sllx	%temp3, 32, %temp4
2576 			or	%temp4, %temp2, %temp5
2577 			or	%temp5, %lo(symbol), %reg  */
2578       else
2579 	{
2580 	  if (temp)
2581 	    {
2582 	      /* It is possible that one of the registers we got for operands[2]
2583 		 might coincide with that of operands[0] (which is why we made
2584 		 it TImode).  Pick the other one to use as our scratch.  */
2585 	      if (rtx_equal_p (temp, op0))
2586 		{
2587 		  gcc_assert (ti_temp);
2588 		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2589 		}
2590 	      temp1 = op0;
2591 	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2592 	      temp3 = op0;
2593 	      temp4 = op0;
2594 	      temp5 = op0;
2595 	    }
2596 	  else
2597 	    {
2598 	      temp1 = gen_reg_rtx (DImode);
2599 	      temp2 = gen_reg_rtx (DImode);
2600 	      temp3 = gen_reg_rtx (DImode);
2601 	      temp4 = gen_reg_rtx (DImode);
2602 	      temp5 = gen_reg_rtx (DImode);
2603 	    }
2604 
2605 	  emit_insn (gen_embmedany_textuhi (temp1, op1));
2606 	  emit_insn (gen_embmedany_texthi  (temp2, op1));
2607 	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2608 	  emit_insn (gen_rtx_SET (temp4,
2609 				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2610 	  emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2611 	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
2612 	}
2613       break;
2614 
2615     default:
2616       gcc_unreachable ();
2617     }
2618 }
2619 
2620 /* These avoid problems when cross compiling.  If we do not
2621    go through all this hair then the optimizer will see
2622    invalid REG_EQUAL notes or in some cases none at all.  */
2623 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2624 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2625 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2626 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2627 
2628 /* The optimizer is not to assume anything about exactly
2629    which bits are set for a HIGH, they are unspecified.
2630    Unfortunately this leads to many missed optimizations
2631    during CSE.  We mask out the non-HIGH bits, and matches
2632    a plain movdi, to alleviate this problem.  */
2633 static rtx
2634 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2635 {
2636   return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2637 }
2638 
2639 static rtx
2640 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2641 {
2642   return gen_rtx_SET (dest, GEN_INT (val));
2643 }
2644 
2645 static rtx
2646 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2647 {
2648   return gen_rtx_IOR (DImode, src, GEN_INT (val));
2649 }
2650 
2651 static rtx
2652 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2653 {
2654   return gen_rtx_XOR (DImode, src, GEN_INT (val));
2655 }
2656 
2657 /* Worker routines for 64-bit constant formation on arch64.
2658    One of the key things to be doing in these emissions is
2659    to create as many temp REGs as possible.  This makes it
2660    possible for half-built constants to be used later when
2661    such values are similar to something required later on.
2662    Without doing this, the optimizer cannot see such
2663    opportunities.  */
2664 
2665 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2666 					   unsigned HOST_WIDE_INT, int);
2667 
2668 static void
2669 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2670 			       unsigned HOST_WIDE_INT low_bits, int is_neg)
2671 {
2672   unsigned HOST_WIDE_INT high_bits;
2673 
2674   if (is_neg)
2675     high_bits = (~low_bits) & 0xffffffff;
2676   else
2677     high_bits = low_bits;
2678 
2679   emit_insn (gen_safe_HIGH64 (temp, high_bits));
2680   if (!is_neg)
2681     {
2682       emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2683     }
2684   else
2685     {
2686       /* If we are XOR'ing with -1, then we should emit a one's complement
2687 	 instead.  This way the combiner will notice logical operations
2688 	 such as ANDN later on and substitute.  */
2689       if ((low_bits & 0x3ff) == 0x3ff)
2690 	{
2691 	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2692 	}
2693       else
2694 	{
2695 	  emit_insn (gen_rtx_SET (op0,
2696 				  gen_safe_XOR64 (temp,
2697 						  (-(HOST_WIDE_INT)0x400
2698 						   | (low_bits & 0x3ff)))));
2699 	}
2700     }
2701 }
2702 
2703 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2704 					   unsigned HOST_WIDE_INT, int);
2705 
2706 static void
2707 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2708 			       unsigned HOST_WIDE_INT high_bits,
2709 			       unsigned HOST_WIDE_INT low_immediate,
2710 			       int shift_count)
2711 {
2712   rtx temp2 = op0;
2713 
2714   if ((high_bits & 0xfffffc00) != 0)
2715     {
2716       emit_insn (gen_safe_HIGH64 (temp, high_bits));
2717       if ((high_bits & ~0xfffffc00) != 0)
2718 	emit_insn (gen_rtx_SET (op0,
2719 				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2720       else
2721 	temp2 = temp;
2722     }
2723   else
2724     {
2725       emit_insn (gen_safe_SET64 (temp, high_bits));
2726       temp2 = temp;
2727     }
2728 
2729   /* Now shift it up into place.  */
2730   emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2731 					       GEN_INT (shift_count))));
2732 
2733   /* If there is a low immediate part piece, finish up by
2734      putting that in as well.  */
2735   if (low_immediate != 0)
2736     emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2737 }
2738 
2739 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2740 					    unsigned HOST_WIDE_INT);
2741 
2742 /* Full 64-bit constant decomposition.  Even though this is the
2743    'worst' case, we still optimize a few things away.  */
2744 static void
2745 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2746 				unsigned HOST_WIDE_INT high_bits,
2747 				unsigned HOST_WIDE_INT low_bits)
2748 {
2749   rtx sub_temp = op0;
2750 
2751   if (can_create_pseudo_p ())
2752     sub_temp = gen_reg_rtx (DImode);
2753 
2754   if ((high_bits & 0xfffffc00) != 0)
2755     {
2756       emit_insn (gen_safe_HIGH64 (temp, high_bits));
2757       if ((high_bits & ~0xfffffc00) != 0)
2758 	emit_insn (gen_rtx_SET (sub_temp,
2759 				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2760       else
2761 	sub_temp = temp;
2762     }
2763   else
2764     {
2765       emit_insn (gen_safe_SET64 (temp, high_bits));
2766       sub_temp = temp;
2767     }
2768 
2769   if (can_create_pseudo_p ())
2770     {
2771       rtx temp2 = gen_reg_rtx (DImode);
2772       rtx temp3 = gen_reg_rtx (DImode);
2773       rtx temp4 = gen_reg_rtx (DImode);
2774 
2775       emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2776 						     GEN_INT (32))));
2777 
2778       emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2779       if ((low_bits & ~0xfffffc00) != 0)
2780 	{
2781 	  emit_insn (gen_rtx_SET (temp3,
2782 				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2783 	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2784 	}
2785       else
2786 	{
2787 	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2788 	}
2789     }
2790   else
2791     {
2792       rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
2793       rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
2794       rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2795       int to_shift = 12;
2796 
2797       /* We are in the middle of reload, so this is really
2798 	 painful.  However we do still make an attempt to
2799 	 avoid emitting truly stupid code.  */
2800       if (low1 != const0_rtx)
2801 	{
2802 	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2803 						       GEN_INT (to_shift))));
2804 	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2805 	  sub_temp = op0;
2806 	  to_shift = 12;
2807 	}
2808       else
2809 	{
2810 	  to_shift += 12;
2811 	}
2812       if (low2 != const0_rtx)
2813 	{
2814 	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2815 						       GEN_INT (to_shift))));
2816 	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2817 	  sub_temp = op0;
2818 	  to_shift = 8;
2819 	}
2820       else
2821 	{
2822 	  to_shift += 8;
2823 	}
2824       emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2825 						   GEN_INT (to_shift))));
2826       if (low3 != const0_rtx)
2827 	emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2828       /* phew...  */
2829     }
2830 }
2831 
2832 /* Analyze a 64-bit constant for certain properties.  */
2833 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2834 				    unsigned HOST_WIDE_INT,
2835 				    int *, int *, int *);
2836 
2837 static void
2838 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2839 			unsigned HOST_WIDE_INT low_bits,
2840 			int *hbsp, int *lbsp, int *abbasp)
2841 {
2842   int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2843   int i;
2844 
2845   lowest_bit_set = highest_bit_set = -1;
2846   i = 0;
2847   do
2848     {
2849       if ((lowest_bit_set == -1)
2850 	  && ((low_bits >> i) & 1))
2851 	lowest_bit_set = i;
2852       if ((highest_bit_set == -1)
2853 	  && ((high_bits >> (32 - i - 1)) & 1))
2854 	highest_bit_set = (64 - i - 1);
2855     }
2856   while (++i < 32
2857 	 && ((highest_bit_set == -1)
2858 	     || (lowest_bit_set == -1)));
2859   if (i == 32)
2860     {
2861       i = 0;
2862       do
2863 	{
2864 	  if ((lowest_bit_set == -1)
2865 	      && ((high_bits >> i) & 1))
2866 	    lowest_bit_set = i + 32;
2867 	  if ((highest_bit_set == -1)
2868 	      && ((low_bits >> (32 - i - 1)) & 1))
2869 	    highest_bit_set = 32 - i - 1;
2870 	}
2871       while (++i < 32
2872 	     && ((highest_bit_set == -1)
2873 		 || (lowest_bit_set == -1)));
2874     }
2875   /* If there are no bits set this should have gone out
2876      as one instruction!  */
2877   gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2878   all_bits_between_are_set = 1;
2879   for (i = lowest_bit_set; i <= highest_bit_set; i++)
2880     {
2881       if (i < 32)
2882 	{
2883 	  if ((low_bits & (1 << i)) != 0)
2884 	    continue;
2885 	}
2886       else
2887 	{
2888 	  if ((high_bits & (1 << (i - 32))) != 0)
2889 	    continue;
2890 	}
2891       all_bits_between_are_set = 0;
2892       break;
2893     }
2894   *hbsp = highest_bit_set;
2895   *lbsp = lowest_bit_set;
2896   *abbasp = all_bits_between_are_set;
2897 }
2898 
2899 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2900 
2901 static int
2902 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2903 		   unsigned HOST_WIDE_INT low_bits)
2904 {
2905   int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2906 
2907   if (high_bits == 0
2908       || high_bits == 0xffffffff)
2909     return 1;
2910 
2911   analyze_64bit_constant (high_bits, low_bits,
2912 			  &highest_bit_set, &lowest_bit_set,
2913 			  &all_bits_between_are_set);
2914 
2915   if ((highest_bit_set == 63
2916        || lowest_bit_set == 0)
2917       && all_bits_between_are_set != 0)
2918     return 1;
2919 
2920   if ((highest_bit_set - lowest_bit_set) < 21)
2921     return 1;
2922 
2923   return 0;
2924 }
2925 
2926 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2927 							unsigned HOST_WIDE_INT,
2928 							int, int);
2929 
2930 static unsigned HOST_WIDE_INT
2931 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2932 			  unsigned HOST_WIDE_INT low_bits,
2933 			  int lowest_bit_set, int shift)
2934 {
2935   HOST_WIDE_INT hi, lo;
2936 
2937   if (lowest_bit_set < 32)
2938     {
2939       lo = (low_bits >> lowest_bit_set) << shift;
2940       hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2941     }
2942   else
2943     {
2944       lo = 0;
2945       hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2946     }
2947   gcc_assert (! (hi & lo));
2948   return (hi | lo);
2949 }
2950 
2951 /* Here we are sure to be arch64 and this is an integer constant
2952    being loaded into a register.  Emit the most efficient
2953    insn sequence possible.  Detection of all the 1-insn cases
2954    has been done already.  */
2955 static void
2956 sparc_emit_set_const64 (rtx op0, rtx op1)
2957 {
2958   unsigned HOST_WIDE_INT high_bits, low_bits;
2959   int lowest_bit_set, highest_bit_set;
2960   int all_bits_between_are_set;
2961   rtx temp = 0;
2962 
2963   /* Sanity check that we know what we are working with.  */
2964   gcc_assert (TARGET_ARCH64
2965 	      && (GET_CODE (op0) == SUBREG
2966 		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2967 
2968   if (! can_create_pseudo_p ())
2969     temp = op0;
2970 
2971   if (GET_CODE (op1) != CONST_INT)
2972     {
2973       sparc_emit_set_symbolic_const64 (op0, op1, temp);
2974       return;
2975     }
2976 
2977   if (! temp)
2978     temp = gen_reg_rtx (DImode);
2979 
2980   high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2981   low_bits = (INTVAL (op1) & 0xffffffff);
2982 
2983   /* low_bits	bits 0  --> 31
2984      high_bits	bits 32 --> 63  */
2985 
2986   analyze_64bit_constant (high_bits, low_bits,
2987 			  &highest_bit_set, &lowest_bit_set,
2988 			  &all_bits_between_are_set);
2989 
2990   /* First try for a 2-insn sequence.  */
2991 
2992   /* These situations are preferred because the optimizer can
2993    * do more things with them:
2994    * 1) mov	-1, %reg
2995    *    sllx	%reg, shift, %reg
2996    * 2) mov	-1, %reg
2997    *    srlx	%reg, shift, %reg
2998    * 3) mov	some_small_const, %reg
2999    *    sllx	%reg, shift, %reg
3000    */
3001   if (((highest_bit_set == 63
3002 	|| lowest_bit_set == 0)
3003        && all_bits_between_are_set != 0)
3004       || ((highest_bit_set - lowest_bit_set) < 12))
3005     {
3006       HOST_WIDE_INT the_const = -1;
3007       int shift = lowest_bit_set;
3008 
3009       if ((highest_bit_set != 63
3010 	   && lowest_bit_set != 0)
3011 	  || all_bits_between_are_set == 0)
3012 	{
3013 	  the_const =
3014 	    create_simple_focus_bits (high_bits, low_bits,
3015 				      lowest_bit_set, 0);
3016 	}
3017       else if (lowest_bit_set == 0)
3018 	shift = -(63 - highest_bit_set);
3019 
3020       gcc_assert (SPARC_SIMM13_P (the_const));
3021       gcc_assert (shift != 0);
3022 
3023       emit_insn (gen_safe_SET64 (temp, the_const));
3024       if (shift > 0)
3025 	emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3026 						     GEN_INT (shift))));
3027       else if (shift < 0)
3028 	emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3029 						       GEN_INT (-shift))));
3030       return;
3031     }
3032 
3033   /* Now a range of 22 or less bits set somewhere.
3034    * 1) sethi	%hi(focus_bits), %reg
3035    *    sllx	%reg, shift, %reg
3036    * 2) sethi	%hi(focus_bits), %reg
3037    *    srlx	%reg, shift, %reg
3038    */
3039   if ((highest_bit_set - lowest_bit_set) < 21)
3040     {
3041       unsigned HOST_WIDE_INT focus_bits =
3042 	create_simple_focus_bits (high_bits, low_bits,
3043 				  lowest_bit_set, 10);
3044 
3045       gcc_assert (SPARC_SETHI_P (focus_bits));
3046       gcc_assert (lowest_bit_set != 10);
3047 
3048       emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3049 
3050       /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
3051       if (lowest_bit_set < 10)
3052 	emit_insn (gen_rtx_SET (op0,
3053 				gen_rtx_LSHIFTRT (DImode, temp,
3054 						  GEN_INT (10 - lowest_bit_set))));
3055       else if (lowest_bit_set > 10)
3056 	emit_insn (gen_rtx_SET (op0,
3057 				gen_rtx_ASHIFT (DImode, temp,
3058 						GEN_INT (lowest_bit_set - 10))));
3059       return;
3060     }
3061 
3062   /* 1) sethi	%hi(low_bits), %reg
3063    *    or	%reg, %lo(low_bits), %reg
3064    * 2) sethi	%hi(~low_bits), %reg
3065    *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3066    */
3067   if (high_bits == 0
3068       || high_bits == 0xffffffff)
3069     {
3070       sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3071 				     (high_bits == 0xffffffff));
3072       return;
3073     }
3074 
3075   /* Now, try 3-insn sequences.  */
3076 
3077   /* 1) sethi	%hi(high_bits), %reg
3078    *    or	%reg, %lo(high_bits), %reg
3079    *    sllx	%reg, 32, %reg
3080    */
3081   if (low_bits == 0)
3082     {
3083       sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3084       return;
3085     }
3086 
3087   /* We may be able to do something quick
3088      when the constant is negated, so try that.  */
3089   if (const64_is_2insns ((~high_bits) & 0xffffffff,
3090 			 (~low_bits) & 0xfffffc00))
3091     {
3092       /* NOTE: The trailing bits get XOR'd so we need the
3093 	 non-negated bits, not the negated ones.  */
3094       unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3095 
3096       if ((((~high_bits) & 0xffffffff) == 0
3097 	   && ((~low_bits) & 0x80000000) == 0)
3098 	  || (((~high_bits) & 0xffffffff) == 0xffffffff
3099 	      && ((~low_bits) & 0x80000000) != 0))
3100 	{
3101 	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3102 
3103 	  if ((SPARC_SETHI_P (fast_int)
3104 	       && (~high_bits & 0xffffffff) == 0)
3105 	      || SPARC_SIMM13_P (fast_int))
3106 	    emit_insn (gen_safe_SET64 (temp, fast_int));
3107 	  else
3108 	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3109 	}
3110       else
3111 	{
3112 	  rtx negated_const;
3113 	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3114 				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3115 	  sparc_emit_set_const64 (temp, negated_const);
3116 	}
3117 
3118       /* If we are XOR'ing with -1, then we should emit a one's complement
3119 	 instead.  This way the combiner will notice logical operations
3120 	 such as ANDN later on and substitute.  */
3121       if (trailing_bits == 0x3ff)
3122 	{
3123 	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3124 	}
3125       else
3126 	{
3127 	  emit_insn (gen_rtx_SET (op0,
3128 				  gen_safe_XOR64 (temp,
3129 						  (-0x400 | trailing_bits))));
3130 	}
3131       return;
3132     }
3133 
3134   /* 1) sethi	%hi(xxx), %reg
3135    *    or	%reg, %lo(xxx), %reg
3136    *	sllx	%reg, yyy, %reg
3137    *
3138    * ??? This is just a generalized version of the low_bits==0
3139    * thing above, FIXME...
3140    */
3141   if ((highest_bit_set - lowest_bit_set) < 32)
3142     {
3143       unsigned HOST_WIDE_INT focus_bits =
3144 	create_simple_focus_bits (high_bits, low_bits,
3145 				  lowest_bit_set, 0);
3146 
3147       /* We can't get here in this state.  */
3148       gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3149 
3150       /* So what we know is that the set bits straddle the
3151 	 middle of the 64-bit word.  */
3152       sparc_emit_set_const64_quick2 (op0, temp,
3153 				     focus_bits, 0,
3154 				     lowest_bit_set);
3155       return;
3156     }
3157 
3158   /* 1) sethi	%hi(high_bits), %reg
3159    *    or	%reg, %lo(high_bits), %reg
3160    *    sllx	%reg, 32, %reg
3161    *	or	%reg, low_bits, %reg
3162    */
3163   if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3164     {
3165       sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3166       return;
3167     }
3168 
3169   /* The easiest way when all else fails, is full decomposition.  */
3170   sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3171 }
3172 
3173 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  */
3174 
3175 static bool
3176 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3177 {
3178   *p1 = SPARC_ICC_REG;
3179   *p2 = SPARC_FCC_REG;
3180   return true;
3181 }
3182 
3183 /* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
3184 
3185 static unsigned int
3186 sparc_min_arithmetic_precision (void)
3187 {
3188   return 32;
3189 }
3190 
3191 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3192    return the mode to be used for the comparison.  For floating-point,
3193    CCFP[E]mode is used.  CCNZmode should be used when the first operand
3194    is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
3195    processing is needed.  */
3196 
3197 machine_mode
3198 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3199 {
3200   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3201     {
3202       switch (op)
3203 	{
3204 	case EQ:
3205 	case NE:
3206 	case UNORDERED:
3207 	case ORDERED:
3208 	case UNLT:
3209 	case UNLE:
3210 	case UNGT:
3211 	case UNGE:
3212 	case UNEQ:
3213 	case LTGT:
3214 	  return CCFPmode;
3215 
3216 	case LT:
3217 	case LE:
3218 	case GT:
3219 	case GE:
3220 	  return CCFPEmode;
3221 
3222 	default:
3223 	  gcc_unreachable ();
3224 	}
3225     }
3226   else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3227 	    || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3228 	   && y == const0_rtx)
3229     {
3230       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3231 	return CCXNZmode;
3232       else
3233 	return CCNZmode;
3234     }
3235   else
3236     {
3237       /* This is for the cmp<mode>_sne pattern.  */
3238       if (GET_CODE (x) == NOT && y == constm1_rtx)
3239 	{
3240 	  if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3241 	    return CCXCmode;
3242 	  else
3243 	    return CCCmode;
3244 	}
3245 
3246       /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns.  */
3247       if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3248 	{
3249 	  if (GET_CODE (y) == UNSPEC
3250 	      && (XINT (y, 1) == UNSPEC_ADDV
3251 		 || XINT (y, 1) == UNSPEC_SUBV
3252 	         || XINT (y, 1) == UNSPEC_NEGV))
3253 	    return CCVmode;
3254 	  else
3255 	    return CCCmode;
3256 	}
3257 
3258       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3259 	return CCXmode;
3260       else
3261 	return CCmode;
3262     }
3263 }
3264 
3265 /* Emit the compare insn and return the CC reg for a CODE comparison
3266    with operands X and Y.  */
3267 
3268 static rtx
3269 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3270 {
3271   machine_mode mode;
3272   rtx cc_reg;
3273 
3274   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3275     return x;
3276 
3277   mode = SELECT_CC_MODE (code, x, y);
3278 
3279   /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3280      fcc regs (cse can't tell they're really call clobbered regs and will
3281      remove a duplicate comparison even if there is an intervening function
3282      call - it will then try to reload the cc reg via an int reg which is why
3283      we need the movcc patterns).  It is possible to provide the movcc
3284      patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
3285      registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
3286      to tell cse that CCFPE mode registers (even pseudos) are call
3287      clobbered.  */
3288 
3289   /* ??? This is an experiment.  Rather than making changes to cse which may
3290      or may not be easy/clean, we do our own cse.  This is possible because
3291      we will generate hard registers.  Cse knows they're call clobbered (it
3292      doesn't know the same thing about pseudos). If we guess wrong, no big
3293      deal, but if we win, great!  */
3294 
3295   if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3296 #if 1 /* experiment */
3297     {
3298       int reg;
3299       /* We cycle through the registers to ensure they're all exercised.  */
3300       static int next_fcc_reg = 0;
3301       /* Previous x,y for each fcc reg.  */
3302       static rtx prev_args[4][2];
3303 
3304       /* Scan prev_args for x,y.  */
3305       for (reg = 0; reg < 4; reg++)
3306 	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3307 	  break;
3308       if (reg == 4)
3309 	{
3310 	  reg = next_fcc_reg;
3311 	  prev_args[reg][0] = x;
3312 	  prev_args[reg][1] = y;
3313 	  next_fcc_reg = (next_fcc_reg + 1) & 3;
3314 	}
3315       cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3316     }
3317 #else
3318     cc_reg = gen_reg_rtx (mode);
3319 #endif /* ! experiment */
3320   else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3321     cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3322   else
3323     cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3324 
3325   /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
3326      will only result in an unrecognizable insn so no point in asserting.  */
3327   emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3328 
3329   return cc_reg;
3330 }
3331 
3332 
3333 /* Emit the compare insn and return the CC reg for the comparison in CMP.  */
3334 
3335 rtx
3336 gen_compare_reg (rtx cmp)
3337 {
3338   return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3339 }
3340 
3341 /* This function is used for v9 only.
3342    DEST is the target of the Scc insn.
3343    CODE is the code for an Scc's comparison.
3344    X and Y are the values we compare.
3345 
3346    This function is needed to turn
3347 
3348 	   (set (reg:SI 110)
3349 	       (gt (reg:CCX 100 %icc)
3350 	           (const_int 0)))
3351    into
3352 	   (set (reg:SI 110)
3353 	       (gt:DI (reg:CCX 100 %icc)
3354 	           (const_int 0)))
3355 
3356    IE: The instruction recognizer needs to see the mode of the comparison to
3357    find the right instruction. We could use "gt:DI" right in the
3358    define_expand, but leaving it out allows us to handle DI, SI, etc.  */
3359 
3360 static int
3361 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3362 {
3363   if (! TARGET_ARCH64
3364       && (GET_MODE (x) == DImode
3365 	  || GET_MODE (dest) == DImode))
3366     return 0;
3367 
3368   /* Try to use the movrCC insns.  */
3369   if (TARGET_ARCH64
3370       && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3371       && y == const0_rtx
3372       && v9_regcmp_p (compare_code))
3373     {
3374       rtx op0 = x;
3375       rtx temp;
3376 
3377       /* Special case for op0 != 0.  This can be done with one instruction if
3378 	 dest == x.  */
3379 
3380       if (compare_code == NE
3381 	  && GET_MODE (dest) == DImode
3382 	  && rtx_equal_p (op0, dest))
3383 	{
3384 	  emit_insn (gen_rtx_SET (dest,
3385 			      gen_rtx_IF_THEN_ELSE (DImode,
3386 				       gen_rtx_fmt_ee (compare_code, DImode,
3387 						       op0, const0_rtx),
3388 				       const1_rtx,
3389 				       dest)));
3390 	  return 1;
3391 	}
3392 
3393       if (reg_overlap_mentioned_p (dest, op0))
3394 	{
3395 	  /* Handle the case where dest == x.
3396 	     We "early clobber" the result.  */
3397 	  op0 = gen_reg_rtx (GET_MODE (x));
3398 	  emit_move_insn (op0, x);
3399 	}
3400 
3401       emit_insn (gen_rtx_SET (dest, const0_rtx));
3402       if (GET_MODE (op0) != DImode)
3403 	{
3404 	  temp = gen_reg_rtx (DImode);
3405 	  convert_move (temp, op0, 0);
3406 	}
3407       else
3408 	temp = op0;
3409       emit_insn (gen_rtx_SET (dest,
3410 			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3411 				   gen_rtx_fmt_ee (compare_code, DImode,
3412 						   temp, const0_rtx),
3413 				   const1_rtx,
3414 				   dest)));
3415       return 1;
3416     }
3417   else
3418     {
3419       x = gen_compare_reg_1 (compare_code, x, y);
3420       y = const0_rtx;
3421 
3422       emit_insn (gen_rtx_SET (dest, const0_rtx));
3423       emit_insn (gen_rtx_SET (dest,
3424 			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3425 				   gen_rtx_fmt_ee (compare_code,
3426 						   GET_MODE (x), x, y),
3427 				    const1_rtx, dest)));
3428       return 1;
3429     }
3430 }
3431 
3432 
3433 /* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
3434    without jumps using the addx/subx instructions.  */
3435 
3436 bool
3437 emit_scc_insn (rtx operands[])
3438 {
3439   rtx tem, x, y;
3440   enum rtx_code code;
3441   machine_mode mode;
3442 
3443   /* The quad-word fp compare library routines all return nonzero to indicate
3444      true, which is different from the equivalent libgcc routines, so we must
3445      handle them specially here.  */
3446   if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3447     {
3448       operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3449 					      GET_CODE (operands[1]));
3450       operands[2] = XEXP (operands[1], 0);
3451       operands[3] = XEXP (operands[1], 1);
3452     }
3453 
3454   code = GET_CODE (operands[1]);
3455   x = operands[2];
3456   y = operands[3];
3457   mode = GET_MODE (x);
3458 
3459   /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3460      more applications).  The exception to this is "reg != 0" which can
3461      be done in one instruction on v9 (so we do it).  */
3462   if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3463     {
3464       if (y != const0_rtx)
3465 	x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3466 
3467       rtx pat = gen_rtx_SET (operands[0],
3468 			     gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3469 					     x, const0_rtx));
3470 
3471       /* If we can use addx/subx or addxc, add a clobber for CC.  */
3472       if (mode == SImode || (code == NE && TARGET_VIS3))
3473 	{
3474 	  rtx clobber
3475 	    = gen_rtx_CLOBBER (VOIDmode,
3476 			       gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3477 					    SPARC_ICC_REG));
3478 	  pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3479 	}
3480 
3481       emit_insn (pat);
3482       return true;
3483     }
3484 
3485   /* We can do LTU in DImode using the addxc instruction with VIS3.  */
3486   if (TARGET_ARCH64
3487       && mode == DImode
3488       && !((code == LTU || code == GTU) && TARGET_VIS3)
3489       && gen_v9_scc (operands[0], code, x, y))
3490     return true;
3491 
3492   /* We can do LTU and GEU using the addx/subx instructions too.  And
3493      for GTU/LEU, if both operands are registers swap them and fall
3494      back to the easy case.  */
3495   if (code == GTU || code == LEU)
3496     {
3497       if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3498           && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3499         {
3500           tem = x;
3501           x = y;
3502           y = tem;
3503           code = swap_condition (code);
3504         }
3505     }
3506 
3507   if (code == LTU || code == GEU)
3508     {
3509       emit_insn (gen_rtx_SET (operands[0],
3510 			      gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3511 					      gen_compare_reg_1 (code, x, y),
3512 					      const0_rtx)));
3513       return true;
3514     }
3515 
3516   /* All the posibilities to use addx/subx based sequences has been
3517      exhausted, try for a 3 instruction sequence using v9 conditional
3518      moves.  */
3519   if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3520     return true;
3521 
3522   /* Nope, do branches.  */
3523   return false;
3524 }
3525 
3526 /* Emit a conditional jump insn for the v9 architecture using comparison code
3527    CODE and jump target LABEL.
3528    This function exists to take advantage of the v9 brxx insns.  */
3529 
3530 static void
3531 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3532 {
3533   emit_jump_insn (gen_rtx_SET (pc_rtx,
3534 			   gen_rtx_IF_THEN_ELSE (VOIDmode,
3535 				    gen_rtx_fmt_ee (code, GET_MODE (op0),
3536 						    op0, const0_rtx),
3537 				    gen_rtx_LABEL_REF (VOIDmode, label),
3538 				    pc_rtx)));
3539 }
3540 
3541 /* Emit a conditional jump insn for the UA2011 architecture using
3542    comparison code CODE and jump target LABEL.  This function exists
3543    to take advantage of the UA2011 Compare and Branch insns.  */
3544 
3545 static void
3546 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3547 {
3548   rtx if_then_else;
3549 
3550   if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3551 				       gen_rtx_fmt_ee(code, GET_MODE(op0),
3552 						      op0, op1),
3553 				       gen_rtx_LABEL_REF (VOIDmode, label),
3554 				       pc_rtx);
3555 
3556   emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3557 }
3558 
3559 void
3560 emit_conditional_branch_insn (rtx operands[])
3561 {
3562   /* The quad-word fp compare library routines all return nonzero to indicate
3563      true, which is different from the equivalent libgcc routines, so we must
3564      handle them specially here.  */
3565   if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3566     {
3567       operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3568 					      GET_CODE (operands[0]));
3569       operands[1] = XEXP (operands[0], 0);
3570       operands[2] = XEXP (operands[0], 1);
3571     }
3572 
3573   /* If we can tell early on that the comparison is against a constant
3574      that won't fit in the 5-bit signed immediate field of a cbcond,
3575      use one of the other v9 conditional branch sequences.  */
3576   if (TARGET_CBCOND
3577       && GET_CODE (operands[1]) == REG
3578       && (GET_MODE (operands[1]) == SImode
3579 	  || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3580       && (GET_CODE (operands[2]) != CONST_INT
3581 	  || SPARC_SIMM5_P (INTVAL (operands[2]))))
3582     {
3583       emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3584       return;
3585     }
3586 
3587   if (TARGET_ARCH64 && operands[2] == const0_rtx
3588       && GET_CODE (operands[1]) == REG
3589       && GET_MODE (operands[1]) == DImode)
3590     {
3591       emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3592       return;
3593     }
3594 
3595   operands[1] = gen_compare_reg (operands[0]);
3596   operands[2] = const0_rtx;
3597   operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3598 				operands[1], operands[2]);
3599   emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3600 				  operands[3]));
3601 }
3602 
3603 
3604 /* Generate a DFmode part of a hard TFmode register.
3605    REG is the TFmode hard register, LOW is 1 for the
3606    low 64bit of the register and 0 otherwise.
3607  */
3608 rtx
3609 gen_df_reg (rtx reg, int low)
3610 {
3611   int regno = REGNO (reg);
3612 
3613   if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3614     regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3615   return gen_rtx_REG (DFmode, regno);
3616 }
3617 
3618 /* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
3619    Unlike normal calls, TFmode operands are passed by reference.  It is
3620    assumed that no more than 3 operands are required.  */
3621 
3622 static void
3623 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3624 {
3625   rtx ret_slot = NULL, arg[3], func_sym;
3626   int i;
3627 
3628   /* We only expect to be called for conversions, unary, and binary ops.  */
3629   gcc_assert (nargs == 2 || nargs == 3);
3630 
3631   for (i = 0; i < nargs; ++i)
3632     {
3633       rtx this_arg = operands[i];
3634       rtx this_slot;
3635 
3636       /* TFmode arguments and return values are passed by reference.  */
3637       if (GET_MODE (this_arg) == TFmode)
3638 	{
3639 	  int force_stack_temp;
3640 
3641 	  force_stack_temp = 0;
3642 	  if (TARGET_BUGGY_QP_LIB && i == 0)
3643 	    force_stack_temp = 1;
3644 
3645 	  if (GET_CODE (this_arg) == MEM
3646 	      && ! force_stack_temp)
3647 	    {
3648 	      tree expr = MEM_EXPR (this_arg);
3649 	      if (expr)
3650 		mark_addressable (expr);
3651 	      this_arg = XEXP (this_arg, 0);
3652 	    }
3653 	  else if (CONSTANT_P (this_arg)
3654 		   && ! force_stack_temp)
3655 	    {
3656 	      this_slot = force_const_mem (TFmode, this_arg);
3657 	      this_arg = XEXP (this_slot, 0);
3658 	    }
3659 	  else
3660 	    {
3661 	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3662 
3663 	      /* Operand 0 is the return value.  We'll copy it out later.  */
3664 	      if (i > 0)
3665 		emit_move_insn (this_slot, this_arg);
3666 	      else
3667 		ret_slot = this_slot;
3668 
3669 	      this_arg = XEXP (this_slot, 0);
3670 	    }
3671 	}
3672 
3673       arg[i] = this_arg;
3674     }
3675 
3676   func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3677 
3678   if (GET_MODE (operands[0]) == TFmode)
3679     {
3680       if (nargs == 2)
3681 	emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3682 			   arg[0], GET_MODE (arg[0]),
3683 			   arg[1], GET_MODE (arg[1]));
3684       else
3685 	emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3686 			   arg[0], GET_MODE (arg[0]),
3687 			   arg[1], GET_MODE (arg[1]),
3688 			   arg[2], GET_MODE (arg[2]));
3689 
3690       if (ret_slot)
3691 	emit_move_insn (operands[0], ret_slot);
3692     }
3693   else
3694     {
3695       rtx ret;
3696 
3697       gcc_assert (nargs == 2);
3698 
3699       ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3700 				     GET_MODE (operands[0]),
3701 				     arg[1], GET_MODE (arg[1]));
3702 
3703       if (ret != operands[0])
3704 	emit_move_insn (operands[0], ret);
3705     }
3706 }
3707 
3708 /* Expand soft-float TFmode calls to sparc abi routines.  */
3709 
3710 static void
3711 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3712 {
3713   const char *func;
3714 
3715   switch (code)
3716     {
3717     case PLUS:
3718       func = "_Qp_add";
3719       break;
3720     case MINUS:
3721       func = "_Qp_sub";
3722       break;
3723     case MULT:
3724       func = "_Qp_mul";
3725       break;
3726     case DIV:
3727       func = "_Qp_div";
3728       break;
3729     default:
3730       gcc_unreachable ();
3731     }
3732 
3733   emit_soft_tfmode_libcall (func, 3, operands);
3734 }
3735 
3736 static void
3737 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3738 {
3739   const char *func;
3740 
3741   gcc_assert (code == SQRT);
3742   func = "_Qp_sqrt";
3743 
3744   emit_soft_tfmode_libcall (func, 2, operands);
3745 }
3746 
3747 static void
3748 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3749 {
3750   const char *func;
3751 
3752   switch (code)
3753     {
3754     case FLOAT_EXTEND:
3755       switch (GET_MODE (operands[1]))
3756 	{
3757 	case E_SFmode:
3758 	  func = "_Qp_stoq";
3759 	  break;
3760 	case E_DFmode:
3761 	  func = "_Qp_dtoq";
3762 	  break;
3763 	default:
3764 	  gcc_unreachable ();
3765 	}
3766       break;
3767 
3768     case FLOAT_TRUNCATE:
3769       switch (GET_MODE (operands[0]))
3770 	{
3771 	case E_SFmode:
3772 	  func = "_Qp_qtos";
3773 	  break;
3774 	case E_DFmode:
3775 	  func = "_Qp_qtod";
3776 	  break;
3777 	default:
3778 	  gcc_unreachable ();
3779 	}
3780       break;
3781 
3782     case FLOAT:
3783       switch (GET_MODE (operands[1]))
3784 	{
3785 	case E_SImode:
3786 	  func = "_Qp_itoq";
3787 	  if (TARGET_ARCH64)
3788 	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3789 	  break;
3790 	case E_DImode:
3791 	  func = "_Qp_xtoq";
3792 	  break;
3793 	default:
3794 	  gcc_unreachable ();
3795 	}
3796       break;
3797 
3798     case UNSIGNED_FLOAT:
3799       switch (GET_MODE (operands[1]))
3800 	{
3801 	case E_SImode:
3802 	  func = "_Qp_uitoq";
3803 	  if (TARGET_ARCH64)
3804 	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3805 	  break;
3806 	case E_DImode:
3807 	  func = "_Qp_uxtoq";
3808 	  break;
3809 	default:
3810 	  gcc_unreachable ();
3811 	}
3812       break;
3813 
3814     case FIX:
3815       switch (GET_MODE (operands[0]))
3816 	{
3817 	case E_SImode:
3818 	  func = "_Qp_qtoi";
3819 	  break;
3820 	case E_DImode:
3821 	  func = "_Qp_qtox";
3822 	  break;
3823 	default:
3824 	  gcc_unreachable ();
3825 	}
3826       break;
3827 
3828     case UNSIGNED_FIX:
3829       switch (GET_MODE (operands[0]))
3830 	{
3831 	case E_SImode:
3832 	  func = "_Qp_qtoui";
3833 	  break;
3834 	case E_DImode:
3835 	  func = "_Qp_qtoux";
3836 	  break;
3837 	default:
3838 	  gcc_unreachable ();
3839 	}
3840       break;
3841 
3842     default:
3843       gcc_unreachable ();
3844     }
3845 
3846   emit_soft_tfmode_libcall (func, 2, operands);
3847 }
3848 
3849 /* Expand a hard-float tfmode operation.  All arguments must be in
3850    registers.  */
3851 
3852 static void
3853 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3854 {
3855   rtx op, dest;
3856 
3857   if (GET_RTX_CLASS (code) == RTX_UNARY)
3858     {
3859       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3860       op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3861     }
3862   else
3863     {
3864       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3865       operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3866       op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3867 			   operands[1], operands[2]);
3868     }
3869 
3870   if (register_operand (operands[0], VOIDmode))
3871     dest = operands[0];
3872   else
3873     dest = gen_reg_rtx (GET_MODE (operands[0]));
3874 
3875   emit_insn (gen_rtx_SET (dest, op));
3876 
3877   if (dest != operands[0])
3878     emit_move_insn (operands[0], dest);
3879 }
3880 
3881 void
3882 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3883 {
3884   if (TARGET_HARD_QUAD)
3885     emit_hard_tfmode_operation (code, operands);
3886   else
3887     emit_soft_tfmode_binop (code, operands);
3888 }
3889 
3890 void
3891 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3892 {
3893   if (TARGET_HARD_QUAD)
3894     emit_hard_tfmode_operation (code, operands);
3895   else
3896     emit_soft_tfmode_unop (code, operands);
3897 }
3898 
3899 void
3900 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3901 {
3902   if (TARGET_HARD_QUAD)
3903     emit_hard_tfmode_operation (code, operands);
3904   else
3905     emit_soft_tfmode_cvt (code, operands);
3906 }
3907 
3908 /* Return nonzero if a branch/jump/call instruction will be emitting
3909    nop into its delay slot.  */
3910 
3911 int
3912 empty_delay_slot (rtx_insn *insn)
3913 {
3914   rtx seq;
3915 
3916   /* If no previous instruction (should not happen), return true.  */
3917   if (PREV_INSN (insn) == NULL)
3918     return 1;
3919 
3920   seq = NEXT_INSN (PREV_INSN (insn));
3921   if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3922     return 0;
3923 
3924   return 1;
3925 }
3926 
3927 /* Return nonzero if we should emit a nop after a cbcond instruction.
3928    The cbcond instruction does not have a delay slot, however there is
3929    a severe performance penalty if a control transfer appears right
3930    after a cbcond.  Therefore we emit a nop when we detect this
3931    situation.  */
3932 
3933 int
3934 emit_cbcond_nop (rtx_insn *insn)
3935 {
3936   rtx next = next_active_insn (insn);
3937 
3938   if (!next)
3939     return 1;
3940 
3941   if (NONJUMP_INSN_P (next)
3942       && GET_CODE (PATTERN (next)) == SEQUENCE)
3943     next = XVECEXP (PATTERN (next), 0, 0);
3944   else if (CALL_P (next)
3945 	   && GET_CODE (PATTERN (next)) == PARALLEL)
3946     {
3947       rtx delay = XVECEXP (PATTERN (next), 0, 1);
3948 
3949       if (GET_CODE (delay) == RETURN)
3950 	{
3951 	  /* It's a sibling call.  Do not emit the nop if we're going
3952 	     to emit something other than the jump itself as the first
3953 	     instruction of the sibcall sequence.  */
3954 	  if (sparc_leaf_function_p || TARGET_FLAT)
3955 	    return 0;
3956 	}
3957     }
3958 
3959   if (NONJUMP_INSN_P (next))
3960     return 0;
3961 
3962   return 1;
3963 }
3964 
3965 /* Return nonzero if TRIAL can go into the call delay slot.  */
3966 
3967 int
3968 eligible_for_call_delay (rtx_insn *trial)
3969 {
3970   rtx pat;
3971 
3972   if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3973     return 0;
3974 
3975   /* The only problematic cases are TLS sequences with Sun as/ld.  */
3976   if ((TARGET_GNU_TLS && HAVE_GNU_LD) || !TARGET_TLS)
3977     return 1;
3978 
3979   pat = PATTERN (trial);
3980 
3981   /* We must reject tgd_add{32|64}, i.e.
3982        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3983      and tldm_add{32|64}, i.e.
3984        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3985      for Sun as/ld.  */
3986   if (GET_CODE (pat) == SET
3987       && GET_CODE (SET_SRC (pat)) == PLUS)
3988     {
3989       rtx unspec = XEXP (SET_SRC (pat), 1);
3990 
3991       if (GET_CODE (unspec) == UNSPEC
3992 	  && (XINT (unspec, 1) == UNSPEC_TLSGD
3993 	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
3994 	return 0;
3995     }
3996 
3997   return 1;
3998 }
3999 
4000 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4001    instruction.  RETURN_P is true if the v9 variant 'return' is to be
4002    considered in the test too.
4003 
4004    TRIAL must be a SET whose destination is a REG appropriate for the
4005    'restore' instruction or, if RETURN_P is true, for the 'return'
4006    instruction.  */
4007 
4008 static int
4009 eligible_for_restore_insn (rtx trial, bool return_p)
4010 {
4011   rtx pat = PATTERN (trial);
4012   rtx src = SET_SRC (pat);
4013   bool src_is_freg = false;
4014   rtx src_reg;
4015 
4016   /* Since we now can do moves between float and integer registers when
4017      VIS3 is enabled, we have to catch this case.  We can allow such
4018      moves when doing a 'return' however.  */
4019   src_reg = src;
4020   if (GET_CODE (src_reg) == SUBREG)
4021     src_reg = SUBREG_REG (src_reg);
4022   if (GET_CODE (src_reg) == REG
4023       && SPARC_FP_REG_P (REGNO (src_reg)))
4024     src_is_freg = true;
4025 
4026   /* The 'restore src,%g0,dest' pattern for word mode and below.  */
4027   if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4028       && arith_operand (src, GET_MODE (src))
4029       && ! src_is_freg)
4030     {
4031       if (TARGET_ARCH64)
4032         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4033       else
4034         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4035     }
4036 
4037   /* The 'restore src,%g0,dest' pattern for double-word mode.  */
4038   else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4039 	   && arith_double_operand (src, GET_MODE (src))
4040 	   && ! src_is_freg)
4041     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4042 
4043   /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
4044   else if (! TARGET_FPU && register_operand (src, SFmode))
4045     return 1;
4046 
4047   /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
4048   else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4049     return 1;
4050 
4051   /* If we have the 'return' instruction, anything that does not use
4052      local or output registers and can go into a delay slot wins.  */
4053   else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4054     return 1;
4055 
4056   /* The 'restore src1,src2,dest' pattern for SImode.  */
4057   else if (GET_CODE (src) == PLUS
4058 	   && register_operand (XEXP (src, 0), SImode)
4059 	   && arith_operand (XEXP (src, 1), SImode))
4060     return 1;
4061 
4062   /* The 'restore src1,src2,dest' pattern for DImode.  */
4063   else if (GET_CODE (src) == PLUS
4064 	   && register_operand (XEXP (src, 0), DImode)
4065 	   && arith_double_operand (XEXP (src, 1), DImode))
4066     return 1;
4067 
4068   /* The 'restore src1,%lo(src2),dest' pattern.  */
4069   else if (GET_CODE (src) == LO_SUM
4070 	   && ! TARGET_CM_MEDMID
4071 	   && ((register_operand (XEXP (src, 0), SImode)
4072 	        && immediate_operand (XEXP (src, 1), SImode))
4073 	       || (TARGET_ARCH64
4074 		   && register_operand (XEXP (src, 0), DImode)
4075 		   && immediate_operand (XEXP (src, 1), DImode))))
4076     return 1;
4077 
4078   /* The 'restore src,src,dest' pattern.  */
4079   else if (GET_CODE (src) == ASHIFT
4080 	   && (register_operand (XEXP (src, 0), SImode)
4081 	       || register_operand (XEXP (src, 0), DImode))
4082 	   && XEXP (src, 1) == const1_rtx)
4083     return 1;
4084 
4085   return 0;
4086 }
4087 
4088 /* Return nonzero if TRIAL can go into the function return's delay slot.  */
4089 
4090 int
4091 eligible_for_return_delay (rtx_insn *trial)
4092 {
4093   int regno;
4094   rtx pat;
4095 
4096   /* If the function uses __builtin_eh_return, the eh_return machinery
4097      occupies the delay slot.  */
4098   if (crtl->calls_eh_return)
4099     return 0;
4100 
4101   if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4102     return 0;
4103 
4104   /* In the case of a leaf or flat function, anything can go into the slot.  */
4105   if (sparc_leaf_function_p || TARGET_FLAT)
4106     return 1;
4107 
4108   if (!NONJUMP_INSN_P (trial))
4109     return 0;
4110 
4111   pat = PATTERN (trial);
4112   if (GET_CODE (pat) == PARALLEL)
4113     {
4114       int i;
4115 
4116       if (! TARGET_V9)
4117 	return 0;
4118       for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4119 	{
4120 	  rtx expr = XVECEXP (pat, 0, i);
4121 	  if (GET_CODE (expr) != SET)
4122 	    return 0;
4123 	  if (GET_CODE (SET_DEST (expr)) != REG)
4124 	    return 0;
4125 	  regno = REGNO (SET_DEST (expr));
4126 	  if (regno >= 8 && regno < 24)
4127 	    return 0;
4128 	}
4129       return !epilogue_renumber (&pat, 1);
4130     }
4131 
4132   if (GET_CODE (pat) != SET)
4133     return 0;
4134 
4135   if (GET_CODE (SET_DEST (pat)) != REG)
4136     return 0;
4137 
4138   regno = REGNO (SET_DEST (pat));
4139 
4140   /* Otherwise, only operations which can be done in tandem with
4141      a `restore' or `return' insn can go into the delay slot.  */
4142   if (regno >= 8 && regno < 24)
4143     return 0;
4144 
4145   /* If this instruction sets up floating point register and we have a return
4146      instruction, it can probably go in.  But restore will not work
4147      with FP_REGS.  */
4148   if (! SPARC_INT_REG_P (regno))
4149     return TARGET_V9 && !epilogue_renumber (&pat, 1);
4150 
4151   return eligible_for_restore_insn (trial, true);
4152 }
4153 
4154 /* Return nonzero if TRIAL can go into the sibling call's delay slot.  */
4155 
4156 int
4157 eligible_for_sibcall_delay (rtx_insn *trial)
4158 {
4159   rtx pat;
4160 
4161   if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4162     return 0;
4163 
4164   if (!NONJUMP_INSN_P (trial))
4165     return 0;
4166 
4167   pat = PATTERN (trial);
4168 
4169   if (sparc_leaf_function_p || TARGET_FLAT)
4170     {
4171       /* If the tail call is done using the call instruction,
4172 	 we have to restore %o7 in the delay slot.  */
4173       if (LEAF_SIBCALL_SLOT_RESERVED_P)
4174 	return 0;
4175 
4176       /* %g1 is used to build the function address */
4177       if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4178 	return 0;
4179 
4180       return 1;
4181     }
4182 
4183   if (GET_CODE (pat) != SET)
4184     return 0;
4185 
4186   /* Otherwise, only operations which can be done in tandem with
4187      a `restore' insn can go into the delay slot.  */
4188   if (GET_CODE (SET_DEST (pat)) != REG
4189       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4190       || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4191     return 0;
4192 
4193   /* If it mentions %o7, it can't go in, because sibcall will clobber it
4194      in most cases.  */
4195   if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4196     return 0;
4197 
4198   return eligible_for_restore_insn (trial, false);
4199 }
4200 
4201 /* Determine if it's legal to put X into the constant pool.  This
4202    is not possible if X contains the address of a symbol that is
4203    not constant (TLS) or not known at final link time (PIC).  */
4204 
4205 static bool
4206 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4207 {
4208   switch (GET_CODE (x))
4209     {
4210     case CONST_INT:
4211     case CONST_WIDE_INT:
4212     case CONST_DOUBLE:
4213     case CONST_VECTOR:
4214       /* Accept all non-symbolic constants.  */
4215       return false;
4216 
4217     case LABEL_REF:
4218       /* Labels are OK iff we are non-PIC.  */
4219       return flag_pic != 0;
4220 
4221     case SYMBOL_REF:
4222       /* 'Naked' TLS symbol references are never OK,
4223 	 non-TLS symbols are OK iff we are non-PIC.  */
4224       if (SYMBOL_REF_TLS_MODEL (x))
4225 	return true;
4226       else
4227 	return flag_pic != 0;
4228 
4229     case CONST:
4230       return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4231     case PLUS:
4232     case MINUS:
4233       return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4234          || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4235     case UNSPEC:
4236       return true;
4237     default:
4238       gcc_unreachable ();
4239     }
4240 }
4241 
4242 /* Global Offset Table support.  */
4243 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4244 static GTY(()) rtx got_register_rtx = NULL_RTX;
4245 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4246 
4247 static GTY(()) bool got_helper_needed = false;
4248 
4249 /* Return the SYMBOL_REF for the Global Offset Table.  */
4250 
4251 static rtx
4252 sparc_got (void)
4253 {
4254   if (!got_symbol_rtx)
4255     got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4256 
4257   return got_symbol_rtx;
4258 }
4259 
4260 /* Wrapper around the load_pcrel_sym{si,di} patterns.  */
4261 
4262 static rtx
4263 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4264 {
4265   int orig_flag_pic = flag_pic;
4266   rtx insn;
4267 
4268   /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
4269   flag_pic = 0;
4270   if (TARGET_ARCH64)
4271     insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4272   else
4273     insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4274   flag_pic = orig_flag_pic;
4275 
4276   return insn;
4277 }
4278 
4279 /* Output the load_pcrel_sym{si,di} patterns.  */
4280 
4281 const char *
4282 output_load_pcrel_sym (rtx *operands)
4283 {
4284   if (flag_delayed_branch)
4285     {
4286       output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4287       output_asm_insn ("call\t%a2", operands);
4288       output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4289     }
4290   else
4291     {
4292       output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4293       output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4294       output_asm_insn ("call\t%a2", operands);
4295       output_asm_insn (" nop", NULL);
4296     }
4297 
4298   if (operands[2] == got_helper_rtx)
4299     got_helper_needed = true;
4300 
4301   return "";
4302 }
4303 
4304 #ifdef HAVE_GAS_HIDDEN
4305 # define USE_HIDDEN_LINKONCE 1
4306 #else
4307 # define USE_HIDDEN_LINKONCE 0
4308 #endif
4309 
4310 /* Emit code to load the GOT register.  */
4311 
4312 void
4313 load_got_register (void)
4314 {
4315   rtx insn;
4316 
4317   if (TARGET_VXWORKS_RTP)
4318     {
4319       if (!got_register_rtx)
4320 	got_register_rtx = pic_offset_table_rtx;
4321 
4322       insn = gen_vxworks_load_got ();
4323     }
4324   else
4325     {
4326       if (!got_register_rtx)
4327 	got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4328 
4329       /* The GOT symbol is subject to a PC-relative relocation so we need a
4330 	 helper function to add the PC value and thus get the final value.  */
4331       if (!got_helper_rtx)
4332 	{
4333 	  char name[32];
4334 
4335 	  /* Skip the leading '%' as that cannot be used in a symbol name.  */
4336 	  if (USE_HIDDEN_LINKONCE)
4337 	    sprintf (name, "__sparc_get_pc_thunk.%s",
4338 		     reg_names[REGNO (got_register_rtx)] + 1);
4339 	  else
4340 	    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4341 					 REGNO (got_register_rtx));
4342 
4343 	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4344 	}
4345 
4346       insn
4347 	= gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4348     }
4349 
4350   emit_insn (insn);
4351 }
4352 
4353 /* Ensure that we are not using patterns that are not OK with PIC.  */
4354 
4355 int
4356 check_pic (int i)
4357 {
4358   rtx op;
4359 
4360   switch (flag_pic)
4361     {
4362     case 1:
4363       op = recog_data.operand[i];
4364       gcc_assert (GET_CODE (op) != SYMBOL_REF
4365 	  	  && (GET_CODE (op) != CONST
4366 		      || (GET_CODE (XEXP (op, 0)) == MINUS
4367 			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
4368 			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4369       /* fallthrough */
4370     case 2:
4371     default:
4372       return 1;
4373     }
4374 }
4375 
4376 /* Return true if X is an address which needs a temporary register when
4377    reloaded while generating PIC code.  */
4378 
4379 int
4380 pic_address_needs_scratch (rtx x)
4381 {
4382   /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
4383   if (GET_CODE (x) == CONST
4384       && GET_CODE (XEXP (x, 0)) == PLUS
4385       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4386       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4387       && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4388     return 1;
4389 
4390   return 0;
4391 }
4392 
4393 /* Determine if a given RTX is a valid constant.  We already know this
4394    satisfies CONSTANT_P.  */
4395 
4396 static bool
4397 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4398 {
4399   switch (GET_CODE (x))
4400     {
4401     case CONST:
4402     case SYMBOL_REF:
4403       if (sparc_tls_referenced_p (x))
4404 	return false;
4405       break;
4406 
4407     case CONST_DOUBLE:
4408       /* Floating point constants are generally not ok.
4409 	 The only exception is 0.0 and all-ones in VIS.  */
4410       if (TARGET_VIS
4411 	  && SCALAR_FLOAT_MODE_P (mode)
4412 	  && (const_zero_operand (x, mode)
4413 	      || const_all_ones_operand (x, mode)))
4414 	return true;
4415 
4416       return false;
4417 
4418     case CONST_VECTOR:
4419       /* Vector constants are generally not ok.
4420 	 The only exception is 0 or -1 in VIS.  */
4421       if (TARGET_VIS
4422 	  && (const_zero_operand (x, mode)
4423 	      || const_all_ones_operand (x, mode)))
4424 	return true;
4425 
4426       return false;
4427 
4428     default:
4429       break;
4430     }
4431 
4432   return true;
4433 }
4434 
4435 /* Determine if a given RTX is a valid constant address.  */
4436 
4437 bool
4438 constant_address_p (rtx x)
4439 {
4440   switch (GET_CODE (x))
4441     {
4442     case LABEL_REF:
4443     case CONST_INT:
4444     case HIGH:
4445       return true;
4446 
4447     case CONST:
4448       if (flag_pic && pic_address_needs_scratch (x))
4449 	return false;
4450       return sparc_legitimate_constant_p (Pmode, x);
4451 
4452     case SYMBOL_REF:
4453       return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4454 
4455     default:
4456       return false;
4457     }
4458 }
4459 
4460 /* Nonzero if the constant value X is a legitimate general operand
4461    when generating PIC code.  It is given that flag_pic is on and
4462    that X satisfies CONSTANT_P.  */
4463 
4464 bool
4465 legitimate_pic_operand_p (rtx x)
4466 {
4467   if (pic_address_needs_scratch (x))
4468     return false;
4469   if (sparc_tls_referenced_p (x))
4470     return false;
4471   return true;
4472 }
4473 
4474 /* Return true if X is a representation of the PIC register.  */
4475 
4476 static bool
4477 sparc_pic_register_p (rtx x)
4478 {
4479   if (!REG_P (x) || !pic_offset_table_rtx)
4480     return false;
4481 
4482   if (x == pic_offset_table_rtx)
4483     return true;
4484 
4485   if (!HARD_REGISTER_P (pic_offset_table_rtx)
4486       && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4487       && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4488     return true;
4489 
4490   return false;
4491 }
4492 
4493 #define RTX_OK_FOR_OFFSET_P(X, MODE)			\
4494   (CONST_INT_P (X)					\
4495    && INTVAL (X) >= -0x1000				\
4496    && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4497 
4498 #define RTX_OK_FOR_OLO10_P(X, MODE)			\
4499   (CONST_INT_P (X)					\
4500    && INTVAL (X) >= -0x1000				\
4501    && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4502 
4503 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4504 
4505    On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4506    ordinarily.  This changes a bit when generating PIC.  */
4507 
4508 static bool
4509 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4510 {
4511   rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4512 
4513   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4514     rs1 = addr;
4515   else if (GET_CODE (addr) == PLUS)
4516     {
4517       rs1 = XEXP (addr, 0);
4518       rs2 = XEXP (addr, 1);
4519 
4520       /* Canonicalize.  REG comes first, if there are no regs,
4521 	 LO_SUM comes first.  */
4522       if (!REG_P (rs1)
4523 	  && GET_CODE (rs1) != SUBREG
4524 	  && (REG_P (rs2)
4525 	      || GET_CODE (rs2) == SUBREG
4526 	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4527 	{
4528 	  rs1 = XEXP (addr, 1);
4529 	  rs2 = XEXP (addr, 0);
4530 	}
4531 
4532       if ((flag_pic == 1
4533 	   && sparc_pic_register_p (rs1)
4534 	   && !REG_P (rs2)
4535 	   && GET_CODE (rs2) != SUBREG
4536 	   && GET_CODE (rs2) != LO_SUM
4537 	   && GET_CODE (rs2) != MEM
4538 	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4539 	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4540 	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4541 	  || ((REG_P (rs1)
4542 	       || GET_CODE (rs1) == SUBREG)
4543 	      && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4544 	{
4545 	  imm1 = rs2;
4546 	  rs2 = NULL;
4547 	}
4548       else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4549 	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4550 	{
4551 	  /* We prohibit REG + REG for TFmode when there are no quad move insns
4552 	     and we consequently need to split.  We do this because REG+REG
4553 	     is not an offsettable address.  If we get the situation in reload
4554 	     where source and destination of a movtf pattern are both MEMs with
4555 	     REG+REG address, then only one of them gets converted to an
4556 	     offsettable address.  */
4557 	  if (mode == TFmode
4558 	      && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4559 	    return 0;
4560 
4561 	  /* Likewise for TImode, but in all cases.  */
4562 	  if (mode == TImode)
4563 	    return 0;
4564 
4565 	  /* We prohibit REG + REG on ARCH32 if not optimizing for
4566 	     DFmode/DImode because then mem_min_alignment is likely to be zero
4567 	     after reload and the  forced split would lack a matching splitter
4568 	     pattern.  */
4569 	  if (TARGET_ARCH32 && !optimize
4570 	      && (mode == DFmode || mode == DImode))
4571 	    return 0;
4572 	}
4573       else if (USE_AS_OFFSETABLE_LO10
4574 	       && GET_CODE (rs1) == LO_SUM
4575 	       && TARGET_ARCH64
4576 	       && ! TARGET_CM_MEDMID
4577 	       && RTX_OK_FOR_OLO10_P (rs2, mode))
4578 	{
4579 	  rs2 = NULL;
4580 	  imm1 = XEXP (rs1, 1);
4581 	  rs1 = XEXP (rs1, 0);
4582 	  if (!CONSTANT_P (imm1)
4583 	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4584 	    return 0;
4585 	}
4586     }
4587   else if (GET_CODE (addr) == LO_SUM)
4588     {
4589       rs1 = XEXP (addr, 0);
4590       imm1 = XEXP (addr, 1);
4591 
4592       if (!CONSTANT_P (imm1)
4593 	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4594 	return 0;
4595 
4596       /* We can't allow TFmode in 32-bit mode, because an offset greater
4597 	 than the alignment (8) may cause the LO_SUM to overflow.  */
4598       if (mode == TFmode && TARGET_ARCH32)
4599 	return 0;
4600 
4601       /* During reload, accept the HIGH+LO_SUM construct generated by
4602 	 sparc_legitimize_reload_address.  */
4603       if (reload_in_progress
4604 	  && GET_CODE (rs1) == HIGH
4605 	  && XEXP (rs1, 0) == imm1)
4606 	return 1;
4607     }
4608   else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4609     return 1;
4610   else
4611     return 0;
4612 
4613   if (GET_CODE (rs1) == SUBREG)
4614     rs1 = SUBREG_REG (rs1);
4615   if (!REG_P (rs1))
4616     return 0;
4617 
4618   if (rs2)
4619     {
4620       if (GET_CODE (rs2) == SUBREG)
4621 	rs2 = SUBREG_REG (rs2);
4622       if (!REG_P (rs2))
4623 	return 0;
4624     }
4625 
4626   if (strict)
4627     {
4628       if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4629 	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4630 	return 0;
4631     }
4632   else
4633     {
4634       if ((! SPARC_INT_REG_P (REGNO (rs1))
4635 	   && REGNO (rs1) != FRAME_POINTER_REGNUM
4636 	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4637 	  || (rs2
4638 	      && (! SPARC_INT_REG_P (REGNO (rs2))
4639 		  && REGNO (rs2) != FRAME_POINTER_REGNUM
4640 		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4641 	return 0;
4642     }
4643   return 1;
4644 }
4645 
4646 /* Return the SYMBOL_REF for the tls_get_addr function.  */
4647 
4648 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4649 
4650 static rtx
4651 sparc_tls_get_addr (void)
4652 {
4653   if (!sparc_tls_symbol)
4654     sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4655 
4656   return sparc_tls_symbol;
4657 }
4658 
4659 /* Return the Global Offset Table to be used in TLS mode.  */
4660 
4661 static rtx
4662 sparc_tls_got (void)
4663 {
4664   /* In PIC mode, this is just the PIC offset table.  */
4665   if (flag_pic)
4666     {
4667       crtl->uses_pic_offset_table = 1;
4668       return pic_offset_table_rtx;
4669     }
4670 
4671   /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4672      the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
4673   if (TARGET_SUN_TLS && TARGET_ARCH32)
4674     {
4675       load_got_register ();
4676       return got_register_rtx;
4677     }
4678 
4679   /* In all other cases, we load a new pseudo with the GOT symbol.  */
4680   return copy_to_reg (sparc_got ());
4681 }
4682 
4683 /* Return true if X contains a thread-local symbol.  */
4684 
4685 static bool
4686 sparc_tls_referenced_p (rtx x)
4687 {
4688   if (!TARGET_HAVE_TLS)
4689     return false;
4690 
4691   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4692     x = XEXP (XEXP (x, 0), 0);
4693 
4694   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4695     return true;
4696 
4697   /* That's all we handle in sparc_legitimize_tls_address for now.  */
4698   return false;
4699 }
4700 
4701 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
4702    this (thread-local) address.  */
4703 
4704 static rtx
4705 sparc_legitimize_tls_address (rtx addr)
4706 {
4707   rtx temp1, temp2, temp3, ret, o0, got;
4708   rtx_insn *insn;
4709 
4710   gcc_assert (can_create_pseudo_p ());
4711 
4712   if (GET_CODE (addr) == SYMBOL_REF)
4713     /* Although the various sethi/or sequences generate SImode values, many of
4714        them can be transformed by the linker when relaxing and, if relaxing to
4715        local-exec, will become a sethi/xor pair, which is signed and therefore
4716        a full DImode value in 64-bit mode.  Thus we must use Pmode, lest these
4717        values be spilled onto the stack in 64-bit mode.  */
4718     switch (SYMBOL_REF_TLS_MODEL (addr))
4719       {
4720       case TLS_MODEL_GLOBAL_DYNAMIC:
4721 	start_sequence ();
4722 	temp1 = gen_reg_rtx (Pmode);
4723 	temp2 = gen_reg_rtx (Pmode);
4724 	ret = gen_reg_rtx (Pmode);
4725 	o0 = gen_rtx_REG (Pmode, 8);
4726 	got = sparc_tls_got ();
4727 	if (TARGET_ARCH32)
4728 	  {
4729 	    emit_insn (gen_tgd_hi22si (temp1, addr));
4730 	    emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4731 	    emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4732 	    insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4733 						   addr, const1_rtx));
4734 	  }
4735 	else
4736 	  {
4737 	    emit_insn (gen_tgd_hi22di (temp1, addr));
4738 	    emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4739 	    emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4740 	    insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4741 						   addr, const1_rtx));
4742 	  }
4743 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4744 	RTL_CONST_CALL_P (insn) = 1;
4745 	insn = get_insns ();
4746 	end_sequence ();
4747 	emit_libcall_block (insn, ret, o0, addr);
4748 	break;
4749 
4750       case TLS_MODEL_LOCAL_DYNAMIC:
4751 	start_sequence ();
4752 	temp1 = gen_reg_rtx (Pmode);
4753 	temp2 = gen_reg_rtx (Pmode);
4754 	temp3 = gen_reg_rtx (Pmode);
4755 	ret = gen_reg_rtx (Pmode);
4756 	o0 = gen_rtx_REG (Pmode, 8);
4757 	got = sparc_tls_got ();
4758 	if (TARGET_ARCH32)
4759 	  {
4760 	    emit_insn (gen_tldm_hi22si (temp1));
4761 	    emit_insn (gen_tldm_lo10si (temp2, temp1));
4762 	    emit_insn (gen_tldm_addsi (o0, got, temp2));
4763 	    insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4764 						    const1_rtx));
4765 	  }
4766 	else
4767 	  {
4768 	    emit_insn (gen_tldm_hi22di (temp1));
4769 	    emit_insn (gen_tldm_lo10di (temp2, temp1));
4770 	    emit_insn (gen_tldm_adddi (o0, got, temp2));
4771 	    insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4772 						    const1_rtx));
4773 	  }
4774 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4775 	RTL_CONST_CALL_P (insn) = 1;
4776 	insn = get_insns ();
4777 	end_sequence ();
4778 	/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4779 	  share the LD_BASE result with other LD model accesses.  */
4780 	emit_libcall_block (insn, temp3, o0,
4781 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4782 					    UNSPEC_TLSLD_BASE));
4783 	temp1 = gen_reg_rtx (Pmode);
4784 	temp2 = gen_reg_rtx (Pmode);
4785 	if (TARGET_ARCH32)
4786 	  {
4787 	    emit_insn (gen_tldo_hix22si (temp1, addr));
4788 	    emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4789 	    emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4790 	  }
4791 	else
4792 	  {
4793 	    emit_insn (gen_tldo_hix22di (temp1, addr));
4794 	    emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4795 	    emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4796 	  }
4797 	break;
4798 
4799       case TLS_MODEL_INITIAL_EXEC:
4800 	temp1 = gen_reg_rtx (Pmode);
4801 	temp2 = gen_reg_rtx (Pmode);
4802 	temp3 = gen_reg_rtx (Pmode);
4803 	got = sparc_tls_got ();
4804 	if (TARGET_ARCH32)
4805 	  {
4806 	    emit_insn (gen_tie_hi22si (temp1, addr));
4807 	    emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4808 	    emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4809 	  }
4810 	else
4811 	  {
4812 	    emit_insn (gen_tie_hi22di (temp1, addr));
4813 	    emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4814 	    emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4815 	  }
4816         if (TARGET_SUN_TLS)
4817 	  {
4818 	    ret = gen_reg_rtx (Pmode);
4819 	    if (TARGET_ARCH32)
4820 	      emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4821 					temp3, addr));
4822 	    else
4823 	      emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4824 					temp3, addr));
4825 	  }
4826 	else
4827 	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4828 	break;
4829 
4830       case TLS_MODEL_LOCAL_EXEC:
4831 	temp1 = gen_reg_rtx (Pmode);
4832 	temp2 = gen_reg_rtx (Pmode);
4833 	if (TARGET_ARCH32)
4834 	  {
4835 	    emit_insn (gen_tle_hix22si (temp1, addr));
4836 	    emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4837 	  }
4838 	else
4839 	  {
4840 	    emit_insn (gen_tle_hix22di (temp1, addr));
4841 	    emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4842 	  }
4843 	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4844 	break;
4845 
4846       default:
4847 	gcc_unreachable ();
4848       }
4849 
4850   else if (GET_CODE (addr) == CONST)
4851     {
4852       rtx base, offset;
4853 
4854       gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4855 
4856       base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4857       offset = XEXP (XEXP (addr, 0), 1);
4858 
4859       base = force_operand (base, NULL_RTX);
4860       if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4861 	offset = force_reg (Pmode, offset);
4862       ret = gen_rtx_PLUS (Pmode, base, offset);
4863     }
4864 
4865   else
4866     gcc_unreachable ();  /* for now ... */
4867 
4868   return ret;
4869 }
4870 
4871 /* Legitimize PIC addresses.  If the address is already position-independent,
4872    we return ORIG.  Newly generated position-independent addresses go into a
4873    reg.  This is REG if nonzero, otherwise we allocate register(s) as
4874    necessary.  */
4875 
4876 static rtx
4877 sparc_legitimize_pic_address (rtx orig, rtx reg)
4878 {
4879   if (GET_CODE (orig) == SYMBOL_REF
4880       /* See the comment in sparc_expand_move.  */
4881       || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4882     {
4883       bool gotdata_op = false;
4884       rtx pic_ref, address;
4885       rtx_insn *insn;
4886 
4887       if (!reg)
4888 	{
4889 	  gcc_assert (can_create_pseudo_p ());
4890 	  reg = gen_reg_rtx (Pmode);
4891 	}
4892 
4893       if (flag_pic == 2)
4894 	{
4895 	  /* If not during reload, allocate another temp reg here for loading
4896 	     in the address, so that these instructions can be optimized
4897 	     properly.  */
4898 	  rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4899 
4900 	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4901 	     won't get confused into thinking that these two instructions
4902 	     are loading in the true address of the symbol.  If in the
4903 	     future a PIC rtx exists, that should be used instead.  */
4904 	  if (TARGET_ARCH64)
4905 	    {
4906 	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
4907 	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4908 	    }
4909 	  else
4910 	    {
4911 	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
4912 	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4913 	    }
4914 
4915 	  address = temp_reg;
4916 	  gotdata_op = true;
4917 	}
4918       else
4919 	address = orig;
4920 
4921       crtl->uses_pic_offset_table = 1;
4922       if (gotdata_op)
4923 	{
4924 	  if (TARGET_ARCH64)
4925 	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4926 							pic_offset_table_rtx,
4927 							address, orig));
4928 	  else
4929 	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4930 							pic_offset_table_rtx,
4931 							address, orig));
4932 	}
4933       else
4934 	{
4935 	  pic_ref
4936 	    = gen_const_mem (Pmode,
4937 			     gen_rtx_PLUS (Pmode,
4938 					   pic_offset_table_rtx, address));
4939 	  insn = emit_move_insn (reg, pic_ref);
4940 	}
4941 
4942       /* Put a REG_EQUAL note on this insn, so that it can be optimized
4943 	 by loop.  */
4944       set_unique_reg_note (insn, REG_EQUAL, orig);
4945       return reg;
4946     }
4947   else if (GET_CODE (orig) == CONST)
4948     {
4949       rtx base, offset;
4950 
4951       if (GET_CODE (XEXP (orig, 0)) == PLUS
4952 	  && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4953 	return orig;
4954 
4955       if (!reg)
4956 	{
4957 	  gcc_assert (can_create_pseudo_p ());
4958 	  reg = gen_reg_rtx (Pmode);
4959 	}
4960 
4961       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4962       base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4963       offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4964 			 		     base == reg ? NULL_RTX : reg);
4965 
4966       if (GET_CODE (offset) == CONST_INT)
4967 	{
4968 	  if (SMALL_INT (offset))
4969 	    return plus_constant (Pmode, base, INTVAL (offset));
4970 	  else if (can_create_pseudo_p ())
4971 	    offset = force_reg (Pmode, offset);
4972 	  else
4973 	    /* If we reach here, then something is seriously wrong.  */
4974 	    gcc_unreachable ();
4975 	}
4976       return gen_rtx_PLUS (Pmode, base, offset);
4977     }
4978   else if (GET_CODE (orig) == LABEL_REF)
4979     /* ??? We ought to be checking that the register is live instead, in case
4980        it is eliminated.  */
4981     crtl->uses_pic_offset_table = 1;
4982 
4983   return orig;
4984 }
4985 
4986 /* Try machine-dependent ways of modifying an illegitimate address X
4987    to be legitimate.  If we find one, return the new, valid address.
4988 
4989    OLDX is the address as it was before break_out_memory_refs was called.
4990    In some cases it is useful to look at this to decide what needs to be done.
4991 
4992    MODE is the mode of the operand pointed to by X.
4993 
4994    On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
4995 
4996 static rtx
4997 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4998 			  machine_mode mode)
4999 {
5000   rtx orig_x = x;
5001 
5002   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
5003     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5004 		      force_operand (XEXP (x, 0), NULL_RTX));
5005   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
5006     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5007 		      force_operand (XEXP (x, 1), NULL_RTX));
5008   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
5009     x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
5010 		      XEXP (x, 1));
5011   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
5012     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5013 		      force_operand (XEXP (x, 1), NULL_RTX));
5014 
5015   if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
5016     return x;
5017 
5018   if (sparc_tls_referenced_p (x))
5019     x = sparc_legitimize_tls_address (x);
5020   else if (flag_pic)
5021     x = sparc_legitimize_pic_address (x, NULL_RTX);
5022   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5023     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5024 		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
5025   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5026     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5027 		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
5028   else if (GET_CODE (x) == SYMBOL_REF
5029 	   || GET_CODE (x) == CONST
5030 	   || GET_CODE (x) == LABEL_REF)
5031     x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5032 
5033   return x;
5034 }
5035 
5036 /* Delegitimize an address that was legitimized by the above function.  */
5037 
5038 static rtx
5039 sparc_delegitimize_address (rtx x)
5040 {
5041   x = delegitimize_mem_from_attrs (x);
5042 
5043   if (GET_CODE (x) == LO_SUM)
5044     x = XEXP (x, 1);
5045 
5046   if (GET_CODE (x) == UNSPEC)
5047     switch (XINT (x, 1))
5048       {
5049       case UNSPEC_MOVE_PIC:
5050       case UNSPEC_TLSLE:
5051 	x = XVECEXP (x, 0, 0);
5052 	gcc_assert (GET_CODE (x) == SYMBOL_REF);
5053 	break;
5054       case UNSPEC_MOVE_GOTDATA:
5055 	x = XVECEXP (x, 0, 2);
5056 	gcc_assert (GET_CODE (x) == SYMBOL_REF);
5057 	break;
5058       default:
5059 	break;
5060       }
5061 
5062   /* This is generated by mov{si,di}_pic_label_ref in PIC mode.  */
5063   if (GET_CODE (x) == MINUS
5064       && (XEXP (x, 0) == got_register_rtx
5065 	  || sparc_pic_register_p (XEXP (x, 0))))
5066     {
5067       rtx y = XEXP (x, 1);
5068 
5069       if (GET_CODE (y) == LO_SUM)
5070 	y = XEXP (y, 1);
5071 
5072       if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5073 	{
5074 	  x = XVECEXP (y, 0, 0);
5075 	  gcc_assert (GET_CODE (x) == LABEL_REF
5076 		      || (GET_CODE (x) == CONST
5077 			  && GET_CODE (XEXP (x, 0)) == PLUS
5078 			  && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5079 			  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5080 	}
5081     }
5082 
5083   return x;
5084 }
5085 
5086 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
5087    replace the input X, or the original X if no replacement is called for.
5088    The output parameter *WIN is 1 if the calling macro should goto WIN,
5089    0 if it should not.
5090 
5091    For SPARC, we wish to handle addresses by splitting them into
5092    HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5093    This cuts the number of extra insns by one.
5094 
5095    Do nothing when generating PIC code and the address is a symbolic
5096    operand or requires a scratch register.  */
5097 
5098 rtx
5099 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5100 				 int opnum, int type,
5101 				 int ind_levels ATTRIBUTE_UNUSED, int *win)
5102 {
5103   /* Decompose SImode constants into HIGH+LO_SUM.  */
5104   if (CONSTANT_P (x)
5105       && (mode != TFmode || TARGET_ARCH64)
5106       && GET_MODE (x) == SImode
5107       && GET_CODE (x) != LO_SUM
5108       && GET_CODE (x) != HIGH
5109       && sparc_code_model <= CM_MEDLOW
5110       && !(flag_pic
5111 	   && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5112     {
5113       x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5114       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5115 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5116 		   opnum, (enum reload_type)type);
5117       *win = 1;
5118       return x;
5119     }
5120 
5121   /* We have to recognize what we have already generated above.  */
5122   if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5123     {
5124       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5125 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5126 		   opnum, (enum reload_type)type);
5127       *win = 1;
5128       return x;
5129     }
5130 
5131   *win = 0;
5132   return x;
5133 }
5134 
5135 /* Return true if ADDR (a legitimate address expression)
5136    has an effect that depends on the machine mode it is used for.
5137 
5138    In PIC mode,
5139 
5140       (mem:HI [%l7+a])
5141 
5142    is not equivalent to
5143 
5144       (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5145 
5146    because [%l7+a+1] is interpreted as the address of (a+1).  */
5147 
5148 
5149 static bool
5150 sparc_mode_dependent_address_p (const_rtx addr,
5151 				addr_space_t as ATTRIBUTE_UNUSED)
5152 {
5153   if (GET_CODE (addr) == PLUS
5154       && sparc_pic_register_p (XEXP (addr, 0))
5155       && symbolic_operand (XEXP (addr, 1), VOIDmode))
5156     return true;
5157 
5158   return false;
5159 }
5160 
5161 /* Emit a call instruction with the pattern given by PAT.  ADDR is the
5162    address of the call target.  */
5163 
5164 void
5165 sparc_emit_call_insn (rtx pat, rtx addr)
5166 {
5167   rtx_insn *insn;
5168 
5169   insn = emit_call_insn (pat);
5170 
5171   /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
5172   if (TARGET_VXWORKS_RTP
5173       && flag_pic
5174       && GET_CODE (addr) == SYMBOL_REF
5175       && (SYMBOL_REF_DECL (addr)
5176 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5177 	  : !SYMBOL_REF_LOCAL_P (addr)))
5178     {
5179       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5180       crtl->uses_pic_offset_table = 1;
5181     }
5182 }
5183 
5184 /* Return 1 if RTX is a MEM which is known to be aligned to at
5185    least a DESIRED byte boundary.  */
5186 
5187 int
5188 mem_min_alignment (rtx mem, int desired)
5189 {
5190   rtx addr, base, offset;
5191 
5192   /* If it's not a MEM we can't accept it.  */
5193   if (GET_CODE (mem) != MEM)
5194     return 0;
5195 
5196   /* Obviously...  */
5197   if (!TARGET_UNALIGNED_DOUBLES
5198       && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5199     return 1;
5200 
5201   /* ??? The rest of the function predates MEM_ALIGN so
5202      there is probably a bit of redundancy.  */
5203   addr = XEXP (mem, 0);
5204   base = offset = NULL_RTX;
5205   if (GET_CODE (addr) == PLUS)
5206     {
5207       if (GET_CODE (XEXP (addr, 0)) == REG)
5208 	{
5209 	  base = XEXP (addr, 0);
5210 
5211 	  /* What we are saying here is that if the base
5212 	     REG is aligned properly, the compiler will make
5213 	     sure any REG based index upon it will be so
5214 	     as well.  */
5215 	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5216 	    offset = XEXP (addr, 1);
5217 	  else
5218 	    offset = const0_rtx;
5219 	}
5220     }
5221   else if (GET_CODE (addr) == REG)
5222     {
5223       base = addr;
5224       offset = const0_rtx;
5225     }
5226 
5227   if (base != NULL_RTX)
5228     {
5229       int regno = REGNO (base);
5230 
5231       if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5232 	{
5233 	  /* Check if the compiler has recorded some information
5234 	     about the alignment of the base REG.  If reload has
5235 	     completed, we already matched with proper alignments.
5236 	     If not running global_alloc, reload might give us
5237 	     unaligned pointer to local stack though.  */
5238 	  if (((cfun != 0
5239 		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5240 	       || (optimize && reload_completed))
5241 	      && (INTVAL (offset) & (desired - 1)) == 0)
5242 	    return 1;
5243 	}
5244       else
5245 	{
5246 	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5247 	    return 1;
5248 	}
5249     }
5250   else if (! TARGET_UNALIGNED_DOUBLES
5251 	   || CONSTANT_P (addr)
5252 	   || GET_CODE (addr) == LO_SUM)
5253     {
5254       /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5255 	 is true, in which case we can only assume that an access is aligned if
5256 	 it is to a constant address, or the address involves a LO_SUM.  */
5257       return 1;
5258     }
5259 
5260   /* An obviously unaligned address.  */
5261   return 0;
5262 }
5263 
5264 
5265 /* Vectors to keep interesting information about registers where it can easily
5266    be got.  We used to use the actual mode value as the bit number, but there
5267    are more than 32 modes now.  Instead we use two tables: one indexed by
5268    hard register number, and one indexed by mode.  */
5269 
5270 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5271    they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
5272    mapped into one sparc_mode_class mode.  */
5273 
5274 enum sparc_mode_class {
5275   H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5276   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5277   CC_MODE, CCFP_MODE
5278 };
5279 
5280 /* Modes for single-word and smaller quantities.  */
5281 #define S_MODES \
5282   ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5283 
5284 /* Modes for double-word and smaller quantities.  */
5285 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5286 
5287 /* Modes for quad-word and smaller quantities.  */
5288 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5289 
5290 /* Modes for 8-word and smaller quantities.  */
5291 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5292 
5293 /* Modes for single-float quantities.  */
5294 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5295 
5296 /* Modes for double-float and smaller quantities.  */
5297 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5298 
5299 /* Modes for quad-float and smaller quantities.  */
5300 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5301 
5302 /* Modes for quad-float pairs and smaller quantities.  */
5303 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5304 
5305 /* Modes for double-float only quantities.  */
5306 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5307 
5308 /* Modes for quad-float and double-float only quantities.  */
5309 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5310 
5311 /* Modes for quad-float pairs and double-float only quantities.  */
5312 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5313 
5314 /* Modes for condition codes.  */
5315 #define CC_MODES (1 << (int) CC_MODE)
5316 #define CCFP_MODES (1 << (int) CCFP_MODE)
5317 
5318 /* Value is 1 if register/mode pair is acceptable on sparc.
5319 
5320    The funny mixture of D and T modes is because integer operations
5321    do not specially operate on tetra quantities, so non-quad-aligned
5322    registers can hold quadword quantities (except %o4 and %i4 because
5323    they cross fixed registers).
5324 
5325    ??? Note that, despite the settings, non-double-aligned parameter
5326    registers can hold double-word quantities in 32-bit mode.  */
5327 
5328 /* This points to either the 32-bit or the 64-bit version.  */
5329 static const int *hard_regno_mode_classes;
5330 
5331 static const int hard_32bit_mode_classes[] = {
5332   S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5333   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5334   T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5335   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5336 
5337   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5338   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5339   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5340   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5341 
5342   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
5343      and none can hold SFmode/SImode values.  */
5344   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5345   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5346   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5347   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5348 
5349   /* %fcc[0123] */
5350   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5351 
5352   /* %icc, %sfp, %gsr */
5353   CC_MODES, 0, D_MODES
5354 };
5355 
5356 static const int hard_64bit_mode_classes[] = {
5357   D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5358   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5359   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5360   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5361 
5362   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5363   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5364   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5365   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5366 
5367   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
5368      and none can hold SFmode/SImode values.  */
5369   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5370   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5371   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5372   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5373 
5374   /* %fcc[0123] */
5375   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5376 
5377   /* %icc, %sfp, %gsr */
5378   CC_MODES, 0, D_MODES
5379 };
5380 
5381 static int sparc_mode_class [NUM_MACHINE_MODES];
5382 
5383 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5384 
5385 static void
5386 sparc_init_modes (void)
5387 {
5388   int i;
5389 
5390   for (i = 0; i < NUM_MACHINE_MODES; i++)
5391     {
5392       machine_mode m = (machine_mode) i;
5393       unsigned int size = GET_MODE_SIZE (m);
5394 
5395       switch (GET_MODE_CLASS (m))
5396 	{
5397 	case MODE_INT:
5398 	case MODE_PARTIAL_INT:
5399 	case MODE_COMPLEX_INT:
5400 	  if (size < 4)
5401 	    sparc_mode_class[i] = 1 << (int) H_MODE;
5402 	  else if (size == 4)
5403 	    sparc_mode_class[i] = 1 << (int) S_MODE;
5404 	  else if (size == 8)
5405 	    sparc_mode_class[i] = 1 << (int) D_MODE;
5406 	  else if (size == 16)
5407 	    sparc_mode_class[i] = 1 << (int) T_MODE;
5408 	  else if (size == 32)
5409 	    sparc_mode_class[i] = 1 << (int) O_MODE;
5410 	  else
5411 	    sparc_mode_class[i] = 0;
5412 	  break;
5413 	case MODE_VECTOR_INT:
5414 	  if (size == 4)
5415 	    sparc_mode_class[i] = 1 << (int) SF_MODE;
5416 	  else if (size == 8)
5417 	    sparc_mode_class[i] = 1 << (int) DF_MODE;
5418 	  else
5419 	    sparc_mode_class[i] = 0;
5420 	  break;
5421 	case MODE_FLOAT:
5422 	case MODE_COMPLEX_FLOAT:
5423 	  if (size == 4)
5424 	    sparc_mode_class[i] = 1 << (int) SF_MODE;
5425 	  else if (size == 8)
5426 	    sparc_mode_class[i] = 1 << (int) DF_MODE;
5427 	  else if (size == 16)
5428 	    sparc_mode_class[i] = 1 << (int) TF_MODE;
5429 	  else if (size == 32)
5430 	    sparc_mode_class[i] = 1 << (int) OF_MODE;
5431 	  else
5432 	    sparc_mode_class[i] = 0;
5433 	  break;
5434 	case MODE_CC:
5435 	  if (m == CCFPmode || m == CCFPEmode)
5436 	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5437 	  else
5438 	    sparc_mode_class[i] = 1 << (int) CC_MODE;
5439 	  break;
5440 	default:
5441 	  sparc_mode_class[i] = 0;
5442 	  break;
5443 	}
5444     }
5445 
5446   if (TARGET_ARCH64)
5447     hard_regno_mode_classes = hard_64bit_mode_classes;
5448   else
5449     hard_regno_mode_classes = hard_32bit_mode_classes;
5450 
5451   /* Initialize the array used by REGNO_REG_CLASS.  */
5452   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5453     {
5454       if (i < 16 && TARGET_V8PLUS)
5455 	sparc_regno_reg_class[i] = I64_REGS;
5456       else if (i < 32 || i == FRAME_POINTER_REGNUM)
5457 	sparc_regno_reg_class[i] = GENERAL_REGS;
5458       else if (i < 64)
5459 	sparc_regno_reg_class[i] = FP_REGS;
5460       else if (i < 96)
5461 	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5462       else if (i < 100)
5463 	sparc_regno_reg_class[i] = FPCC_REGS;
5464       else
5465 	sparc_regno_reg_class[i] = NO_REGS;
5466     }
5467 }
5468 
5469 /* Return whether REGNO, a global or FP register, must be saved/restored.  */
5470 
5471 static inline bool
5472 save_global_or_fp_reg_p (unsigned int regno,
5473 			 int leaf_function ATTRIBUTE_UNUSED)
5474 {
5475   return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5476 }
5477 
5478 /* Return whether the return address register (%i7) is needed.  */
5479 
5480 static inline bool
5481 return_addr_reg_needed_p (int leaf_function)
5482 {
5483   /* If it is live, for example because of __builtin_return_address (0).  */
5484   if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5485     return true;
5486 
5487   /* Otherwise, it is needed as save register if %o7 is clobbered.  */
5488   if (!leaf_function
5489       /* Loading the GOT register clobbers %o7.  */
5490       || crtl->uses_pic_offset_table
5491       || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5492     return true;
5493 
5494   return false;
5495 }
5496 
5497 /* Return whether REGNO, a local or in register, must be saved/restored.  */
5498 
5499 static bool
5500 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5501 {
5502   /* General case: call-saved registers live at some point.  */
5503   if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5504     return true;
5505 
5506   /* Frame pointer register (%fp) if needed.  */
5507   if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5508     return true;
5509 
5510   /* Return address register (%i7) if needed.  */
5511   if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5512     return true;
5513 
5514   /* GOT register (%l7) if needed.  */
5515   if (got_register_rtx && regno == REGNO (got_register_rtx))
5516     return true;
5517 
5518   /* If the function accesses prior frames, the frame pointer and the return
5519      address of the previous frame must be saved on the stack.  */
5520   if (crtl->accesses_prior_frames
5521       && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5522     return true;
5523 
5524   return false;
5525 }
5526 
5527 /* Compute the frame size required by the function.  This function is called
5528    during the reload pass and also by sparc_expand_prologue.  */
5529 
5530 static HOST_WIDE_INT
5531 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5532 {
5533   HOST_WIDE_INT frame_size, apparent_frame_size;
5534   int args_size, n_global_fp_regs = 0;
5535   bool save_local_in_regs_p = false;
5536   unsigned int i;
5537 
5538   /* If the function allocates dynamic stack space, the dynamic offset is
5539      computed early and contains REG_PARM_STACK_SPACE, so we need to cope.  */
5540   if (leaf_function && !cfun->calls_alloca)
5541     args_size = 0;
5542   else
5543     args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5544 
5545   /* Calculate space needed for global registers.  */
5546   if (TARGET_ARCH64)
5547     {
5548       for (i = 0; i < 8; i++)
5549 	if (save_global_or_fp_reg_p (i, 0))
5550 	  n_global_fp_regs += 2;
5551     }
5552   else
5553     {
5554       for (i = 0; i < 8; i += 2)
5555 	if (save_global_or_fp_reg_p (i, 0)
5556 	    || save_global_or_fp_reg_p (i + 1, 0))
5557 	  n_global_fp_regs += 2;
5558     }
5559 
5560   /* In the flat window model, find out which local and in registers need to
5561      be saved.  We don't reserve space in the current frame for them as they
5562      will be spilled into the register window save area of the caller's frame.
5563      However, as soon as we use this register window save area, we must create
5564      that of the current frame to make it the live one.  */
5565   if (TARGET_FLAT)
5566     for (i = 16; i < 32; i++)
5567       if (save_local_or_in_reg_p (i, leaf_function))
5568 	{
5569 	 save_local_in_regs_p = true;
5570 	 break;
5571 	}
5572 
5573   /* Calculate space needed for FP registers.  */
5574   for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5575     if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5576       n_global_fp_regs += 2;
5577 
5578   if (size == 0
5579       && n_global_fp_regs == 0
5580       && args_size == 0
5581       && !save_local_in_regs_p)
5582     frame_size = apparent_frame_size = 0;
5583   else
5584     {
5585       /* Start from the apparent frame size.  */
5586       apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5587 
5588       /* We need to add the size of the outgoing argument area.  */
5589       frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5590 
5591       /* And that of the register window save area.  */
5592       frame_size += FIRST_PARM_OFFSET (cfun->decl);
5593 
5594       /* Finally, bump to the appropriate alignment.  */
5595       frame_size = SPARC_STACK_ALIGN (frame_size);
5596     }
5597 
5598   /* Set up values for use in prologue and epilogue.  */
5599   sparc_frame_size = frame_size;
5600   sparc_apparent_frame_size = apparent_frame_size;
5601   sparc_n_global_fp_regs = n_global_fp_regs;
5602   sparc_save_local_in_regs_p = save_local_in_regs_p;
5603 
5604   return frame_size;
5605 }
5606 
5607 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
5608 
5609 int
5610 sparc_initial_elimination_offset (int to)
5611 {
5612   int offset;
5613 
5614   if (to == STACK_POINTER_REGNUM)
5615     offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5616   else
5617     offset = 0;
5618 
5619   offset += SPARC_STACK_BIAS;
5620   return offset;
5621 }
5622 
5623 /* Output any necessary .register pseudo-ops.  */
5624 
5625 void
5626 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5627 {
5628   int i;
5629 
5630   if (TARGET_ARCH32)
5631     return;
5632 
5633   /* Check if %g[2367] were used without
5634      .register being printed for them already.  */
5635   for (i = 2; i < 8; i++)
5636     {
5637       if (df_regs_ever_live_p (i)
5638 	  && ! sparc_hard_reg_printed [i])
5639 	{
5640 	  sparc_hard_reg_printed [i] = 1;
5641 	  /* %g7 is used as TLS base register, use #ignore
5642 	     for it instead of #scratch.  */
5643 	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5644 		   i == 7 ? "ignore" : "scratch");
5645 	}
5646       if (i == 3) i = 5;
5647     }
5648 }
5649 
5650 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5651 
5652 #if PROBE_INTERVAL > 4096
5653 #error Cannot use indexed addressing mode for stack probing
5654 #endif
5655 
5656 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5657    inclusive.  These are offsets from the current stack pointer.
5658 
5659    Note that we don't use the REG+REG addressing mode for the probes because
5660    of the stack bias in 64-bit mode.  And it doesn't really buy us anything
5661    so the advantages of having a single code win here.  */
5662 
5663 static void
5664 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5665 {
5666   rtx g1 = gen_rtx_REG (Pmode, 1);
5667 
5668   /* See if we have a constant small number of probes to generate.  If so,
5669      that's the easy case.  */
5670   if (size <= PROBE_INTERVAL)
5671     {
5672       emit_move_insn (g1, GEN_INT (first));
5673       emit_insn (gen_rtx_SET (g1,
5674 			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5675       emit_stack_probe (plus_constant (Pmode, g1, -size));
5676     }
5677 
5678   /* The run-time loop is made up of 9 insns in the generic case while the
5679      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
5680   else if (size <= 4 * PROBE_INTERVAL)
5681     {
5682       HOST_WIDE_INT i;
5683 
5684       emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5685       emit_insn (gen_rtx_SET (g1,
5686 			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5687       emit_stack_probe (g1);
5688 
5689       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5690 	 it exceeds SIZE.  If only two probes are needed, this will not
5691 	 generate any code.  Then probe at FIRST + SIZE.  */
5692       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5693 	{
5694 	  emit_insn (gen_rtx_SET (g1,
5695 				  plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5696 	  emit_stack_probe (g1);
5697 	}
5698 
5699       emit_stack_probe (plus_constant (Pmode, g1,
5700 				       (i - PROBE_INTERVAL) - size));
5701     }
5702 
5703   /* Otherwise, do the same as above, but in a loop.  Note that we must be
5704      extra careful with variables wrapping around because we might be at
5705      the very top (or the very bottom) of the address space and we have
5706      to be able to handle this case properly; in particular, we use an
5707      equality test for the loop condition.  */
5708   else
5709     {
5710       HOST_WIDE_INT rounded_size;
5711       rtx g4 = gen_rtx_REG (Pmode, 4);
5712 
5713       emit_move_insn (g1, GEN_INT (first));
5714 
5715 
5716       /* Step 1: round SIZE to the previous multiple of the interval.  */
5717 
5718       rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5719       emit_move_insn (g4, GEN_INT (rounded_size));
5720 
5721 
5722       /* Step 2: compute initial and final value of the loop counter.  */
5723 
5724       /* TEST_ADDR = SP + FIRST.  */
5725       emit_insn (gen_rtx_SET (g1,
5726 			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5727 
5728       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
5729       emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5730 
5731 
5732       /* Step 3: the loop
5733 
5734 	 while (TEST_ADDR != LAST_ADDR)
5735 	   {
5736 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5737 	     probe at TEST_ADDR
5738 	   }
5739 
5740 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5741 	 until it is equal to ROUNDED_SIZE.  */
5742 
5743       if (TARGET_ARCH64)
5744 	emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5745       else
5746 	emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5747 
5748 
5749       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5750 	 that SIZE is equal to ROUNDED_SIZE.  */
5751 
5752       if (size != rounded_size)
5753 	emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5754     }
5755 
5756   /* Make sure nothing is scheduled before we are done.  */
5757   emit_insn (gen_blockage ());
5758 }
5759 
5760 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
5761    absolute addresses.  */
5762 
5763 const char *
5764 output_probe_stack_range (rtx reg1, rtx reg2)
5765 {
5766   static int labelno = 0;
5767   char loop_lab[32];
5768   rtx xops[2];
5769 
5770   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5771 
5772   /* Loop.  */
5773   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5774 
5775   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
5776   xops[0] = reg1;
5777   xops[1] = GEN_INT (-PROBE_INTERVAL);
5778   output_asm_insn ("add\t%0, %1, %0", xops);
5779 
5780   /* Test if TEST_ADDR == LAST_ADDR.  */
5781   xops[1] = reg2;
5782   output_asm_insn ("cmp\t%0, %1", xops);
5783 
5784   /* Probe at TEST_ADDR and branch.  */
5785   if (TARGET_ARCH64)
5786     fputs ("\tbne,pt\t%xcc,", asm_out_file);
5787   else
5788     fputs ("\tbne\t", asm_out_file);
5789   assemble_name_raw (asm_out_file, loop_lab);
5790   fputc ('\n', asm_out_file);
5791   xops[1] = GEN_INT (SPARC_STACK_BIAS);
5792   output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5793 
5794   return "";
5795 }
5796 
5797 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5798    needed.  LOW is supposed to be double-word aligned for 32-bit registers.
5799    SAVE_P decides whether a register must be saved/restored.  ACTION_TRUE
5800    is the action to be performed if SAVE_P returns true and ACTION_FALSE
5801    the action to be performed if it returns false.  Return the new offset.  */
5802 
5803 typedef bool (*sorr_pred_t) (unsigned int, int);
5804 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5805 
5806 static int
5807 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5808 			   int offset, int leaf_function, sorr_pred_t save_p,
5809 			   sorr_act_t action_true, sorr_act_t action_false)
5810 {
5811   unsigned int i;
5812   rtx mem;
5813   rtx_insn *insn;
5814 
5815   if (TARGET_ARCH64 && high <= 32)
5816     {
5817       int fp_offset = -1;
5818 
5819       for (i = low; i < high; i++)
5820 	{
5821 	  if (save_p (i, leaf_function))
5822 	    {
5823 	      mem = gen_frame_mem (DImode, plus_constant (Pmode,
5824 							  base, offset));
5825 	      if (action_true == SORR_SAVE)
5826 		{
5827 		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5828 		  RTX_FRAME_RELATED_P (insn) = 1;
5829 		}
5830 	      else  /* action_true == SORR_RESTORE */
5831 		{
5832 		  /* The frame pointer must be restored last since its old
5833 		     value may be used as base address for the frame.  This
5834 		     is problematic in 64-bit mode only because of the lack
5835 		     of double-word load instruction.  */
5836 		  if (i == HARD_FRAME_POINTER_REGNUM)
5837 		    fp_offset = offset;
5838 		  else
5839 		    emit_move_insn (gen_rtx_REG (DImode, i), mem);
5840 		}
5841 	      offset += 8;
5842 	    }
5843 	  else if (action_false == SORR_ADVANCE)
5844 	    offset += 8;
5845 	}
5846 
5847       if (fp_offset >= 0)
5848 	{
5849 	  mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5850 	  emit_move_insn (hard_frame_pointer_rtx, mem);
5851 	}
5852     }
5853   else
5854     {
5855       for (i = low; i < high; i += 2)
5856 	{
5857 	  bool reg0 = save_p (i, leaf_function);
5858 	  bool reg1 = save_p (i + 1, leaf_function);
5859 	  machine_mode mode;
5860 	  int regno;
5861 
5862 	  if (reg0 && reg1)
5863 	    {
5864 	      mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5865 	      regno = i;
5866 	    }
5867 	  else if (reg0)
5868 	    {
5869 	      mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5870 	      regno = i;
5871 	    }
5872 	  else if (reg1)
5873 	    {
5874 	      mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5875 	      regno = i + 1;
5876 	      offset += 4;
5877 	    }
5878 	  else
5879 	    {
5880 	      if (action_false == SORR_ADVANCE)
5881 		offset += 8;
5882 	      continue;
5883 	    }
5884 
5885 	  mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5886 	  if (action_true == SORR_SAVE)
5887 	    {
5888 	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5889 	      RTX_FRAME_RELATED_P (insn) = 1;
5890 	      if (mode == DImode)
5891 		{
5892 		  rtx set1, set2;
5893 		  mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5894 							      offset));
5895 		  set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5896 		  RTX_FRAME_RELATED_P (set1) = 1;
5897 		  mem
5898 		    = gen_frame_mem (SImode, plus_constant (Pmode, base,
5899 							    offset + 4));
5900 		  set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5901 		  RTX_FRAME_RELATED_P (set2) = 1;
5902 		  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5903 				gen_rtx_PARALLEL (VOIDmode,
5904 						  gen_rtvec (2, set1, set2)));
5905 		}
5906 	    }
5907 	  else  /* action_true == SORR_RESTORE */
5908 	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
5909 
5910 	  /* Bump and round down to double word
5911 	     in case we already bumped by 4.  */
5912 	  offset = ROUND_DOWN (offset + 8, 8);
5913 	}
5914     }
5915 
5916   return offset;
5917 }
5918 
5919 /* Emit code to adjust BASE to OFFSET.  Return the new base.  */
5920 
5921 static rtx
5922 emit_adjust_base_to_offset (rtx base, int offset)
5923 {
5924   /* ??? This might be optimized a little as %g1 might already have a
5925      value close enough that a single add insn will do.  */
5926   /* ??? Although, all of this is probably only a temporary fix because
5927      if %g1 can hold a function result, then sparc_expand_epilogue will
5928      lose (the result will be clobbered).  */
5929   rtx new_base = gen_rtx_REG (Pmode, 1);
5930   emit_move_insn (new_base, GEN_INT (offset));
5931   emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5932   return new_base;
5933 }
5934 
5935 /* Emit code to save/restore call-saved global and FP registers.  */
5936 
5937 static void
5938 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5939 {
5940   if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5941     {
5942       base = emit_adjust_base_to_offset  (base, offset);
5943       offset = 0;
5944     }
5945 
5946   offset
5947     = emit_save_or_restore_regs (0, 8, base, offset, 0,
5948 				 save_global_or_fp_reg_p, action, SORR_NONE);
5949   emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5950 			     save_global_or_fp_reg_p, action, SORR_NONE);
5951 }
5952 
5953 /* Emit code to save/restore call-saved local and in registers.  */
5954 
5955 static void
5956 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5957 {
5958   if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5959     {
5960       base = emit_adjust_base_to_offset  (base, offset);
5961       offset = 0;
5962     }
5963 
5964   emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5965 			     save_local_or_in_reg_p, action, SORR_ADVANCE);
5966 }
5967 
5968 /* Emit a window_save insn.  */
5969 
5970 static rtx_insn *
5971 emit_window_save (rtx increment)
5972 {
5973   rtx_insn *insn = emit_insn (gen_window_save (increment));
5974   RTX_FRAME_RELATED_P (insn) = 1;
5975 
5976   /* The incoming return address (%o7) is saved in %i7.  */
5977   add_reg_note (insn, REG_CFA_REGISTER,
5978 		gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5979 			     gen_rtx_REG (Pmode,
5980 					  INCOMING_RETURN_ADDR_REGNUM)));
5981 
5982   /* The window save event.  */
5983   add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5984 
5985   /* The CFA is %fp, the hard frame pointer.  */
5986   add_reg_note (insn, REG_CFA_DEF_CFA,
5987 		plus_constant (Pmode, hard_frame_pointer_rtx,
5988 			       INCOMING_FRAME_SP_OFFSET));
5989 
5990   return insn;
5991 }
5992 
5993 /* Generate an increment for the stack pointer.  */
5994 
5995 static rtx
5996 gen_stack_pointer_inc (rtx increment)
5997 {
5998   return gen_rtx_SET (stack_pointer_rtx,
5999 		      gen_rtx_PLUS (Pmode,
6000 				    stack_pointer_rtx,
6001 				    increment));
6002 }
6003 
6004 /* Expand the function prologue.  The prologue is responsible for reserving
6005    storage for the frame, saving the call-saved registers and loading the
6006    GOT register if needed.  */
6007 
6008 void
6009 sparc_expand_prologue (void)
6010 {
6011   HOST_WIDE_INT size;
6012   rtx_insn *insn;
6013 
6014   /* Compute a snapshot of crtl->uses_only_leaf_regs.  Relying
6015      on the final value of the flag means deferring the prologue/epilogue
6016      expansion until just before the second scheduling pass, which is too
6017      late to emit multiple epilogues or return insns.
6018 
6019      Of course we are making the assumption that the value of the flag
6020      will not change between now and its final value.  Of the three parts
6021      of the formula, only the last one can reasonably vary.  Let's take a
6022      closer look, after assuming that the first two ones are set to true
6023      (otherwise the last value is effectively silenced).
6024 
6025      If only_leaf_regs_used returns false, the global predicate will also
6026      be false so the actual frame size calculated below will be positive.
6027      As a consequence, the save_register_window insn will be emitted in
6028      the instruction stream; now this insn explicitly references %fp
6029      which is not a leaf register so only_leaf_regs_used will always
6030      return false subsequently.
6031 
6032      If only_leaf_regs_used returns true, we hope that the subsequent
6033      optimization passes won't cause non-leaf registers to pop up.  For
6034      example, the regrename pass has special provisions to not rename to
6035      non-leaf registers in a leaf function.  */
6036   sparc_leaf_function_p
6037     = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6038 
6039   size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6040 
6041   if (flag_stack_usage_info)
6042     current_function_static_stack_size = size;
6043 
6044   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6045       || flag_stack_clash_protection)
6046     {
6047       if (crtl->is_leaf && !cfun->calls_alloca)
6048 	{
6049 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6050 	    sparc_emit_probe_stack_range (get_stack_check_protect (),
6051 					  size - get_stack_check_protect ());
6052 	}
6053       else if (size > 0)
6054 	sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6055     }
6056 
6057   if (size == 0)
6058     ; /* do nothing.  */
6059   else if (sparc_leaf_function_p)
6060     {
6061       rtx size_int_rtx = GEN_INT (-size);
6062 
6063       if (size <= 4096)
6064 	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6065       else if (size <= 8192)
6066 	{
6067 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6068 	  RTX_FRAME_RELATED_P (insn) = 1;
6069 
6070 	  /* %sp is still the CFA register.  */
6071 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6072 	}
6073       else
6074 	{
6075 	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
6076 	  emit_move_insn (size_rtx, size_int_rtx);
6077 	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6078 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6079 			gen_stack_pointer_inc (size_int_rtx));
6080 	}
6081 
6082       RTX_FRAME_RELATED_P (insn) = 1;
6083     }
6084   else
6085     {
6086       rtx size_int_rtx = GEN_INT (-size);
6087 
6088       if (size <= 4096)
6089 	emit_window_save (size_int_rtx);
6090       else if (size <= 8192)
6091 	{
6092 	  emit_window_save (GEN_INT (-4096));
6093 
6094 	  /* %sp is not the CFA register anymore.  */
6095 	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6096 
6097 	  /* Make sure no %fp-based store is issued until after the frame is
6098 	     established.  The offset between the frame pointer and the stack
6099 	     pointer is calculated relative to the value of the stack pointer
6100 	     at the end of the function prologue, and moving instructions that
6101 	     access the stack via the frame pointer between the instructions
6102 	     that decrement the stack pointer could result in accessing the
6103 	     register window save area, which is volatile.  */
6104 	  emit_insn (gen_frame_blockage ());
6105 	}
6106       else
6107 	{
6108 	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
6109 	  emit_move_insn (size_rtx, size_int_rtx);
6110 	  emit_window_save (size_rtx);
6111 	}
6112     }
6113 
6114   if (sparc_leaf_function_p)
6115     {
6116       sparc_frame_base_reg = stack_pointer_rtx;
6117       sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6118     }
6119   else
6120     {
6121       sparc_frame_base_reg = hard_frame_pointer_rtx;
6122       sparc_frame_base_offset = SPARC_STACK_BIAS;
6123     }
6124 
6125   if (sparc_n_global_fp_regs > 0)
6126     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6127 				         sparc_frame_base_offset
6128 					   - sparc_apparent_frame_size,
6129 					 SORR_SAVE);
6130 
6131   /* Advertise that the data calculated just above are now valid.  */
6132   sparc_prologue_data_valid_p = true;
6133 }
6134 
6135 /* Expand the function prologue.  The prologue is responsible for reserving
6136    storage for the frame, saving the call-saved registers and loading the
6137    GOT register if needed.  */
6138 
6139 void
6140 sparc_flat_expand_prologue (void)
6141 {
6142   HOST_WIDE_INT size;
6143   rtx_insn *insn;
6144 
6145   sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6146 
6147   size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6148 
6149   if (flag_stack_usage_info)
6150     current_function_static_stack_size = size;
6151 
6152   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6153       || flag_stack_clash_protection)
6154     {
6155       if (crtl->is_leaf && !cfun->calls_alloca)
6156 	{
6157 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6158 	    sparc_emit_probe_stack_range (get_stack_check_protect (),
6159 					  size - get_stack_check_protect ());
6160 	}
6161       else if (size > 0)
6162 	sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6163     }
6164 
6165   if (sparc_save_local_in_regs_p)
6166     emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6167 					SORR_SAVE);
6168 
6169   if (size == 0)
6170     ; /* do nothing.  */
6171   else
6172     {
6173       rtx size_int_rtx, size_rtx;
6174 
6175       size_rtx = size_int_rtx = GEN_INT (-size);
6176 
6177       /* We establish the frame (i.e. decrement the stack pointer) first, even
6178 	 if we use a frame pointer, because we cannot clobber any call-saved
6179 	 registers, including the frame pointer, if we haven't created a new
6180 	 register save area, for the sake of compatibility with the ABI.  */
6181       if (size <= 4096)
6182 	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6183       else if (size <= 8192 && !frame_pointer_needed)
6184 	{
6185 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6186 	  RTX_FRAME_RELATED_P (insn) = 1;
6187 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6188 	}
6189       else
6190 	{
6191 	  size_rtx = gen_rtx_REG (Pmode, 1);
6192 	  emit_move_insn (size_rtx, size_int_rtx);
6193 	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6194 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
6195 			gen_stack_pointer_inc (size_int_rtx));
6196 	}
6197       RTX_FRAME_RELATED_P (insn) = 1;
6198 
6199       /* Ensure nothing is scheduled until after the frame is established.  */
6200       emit_insn (gen_blockage ());
6201 
6202       if (frame_pointer_needed)
6203 	{
6204 	  insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6205 					 gen_rtx_MINUS (Pmode,
6206 							stack_pointer_rtx,
6207 							size_rtx)));
6208 	  RTX_FRAME_RELATED_P (insn) = 1;
6209 
6210 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
6211 			gen_rtx_SET (hard_frame_pointer_rtx,
6212 				     plus_constant (Pmode, stack_pointer_rtx,
6213 						    size)));
6214 	}
6215 
6216       if (return_addr_reg_needed_p (sparc_leaf_function_p))
6217 	{
6218 	  rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6219 	  rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6220 
6221 	  insn = emit_move_insn (i7, o7);
6222 	  RTX_FRAME_RELATED_P (insn) = 1;
6223 
6224 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6225 
6226 	  /* Prevent this instruction from ever being considered dead,
6227 	     even if this function has no epilogue.  */
6228 	  emit_use (i7);
6229 	}
6230     }
6231 
6232   if (frame_pointer_needed)
6233     {
6234       sparc_frame_base_reg = hard_frame_pointer_rtx;
6235       sparc_frame_base_offset = SPARC_STACK_BIAS;
6236     }
6237   else
6238     {
6239       sparc_frame_base_reg = stack_pointer_rtx;
6240       sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6241     }
6242 
6243   if (sparc_n_global_fp_regs > 0)
6244     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6245 				         sparc_frame_base_offset
6246 					   - sparc_apparent_frame_size,
6247 					 SORR_SAVE);
6248 
6249   /* Advertise that the data calculated just above are now valid.  */
6250   sparc_prologue_data_valid_p = true;
6251 }
6252 
6253 /* This function generates the assembly code for function entry, which boils
6254    down to emitting the necessary .register directives.  */
6255 
6256 static void
6257 sparc_asm_function_prologue (FILE *file)
6258 {
6259   /* Check that the assumption we made in sparc_expand_prologue is valid.  */
6260   if (!TARGET_FLAT)
6261     gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6262 
6263   sparc_output_scratch_registers (file);
6264 }
6265 
6266 /* Expand the function epilogue, either normal or part of a sibcall.
6267    We emit all the instructions except the return or the call.  */
6268 
6269 void
6270 sparc_expand_epilogue (bool for_eh)
6271 {
6272   HOST_WIDE_INT size = sparc_frame_size;
6273 
6274   if (cfun->calls_alloca)
6275     emit_insn (gen_frame_blockage ());
6276 
6277   if (sparc_n_global_fp_regs > 0)
6278     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6279 				         sparc_frame_base_offset
6280 					   - sparc_apparent_frame_size,
6281 					 SORR_RESTORE);
6282 
6283   if (size == 0 || for_eh)
6284     ; /* do nothing.  */
6285   else if (sparc_leaf_function_p)
6286     {
6287       if (size <= 4096)
6288 	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6289       else if (size <= 8192)
6290 	{
6291 	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6292 	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6293 	}
6294       else
6295 	{
6296 	  rtx reg = gen_rtx_REG (Pmode, 1);
6297 	  emit_move_insn (reg, GEN_INT (size));
6298 	  emit_insn (gen_stack_pointer_inc (reg));
6299 	}
6300     }
6301 }
6302 
6303 /* Expand the function epilogue, either normal or part of a sibcall.
6304    We emit all the instructions except the return or the call.  */
6305 
6306 void
6307 sparc_flat_expand_epilogue (bool for_eh)
6308 {
6309   HOST_WIDE_INT size = sparc_frame_size;
6310 
6311   if (sparc_n_global_fp_regs > 0)
6312     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6313 				         sparc_frame_base_offset
6314 					   - sparc_apparent_frame_size,
6315 					 SORR_RESTORE);
6316 
6317   /* If we have a frame pointer, we'll need both to restore it before the
6318      frame is destroyed and use its current value in destroying the frame.
6319      Since we don't have an atomic way to do that in the flat window model,
6320      we save the current value into a temporary register (%g1).  */
6321   if (frame_pointer_needed && !for_eh)
6322     emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6323 
6324   if (return_addr_reg_needed_p (sparc_leaf_function_p))
6325     emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6326 		    gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6327 
6328   if (sparc_save_local_in_regs_p)
6329     emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6330 					sparc_frame_base_offset,
6331 					SORR_RESTORE);
6332 
6333   if (size == 0 || for_eh)
6334     ; /* do nothing.  */
6335   else if (frame_pointer_needed)
6336     {
6337       /* Make sure the frame is destroyed after everything else is done.  */
6338       emit_insn (gen_blockage ());
6339 
6340       emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6341     }
6342   else
6343     {
6344       /* Likewise.  */
6345       emit_insn (gen_blockage ());
6346 
6347       if (size <= 4096)
6348 	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6349       else if (size <= 8192)
6350 	{
6351 	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6352 	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6353 	}
6354       else
6355 	{
6356 	  rtx reg = gen_rtx_REG (Pmode, 1);
6357 	  emit_move_insn (reg, GEN_INT (size));
6358 	  emit_insn (gen_stack_pointer_inc (reg));
6359 	}
6360     }
6361 }
6362 
6363 /* Return true if it is appropriate to emit `return' instructions in the
6364    body of a function.  */
6365 
6366 bool
6367 sparc_can_use_return_insn_p (void)
6368 {
6369   return sparc_prologue_data_valid_p
6370 	 && sparc_n_global_fp_regs == 0
6371 	 && TARGET_FLAT
6372 	    ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6373 	    : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6374 }
6375 
6376 /* This function generates the assembly code for function exit.  */
6377 
6378 static void
6379 sparc_asm_function_epilogue (FILE *file)
6380 {
6381   /* If the last two instructions of a function are "call foo; dslot;"
6382      the return address might point to the first instruction in the next
6383      function and we have to output a dummy nop for the sake of sane
6384      backtraces in such cases.  This is pointless for sibling calls since
6385      the return address is explicitly adjusted.  */
6386 
6387   rtx_insn *insn = get_last_insn ();
6388 
6389   rtx last_real_insn = prev_real_insn (insn);
6390   if (last_real_insn
6391       && NONJUMP_INSN_P (last_real_insn)
6392       && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6393     last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6394 
6395   if (last_real_insn
6396       && CALL_P (last_real_insn)
6397       && !SIBLING_CALL_P (last_real_insn))
6398     fputs("\tnop\n", file);
6399 
6400   sparc_output_deferred_case_vectors ();
6401 }
6402 
6403 /* Output a 'restore' instruction.  */
6404 
6405 static void
6406 output_restore (rtx pat)
6407 {
6408   rtx operands[3];
6409 
6410   if (! pat)
6411     {
6412       fputs ("\t restore\n", asm_out_file);
6413       return;
6414     }
6415 
6416   gcc_assert (GET_CODE (pat) == SET);
6417 
6418   operands[0] = SET_DEST (pat);
6419   pat = SET_SRC (pat);
6420 
6421   switch (GET_CODE (pat))
6422     {
6423       case PLUS:
6424 	operands[1] = XEXP (pat, 0);
6425 	operands[2] = XEXP (pat, 1);
6426 	output_asm_insn (" restore %r1, %2, %Y0", operands);
6427 	break;
6428       case LO_SUM:
6429 	operands[1] = XEXP (pat, 0);
6430 	operands[2] = XEXP (pat, 1);
6431 	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6432 	break;
6433       case ASHIFT:
6434 	operands[1] = XEXP (pat, 0);
6435 	gcc_assert (XEXP (pat, 1) == const1_rtx);
6436 	output_asm_insn (" restore %r1, %r1, %Y0", operands);
6437 	break;
6438       default:
6439 	operands[1] = pat;
6440 	output_asm_insn (" restore %%g0, %1, %Y0", operands);
6441 	break;
6442     }
6443 }
6444 
6445 /* Output a return.  */
6446 
6447 const char *
6448 output_return (rtx_insn *insn)
6449 {
6450   if (crtl->calls_eh_return)
6451     {
6452       /* If the function uses __builtin_eh_return, the eh_return
6453 	 machinery occupies the delay slot.  */
6454       gcc_assert (!final_sequence);
6455 
6456       if (flag_delayed_branch)
6457 	{
6458 	  if (!TARGET_FLAT && TARGET_V9)
6459 	    fputs ("\treturn\t%i7+8\n", asm_out_file);
6460 	  else
6461 	    {
6462 	      if (!TARGET_FLAT)
6463 		fputs ("\trestore\n", asm_out_file);
6464 
6465 	      fputs ("\tjmp\t%o7+8\n", asm_out_file);
6466 	    }
6467 
6468 	  fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6469 	}
6470       else
6471 	{
6472 	  if (!TARGET_FLAT)
6473 	    fputs ("\trestore\n", asm_out_file);
6474 
6475 	  fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6476 	  fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6477 	}
6478     }
6479   else if (sparc_leaf_function_p || TARGET_FLAT)
6480     {
6481       /* This is a leaf or flat function so we don't have to bother restoring
6482 	 the register window, which frees us from dealing with the convoluted
6483 	 semantics of restore/return.  We simply output the jump to the
6484 	 return address and the insn in the delay slot (if any).  */
6485 
6486       return "jmp\t%%o7+%)%#";
6487     }
6488   else
6489     {
6490       /* This is a regular function so we have to restore the register window.
6491 	 We may have a pending insn for the delay slot, which will be either
6492 	 combined with the 'restore' instruction or put in the delay slot of
6493 	 the 'return' instruction.  */
6494 
6495       if (final_sequence)
6496 	{
6497 	  rtx_insn *delay;
6498 	  rtx pat;
6499 
6500 	  delay = NEXT_INSN (insn);
6501 	  gcc_assert (delay);
6502 
6503 	  pat = PATTERN (delay);
6504 
6505 	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6506 	    {
6507 	      epilogue_renumber (&pat, 0);
6508 	      return "return\t%%i7+%)%#";
6509 	    }
6510 	  else
6511 	    {
6512 	      output_asm_insn ("jmp\t%%i7+%)", NULL);
6513 
6514 	      /* We're going to output the insn in the delay slot manually.
6515 		 Make sure to output its source location first.  */
6516 	      PATTERN (delay) = gen_blockage ();
6517 	      INSN_CODE (delay) = -1;
6518 	      final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6519 	      INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6520 
6521 	      output_restore (pat);
6522 	    }
6523 	}
6524       else
6525         {
6526 	  /* The delay slot is empty.  */
6527 	  if (TARGET_V9)
6528 	    return "return\t%%i7+%)\n\t nop";
6529 	  else if (flag_delayed_branch)
6530 	    return "jmp\t%%i7+%)\n\t restore";
6531 	  else
6532 	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
6533 	}
6534     }
6535 
6536   return "";
6537 }
6538 
6539 /* Output a sibling call.  */
6540 
6541 const char *
6542 output_sibcall (rtx_insn *insn, rtx call_operand)
6543 {
6544   rtx operands[1];
6545 
6546   gcc_assert (flag_delayed_branch);
6547 
6548   operands[0] = call_operand;
6549 
6550   if (sparc_leaf_function_p || TARGET_FLAT)
6551     {
6552       /* This is a leaf or flat function so we don't have to bother restoring
6553 	 the register window.  We simply output the jump to the function and
6554 	 the insn in the delay slot (if any).  */
6555 
6556       gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6557 
6558       if (final_sequence)
6559 	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6560 			 operands);
6561       else
6562 	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6563 	   it into branch if possible.  */
6564 	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6565 			 operands);
6566     }
6567   else
6568     {
6569       /* This is a regular function so we have to restore the register window.
6570 	 We may have a pending insn for the delay slot, which will be combined
6571 	 with the 'restore' instruction.  */
6572 
6573       output_asm_insn ("call\t%a0, 0", operands);
6574 
6575       if (final_sequence)
6576 	{
6577 	  rtx_insn *delay;
6578 	  rtx pat;
6579 
6580 	  delay = NEXT_INSN (insn);
6581 	  gcc_assert (delay);
6582 
6583 	  pat = PATTERN (delay);
6584 
6585 	  /* We're going to output the insn in the delay slot manually.
6586 	     Make sure to output its source location first.  */
6587 	  PATTERN (delay) = gen_blockage ();
6588 	  INSN_CODE (delay) = -1;
6589 	  final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6590 	  INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6591 
6592 	  output_restore (pat);
6593 	}
6594       else
6595 	output_restore (NULL_RTX);
6596     }
6597 
6598   return "";
6599 }
6600 
6601 /* Functions for handling argument passing.
6602 
6603    For 32-bit, the first 6 args are normally in registers and the rest are
6604    pushed.  Any arg that starts within the first 6 words is at least
6605    partially passed in a register unless its data type forbids.
6606 
6607    For 64-bit, the argument registers are laid out as an array of 16 elements
6608    and arguments are added sequentially.  The first 6 int args and up to the
6609    first 16 fp args (depending on size) are passed in regs.
6610 
6611    Slot    Stack   Integral   Float   Float in structure   Double   Long Double
6612    ----    -----   --------   -----   ------------------   ------   -----------
6613     15   [SP+248]              %f31       %f30,%f31         %d30
6614     14   [SP+240]              %f29       %f28,%f29         %d28       %q28
6615     13   [SP+232]              %f27       %f26,%f27         %d26
6616     12   [SP+224]              %f25       %f24,%f25         %d24       %q24
6617     11   [SP+216]              %f23       %f22,%f23         %d22
6618     10   [SP+208]              %f21       %f20,%f21         %d20       %q20
6619      9   [SP+200]              %f19       %f18,%f19         %d18
6620      8   [SP+192]              %f17       %f16,%f17         %d16       %q16
6621      7   [SP+184]              %f15       %f14,%f15         %d14
6622      6   [SP+176]              %f13       %f12,%f13         %d12       %q12
6623      5   [SP+168]     %o5      %f11       %f10,%f11         %d10
6624      4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
6625      3   [SP+152]     %o3       %f7        %f6,%f7           %d6
6626      2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
6627      1   [SP+136]     %o1       %f3        %f2,%f3           %d2
6628      0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
6629 
6630    Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6631 
6632    Integral arguments are always passed as 64-bit quantities appropriately
6633    extended.
6634 
6635    Passing of floating point values is handled as follows.
6636    If a prototype is in scope:
6637      If the value is in a named argument (i.e. not a stdarg function or a
6638      value not part of the `...') then the value is passed in the appropriate
6639      fp reg.
6640      If the value is part of the `...' and is passed in one of the first 6
6641      slots then the value is passed in the appropriate int reg.
6642      If the value is part of the `...' and is not passed in one of the first 6
6643      slots then the value is passed in memory.
6644    If a prototype is not in scope:
6645      If the value is one of the first 6 arguments the value is passed in the
6646      appropriate integer reg and the appropriate fp reg.
6647      If the value is not one of the first 6 arguments the value is passed in
6648      the appropriate fp reg and in memory.
6649 
6650 
6651    Summary of the calling conventions implemented by GCC on the SPARC:
6652 
6653    32-bit ABI:
6654                                 size      argument     return value
6655 
6656       small integer              <4       int. reg.      int. reg.
6657       word                        4       int. reg.      int. reg.
6658       double word                 8       int. reg.      int. reg.
6659 
6660       _Complex small integer     <8       int. reg.      int. reg.
6661       _Complex word               8       int. reg.      int. reg.
6662       _Complex double word       16        memory        int. reg.
6663 
6664       vector integer            <=8       int. reg.       FP reg.
6665       vector integer             >8        memory         memory
6666 
6667       float                       4       int. reg.       FP reg.
6668       double                      8       int. reg.       FP reg.
6669       long double                16        memory         memory
6670 
6671       _Complex float              8        memory         FP reg.
6672       _Complex double            16        memory         FP reg.
6673       _Complex long double       32        memory         FP reg.
6674 
6675       vector float              any        memory         memory
6676 
6677       aggregate                 any        memory         memory
6678 
6679 
6680 
6681     64-bit ABI:
6682                                 size      argument     return value
6683 
6684       small integer              <8       int. reg.      int. reg.
6685       word                        8       int. reg.      int. reg.
6686       double word                16       int. reg.      int. reg.
6687 
6688       _Complex small integer    <16       int. reg.      int. reg.
6689       _Complex word              16       int. reg.      int. reg.
6690       _Complex double word       32        memory        int. reg.
6691 
6692       vector integer           <=16        FP reg.        FP reg.
6693       vector integer       16<s<=32        memory         FP reg.
6694       vector integer            >32        memory         memory
6695 
6696       float                       4        FP reg.        FP reg.
6697       double                      8        FP reg.        FP reg.
6698       long double                16        FP reg.        FP reg.
6699 
6700       _Complex float              8        FP reg.        FP reg.
6701       _Complex double            16        FP reg.        FP reg.
6702       _Complex long double       32        memory         FP reg.
6703 
6704       vector float             <=16        FP reg.        FP reg.
6705       vector float         16<s<=32        memory         FP reg.
6706       vector float              >32        memory         memory
6707 
6708       aggregate                <=16         reg.           reg.
6709       aggregate            16<s<=32        memory          reg.
6710       aggregate                 >32        memory         memory
6711 
6712 
6713 
6714 Note #1: complex floating-point types follow the extended SPARC ABIs as
6715 implemented by the Sun compiler.
6716 
6717 Note #2: integer vector types follow the scalar floating-point types
6718 conventions to match what is implemented by the Sun VIS SDK.
6719 
6720 Note #3: floating-point vector types follow the aggregate types
6721 conventions.  */
6722 
6723 
6724 /* Maximum number of int regs for args.  */
6725 #define SPARC_INT_ARG_MAX 6
6726 /* Maximum number of fp regs for args.  */
6727 #define SPARC_FP_ARG_MAX 16
6728 /* Number of words (partially) occupied for a given size in units.  */
6729 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6730 
6731 /* Handle the INIT_CUMULATIVE_ARGS macro.
6732    Initialize a variable CUM of type CUMULATIVE_ARGS
6733    for a call to a function whose data type is FNTYPE.
6734    For a library call, FNTYPE is 0.  */
6735 
6736 void
6737 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6738 {
6739   cum->words = 0;
6740   cum->prototype_p = fntype && prototype_p (fntype);
6741   cum->libcall_p = !fntype;
6742 }
6743 
6744 /* Handle promotion of pointer and integer arguments.  */
6745 
6746 static machine_mode
6747 sparc_promote_function_mode (const_tree type, machine_mode mode,
6748 			     int *punsignedp, const_tree, int)
6749 {
6750   if (type && POINTER_TYPE_P (type))
6751     {
6752       *punsignedp = POINTERS_EXTEND_UNSIGNED;
6753       return Pmode;
6754     }
6755 
6756   /* Integral arguments are passed as full words, as per the ABI.  */
6757   if (GET_MODE_CLASS (mode) == MODE_INT
6758       && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6759     return word_mode;
6760 
6761   return mode;
6762 }
6763 
6764 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
6765 
6766 static bool
6767 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6768 {
6769   return TARGET_ARCH64 ? true : false;
6770 }
6771 
6772 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6773    Specify whether to pass the argument by reference.  */
6774 
6775 static bool
6776 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6777 			 machine_mode mode, const_tree type,
6778 			 bool named ATTRIBUTE_UNUSED)
6779 {
6780   if (TARGET_ARCH32)
6781     /* Original SPARC 32-bit ABI says that structures and unions,
6782        and quad-precision floats are passed by reference.
6783        All other base types are passed in registers.
6784 
6785        Extended ABI (as implemented by the Sun compiler) says that all
6786        complex floats are passed by reference.  Pass complex integers
6787        in registers up to 8 bytes.  More generally, enforce the 2-word
6788        cap for passing arguments in registers.
6789 
6790        Vector ABI (as implemented by the Sun VIS SDK) says that integer
6791        vectors are passed like floats of the same size, that is in
6792        registers up to 8 bytes.  Pass all vector floats by reference
6793        like structure and unions.  */
6794     return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6795 	    || mode == SCmode
6796 	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
6797 	    || GET_MODE_SIZE (mode) > 8
6798 	    || (type
6799 		&& VECTOR_TYPE_P (type)
6800 		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6801   else
6802     /* Original SPARC 64-bit ABI says that structures and unions
6803        smaller than 16 bytes are passed in registers, as well as
6804        all other base types.
6805 
6806        Extended ABI (as implemented by the Sun compiler) says that
6807        complex floats are passed in registers up to 16 bytes.  Pass
6808        all complex integers in registers up to 16 bytes.  More generally,
6809        enforce the 2-word cap for passing arguments in registers.
6810 
6811        Vector ABI (as implemented by the Sun VIS SDK) says that integer
6812        vectors are passed like floats of the same size, that is in
6813        registers (up to 16 bytes).  Pass all vector floats like structure
6814        and unions.  */
6815     return ((type
6816 	     && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6817 	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6818 	    /* Catch CTImode and TCmode.  */
6819 	    || GET_MODE_SIZE (mode) > 16);
6820 }
6821 
6822 /* Traverse the record TYPE recursively and call FUNC on its fields.
6823    NAMED is true if this is for a named parameter.  DATA is passed
6824    to FUNC for each field.  OFFSET is the starting position and
6825    PACKED is true if we are inside a packed record.  */
6826 
6827 template <typename T, void Func (const_tree, int, bool, T*)>
6828 static void
6829 traverse_record_type (const_tree type, bool named, T *data,
6830 		      int offset = 0, bool packed = false)
6831 {
6832   /* The ABI obviously doesn't specify how packed structures are passed.
6833      These are passed in integer regs if possible, otherwise memory.  */
6834   if (!packed)
6835     for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6836       if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6837 	{
6838 	  packed = true;
6839 	  break;
6840 	}
6841 
6842   /* Walk the real fields, but skip those with no size or a zero size.
6843      ??? Fields with variable offset are handled as having zero offset.  */
6844   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6845     if (TREE_CODE (field) == FIELD_DECL)
6846       {
6847 	if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6848 	  continue;
6849 
6850 	int bitpos = offset;
6851 	if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6852 	  bitpos += int_bit_position (field);
6853 
6854 	tree field_type = TREE_TYPE (field);
6855 	if (TREE_CODE (field_type) == RECORD_TYPE)
6856 	  traverse_record_type<T, Func> (field_type, named, data, bitpos,
6857 					 packed);
6858 	else
6859 	  {
6860 	    const bool fp_type
6861 	      = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6862 	    Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6863 		  data);
6864 	  }
6865       }
6866 }
6867 
6868 /* Handle recursive register classifying for structure layout.  */
6869 
6870 typedef struct
6871 {
6872   bool fp_regs;		/* true if field eligible to FP registers.  */
6873   bool fp_regs_in_first_word;	/* true if such field in first word.  */
6874 } classify_data_t;
6875 
6876 /* A subroutine of function_arg_slotno.  Classify the field.  */
6877 
6878 inline void
6879 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6880 {
6881   if (fp)
6882     {
6883       data->fp_regs = true;
6884       if (bitpos < BITS_PER_WORD)
6885 	data->fp_regs_in_first_word = true;
6886     }
6887 }
6888 
6889 /* Compute the slot number to pass an argument in.
6890    Return the slot number or -1 if passing on the stack.
6891 
6892    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6893     the preceding args and about the function being called.
6894    MODE is the argument's machine mode.
6895    TYPE is the data type of the argument (as a tree).
6896     This is null for libcalls where that information may
6897     not be available.
6898    NAMED is nonzero if this argument is a named parameter
6899     (otherwise it is an extra parameter matching an ellipsis).
6900    INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6901    *PREGNO records the register number to use if scalar type.
6902    *PPADDING records the amount of padding needed in words.  */
6903 
6904 static int
6905 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6906 		     const_tree type, bool named, bool incoming,
6907 		     int *pregno, int *ppadding)
6908 {
6909   const int regbase
6910     = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6911   int slotno = cum->words, regno;
6912   enum mode_class mclass = GET_MODE_CLASS (mode);
6913 
6914   /* Silence warnings in the callers.  */
6915   *pregno = -1;
6916   *ppadding = -1;
6917 
6918   if (type && TREE_ADDRESSABLE (type))
6919     return -1;
6920 
6921   /* In 64-bit mode, objects requiring 16-byte alignment get it.  */
6922   if (TARGET_ARCH64
6923       && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6924       && (slotno & 1) != 0)
6925     {
6926       slotno++;
6927       *ppadding = 1;
6928     }
6929   else
6930     *ppadding = 0;
6931 
6932   /* Vector types deserve special treatment because they are polymorphic wrt
6933      their mode, depending upon whether VIS instructions are enabled.  */
6934   if (type && VECTOR_TYPE_P (type))
6935     {
6936       if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6937 	{
6938 	  /* The SPARC port defines no floating-point vector modes.  */
6939 	  gcc_assert (mode == BLKmode);
6940 	}
6941       else
6942 	{
6943 	  /* Integer vector types should either have a vector
6944 	     mode or an integral mode, because we are guaranteed
6945 	     by pass_by_reference that their size is not greater
6946 	     than 16 bytes and TImode is 16-byte wide.  */
6947 	  gcc_assert (mode != BLKmode);
6948 
6949 	  /* Integer vectors are handled like floats as per
6950 	     the Sun VIS SDK.  */
6951 	  mclass = MODE_FLOAT;
6952 	}
6953     }
6954 
6955   switch (mclass)
6956     {
6957     case MODE_FLOAT:
6958     case MODE_COMPLEX_FLOAT:
6959     case MODE_VECTOR_INT:
6960       if (TARGET_ARCH64 && TARGET_FPU && named)
6961 	{
6962 	  /* If all arg slots are filled, then must pass on stack.  */
6963 	  if (slotno >= SPARC_FP_ARG_MAX)
6964 	    return -1;
6965 
6966 	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
6967 	  /* Arguments filling only one single FP register are
6968 	     right-justified in the outer double FP register.  */
6969 	  if (GET_MODE_SIZE (mode) <= 4)
6970 	    regno++;
6971 	  break;
6972 	}
6973       /* fallthrough */
6974 
6975     case MODE_INT:
6976     case MODE_COMPLEX_INT:
6977       /* If all arg slots are filled, then must pass on stack.  */
6978       if (slotno >= SPARC_INT_ARG_MAX)
6979 	return -1;
6980 
6981       regno = regbase + slotno;
6982       break;
6983 
6984     case MODE_RANDOM:
6985       /* MODE is VOIDmode when generating the actual call.  */
6986       if (mode == VOIDmode)
6987 	return -1;
6988 
6989       if (TARGET_64BIT && TARGET_FPU && named
6990 	  && type
6991 	  && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6992 	{
6993 	  /* If all arg slots are filled, then must pass on stack.  */
6994 	  if (slotno >= SPARC_FP_ARG_MAX)
6995 	    return -1;
6996 
6997 	  if (TREE_CODE (type) == RECORD_TYPE)
6998 	    {
6999 	      classify_data_t data = { false, false };
7000 	      traverse_record_type<classify_data_t, classify_registers>
7001 		(type, named, &data);
7002 
7003 	      if (data.fp_regs)
7004 		{
7005 		  /* If all FP slots are filled except for the last one and
7006 		     there is no FP field in the first word, then must pass
7007 		     on stack.  */
7008 		  if (slotno >= SPARC_FP_ARG_MAX - 1
7009 		      && !data.fp_regs_in_first_word)
7010 		    return -1;
7011 		}
7012 	      else
7013 		{
7014 		  /* If all int slots are filled, then must pass on stack.  */
7015 		  if (slotno >= SPARC_INT_ARG_MAX)
7016 		    return -1;
7017 		}
7018 
7019 	      /* PREGNO isn't set since both int and FP regs can be used.  */
7020 	      return slotno;
7021 	    }
7022 
7023 	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
7024 	}
7025       else
7026 	{
7027 	  /* If all arg slots are filled, then must pass on stack.  */
7028 	  if (slotno >= SPARC_INT_ARG_MAX)
7029 	    return -1;
7030 
7031 	  regno = regbase + slotno;
7032 	}
7033       break;
7034 
7035     default :
7036       gcc_unreachable ();
7037     }
7038 
7039   *pregno = regno;
7040   return slotno;
7041 }
7042 
7043 /* Handle recursive register counting/assigning for structure layout.  */
7044 
7045 typedef struct
7046 {
7047   int slotno;		/* slot number of the argument.  */
7048   int regbase;		/* regno of the base register.  */
7049   int intoffset;	/* offset of the first pending integer field.  */
7050   int nregs;		/* number of words passed in registers.  */
7051   bool stack;		/* true if part of the argument is on the stack.  */
7052   rtx ret;		/* return expression being built.  */
7053 } assign_data_t;
7054 
7055 /* A subroutine of function_arg_record_value.  Compute the number of integer
7056    registers to be assigned between PARMS->intoffset and BITPOS.  Return
7057    true if at least one integer register is assigned or false otherwise.  */
7058 
7059 static bool
7060 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7061 {
7062   if (data->intoffset < 0)
7063     return false;
7064 
7065   const int intoffset = data->intoffset;
7066   data->intoffset = -1;
7067 
7068   const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7069   const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7070   const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7071   int nregs = (endbit - startbit) / BITS_PER_WORD;
7072 
7073   if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7074     {
7075       nregs = SPARC_INT_ARG_MAX - this_slotno;
7076 
7077       /* We need to pass this field (partly) on the stack.  */
7078       data->stack = 1;
7079     }
7080 
7081   if (nregs <= 0)
7082     return false;
7083 
7084   *pnregs = nregs;
7085   return true;
7086 }
7087 
7088 /* A subroutine of function_arg_record_value.  Compute the number and the mode
7089    of the FP registers to be assigned for FIELD.  Return true if at least one
7090    FP register is assigned or false otherwise.  */
7091 
7092 static bool
7093 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7094 		   int *pnregs, machine_mode *pmode)
7095 {
7096   const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7097   machine_mode mode = DECL_MODE (field);
7098   int nregs, nslots;
7099 
7100   /* Slots are counted as words while regs are counted as having the size of
7101      the (inner) mode.  */
7102   if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7103     {
7104       mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7105       nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7106     }
7107   else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7108     {
7109       mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7110       nregs = 2;
7111     }
7112   else
7113     nregs = 1;
7114 
7115   nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7116 
7117   if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7118     {
7119       nslots = SPARC_FP_ARG_MAX - this_slotno;
7120       nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7121 
7122       /* We need to pass this field (partly) on the stack.  */
7123       data->stack = 1;
7124 
7125       if (nregs <= 0)
7126 	return false;
7127     }
7128 
7129   *pnregs = nregs;
7130   *pmode = mode;
7131   return true;
7132 }
7133 
7134 /* A subroutine of function_arg_record_value.  Count the number of registers
7135    to be assigned for FIELD and between PARMS->intoffset and BITPOS.  */
7136 
7137 inline void
7138 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7139 {
7140   if (fp)
7141     {
7142       int nregs;
7143       machine_mode mode;
7144 
7145       if (compute_int_layout (bitpos, data, &nregs))
7146 	data->nregs += nregs;
7147 
7148       if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7149 	data->nregs += nregs;
7150     }
7151   else
7152     {
7153       if (data->intoffset < 0)
7154 	data->intoffset = bitpos;
7155     }
7156 }
7157 
7158 /* A subroutine of function_arg_record_value.  Assign the bits of the
7159    structure between PARMS->intoffset and BITPOS to integer registers.  */
7160 
7161 static void
7162 assign_int_registers (int bitpos, assign_data_t *data)
7163 {
7164   int intoffset = data->intoffset;
7165   machine_mode mode;
7166   int nregs;
7167 
7168   if (!compute_int_layout (bitpos, data, &nregs))
7169     return;
7170 
7171   /* If this is the trailing part of a word, only load that much into
7172      the register.  Otherwise load the whole register.  Note that in
7173      the latter case we may pick up unwanted bits.  It's not a problem
7174      at the moment but may wish to revisit.  */
7175   if (intoffset % BITS_PER_WORD != 0)
7176     mode = smallest_int_mode_for_size (BITS_PER_WORD
7177 				       - intoffset % BITS_PER_WORD);
7178   else
7179     mode = word_mode;
7180 
7181   const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7182   unsigned int regno = data->regbase + this_slotno;
7183   intoffset /= BITS_PER_UNIT;
7184 
7185   do
7186     {
7187       rtx reg = gen_rtx_REG (mode, regno);
7188       XVECEXP (data->ret, 0, data->stack + data->nregs)
7189 	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7190       data->nregs += 1;
7191       mode = word_mode;
7192       regno += 1;
7193       intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7194     }
7195   while (--nregs > 0);
7196 }
7197 
7198 /* A subroutine of function_arg_record_value.  Assign FIELD at position
7199    BITPOS to FP registers.  */
7200 
7201 static void
7202 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7203 {
7204   int nregs;
7205   machine_mode mode;
7206 
7207   if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7208     return;
7209 
7210   const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7211   int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7212   if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7213     regno++;
7214   int pos = bitpos / BITS_PER_UNIT;
7215 
7216   do
7217     {
7218       rtx reg = gen_rtx_REG (mode, regno);
7219       XVECEXP (data->ret, 0, data->stack + data->nregs)
7220 	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7221       data->nregs += 1;
7222       regno += GET_MODE_SIZE (mode) / 4;
7223       pos += GET_MODE_SIZE (mode);
7224     }
7225   while (--nregs > 0);
7226 }
7227 
7228 /* A subroutine of function_arg_record_value.  Assign FIELD and the bits of
7229    the structure between PARMS->intoffset and BITPOS to registers.  */
7230 
7231 inline void
7232 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7233 {
7234   if (fp)
7235     {
7236       assign_int_registers (bitpos, data);
7237 
7238       assign_fp_registers (field, bitpos, data);
7239     }
7240   else
7241     {
7242       if (data->intoffset < 0)
7243 	data->intoffset = bitpos;
7244     }
7245 }
7246 
7247 /* Used by function_arg and function_value to implement the complex
7248    conventions of the 64-bit ABI for passing and returning structures.
7249    Return an expression valid as a return value for the FUNCTION_ARG
7250    and TARGET_FUNCTION_VALUE.
7251 
7252    TYPE is the data type of the argument (as a tree).
7253     This is null for libcalls where that information may
7254     not be available.
7255    MODE is the argument's machine mode.
7256    SLOTNO is the index number of the argument's slot in the parameter array.
7257    NAMED is true if this argument is a named parameter
7258     (otherwise it is an extra parameter matching an ellipsis).
7259    REGBASE is the regno of the base register for the parameter array.  */
7260 
7261 static rtx
7262 function_arg_record_value (const_tree type, machine_mode mode,
7263 			   int slotno, bool named, int regbase)
7264 {
7265   const int size = int_size_in_bytes (type);
7266   assign_data_t data;
7267   int nregs;
7268 
7269   data.slotno = slotno;
7270   data.regbase = regbase;
7271 
7272   /* Count how many registers we need.  */
7273   data.nregs = 0;
7274   data.intoffset = 0;
7275   data.stack = false;
7276   traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7277 
7278   /* Take into account pending integer fields.  */
7279   if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7280     data.nregs += nregs;
7281 
7282   /* Allocate the vector and handle some annoying special cases.  */
7283   nregs = data.nregs;
7284 
7285   if (nregs == 0)
7286     {
7287       /* ??? Empty structure has no value?  Duh?  */
7288       if (size <= 0)
7289 	{
7290 	  /* Though there's nothing really to store, return a word register
7291 	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
7292 	     leads to breakage due to the fact that there are zero bytes to
7293 	     load.  */
7294 	  return gen_rtx_REG (mode, regbase);
7295 	}
7296 
7297       /* ??? C++ has structures with no fields, and yet a size.  Give up
7298 	 for now and pass everything back in integer registers.  */
7299       nregs = CEIL_NWORDS (size);
7300       if (nregs + slotno > SPARC_INT_ARG_MAX)
7301 	nregs = SPARC_INT_ARG_MAX - slotno;
7302     }
7303 
7304   gcc_assert (nregs > 0);
7305 
7306   data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7307 
7308   /* If at least one field must be passed on the stack, generate
7309      (parallel [(expr_list (nil) ...) ...]) so that all fields will
7310      also be passed on the stack.  We can't do much better because the
7311      semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7312      of structures for which the fields passed exclusively in registers
7313      are not at the beginning of the structure.  */
7314   if (data.stack)
7315     XVECEXP (data.ret, 0, 0)
7316       = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7317 
7318   /* Assign the registers.  */
7319   data.nregs = 0;
7320   data.intoffset = 0;
7321   traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7322 
7323   /* Assign pending integer fields.  */
7324   assign_int_registers (size * BITS_PER_UNIT, &data);
7325 
7326   gcc_assert (data.nregs == nregs);
7327 
7328   return data.ret;
7329 }
7330 
7331 /* Used by function_arg and function_value to implement the conventions
7332    of the 64-bit ABI for passing and returning unions.
7333    Return an expression valid as a return value for the FUNCTION_ARG
7334    and TARGET_FUNCTION_VALUE.
7335 
7336    SIZE is the size in bytes of the union.
7337    MODE is the argument's machine mode.
7338    SLOTNO is the index number of the argument's slot in the parameter array.
7339    REGNO is the hard register the union will be passed in.  */
7340 
7341 static rtx
7342 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7343 {
7344   unsigned int nwords;
7345 
7346   /* See comment in function_arg_record_value for empty structures.  */
7347   if (size <= 0)
7348     return gen_rtx_REG (mode, regno);
7349 
7350   if (slotno == SPARC_INT_ARG_MAX - 1)
7351     nwords = 1;
7352   else
7353     nwords = CEIL_NWORDS (size);
7354 
7355   rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7356 
7357   /* Unions are passed left-justified.  */
7358   for (unsigned int i = 0; i < nwords; i++)
7359     XVECEXP (regs, 0, i)
7360     = gen_rtx_EXPR_LIST (VOIDmode,
7361 			 gen_rtx_REG (word_mode, regno + i),
7362 			 GEN_INT (UNITS_PER_WORD * i));
7363 
7364   return regs;
7365 }
7366 
7367 /* Used by function_arg and function_value to implement the conventions
7368    of the 64-bit ABI for passing and returning BLKmode vectors.
7369    Return an expression valid as a return value for the FUNCTION_ARG
7370    and TARGET_FUNCTION_VALUE.
7371 
7372    SIZE is the size in bytes of the vector.
7373    SLOTNO is the index number of the argument's slot in the parameter array.
7374    NAMED is true if this argument is a named parameter
7375     (otherwise it is an extra parameter matching an ellipsis).
7376    REGNO is the hard register the vector will be passed in.  */
7377 
7378 static rtx
7379 function_arg_vector_value (int size, int slotno, bool named, int regno)
7380 {
7381   const int mult = (named ? 2 : 1);
7382   unsigned int nwords;
7383 
7384   if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7385     nwords = 1;
7386   else
7387     nwords = CEIL_NWORDS (size);
7388 
7389   rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7390 
7391   if (size < UNITS_PER_WORD)
7392     XVECEXP (regs, 0, 0)
7393       = gen_rtx_EXPR_LIST (VOIDmode,
7394 			   gen_rtx_REG (SImode, regno),
7395 			   const0_rtx);
7396   else
7397     for (unsigned int i = 0; i < nwords; i++)
7398       XVECEXP (regs, 0, i)
7399 	= gen_rtx_EXPR_LIST (VOIDmode,
7400 			     gen_rtx_REG (word_mode, regno + i * mult),
7401 			     GEN_INT (i * UNITS_PER_WORD));
7402 
7403   return regs;
7404 }
7405 
7406 /* Determine where to put an argument to a function.
7407    Value is zero to push the argument on the stack,
7408    or a hard register in which to store the argument.
7409 
7410    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7411     the preceding args and about the function being called.
7412    MODE is the argument's machine mode.
7413    TYPE is the data type of the argument (as a tree).
7414     This is null for libcalls where that information may
7415     not be available.
7416    NAMED is true if this argument is a named parameter
7417     (otherwise it is an extra parameter matching an ellipsis).
7418    INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7419     TARGET_FUNCTION_INCOMING_ARG.  */
7420 
7421 static rtx
7422 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7423 		      const_tree type, bool named, bool incoming)
7424 {
7425   const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7426   const int regbase
7427     = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7428   int slotno, regno, padding;
7429   enum mode_class mclass = GET_MODE_CLASS (mode);
7430 
7431   slotno
7432     = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7433   if (slotno == -1)
7434     return 0;
7435 
7436   /* Integer vectors are handled like floats as per the Sun VIS SDK.  */
7437   if (type && VECTOR_INTEGER_TYPE_P (type))
7438     mclass = MODE_FLOAT;
7439 
7440   if (TARGET_ARCH32)
7441     return gen_rtx_REG (mode, regno);
7442 
7443   /* Structures up to 16 bytes in size are passed in arg slots on the stack
7444      and are promoted to registers if possible.  */
7445   if (type && TREE_CODE (type) == RECORD_TYPE)
7446     {
7447       const int size = int_size_in_bytes (type);
7448       gcc_assert (size <= 16);
7449 
7450       return function_arg_record_value (type, mode, slotno, named, regbase);
7451     }
7452 
7453   /* Unions up to 16 bytes in size are passed in integer registers.  */
7454   else if (type && TREE_CODE (type) == UNION_TYPE)
7455     {
7456       const int size = int_size_in_bytes (type);
7457       gcc_assert (size <= 16);
7458 
7459       return function_arg_union_value (size, mode, slotno, regno);
7460     }
7461 
7462    /* Floating-point vectors up to 16 bytes are passed in registers.  */
7463   else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7464     {
7465       const int size = int_size_in_bytes (type);
7466       gcc_assert (size <= 16);
7467 
7468       return function_arg_vector_value (size, slotno, named, regno);
7469     }
7470 
7471   /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7472      but also have the slot allocated for them.
7473      If no prototype is in scope fp values in register slots get passed
7474      in two places, either fp regs and int regs or fp regs and memory.  */
7475   else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7476 	   && SPARC_FP_REG_P (regno))
7477     {
7478       rtx reg = gen_rtx_REG (mode, regno);
7479       if (cum->prototype_p || cum->libcall_p)
7480 	return reg;
7481       else
7482 	{
7483 	  rtx v0, v1;
7484 
7485 	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7486 	    {
7487 	      int intreg;
7488 
7489 	      /* On incoming, we don't need to know that the value
7490 		 is passed in %f0 and %i0, and it confuses other parts
7491 		 causing needless spillage even on the simplest cases.  */
7492 	      if (incoming)
7493 		return reg;
7494 
7495 	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7496 			+ (regno - SPARC_FP_ARG_FIRST) / 2);
7497 
7498 	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7499 	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7500 				      const0_rtx);
7501 	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7502 	    }
7503 	  else
7504 	    {
7505 	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7506 	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7507 	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7508 	    }
7509 	}
7510     }
7511 
7512   /* All other aggregate types are passed in an integer register in a mode
7513      corresponding to the size of the type.  */
7514   else if (type && AGGREGATE_TYPE_P (type))
7515     {
7516       const int size = int_size_in_bytes (type);
7517       gcc_assert (size <= 16);
7518 
7519       mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7520     }
7521 
7522   return gen_rtx_REG (mode, regno);
7523 }
7524 
7525 /* Handle the TARGET_FUNCTION_ARG target hook.  */
7526 
7527 static rtx
7528 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7529 		    const_tree type, bool named)
7530 {
7531   return sparc_function_arg_1 (cum, mode, type, named, false);
7532 }
7533 
7534 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook.  */
7535 
7536 static rtx
7537 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7538 			     const_tree type, bool named)
7539 {
7540   return sparc_function_arg_1 (cum, mode, type, named, true);
7541 }
7542 
7543 /* For sparc64, objects requiring 16 byte alignment are passed that way.  */
7544 
7545 static unsigned int
7546 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7547 {
7548   return ((TARGET_ARCH64
7549 	   && (GET_MODE_ALIGNMENT (mode) == 128
7550 	       || (type && TYPE_ALIGN (type) == 128)))
7551 	  ? 128
7552 	  : PARM_BOUNDARY);
7553 }
7554 
7555 /* For an arg passed partly in registers and partly in memory,
7556    this is the number of bytes of registers used.
7557    For args passed entirely in registers or entirely in memory, zero.
7558 
7559    Any arg that starts in the first 6 regs but won't entirely fit in them
7560    needs partial registers on v8.  On v9, structures with integer
7561    values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7562    values that begin in the last fp reg [where "last fp reg" varies with the
7563    mode] will be split between that reg and memory.  */
7564 
7565 static int
7566 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7567 			 tree type, bool named)
7568 {
7569   int slotno, regno, padding;
7570 
7571   /* We pass false for incoming here, it doesn't matter.  */
7572   slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7573 				false, &regno, &padding);
7574 
7575   if (slotno == -1)
7576     return 0;
7577 
7578   if (TARGET_ARCH32)
7579     {
7580       /* We are guaranteed by pass_by_reference that the size of the
7581 	 argument is not greater than 8 bytes, so we only need to return
7582 	 one word if the argument is partially passed in registers.  */
7583       const int size = GET_MODE_SIZE (mode);
7584 
7585       if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7586 	return UNITS_PER_WORD;
7587     }
7588   else
7589     {
7590       /* We are guaranteed by pass_by_reference that the size of the
7591 	 argument is not greater than 16 bytes, so we only need to return
7592 	 one word if the argument is partially passed in registers.  */
7593       if (type && AGGREGATE_TYPE_P (type))
7594 	{
7595 	  const int size = int_size_in_bytes (type);
7596 
7597 	  if (size > UNITS_PER_WORD
7598 	      && (slotno == SPARC_INT_ARG_MAX - 1
7599 		  || slotno == SPARC_FP_ARG_MAX - 1))
7600 	    return UNITS_PER_WORD;
7601 	}
7602       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7603 	       || ((GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7604 		    || (type && VECTOR_TYPE_P (type)))
7605 		   && !(TARGET_FPU && named)))
7606 	{
7607 	  const int size = (type && VECTOR_FLOAT_TYPE_P (type))
7608 			   ? int_size_in_bytes (type)
7609 			   : GET_MODE_SIZE (mode);
7610 
7611 	  if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7612 	    return UNITS_PER_WORD;
7613 	}
7614       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7615 	       || (type && VECTOR_TYPE_P (type)))
7616 	{
7617 	  const int size = (type && VECTOR_FLOAT_TYPE_P (type))
7618 			   ? int_size_in_bytes (type)
7619 			   : GET_MODE_SIZE (mode);
7620 
7621 	  if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7622 	    return UNITS_PER_WORD;
7623 	}
7624     }
7625 
7626   return 0;
7627 }
7628 
7629 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7630    Update the data in CUM to advance over an argument
7631    of mode MODE and data type TYPE.
7632    TYPE is null for libcalls where that information may not be available.  */
7633 
7634 static void
7635 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7636 			    const_tree type, bool named)
7637 {
7638   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7639   int regno, padding;
7640 
7641   /* We pass false for incoming here, it doesn't matter.  */
7642   function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7643 
7644   /* If argument requires leading padding, add it.  */
7645   cum->words += padding;
7646 
7647   if (TARGET_ARCH32)
7648     cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7649   else
7650     {
7651       /* For types that can have BLKmode, get the size from the type.  */
7652       if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7653 	{
7654 	  const int size = int_size_in_bytes (type);
7655 
7656 	  /* See comment in function_arg_record_value for empty structures.  */
7657 	  if (size <= 0)
7658 	    cum->words++;
7659 	  else
7660 	    cum->words += CEIL_NWORDS (size);
7661 	}
7662       else
7663 	cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7664     }
7665 }
7666 
7667 /* Implement TARGET_FUNCTION_ARG_PADDING.  For the 64-bit ABI structs
7668    are always stored left shifted in their argument slot.  */
7669 
7670 static pad_direction
7671 sparc_function_arg_padding (machine_mode mode, const_tree type)
7672 {
7673   if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7674     return PAD_UPWARD;
7675 
7676   /* Fall back to the default.  */
7677   return default_function_arg_padding (mode, type);
7678 }
7679 
7680 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7681    Specify whether to return the return value in memory.  */
7682 
7683 static bool
7684 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7685 {
7686   if (TARGET_ARCH32)
7687     /* Original SPARC 32-bit ABI says that structures and unions, and
7688        quad-precision floats are returned in memory.  But note that the
7689        first part is implemented through -fpcc-struct-return being the
7690        default, so here we only implement -freg-struct-return instead.
7691        All other base types are returned in registers.
7692 
7693        Extended ABI (as implemented by the Sun compiler) says that
7694        all complex floats are returned in registers (8 FP registers
7695        at most for '_Complex long double').  Return all complex integers
7696        in registers (4 at most for '_Complex long long').
7697 
7698        Vector ABI (as implemented by the Sun VIS SDK) says that vector
7699        integers are returned like floats of the same size, that is in
7700        registers up to 8 bytes and in memory otherwise.  Return all
7701        vector floats in memory like structure and unions; note that
7702        they always have BLKmode like the latter.  */
7703     return (TYPE_MODE (type) == BLKmode
7704 	    || TYPE_MODE (type) == TFmode
7705 	    || (TREE_CODE (type) == VECTOR_TYPE
7706 		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7707   else
7708     /* Original SPARC 64-bit ABI says that structures and unions
7709        smaller than 32 bytes are returned in registers, as well as
7710        all other base types.
7711 
7712        Extended ABI (as implemented by the Sun compiler) says that all
7713        complex floats are returned in registers (8 FP registers at most
7714        for '_Complex long double').  Return all complex integers in
7715        registers (4 at most for '_Complex TItype').
7716 
7717        Vector ABI (as implemented by the Sun VIS SDK) says that vector
7718        integers are returned like floats of the same size, that is in
7719        registers.  Return all vector floats like structure and unions;
7720        note that they always have BLKmode like the latter.  */
7721     return (TYPE_MODE (type) == BLKmode
7722 	    && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7723 }
7724 
7725 /* Handle the TARGET_STRUCT_VALUE target hook.
7726    Return where to find the structure return value address.  */
7727 
7728 static rtx
7729 sparc_struct_value_rtx (tree fndecl, int incoming)
7730 {
7731   if (TARGET_ARCH64)
7732     return NULL_RTX;
7733   else
7734     {
7735       rtx mem;
7736 
7737       if (incoming)
7738 	mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7739 						   STRUCT_VALUE_OFFSET));
7740       else
7741 	mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7742 						   STRUCT_VALUE_OFFSET));
7743 
7744       /* Only follow the SPARC ABI for fixed-size structure returns.
7745          Variable size structure returns are handled per the normal
7746          procedures in GCC. This is enabled by -mstd-struct-return */
7747       if (incoming == 2
7748 	  && sparc_std_struct_return
7749 	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7750 	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7751 	{
7752 	  /* We must check and adjust the return address, as it is optional
7753 	     as to whether the return object is really provided.  */
7754 	  rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7755 	  rtx scratch = gen_reg_rtx (SImode);
7756 	  rtx_code_label *endlab = gen_label_rtx ();
7757 
7758 	  /* Calculate the return object size.  */
7759 	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7760 	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7761 	  /* Construct a temporary return value.  */
7762 	  rtx temp_val
7763 	    = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7764 
7765 	  /* Implement SPARC 32-bit psABI callee return struct checking:
7766 
7767 	     Fetch the instruction where we will return to and see if
7768 	     it's an unimp instruction (the most significant 10 bits
7769 	     will be zero).  */
7770 	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
7771 						plus_constant (Pmode,
7772 							       ret_reg, 8)));
7773 	  /* Assume the size is valid and pre-adjust.  */
7774 	  emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7775 	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7776 				   0, endlab);
7777 	  emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7778 	  /* Write the address of the memory pointed to by temp_val into
7779 	     the memory pointed to by mem.  */
7780 	  emit_move_insn (mem, XEXP (temp_val, 0));
7781 	  emit_label (endlab);
7782 	}
7783 
7784       return mem;
7785     }
7786 }
7787 
7788 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7789    For v9, function return values are subject to the same rules as arguments,
7790    except that up to 32 bytes may be returned in registers.  */
7791 
7792 static rtx
7793 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7794 {
7795   /* Beware that the two values are swapped here wrt function_arg.  */
7796   const int regbase
7797     = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7798   enum mode_class mclass = GET_MODE_CLASS (mode);
7799   int regno;
7800 
7801   /* Integer vectors are handled like floats as per the Sun VIS SDK.
7802      Note that integer vectors larger than 16 bytes have BLKmode so
7803      they need to be handled like floating-point vectors below.  */
7804   if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7805     mclass = MODE_FLOAT;
7806 
7807   if (TARGET_ARCH64 && type)
7808     {
7809       /* Structures up to 32 bytes in size are returned in registers.  */
7810       if (TREE_CODE (type) == RECORD_TYPE)
7811 	{
7812 	  const int size = int_size_in_bytes (type);
7813 	  gcc_assert (size <= 32);
7814 
7815 	  return function_arg_record_value (type, mode, 0, true, regbase);
7816 	}
7817 
7818       /* Unions up to 32 bytes in size are returned in integer registers.  */
7819       else if (TREE_CODE (type) == UNION_TYPE)
7820 	{
7821 	  const int size = int_size_in_bytes (type);
7822 	  gcc_assert (size <= 32);
7823 
7824 	  return function_arg_union_value (size, mode, 0, regbase);
7825 	}
7826 
7827       /* Vectors up to 32 bytes are returned in FP registers.  */
7828       else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7829 	{
7830 	  const int size = int_size_in_bytes (type);
7831 	  gcc_assert (size <= 32);
7832 
7833 	  return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7834 	}
7835 
7836       /* Objects that require it are returned in FP registers.  */
7837       else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7838 	;
7839 
7840       /* All other aggregate types are returned in an integer register in a
7841 	 mode corresponding to the size of the type.  */
7842       else if (AGGREGATE_TYPE_P (type))
7843 	{
7844 	  /* All other aggregate types are passed in an integer register
7845 	     in a mode corresponding to the size of the type.  */
7846 	  const int size = int_size_in_bytes (type);
7847 	  gcc_assert (size <= 32);
7848 
7849 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7850 
7851 	  /* ??? We probably should have made the same ABI change in
7852 	     3.4.0 as the one we made for unions.   The latter was
7853 	     required by the SCD though, while the former is not
7854 	     specified, so we favored compatibility and efficiency.
7855 
7856 	     Now we're stuck for aggregates larger than 16 bytes,
7857 	     because OImode vanished in the meantime.  Let's not
7858 	     try to be unduly clever, and simply follow the ABI
7859 	     for unions in that case.  */
7860 	  if (mode == BLKmode)
7861 	    return function_arg_union_value (size, mode, 0, regbase);
7862 	  else
7863 	    mclass = MODE_INT;
7864 	}
7865 
7866       /* We should only have pointer and integer types at this point.  This
7867 	 must match sparc_promote_function_mode.  */
7868       else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7869 	mode = word_mode;
7870     }
7871 
7872   /* We should only have pointer and integer types at this point, except with
7873      -freg-struct-return.  This must match sparc_promote_function_mode.  */
7874   else if (TARGET_ARCH32
7875 	   && !(type && AGGREGATE_TYPE_P (type))
7876 	   && mclass == MODE_INT
7877 	   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7878     mode = word_mode;
7879 
7880   if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7881     regno = SPARC_FP_ARG_FIRST;
7882   else
7883     regno = regbase;
7884 
7885   return gen_rtx_REG (mode, regno);
7886 }
7887 
7888 /* Handle TARGET_FUNCTION_VALUE.
7889    On the SPARC, the value is found in the first "output" register, but the
7890    called function leaves it in the first "input" register.  */
7891 
7892 static rtx
7893 sparc_function_value (const_tree valtype,
7894 		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7895 		      bool outgoing)
7896 {
7897   return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7898 }
7899 
7900 /* Handle TARGET_LIBCALL_VALUE.  */
7901 
7902 static rtx
7903 sparc_libcall_value (machine_mode mode,
7904 		     const_rtx fun ATTRIBUTE_UNUSED)
7905 {
7906   return sparc_function_value_1 (NULL_TREE, mode, false);
7907 }
7908 
7909 /* Handle FUNCTION_VALUE_REGNO_P.
7910    On the SPARC, the first "output" reg is used for integer values, and the
7911    first floating point register is used for floating point values.  */
7912 
7913 static bool
7914 sparc_function_value_regno_p (const unsigned int regno)
7915 {
7916   return (regno == 8 || (TARGET_FPU && regno == 32));
7917 }
7918 
7919 /* Do what is necessary for `va_start'.  We look at the current function
7920    to determine if stdarg or varargs is used and return the address of
7921    the first unnamed parameter.  */
7922 
7923 static rtx
7924 sparc_builtin_saveregs (void)
7925 {
7926   int first_reg = crtl->args.info.words;
7927   rtx address;
7928   int regno;
7929 
7930   for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7931     emit_move_insn (gen_rtx_MEM (word_mode,
7932 				 gen_rtx_PLUS (Pmode,
7933 					       frame_pointer_rtx,
7934 					       GEN_INT (FIRST_PARM_OFFSET (0)
7935 							+ (UNITS_PER_WORD
7936 							   * regno)))),
7937 		    gen_rtx_REG (word_mode,
7938 				 SPARC_INCOMING_INT_ARG_FIRST + regno));
7939 
7940   address = gen_rtx_PLUS (Pmode,
7941 			  frame_pointer_rtx,
7942 			  GEN_INT (FIRST_PARM_OFFSET (0)
7943 				   + UNITS_PER_WORD * first_reg));
7944 
7945   return address;
7946 }
7947 
7948 /* Implement `va_start' for stdarg.  */
7949 
7950 static void
7951 sparc_va_start (tree valist, rtx nextarg)
7952 {
7953   nextarg = expand_builtin_saveregs ();
7954   std_expand_builtin_va_start (valist, nextarg);
7955 }
7956 
7957 /* Implement `va_arg' for stdarg.  */
7958 
7959 static tree
7960 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7961 		       gimple_seq *post_p)
7962 {
7963   HOST_WIDE_INT size, rsize, align;
7964   tree addr, incr;
7965   bool indirect;
7966   tree ptrtype = build_pointer_type (type);
7967 
7968   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7969     {
7970       indirect = true;
7971       size = rsize = UNITS_PER_WORD;
7972       align = 0;
7973     }
7974   else
7975     {
7976       indirect = false;
7977       size = int_size_in_bytes (type);
7978       rsize = ROUND_UP (size, UNITS_PER_WORD);
7979       align = 0;
7980 
7981       if (TARGET_ARCH64)
7982 	{
7983 	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
7984 	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7985 	    align = 2 * UNITS_PER_WORD;
7986 
7987 	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
7988 	     are left-justified in their slots.  */
7989 	  if (AGGREGATE_TYPE_P (type))
7990 	    {
7991 	      if (size == 0)
7992 		size = rsize = UNITS_PER_WORD;
7993 	      else
7994 		size = rsize;
7995 	    }
7996 	}
7997     }
7998 
7999   incr = valist;
8000   if (align)
8001     {
8002       incr = fold_build_pointer_plus_hwi (incr, align - 1);
8003       incr = fold_convert (sizetype, incr);
8004       incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
8005 			  size_int (-align));
8006       incr = fold_convert (ptr_type_node, incr);
8007     }
8008 
8009   gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
8010   addr = incr;
8011 
8012   if (BYTES_BIG_ENDIAN && size < rsize)
8013     addr = fold_build_pointer_plus_hwi (incr, rsize - size);
8014 
8015   if (indirect)
8016     {
8017       addr = fold_convert (build_pointer_type (ptrtype), addr);
8018       addr = build_va_arg_indirect_ref (addr);
8019     }
8020 
8021   /* If the address isn't aligned properly for the type, we need a temporary.
8022      FIXME: This is inefficient, usually we can do this in registers.  */
8023   else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8024     {
8025       tree tmp = create_tmp_var (type, "va_arg_tmp");
8026       tree dest_addr = build_fold_addr_expr (tmp);
8027       tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8028 				   3, dest_addr, addr, size_int (rsize));
8029       TREE_ADDRESSABLE (tmp) = 1;
8030       gimplify_and_add (copy, pre_p);
8031       addr = dest_addr;
8032     }
8033 
8034   else
8035     addr = fold_convert (ptrtype, addr);
8036 
8037   incr = fold_build_pointer_plus_hwi (incr, rsize);
8038   gimplify_assign (valist, incr, post_p);
8039 
8040   return build_va_arg_indirect_ref (addr);
8041 }
8042 
8043 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8044    Specify whether the vector mode is supported by the hardware.  */
8045 
8046 static bool
8047 sparc_vector_mode_supported_p (machine_mode mode)
8048 {
8049   return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8050 }
8051 
8052 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook.  */
8053 
8054 static machine_mode
8055 sparc_preferred_simd_mode (scalar_mode mode)
8056 {
8057   if (TARGET_VIS)
8058     switch (mode)
8059       {
8060       case E_SImode:
8061 	return V2SImode;
8062       case E_HImode:
8063 	return V4HImode;
8064       case E_QImode:
8065 	return V8QImode;
8066 
8067       default:;
8068       }
8069 
8070   return word_mode;
8071 }
8072 
8073 /* Implement TARGET_CAN_FOLLOW_JUMP.  */
8074 
8075 static bool
8076 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8077 {
8078   /* Do not fold unconditional jumps that have been created for crossing
8079      partition boundaries.  */
8080   if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8081     return false;
8082 
8083   return true;
8084 }
8085 
8086 /* Return the string to output an unconditional branch to LABEL, which is
8087    the operand number of the label.
8088 
8089    DEST is the destination insn (i.e. the label), INSN is the source.  */
8090 
8091 const char *
8092 output_ubranch (rtx dest, rtx_insn *insn)
8093 {
8094   static char string[64];
8095   bool v9_form = false;
8096   int delta;
8097   char *p;
8098 
8099   /* Even if we are trying to use cbcond for this, evaluate
8100      whether we can use V9 branches as our backup plan.  */
8101   delta = 5000000;
8102   if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8103     delta = (INSN_ADDRESSES (INSN_UID (dest))
8104 	     - INSN_ADDRESSES (INSN_UID (insn)));
8105 
8106   /* Leave some instructions for "slop".  */
8107   if (TARGET_V9 && delta >= -260000 && delta < 260000)
8108     v9_form = true;
8109 
8110   if (TARGET_CBCOND)
8111     {
8112       bool emit_nop = emit_cbcond_nop (insn);
8113       bool far = false;
8114       const char *rval;
8115 
8116       if (delta < -500 || delta > 500)
8117 	far = true;
8118 
8119       if (far)
8120 	{
8121 	  if (v9_form)
8122 	    rval = "ba,a,pt\t%%xcc, %l0";
8123 	  else
8124 	    rval = "b,a\t%l0";
8125 	}
8126       else
8127 	{
8128 	  if (emit_nop)
8129 	    rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8130 	  else
8131 	    rval = "cwbe\t%%g0, %%g0, %l0";
8132 	}
8133       return rval;
8134     }
8135 
8136   if (v9_form)
8137     strcpy (string, "ba%*,pt\t%%xcc, ");
8138   else
8139     strcpy (string, "b%*\t");
8140 
8141   p = strchr (string, '\0');
8142   *p++ = '%';
8143   *p++ = 'l';
8144   *p++ = '0';
8145   *p++ = '%';
8146   *p++ = '(';
8147   *p = '\0';
8148 
8149   return string;
8150 }
8151 
8152 /* Return the string to output a conditional branch to LABEL, which is
8153    the operand number of the label.  OP is the conditional expression.
8154    XEXP (OP, 0) is assumed to be a condition code register (integer or
8155    floating point) and its mode specifies what kind of comparison we made.
8156 
8157    DEST is the destination insn (i.e. the label), INSN is the source.
8158 
8159    REVERSED is nonzero if we should reverse the sense of the comparison.
8160 
8161    ANNUL is nonzero if we should generate an annulling branch.  */
8162 
8163 const char *
8164 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8165 		rtx_insn *insn)
8166 {
8167   static char string[64];
8168   enum rtx_code code = GET_CODE (op);
8169   rtx cc_reg = XEXP (op, 0);
8170   machine_mode mode = GET_MODE (cc_reg);
8171   const char *labelno, *branch;
8172   int spaces = 8, far;
8173   char *p;
8174 
8175   /* v9 branches are limited to +-1MB.  If it is too far away,
8176      change
8177 
8178      bne,pt %xcc, .LC30
8179 
8180      to
8181 
8182      be,pn %xcc, .+12
8183       nop
8184      ba .LC30
8185 
8186      and
8187 
8188      fbne,a,pn %fcc2, .LC29
8189 
8190      to
8191 
8192      fbe,pt %fcc2, .+16
8193       nop
8194      ba .LC29  */
8195 
8196   far = TARGET_V9 && (get_attr_length (insn) >= 3);
8197   if (reversed ^ far)
8198     {
8199       /* Reversal of FP compares takes care -- an ordered compare
8200 	 becomes an unordered compare and vice versa.  */
8201       if (mode == CCFPmode || mode == CCFPEmode)
8202 	code = reverse_condition_maybe_unordered (code);
8203       else
8204 	code = reverse_condition (code);
8205     }
8206 
8207   /* Start by writing the branch condition.  */
8208   if (mode == CCFPmode || mode == CCFPEmode)
8209     {
8210       switch (code)
8211 	{
8212 	case NE:
8213 	  branch = "fbne";
8214 	  break;
8215 	case EQ:
8216 	  branch = "fbe";
8217 	  break;
8218 	case GE:
8219 	  branch = "fbge";
8220 	  break;
8221 	case GT:
8222 	  branch = "fbg";
8223 	  break;
8224 	case LE:
8225 	  branch = "fble";
8226 	  break;
8227 	case LT:
8228 	  branch = "fbl";
8229 	  break;
8230 	case UNORDERED:
8231 	  branch = "fbu";
8232 	  break;
8233 	case ORDERED:
8234 	  branch = "fbo";
8235 	  break;
8236 	case UNGT:
8237 	  branch = "fbug";
8238 	  break;
8239 	case UNLT:
8240 	  branch = "fbul";
8241 	  break;
8242 	case UNEQ:
8243 	  branch = "fbue";
8244 	  break;
8245 	case UNGE:
8246 	  branch = "fbuge";
8247 	  break;
8248 	case UNLE:
8249 	  branch = "fbule";
8250 	  break;
8251 	case LTGT:
8252 	  branch = "fblg";
8253 	  break;
8254 	default:
8255 	  gcc_unreachable ();
8256 	}
8257 
8258       /* ??? !v9: FP branches cannot be preceded by another floating point
8259 	 insn.  Because there is currently no concept of pre-delay slots,
8260 	 we can fix this only by always emitting a nop before a floating
8261 	 point branch.  */
8262 
8263       string[0] = '\0';
8264       if (! TARGET_V9)
8265 	strcpy (string, "nop\n\t");
8266       strcat (string, branch);
8267     }
8268   else
8269     {
8270       switch (code)
8271 	{
8272 	case NE:
8273 	  if (mode == CCVmode || mode == CCXVmode)
8274 	    branch = "bvs";
8275 	  else
8276 	    branch = "bne";
8277 	  break;
8278 	case EQ:
8279 	  if (mode == CCVmode || mode == CCXVmode)
8280 	    branch = "bvc";
8281 	  else
8282 	    branch = "be";
8283 	  break;
8284 	case GE:
8285 	  if (mode == CCNZmode || mode == CCXNZmode)
8286 	    branch = "bpos";
8287 	  else
8288 	    branch = "bge";
8289 	  break;
8290 	case GT:
8291 	  branch = "bg";
8292 	  break;
8293 	case LE:
8294 	  branch = "ble";
8295 	  break;
8296 	case LT:
8297 	  if (mode == CCNZmode || mode == CCXNZmode)
8298 	    branch = "bneg";
8299 	  else
8300 	    branch = "bl";
8301 	  break;
8302 	case GEU:
8303 	  branch = "bgeu";
8304 	  break;
8305 	case GTU:
8306 	  branch = "bgu";
8307 	  break;
8308 	case LEU:
8309 	  branch = "bleu";
8310 	  break;
8311 	case LTU:
8312 	  branch = "blu";
8313 	  break;
8314 	default:
8315 	  gcc_unreachable ();
8316 	}
8317       strcpy (string, branch);
8318     }
8319   spaces -= strlen (branch);
8320   p = strchr (string, '\0');
8321 
8322   /* Now add the annulling, the label, and a possible noop.  */
8323   if (annul && ! far)
8324     {
8325       strcpy (p, ",a");
8326       p += 2;
8327       spaces -= 2;
8328     }
8329 
8330   if (TARGET_V9)
8331     {
8332       rtx note;
8333       int v8 = 0;
8334 
8335       if (! far && insn && INSN_ADDRESSES_SET_P ())
8336 	{
8337 	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
8338 		       - INSN_ADDRESSES (INSN_UID (insn)));
8339 	  /* Leave some instructions for "slop".  */
8340 	  if (delta < -260000 || delta >= 260000)
8341 	    v8 = 1;
8342 	}
8343 
8344       switch (mode)
8345 	{
8346 	case E_CCmode:
8347 	case E_CCNZmode:
8348 	case E_CCCmode:
8349 	case E_CCVmode:
8350 	  labelno = "%%icc, ";
8351 	  if (v8)
8352 	    labelno = "";
8353 	  break;
8354 	case E_CCXmode:
8355 	case E_CCXNZmode:
8356 	case E_CCXCmode:
8357 	case E_CCXVmode:
8358 	  labelno = "%%xcc, ";
8359 	  gcc_assert (!v8);
8360 	  break;
8361 	case E_CCFPmode:
8362 	case E_CCFPEmode:
8363 	  {
8364 	    static char v9_fcc_labelno[] = "%%fccX, ";
8365 	    /* Set the char indicating the number of the fcc reg to use.  */
8366 	    v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8367 	    labelno = v9_fcc_labelno;
8368 	    if (v8)
8369 	      {
8370 		gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8371 		labelno = "";
8372 	      }
8373 	  }
8374 	  break;
8375 	default:
8376 	  gcc_unreachable ();
8377 	}
8378 
8379       if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8380 	{
8381 	  strcpy (p,
8382 		  ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8383 		   >= profile_probability::even ()) ^ far)
8384 		  ? ",pt" : ",pn");
8385 	  p += 3;
8386 	  spaces -= 3;
8387 	}
8388     }
8389   else
8390     labelno = "";
8391 
8392   if (spaces > 0)
8393     *p++ = '\t';
8394   else
8395     *p++ = ' ';
8396   strcpy (p, labelno);
8397   p = strchr (p, '\0');
8398   if (far)
8399     {
8400       strcpy (p, ".+12\n\t nop\n\tb\t");
8401       /* Skip the next insn if requested or
8402 	 if we know that it will be a nop.  */
8403       if (annul || ! final_sequence)
8404         p[3] = '6';
8405       p += 14;
8406     }
8407   *p++ = '%';
8408   *p++ = 'l';
8409   *p++ = label + '0';
8410   *p++ = '%';
8411   *p++ = '#';
8412   *p = '\0';
8413 
8414   return string;
8415 }
8416 
8417 /* Emit a library call comparison between floating point X and Y.
8418    COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8419    Return the new operator to be used in the comparison sequence.
8420 
8421    TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8422    values as arguments instead of the TFmode registers themselves,
8423    that's why we cannot call emit_float_lib_cmp.  */
8424 
8425 rtx
8426 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8427 {
8428   const char *qpfunc;
8429   rtx slot0, slot1, result, tem, tem2, libfunc;
8430   machine_mode mode;
8431   enum rtx_code new_comparison;
8432 
8433   switch (comparison)
8434     {
8435     case EQ:
8436       qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8437       break;
8438 
8439     case NE:
8440       qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8441       break;
8442 
8443     case GT:
8444       qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8445       break;
8446 
8447     case GE:
8448       qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8449       break;
8450 
8451     case LT:
8452       qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8453       break;
8454 
8455     case LE:
8456       qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8457       break;
8458 
8459     case ORDERED:
8460     case UNORDERED:
8461     case UNGT:
8462     case UNLT:
8463     case UNEQ:
8464     case UNGE:
8465     case UNLE:
8466     case LTGT:
8467       qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8468       break;
8469 
8470     default:
8471       gcc_unreachable ();
8472     }
8473 
8474   if (TARGET_ARCH64)
8475     {
8476       if (MEM_P (x))
8477 	{
8478 	  tree expr = MEM_EXPR (x);
8479 	  if (expr)
8480 	    mark_addressable (expr);
8481 	  slot0 = x;
8482 	}
8483       else
8484 	{
8485 	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8486 	  emit_move_insn (slot0, x);
8487 	}
8488 
8489       if (MEM_P (y))
8490 	{
8491 	  tree expr = MEM_EXPR (y);
8492 	  if (expr)
8493 	    mark_addressable (expr);
8494 	  slot1 = y;
8495 	}
8496       else
8497 	{
8498 	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8499 	  emit_move_insn (slot1, y);
8500 	}
8501 
8502       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8503       emit_library_call (libfunc, LCT_NORMAL,
8504 			 DImode,
8505 			 XEXP (slot0, 0), Pmode,
8506 			 XEXP (slot1, 0), Pmode);
8507       mode = DImode;
8508     }
8509   else
8510     {
8511       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8512       emit_library_call (libfunc, LCT_NORMAL,
8513 			 SImode,
8514 			 x, TFmode, y, TFmode);
8515       mode = SImode;
8516     }
8517 
8518 
8519   /* Immediately move the result of the libcall into a pseudo
8520      register so reload doesn't clobber the value if it needs
8521      the return register for a spill reg.  */
8522   result = gen_reg_rtx (mode);
8523   emit_move_insn (result, hard_libcall_value (mode, libfunc));
8524 
8525   switch (comparison)
8526     {
8527     default:
8528       return gen_rtx_NE (VOIDmode, result, const0_rtx);
8529     case ORDERED:
8530     case UNORDERED:
8531       new_comparison = (comparison == UNORDERED ? EQ : NE);
8532       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8533     case UNGT:
8534     case UNGE:
8535       new_comparison = (comparison == UNGT ? GT : NE);
8536       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8537     case UNLE:
8538       return gen_rtx_NE (VOIDmode, result, const2_rtx);
8539     case UNLT:
8540       tem = gen_reg_rtx (mode);
8541       if (TARGET_ARCH32)
8542 	emit_insn (gen_andsi3 (tem, result, const1_rtx));
8543       else
8544 	emit_insn (gen_anddi3 (tem, result, const1_rtx));
8545       return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8546     case UNEQ:
8547     case LTGT:
8548       tem = gen_reg_rtx (mode);
8549       if (TARGET_ARCH32)
8550 	emit_insn (gen_addsi3 (tem, result, const1_rtx));
8551       else
8552 	emit_insn (gen_adddi3 (tem, result, const1_rtx));
8553       tem2 = gen_reg_rtx (mode);
8554       if (TARGET_ARCH32)
8555 	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8556       else
8557 	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8558       new_comparison = (comparison == UNEQ ? EQ : NE);
8559       return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8560     }
8561 
8562   gcc_unreachable ();
8563 }
8564 
8565 /* Generate an unsigned DImode to FP conversion.  This is the same code
8566    optabs would emit if we didn't have TFmode patterns.  */
8567 
8568 void
8569 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8570 {
8571   rtx i0, i1, f0, in, out;
8572 
8573   out = operands[0];
8574   in = force_reg (DImode, operands[1]);
8575   rtx_code_label *neglab = gen_label_rtx ();
8576   rtx_code_label *donelab = gen_label_rtx ();
8577   i0 = gen_reg_rtx (DImode);
8578   i1 = gen_reg_rtx (DImode);
8579   f0 = gen_reg_rtx (mode);
8580 
8581   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8582 
8583   emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8584   emit_jump_insn (gen_jump (donelab));
8585   emit_barrier ();
8586 
8587   emit_label (neglab);
8588 
8589   emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8590   emit_insn (gen_anddi3 (i1, in, const1_rtx));
8591   emit_insn (gen_iordi3 (i0, i0, i1));
8592   emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8593   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8594 
8595   emit_label (donelab);
8596 }
8597 
8598 /* Generate an FP to unsigned DImode conversion.  This is the same code
8599    optabs would emit if we didn't have TFmode patterns.  */
8600 
8601 void
8602 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8603 {
8604   rtx i0, i1, f0, in, out, limit;
8605 
8606   out = operands[0];
8607   in = force_reg (mode, operands[1]);
8608   rtx_code_label *neglab = gen_label_rtx ();
8609   rtx_code_label *donelab = gen_label_rtx ();
8610   i0 = gen_reg_rtx (DImode);
8611   i1 = gen_reg_rtx (DImode);
8612   limit = gen_reg_rtx (mode);
8613   f0 = gen_reg_rtx (mode);
8614 
8615   emit_move_insn (limit,
8616 		  const_double_from_real_value (
8617 		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8618   emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8619 
8620   emit_insn (gen_rtx_SET (out,
8621 			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8622   emit_jump_insn (gen_jump (donelab));
8623   emit_barrier ();
8624 
8625   emit_label (neglab);
8626 
8627   emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8628   emit_insn (gen_rtx_SET (i0,
8629 			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8630   emit_insn (gen_movdi (i1, const1_rtx));
8631   emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8632   emit_insn (gen_xordi3 (out, i0, i1));
8633 
8634   emit_label (donelab);
8635 }
8636 
8637 /* Return the string to output a compare and branch instruction to DEST.
8638    DEST is the destination insn (i.e. the label), INSN is the source,
8639    and OP is the conditional expression.  */
8640 
8641 const char *
8642 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8643 {
8644   machine_mode mode = GET_MODE (XEXP (op, 0));
8645   enum rtx_code code = GET_CODE (op);
8646   const char *cond_str, *tmpl;
8647   int far, emit_nop, len;
8648   static char string[64];
8649   char size_char;
8650 
8651   /* Compare and Branch is limited to +-2KB.  If it is too far away,
8652      change
8653 
8654      cxbne X, Y, .LC30
8655 
8656      to
8657 
8658      cxbe X, Y, .+16
8659      nop
8660      ba,pt xcc, .LC30
8661       nop  */
8662 
8663   len = get_attr_length (insn);
8664 
8665   far = len == 4;
8666   emit_nop = len == 2;
8667 
8668   if (far)
8669     code = reverse_condition (code);
8670 
8671   size_char = ((mode == SImode) ? 'w' : 'x');
8672 
8673   switch (code)
8674     {
8675     case NE:
8676       cond_str = "ne";
8677       break;
8678 
8679     case EQ:
8680       cond_str = "e";
8681       break;
8682 
8683     case GE:
8684       cond_str = "ge";
8685       break;
8686 
8687     case GT:
8688       cond_str = "g";
8689       break;
8690 
8691     case LE:
8692       cond_str = "le";
8693       break;
8694 
8695     case LT:
8696       cond_str = "l";
8697       break;
8698 
8699     case GEU:
8700       cond_str = "cc";
8701       break;
8702 
8703     case GTU:
8704       cond_str = "gu";
8705       break;
8706 
8707     case LEU:
8708       cond_str = "leu";
8709       break;
8710 
8711     case LTU:
8712       cond_str = "cs";
8713       break;
8714 
8715     default:
8716       gcc_unreachable ();
8717     }
8718 
8719   if (far)
8720     {
8721       int veryfar = 1, delta;
8722 
8723       if (INSN_ADDRESSES_SET_P ())
8724 	{
8725 	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8726 		   - INSN_ADDRESSES (INSN_UID (insn)));
8727 	  /* Leave some instructions for "slop".  */
8728 	  if (delta >= -260000 && delta < 260000)
8729 	    veryfar = 0;
8730 	}
8731 
8732       if (veryfar)
8733 	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8734       else
8735 	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8736     }
8737   else
8738     {
8739       if (emit_nop)
8740 	tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8741       else
8742 	tmpl = "c%cb%s\t%%1, %%2, %%3";
8743     }
8744 
8745   snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8746 
8747   return string;
8748 }
8749 
8750 /* Return the string to output a conditional branch to LABEL, testing
8751    register REG.  LABEL is the operand number of the label; REG is the
8752    operand number of the reg.  OP is the conditional expression.  The mode
8753    of REG says what kind of comparison we made.
8754 
8755    DEST is the destination insn (i.e. the label), INSN is the source.
8756 
8757    REVERSED is nonzero if we should reverse the sense of the comparison.
8758 
8759    ANNUL is nonzero if we should generate an annulling branch.  */
8760 
8761 const char *
8762 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8763 		 int annul, rtx_insn *insn)
8764 {
8765   static char string[64];
8766   enum rtx_code code = GET_CODE (op);
8767   machine_mode mode = GET_MODE (XEXP (op, 0));
8768   rtx note;
8769   int far;
8770   char *p;
8771 
8772   /* branch on register are limited to +-128KB.  If it is too far away,
8773      change
8774 
8775      brnz,pt %g1, .LC30
8776 
8777      to
8778 
8779      brz,pn %g1, .+12
8780       nop
8781      ba,pt %xcc, .LC30
8782 
8783      and
8784 
8785      brgez,a,pn %o1, .LC29
8786 
8787      to
8788 
8789      brlz,pt %o1, .+16
8790       nop
8791      ba,pt %xcc, .LC29  */
8792 
8793   far = get_attr_length (insn) >= 3;
8794 
8795   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
8796   if (reversed ^ far)
8797     code = reverse_condition (code);
8798 
8799   /* Only 64-bit versions of these instructions exist.  */
8800   gcc_assert (mode == DImode);
8801 
8802   /* Start by writing the branch condition.  */
8803 
8804   switch (code)
8805     {
8806     case NE:
8807       strcpy (string, "brnz");
8808       break;
8809 
8810     case EQ:
8811       strcpy (string, "brz");
8812       break;
8813 
8814     case GE:
8815       strcpy (string, "brgez");
8816       break;
8817 
8818     case LT:
8819       strcpy (string, "brlz");
8820       break;
8821 
8822     case LE:
8823       strcpy (string, "brlez");
8824       break;
8825 
8826     case GT:
8827       strcpy (string, "brgz");
8828       break;
8829 
8830     default:
8831       gcc_unreachable ();
8832     }
8833 
8834   p = strchr (string, '\0');
8835 
8836   /* Now add the annulling, reg, label, and nop.  */
8837   if (annul && ! far)
8838     {
8839       strcpy (p, ",a");
8840       p += 2;
8841     }
8842 
8843   if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8844     {
8845       strcpy (p,
8846 	      ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8847 	       >= profile_probability::even ()) ^ far)
8848 	      ? ",pt" : ",pn");
8849       p += 3;
8850     }
8851 
8852   *p = p < string + 8 ? '\t' : ' ';
8853   p++;
8854   *p++ = '%';
8855   *p++ = '0' + reg;
8856   *p++ = ',';
8857   *p++ = ' ';
8858   if (far)
8859     {
8860       int veryfar = 1, delta;
8861 
8862       if (INSN_ADDRESSES_SET_P ())
8863 	{
8864 	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8865 		   - INSN_ADDRESSES (INSN_UID (insn)));
8866 	  /* Leave some instructions for "slop".  */
8867 	  if (delta >= -260000 && delta < 260000)
8868 	    veryfar = 0;
8869 	}
8870 
8871       strcpy (p, ".+12\n\t nop\n\t");
8872       /* Skip the next insn if requested or
8873 	 if we know that it will be a nop.  */
8874       if (annul || ! final_sequence)
8875         p[3] = '6';
8876       p += 12;
8877       if (veryfar)
8878 	{
8879 	  strcpy (p, "b\t");
8880 	  p += 2;
8881 	}
8882       else
8883 	{
8884 	  strcpy (p, "ba,pt\t%%xcc, ");
8885 	  p += 13;
8886 	}
8887     }
8888   *p++ = '%';
8889   *p++ = 'l';
8890   *p++ = '0' + label;
8891   *p++ = '%';
8892   *p++ = '#';
8893   *p = '\0';
8894 
8895   return string;
8896 }
8897 
8898 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8899    Such instructions cannot be used in the delay slot of return insn on v9.
8900    If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8901  */
8902 
8903 static int
8904 epilogue_renumber (register rtx *where, int test)
8905 {
8906   register const char *fmt;
8907   register int i;
8908   register enum rtx_code code;
8909 
8910   if (*where == 0)
8911     return 0;
8912 
8913   code = GET_CODE (*where);
8914 
8915   switch (code)
8916     {
8917     case REG:
8918       if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
8919 	return 1;
8920       if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8921 	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8922       /* fallthrough */
8923     case SCRATCH:
8924     case CC0:
8925     case PC:
8926     case CONST_INT:
8927     case CONST_WIDE_INT:
8928     case CONST_DOUBLE:
8929       return 0;
8930 
8931       /* Do not replace the frame pointer with the stack pointer because
8932 	 it can cause the delayed instruction to load below the stack.
8933 	 This occurs when instructions like:
8934 
8935 	 (set (reg/i:SI 24 %i0)
8936 	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8937                        (const_int -20 [0xffffffec])) 0))
8938 
8939 	 are in the return delayed slot.  */
8940     case PLUS:
8941       if (GET_CODE (XEXP (*where, 0)) == REG
8942 	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8943 	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8944 	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8945 	return 1;
8946       break;
8947 
8948     case MEM:
8949       if (SPARC_STACK_BIAS
8950 	  && GET_CODE (XEXP (*where, 0)) == REG
8951 	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8952 	return 1;
8953       break;
8954 
8955     default:
8956       break;
8957     }
8958 
8959   fmt = GET_RTX_FORMAT (code);
8960 
8961   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8962     {
8963       if (fmt[i] == 'E')
8964 	{
8965 	  register int j;
8966 	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8967 	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8968 	      return 1;
8969 	}
8970       else if (fmt[i] == 'e'
8971 	       && epilogue_renumber (&(XEXP (*where, i)), test))
8972 	return 1;
8973     }
8974   return 0;
8975 }
8976 
8977 /* Leaf functions and non-leaf functions have different needs.  */
8978 
8979 static const int
8980 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8981 
8982 static const int
8983 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8984 
8985 static const int *const reg_alloc_orders[] = {
8986   reg_leaf_alloc_order,
8987   reg_nonleaf_alloc_order};
8988 
8989 void
8990 order_regs_for_local_alloc (void)
8991 {
8992   static int last_order_nonleaf = 1;
8993 
8994   if (df_regs_ever_live_p (15) != last_order_nonleaf)
8995     {
8996       last_order_nonleaf = !last_order_nonleaf;
8997       memcpy ((char *) reg_alloc_order,
8998 	      (const char *) reg_alloc_orders[last_order_nonleaf],
8999 	      FIRST_PSEUDO_REGISTER * sizeof (int));
9000     }
9001 }
9002 
9003 /* Return 1 if REG and MEM are legitimate enough to allow the various
9004    MEM<-->REG splits to be run.  */
9005 
9006 int
9007 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9008 {
9009   /* Punt if we are here by mistake.  */
9010   gcc_assert (reload_completed);
9011 
9012   /* We must have an offsettable memory reference.  */
9013   if (!offsettable_memref_p (mem))
9014     return 0;
9015 
9016   /* If we have legitimate args for ldd/std, we do not want
9017      the split to happen.  */
9018   if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9019     return 0;
9020 
9021   /* Success.  */
9022   return 1;
9023 }
9024 
9025 /* Split a REG <-- MEM move into a pair of moves in MODE.  */
9026 
9027 void
9028 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9029 {
9030   rtx high_part = gen_highpart (mode, dest);
9031   rtx low_part = gen_lowpart (mode, dest);
9032   rtx word0 = adjust_address (src, mode, 0);
9033   rtx word1 = adjust_address (src, mode, 4);
9034 
9035   if (reg_overlap_mentioned_p (high_part, word1))
9036     {
9037       emit_move_insn_1 (low_part, word1);
9038       emit_move_insn_1 (high_part, word0);
9039     }
9040   else
9041     {
9042       emit_move_insn_1 (high_part, word0);
9043       emit_move_insn_1 (low_part, word1);
9044     }
9045 }
9046 
9047 /* Split a MEM <-- REG move into a pair of moves in MODE.  */
9048 
9049 void
9050 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9051 {
9052   rtx word0 = adjust_address (dest, mode, 0);
9053   rtx word1 = adjust_address (dest, mode, 4);
9054   rtx high_part = gen_highpart (mode, src);
9055   rtx low_part = gen_lowpart (mode, src);
9056 
9057   emit_move_insn_1 (word0, high_part);
9058   emit_move_insn_1 (word1, low_part);
9059 }
9060 
9061 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves.  */
9062 
9063 int
9064 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9065 {
9066   /* Punt if we are here by mistake.  */
9067   gcc_assert (reload_completed);
9068 
9069   if (GET_CODE (reg1) == SUBREG)
9070     reg1 = SUBREG_REG (reg1);
9071   if (GET_CODE (reg1) != REG)
9072     return 0;
9073   const int regno1 = REGNO (reg1);
9074 
9075   if (GET_CODE (reg2) == SUBREG)
9076     reg2 = SUBREG_REG (reg2);
9077   if (GET_CODE (reg2) != REG)
9078     return 0;
9079   const int regno2 = REGNO (reg2);
9080 
9081   if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9082     return 1;
9083 
9084   if (TARGET_VIS3)
9085     {
9086       if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9087 	  || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9088 	return 1;
9089     }
9090 
9091   return 0;
9092 }
9093 
9094 /* Split a REG <--> REG move into a pair of moves in MODE.  */
9095 
9096 void
9097 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9098 {
9099   rtx dest1 = gen_highpart (mode, dest);
9100   rtx dest2 = gen_lowpart (mode, dest);
9101   rtx src1 = gen_highpart (mode, src);
9102   rtx src2 = gen_lowpart (mode, src);
9103 
9104   /* Now emit using the real source and destination we found, swapping
9105      the order if we detect overlap.  */
9106   if (reg_overlap_mentioned_p (dest1, src2))
9107     {
9108       emit_move_insn_1 (dest2, src2);
9109       emit_move_insn_1 (dest1, src1);
9110     }
9111   else
9112     {
9113       emit_move_insn_1 (dest1, src1);
9114       emit_move_insn_1 (dest2, src2);
9115     }
9116 }
9117 
9118 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9119    This makes them candidates for using ldd and std insns.
9120 
9121    Note reg1 and reg2 *must* be hard registers.  */
9122 
9123 int
9124 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9125 {
9126   /* We might have been passed a SUBREG.  */
9127   if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9128     return 0;
9129 
9130   if (REGNO (reg1) % 2 != 0)
9131     return 0;
9132 
9133   /* Integer ldd is deprecated in SPARC V9 */
9134   if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9135     return 0;
9136 
9137   return (REGNO (reg1) == REGNO (reg2) - 1);
9138 }
9139 
9140 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9141    an ldd or std insn.
9142 
9143    This can only happen when addr1 and addr2, the addresses in mem1
9144    and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9145    addr1 must also be aligned on a 64-bit boundary.
9146 
9147    Also iff dependent_reg_rtx is not null it should not be used to
9148    compute the address for mem1, i.e. we cannot optimize a sequence
9149    like:
9150    	ld [%o0], %o0
9151 	ld [%o0 + 4], %o1
9152    to
9153    	ldd [%o0], %o0
9154    nor:
9155 	ld [%g3 + 4], %g3
9156 	ld [%g3], %g2
9157    to
9158         ldd [%g3], %g2
9159 
9160    But, note that the transformation from:
9161 	ld [%g2 + 4], %g3
9162         ld [%g2], %g2
9163    to
9164 	ldd [%g2], %g2
9165    is perfectly fine.  Thus, the peephole2 patterns always pass us
9166    the destination register of the first load, never the second one.
9167 
9168    For stores we don't have a similar problem, so dependent_reg_rtx is
9169    NULL_RTX.  */
9170 
9171 int
9172 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9173 {
9174   rtx addr1, addr2;
9175   unsigned int reg1;
9176   HOST_WIDE_INT offset1;
9177 
9178   /* The mems cannot be volatile.  */
9179   if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9180     return 0;
9181 
9182   /* MEM1 should be aligned on a 64-bit boundary.  */
9183   if (MEM_ALIGN (mem1) < 64)
9184     return 0;
9185 
9186   addr1 = XEXP (mem1, 0);
9187   addr2 = XEXP (mem2, 0);
9188 
9189   /* Extract a register number and offset (if used) from the first addr.  */
9190   if (GET_CODE (addr1) == PLUS)
9191     {
9192       /* If not a REG, return zero.  */
9193       if (GET_CODE (XEXP (addr1, 0)) != REG)
9194 	return 0;
9195       else
9196 	{
9197           reg1 = REGNO (XEXP (addr1, 0));
9198 	  /* The offset must be constant!  */
9199 	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9200             return 0;
9201           offset1 = INTVAL (XEXP (addr1, 1));
9202 	}
9203     }
9204   else if (GET_CODE (addr1) != REG)
9205     return 0;
9206   else
9207     {
9208       reg1 = REGNO (addr1);
9209       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
9210       offset1 = 0;
9211     }
9212 
9213   /* Make sure the second address is a (mem (plus (reg) (const_int).  */
9214   if (GET_CODE (addr2) != PLUS)
9215     return 0;
9216 
9217   if (GET_CODE (XEXP (addr2, 0)) != REG
9218       || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9219     return 0;
9220 
9221   if (reg1 != REGNO (XEXP (addr2, 0)))
9222     return 0;
9223 
9224   if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9225     return 0;
9226 
9227   /* The first offset must be evenly divisible by 8 to ensure the
9228      address is 64-bit aligned.  */
9229   if (offset1 % 8 != 0)
9230     return 0;
9231 
9232   /* The offset for the second addr must be 4 more than the first addr.  */
9233   if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9234     return 0;
9235 
9236   /* All the tests passed.  addr1 and addr2 are valid for ldd and std
9237      instructions.  */
9238   return 1;
9239 }
9240 
9241 /* Return the widened memory access made of MEM1 and MEM2 in MODE.  */
9242 
9243 rtx
9244 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9245 {
9246   rtx x = widen_memory_access (mem1, mode, 0);
9247   MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9248   return x;
9249 }
9250 
9251 /* Return 1 if reg is a pseudo, or is the first register in
9252    a hard register pair.  This makes it suitable for use in
9253    ldd and std insns.  */
9254 
9255 int
9256 register_ok_for_ldd (rtx reg)
9257 {
9258   /* We might have been passed a SUBREG.  */
9259   if (!REG_P (reg))
9260     return 0;
9261 
9262   if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9263     return (REGNO (reg) % 2 == 0);
9264 
9265   return 1;
9266 }
9267 
9268 /* Return 1 if OP, a MEM, has an address which is known to be
9269    aligned to an 8-byte boundary.  */
9270 
9271 int
9272 memory_ok_for_ldd (rtx op)
9273 {
9274   /* In 64-bit mode, we assume that the address is word-aligned.  */
9275   if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9276     return 0;
9277 
9278   if (! can_create_pseudo_p ()
9279       && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9280     return 0;
9281 
9282   return 1;
9283 }
9284 
9285 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
9286 
9287 static bool
9288 sparc_print_operand_punct_valid_p (unsigned char code)
9289 {
9290   if (code == '#'
9291       || code == '*'
9292       || code == '('
9293       || code == ')'
9294       || code == '_'
9295       || code == '&')
9296     return true;
9297 
9298   return false;
9299 }
9300 
9301 /* Implement TARGET_PRINT_OPERAND.
9302    Print operand X (an rtx) in assembler syntax to file FILE.
9303    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9304    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
9305 
9306 static void
9307 sparc_print_operand (FILE *file, rtx x, int code)
9308 {
9309   const char *s;
9310 
9311   switch (code)
9312     {
9313     case '#':
9314       /* Output an insn in a delay slot.  */
9315       if (final_sequence)
9316         sparc_indent_opcode = 1;
9317       else
9318 	fputs ("\n\t nop", file);
9319       return;
9320     case '*':
9321       /* Output an annul flag if there's nothing for the delay slot and we
9322 	 are optimizing.  This is always used with '(' below.
9323          Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9324 	 this is a dbx bug.  So, we only do this when optimizing.
9325          On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9326 	 Always emit a nop in case the next instruction is a branch.  */
9327       if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9328 	fputs (",a", file);
9329       return;
9330     case '(':
9331       /* Output a 'nop' if there's nothing for the delay slot and we are
9332 	 not optimizing.  This is always used with '*' above.  */
9333       if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9334 	fputs ("\n\t nop", file);
9335       else if (final_sequence)
9336         sparc_indent_opcode = 1;
9337       return;
9338     case ')':
9339       /* Output the right displacement from the saved PC on function return.
9340 	 The caller may have placed an "unimp" insn immediately after the call
9341 	 so we have to account for it.  This insn is used in the 32-bit ABI
9342 	 when calling a function that returns a non zero-sized structure.  The
9343 	 64-bit ABI doesn't have it.  Be careful to have this test be the same
9344 	 as that for the call.  The exception is when sparc_std_struct_return
9345 	 is enabled, the psABI is followed exactly and the adjustment is made
9346 	 by the code in sparc_struct_value_rtx.  The call emitted is the same
9347 	 when sparc_std_struct_return is enabled. */
9348      if (!TARGET_ARCH64
9349 	 && cfun->returns_struct
9350 	 && !sparc_std_struct_return
9351 	 && DECL_SIZE (DECL_RESULT (current_function_decl))
9352 	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9353 	     == INTEGER_CST
9354 	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9355 	fputs ("12", file);
9356       else
9357         fputc ('8', file);
9358       return;
9359     case '_':
9360       /* Output the Embedded Medium/Anywhere code model base register.  */
9361       fputs (EMBMEDANY_BASE_REG, file);
9362       return;
9363     case '&':
9364       /* Print some local dynamic TLS name.  */
9365       if (const char *name = get_some_local_dynamic_name ())
9366 	assemble_name (file, name);
9367       else
9368 	output_operand_lossage ("'%%&' used without any "
9369 				"local dynamic TLS references");
9370       return;
9371 
9372     case 'Y':
9373       /* Adjust the operand to take into account a RESTORE operation.  */
9374       if (GET_CODE (x) == CONST_INT)
9375 	break;
9376       else if (GET_CODE (x) != REG)
9377 	output_operand_lossage ("invalid %%Y operand");
9378       else if (REGNO (x) < 8)
9379 	fputs (reg_names[REGNO (x)], file);
9380       else if (REGNO (x) >= 24 && REGNO (x) < 32)
9381 	fputs (reg_names[REGNO (x)-16], file);
9382       else
9383 	output_operand_lossage ("invalid %%Y operand");
9384       return;
9385     case 'L':
9386       /* Print out the low order register name of a register pair.  */
9387       if (WORDS_BIG_ENDIAN)
9388 	fputs (reg_names[REGNO (x)+1], file);
9389       else
9390 	fputs (reg_names[REGNO (x)], file);
9391       return;
9392     case 'H':
9393       /* Print out the high order register name of a register pair.  */
9394       if (WORDS_BIG_ENDIAN)
9395 	fputs (reg_names[REGNO (x)], file);
9396       else
9397 	fputs (reg_names[REGNO (x)+1], file);
9398       return;
9399     case 'R':
9400       /* Print out the second register name of a register pair or quad.
9401 	 I.e., R (%o0) => %o1.  */
9402       fputs (reg_names[REGNO (x)+1], file);
9403       return;
9404     case 'S':
9405       /* Print out the third register name of a register quad.
9406 	 I.e., S (%o0) => %o2.  */
9407       fputs (reg_names[REGNO (x)+2], file);
9408       return;
9409     case 'T':
9410       /* Print out the fourth register name of a register quad.
9411 	 I.e., T (%o0) => %o3.  */
9412       fputs (reg_names[REGNO (x)+3], file);
9413       return;
9414     case 'x':
9415       /* Print a condition code register.  */
9416       if (REGNO (x) == SPARC_ICC_REG)
9417 	{
9418 	  switch (GET_MODE (x))
9419 	    {
9420 	    case E_CCmode:
9421 	    case E_CCNZmode:
9422 	    case E_CCCmode:
9423 	    case E_CCVmode:
9424 	      s = "%icc";
9425 	      break;
9426 	    case E_CCXmode:
9427 	    case E_CCXNZmode:
9428 	    case E_CCXCmode:
9429 	    case E_CCXVmode:
9430 	      s = "%xcc";
9431 	      break;
9432 	    default:
9433 	      gcc_unreachable ();
9434 	    }
9435 	  fputs (s, file);
9436 	}
9437       else
9438 	/* %fccN register */
9439 	fputs (reg_names[REGNO (x)], file);
9440       return;
9441     case 'm':
9442       /* Print the operand's address only.  */
9443       output_address (GET_MODE (x), XEXP (x, 0));
9444       return;
9445     case 'r':
9446       /* In this case we need a register.  Use %g0 if the
9447 	 operand is const0_rtx.  */
9448       if (x == const0_rtx
9449 	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9450 	{
9451 	  fputs ("%g0", file);
9452 	  return;
9453 	}
9454       else
9455 	break;
9456 
9457     case 'A':
9458       switch (GET_CODE (x))
9459 	{
9460 	case IOR:
9461 	  s = "or";
9462 	  break;
9463 	case AND:
9464 	  s = "and";
9465 	  break;
9466 	case XOR:
9467 	  s = "xor";
9468 	  break;
9469 	default:
9470 	  output_operand_lossage ("invalid %%A operand");
9471 	  s = "";
9472 	  break;
9473 	}
9474       fputs (s, file);
9475       return;
9476 
9477     case 'B':
9478       switch (GET_CODE (x))
9479 	{
9480 	case IOR:
9481 	  s = "orn";
9482 	  break;
9483 	case AND:
9484 	  s = "andn";
9485 	  break;
9486 	case XOR:
9487 	  s = "xnor";
9488 	  break;
9489 	default:
9490 	  output_operand_lossage ("invalid %%B operand");
9491 	  s = "";
9492 	  break;
9493 	}
9494       fputs (s, file);
9495       return;
9496 
9497       /* This is used by the conditional move instructions.  */
9498     case 'C':
9499       {
9500 	machine_mode mode = GET_MODE (XEXP (x, 0));
9501 	switch (GET_CODE (x))
9502 	  {
9503 	  case NE:
9504 	    if (mode == CCVmode || mode == CCXVmode)
9505 	      s = "vs";
9506 	    else
9507 	      s = "ne";
9508 	    break;
9509 	  case EQ:
9510 	    if (mode == CCVmode || mode == CCXVmode)
9511 	      s = "vc";
9512 	    else
9513 	      s = "e";
9514 	    break;
9515 	  case GE:
9516 	    if (mode == CCNZmode || mode == CCXNZmode)
9517 	      s = "pos";
9518 	    else
9519 	      s = "ge";
9520 	    break;
9521 	  case GT:
9522 	    s = "g";
9523 	    break;
9524 	  case LE:
9525 	    s = "le";
9526 	    break;
9527 	  case LT:
9528 	    if (mode == CCNZmode || mode == CCXNZmode)
9529 	      s = "neg";
9530 	    else
9531 	      s = "l";
9532 	    break;
9533 	  case GEU:
9534 	    s = "geu";
9535 	    break;
9536 	  case GTU:
9537 	    s = "gu";
9538 	    break;
9539 	  case LEU:
9540 	    s = "leu";
9541 	    break;
9542 	  case LTU:
9543 	    s = "lu";
9544 	    break;
9545 	  case LTGT:
9546 	    s = "lg";
9547 	    break;
9548 	  case UNORDERED:
9549 	    s = "u";
9550 	    break;
9551 	  case ORDERED:
9552 	    s = "o";
9553 	    break;
9554 	  case UNLT:
9555 	    s = "ul";
9556 	    break;
9557 	  case UNLE:
9558 	    s = "ule";
9559 	    break;
9560 	  case UNGT:
9561 	    s = "ug";
9562 	    break;
9563 	  case UNGE:
9564 	    s = "uge"
9565 	    ; break;
9566 	  case UNEQ:
9567 	    s = "ue";
9568 	    break;
9569 	  default:
9570 	    output_operand_lossage ("invalid %%C operand");
9571 	    s = "";
9572 	    break;
9573 	  }
9574 	fputs (s, file);
9575 	return;
9576       }
9577 
9578       /* This are used by the movr instruction pattern.  */
9579     case 'D':
9580       {
9581 	switch (GET_CODE (x))
9582 	  {
9583 	  case NE:
9584 	    s = "ne";
9585 	    break;
9586 	  case EQ:
9587 	    s = "e";
9588 	    break;
9589 	  case GE:
9590 	    s = "gez";
9591 	    break;
9592 	  case LT:
9593 	    s = "lz";
9594 	    break;
9595 	  case LE:
9596 	    s = "lez";
9597 	    break;
9598 	  case GT:
9599 	    s = "gz";
9600 	    break;
9601 	  default:
9602 	    output_operand_lossage ("invalid %%D operand");
9603 	    s = "";
9604 	    break;
9605 	  }
9606 	fputs (s, file);
9607 	return;
9608       }
9609 
9610     case 'b':
9611       {
9612 	/* Print a sign-extended character.  */
9613 	int i = trunc_int_for_mode (INTVAL (x), QImode);
9614 	fprintf (file, "%d", i);
9615 	return;
9616       }
9617 
9618     case 'f':
9619       /* Operand must be a MEM; write its address.  */
9620       if (GET_CODE (x) != MEM)
9621 	output_operand_lossage ("invalid %%f operand");
9622       output_address (GET_MODE (x), XEXP (x, 0));
9623       return;
9624 
9625     case 's':
9626       {
9627 	/* Print a sign-extended 32-bit value.  */
9628 	HOST_WIDE_INT i;
9629 	if (GET_CODE(x) == CONST_INT)
9630 	  i = INTVAL (x);
9631 	else
9632 	  {
9633 	    output_operand_lossage ("invalid %%s operand");
9634 	    return;
9635 	  }
9636 	i = trunc_int_for_mode (i, SImode);
9637 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9638 	return;
9639       }
9640 
9641     case 0:
9642       /* Do nothing special.  */
9643       break;
9644 
9645     default:
9646       /* Undocumented flag.  */
9647       output_operand_lossage ("invalid operand output code");
9648     }
9649 
9650   if (GET_CODE (x) == REG)
9651     fputs (reg_names[REGNO (x)], file);
9652   else if (GET_CODE (x) == MEM)
9653     {
9654       fputc ('[', file);
9655 	/* Poor Sun assembler doesn't understand absolute addressing.  */
9656       if (CONSTANT_P (XEXP (x, 0)))
9657 	fputs ("%g0+", file);
9658       output_address (GET_MODE (x), XEXP (x, 0));
9659       fputc (']', file);
9660     }
9661   else if (GET_CODE (x) == HIGH)
9662     {
9663       fputs ("%hi(", file);
9664       output_addr_const (file, XEXP (x, 0));
9665       fputc (')', file);
9666     }
9667   else if (GET_CODE (x) == LO_SUM)
9668     {
9669       sparc_print_operand (file, XEXP (x, 0), 0);
9670       if (TARGET_CM_MEDMID)
9671 	fputs ("+%l44(", file);
9672       else
9673 	fputs ("+%lo(", file);
9674       output_addr_const (file, XEXP (x, 1));
9675       fputc (')', file);
9676     }
9677   else if (GET_CODE (x) == CONST_DOUBLE)
9678     output_operand_lossage ("floating-point constant not a valid immediate operand");
9679   else
9680     output_addr_const (file, x);
9681 }
9682 
9683 /* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
9684 
9685 static void
9686 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9687 {
9688   register rtx base, index = 0;
9689   int offset = 0;
9690   register rtx addr = x;
9691 
9692   if (REG_P (addr))
9693     fputs (reg_names[REGNO (addr)], file);
9694   else if (GET_CODE (addr) == PLUS)
9695     {
9696       if (CONST_INT_P (XEXP (addr, 0)))
9697 	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9698       else if (CONST_INT_P (XEXP (addr, 1)))
9699 	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9700       else
9701 	base = XEXP (addr, 0), index = XEXP (addr, 1);
9702       if (GET_CODE (base) == LO_SUM)
9703 	{
9704 	  gcc_assert (USE_AS_OFFSETABLE_LO10
9705 		      && TARGET_ARCH64
9706 		      && ! TARGET_CM_MEDMID);
9707 	  output_operand (XEXP (base, 0), 0);
9708 	  fputs ("+%lo(", file);
9709 	  output_address (VOIDmode, XEXP (base, 1));
9710 	  fprintf (file, ")+%d", offset);
9711 	}
9712       else
9713 	{
9714 	  fputs (reg_names[REGNO (base)], file);
9715 	  if (index == 0)
9716 	    fprintf (file, "%+d", offset);
9717 	  else if (REG_P (index))
9718 	    fprintf (file, "+%s", reg_names[REGNO (index)]);
9719 	  else if (GET_CODE (index) == SYMBOL_REF
9720 		   || GET_CODE (index) == LABEL_REF
9721 		   || GET_CODE (index) == CONST)
9722 	    fputc ('+', file), output_addr_const (file, index);
9723 	  else gcc_unreachable ();
9724 	}
9725     }
9726   else if (GET_CODE (addr) == MINUS
9727 	   && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9728     {
9729       output_addr_const (file, XEXP (addr, 0));
9730       fputs ("-(", file);
9731       output_addr_const (file, XEXP (addr, 1));
9732       fputs ("-.)", file);
9733     }
9734   else if (GET_CODE (addr) == LO_SUM)
9735     {
9736       output_operand (XEXP (addr, 0), 0);
9737       if (TARGET_CM_MEDMID)
9738         fputs ("+%l44(", file);
9739       else
9740         fputs ("+%lo(", file);
9741       output_address (VOIDmode, XEXP (addr, 1));
9742       fputc (')', file);
9743     }
9744   else if (flag_pic
9745 	   && GET_CODE (addr) == CONST
9746 	   && GET_CODE (XEXP (addr, 0)) == MINUS
9747 	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9748 	   && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9749 	   && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9750     {
9751       addr = XEXP (addr, 0);
9752       output_addr_const (file, XEXP (addr, 0));
9753       /* Group the args of the second CONST in parenthesis.  */
9754       fputs ("-(", file);
9755       /* Skip past the second CONST--it does nothing for us.  */
9756       output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9757       /* Close the parenthesis.  */
9758       fputc (')', file);
9759     }
9760   else
9761     {
9762       output_addr_const (file, addr);
9763     }
9764 }
9765 
9766 /* Target hook for assembling integer objects.  The sparc version has
9767    special handling for aligned DI-mode objects.  */
9768 
9769 static bool
9770 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9771 {
9772   /* ??? We only output .xword's for symbols and only then in environments
9773      where the assembler can handle them.  */
9774   if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9775     {
9776       if (TARGET_V9)
9777 	{
9778 	  assemble_integer_with_op ("\t.xword\t", x);
9779 	  return true;
9780 	}
9781       else
9782 	{
9783 	  assemble_aligned_integer (4, const0_rtx);
9784 	  assemble_aligned_integer (4, x);
9785 	  return true;
9786 	}
9787     }
9788   return default_assemble_integer (x, size, aligned_p);
9789 }
9790 
9791 /* Return the value of a code used in the .proc pseudo-op that says
9792    what kind of result this function returns.  For non-C types, we pick
9793    the closest C type.  */
9794 
9795 #ifndef SHORT_TYPE_SIZE
9796 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9797 #endif
9798 
9799 #ifndef INT_TYPE_SIZE
9800 #define INT_TYPE_SIZE BITS_PER_WORD
9801 #endif
9802 
9803 #ifndef LONG_TYPE_SIZE
9804 #define LONG_TYPE_SIZE BITS_PER_WORD
9805 #endif
9806 
9807 #ifndef LONG_LONG_TYPE_SIZE
9808 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9809 #endif
9810 
9811 #ifndef FLOAT_TYPE_SIZE
9812 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9813 #endif
9814 
9815 #ifndef DOUBLE_TYPE_SIZE
9816 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9817 #endif
9818 
9819 #ifndef LONG_DOUBLE_TYPE_SIZE
9820 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9821 #endif
9822 
9823 unsigned long
9824 sparc_type_code (register tree type)
9825 {
9826   register unsigned long qualifiers = 0;
9827   register unsigned shift;
9828 
9829   /* Only the first 30 bits of the qualifier are valid.  We must refrain from
9830      setting more, since some assemblers will give an error for this.  Also,
9831      we must be careful to avoid shifts of 32 bits or more to avoid getting
9832      unpredictable results.  */
9833 
9834   for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9835     {
9836       switch (TREE_CODE (type))
9837 	{
9838 	case ERROR_MARK:
9839 	  return qualifiers;
9840 
9841 	case ARRAY_TYPE:
9842 	  qualifiers |= (3 << shift);
9843 	  break;
9844 
9845 	case FUNCTION_TYPE:
9846 	case METHOD_TYPE:
9847 	  qualifiers |= (2 << shift);
9848 	  break;
9849 
9850 	case POINTER_TYPE:
9851 	case REFERENCE_TYPE:
9852 	case OFFSET_TYPE:
9853 	  qualifiers |= (1 << shift);
9854 	  break;
9855 
9856 	case RECORD_TYPE:
9857 	  return (qualifiers | 8);
9858 
9859 	case UNION_TYPE:
9860 	case QUAL_UNION_TYPE:
9861 	  return (qualifiers | 9);
9862 
9863 	case ENUMERAL_TYPE:
9864 	  return (qualifiers | 10);
9865 
9866 	case VOID_TYPE:
9867 	  return (qualifiers | 16);
9868 
9869 	case INTEGER_TYPE:
9870 	  /* If this is a range type, consider it to be the underlying
9871 	     type.  */
9872 	  if (TREE_TYPE (type) != 0)
9873 	    break;
9874 
9875 	  /* Carefully distinguish all the standard types of C,
9876 	     without messing up if the language is not C.  We do this by
9877 	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
9878 	     look at both the names and the above fields, but that's redundant.
9879 	     Any type whose size is between two C types will be considered
9880 	     to be the wider of the two types.  Also, we do not have a
9881 	     special code to use for "long long", so anything wider than
9882 	     long is treated the same.  Note that we can't distinguish
9883 	     between "int" and "long" in this code if they are the same
9884 	     size, but that's fine, since neither can the assembler.  */
9885 
9886 	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9887 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9888 
9889 	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9890 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9891 
9892 	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9893 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9894 
9895 	  else
9896 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9897 
9898 	case REAL_TYPE:
9899 	  /* If this is a range type, consider it to be the underlying
9900 	     type.  */
9901 	  if (TREE_TYPE (type) != 0)
9902 	    break;
9903 
9904 	  /* Carefully distinguish all the standard types of C,
9905 	     without messing up if the language is not C.  */
9906 
9907 	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9908 	    return (qualifiers | 6);
9909 
9910 	  else
9911 	    return (qualifiers | 7);
9912 
9913 	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
9914 	  /* ??? We need to distinguish between double and float complex types,
9915 	     but I don't know how yet because I can't reach this code from
9916 	     existing front-ends.  */
9917 	  return (qualifiers | 7);	/* Who knows? */
9918 
9919 	case VECTOR_TYPE:
9920 	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
9921 	case LANG_TYPE:
9922 	case NULLPTR_TYPE:
9923 	  return qualifiers;
9924 
9925 	default:
9926 	  gcc_unreachable ();		/* Not a type! */
9927         }
9928     }
9929 
9930   return qualifiers;
9931 }
9932 
9933 /* Nested function support.  */
9934 
9935 /* Emit RTL insns to initialize the variable parts of a trampoline.
9936    FNADDR is an RTX for the address of the function's pure code.
9937    CXT is an RTX for the static chain value for the function.
9938 
9939    This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9940    (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9941    (to store insns).  This is a bit excessive.  Perhaps a different
9942    mechanism would be better here.
9943 
9944    Emit enough FLUSH insns to synchronize the data and instruction caches.  */
9945 
9946 static void
9947 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9948 {
9949   /* SPARC 32-bit trampoline:
9950 
9951  	sethi	%hi(fn), %g1
9952  	sethi	%hi(static), %g2
9953  	jmp	%g1+%lo(fn)
9954  	or	%g2, %lo(static), %g2
9955 
9956     SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9957     JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9958    */
9959 
9960   emit_move_insn
9961     (adjust_address (m_tramp, SImode, 0),
9962      expand_binop (SImode, ior_optab,
9963 		   expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9964 		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9965 		   NULL_RTX, 1, OPTAB_DIRECT));
9966 
9967   emit_move_insn
9968     (adjust_address (m_tramp, SImode, 4),
9969      expand_binop (SImode, ior_optab,
9970 		   expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9971 		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9972 		   NULL_RTX, 1, OPTAB_DIRECT));
9973 
9974   emit_move_insn
9975     (adjust_address (m_tramp, SImode, 8),
9976      expand_binop (SImode, ior_optab,
9977 		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9978 		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9979 		   NULL_RTX, 1, OPTAB_DIRECT));
9980 
9981   emit_move_insn
9982     (adjust_address (m_tramp, SImode, 12),
9983      expand_binop (SImode, ior_optab,
9984 		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9985 		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9986 		   NULL_RTX, 1, OPTAB_DIRECT));
9987 
9988   /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
9989      aligned on a 16 byte boundary so one flush clears it all.  */
9990   emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9991   if (sparc_cpu != PROCESSOR_ULTRASPARC
9992       && sparc_cpu != PROCESSOR_ULTRASPARC3
9993       && sparc_cpu != PROCESSOR_NIAGARA
9994       && sparc_cpu != PROCESSOR_NIAGARA2
9995       && sparc_cpu != PROCESSOR_NIAGARA3
9996       && sparc_cpu != PROCESSOR_NIAGARA4
9997       && sparc_cpu != PROCESSOR_NIAGARA7
9998       && sparc_cpu != PROCESSOR_M8)
9999     emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
10000 
10001   /* Call __enable_execute_stack after writing onto the stack to make sure
10002      the stack address is accessible.  */
10003 #ifdef HAVE_ENABLE_EXECUTE_STACK
10004   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10005                      LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10006 #endif
10007 
10008 }
10009 
10010 /* The 64-bit version is simpler because it makes more sense to load the
10011    values as "immediate" data out of the trampoline.  It's also easier since
10012    we can read the PC without clobbering a register.  */
10013 
10014 static void
10015 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10016 {
10017   /* SPARC 64-bit trampoline:
10018 
10019 	rd	%pc, %g1
10020 	ldx	[%g1+24], %g5
10021 	jmp	%g5
10022 	ldx	[%g1+16], %g5
10023 	+16 bytes data
10024    */
10025 
10026   emit_move_insn (adjust_address (m_tramp, SImode, 0),
10027 		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10028   emit_move_insn (adjust_address (m_tramp, SImode, 4),
10029 		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10030   emit_move_insn (adjust_address (m_tramp, SImode, 8),
10031 		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10032   emit_move_insn (adjust_address (m_tramp, SImode, 12),
10033 		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10034   emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10035   emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10036   emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10037 
10038   if (sparc_cpu != PROCESSOR_ULTRASPARC
10039       && sparc_cpu != PROCESSOR_ULTRASPARC3
10040       && sparc_cpu != PROCESSOR_NIAGARA
10041       && sparc_cpu != PROCESSOR_NIAGARA2
10042       && sparc_cpu != PROCESSOR_NIAGARA3
10043       && sparc_cpu != PROCESSOR_NIAGARA4
10044       && sparc_cpu != PROCESSOR_NIAGARA7
10045       && sparc_cpu != PROCESSOR_M8)
10046     emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10047 
10048   /* Call __enable_execute_stack after writing onto the stack to make sure
10049      the stack address is accessible.  */
10050 #ifdef HAVE_ENABLE_EXECUTE_STACK
10051   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10052                      LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10053 #endif
10054 }
10055 
10056 /* Worker for TARGET_TRAMPOLINE_INIT.  */
10057 
10058 static void
10059 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10060 {
10061   rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10062   cxt = force_reg (Pmode, cxt);
10063   if (TARGET_ARCH64)
10064     sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10065   else
10066     sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10067 }
10068 
10069 /* Adjust the cost of a scheduling dependency.  Return the new cost of
10070    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
10071 
10072 static int
10073 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10074 			int cost)
10075 {
10076   enum attr_type insn_type;
10077 
10078   if (recog_memoized (insn) < 0)
10079     return cost;
10080 
10081   insn_type = get_attr_type (insn);
10082 
10083   if (dep_type == 0)
10084     {
10085       /* Data dependency; DEP_INSN writes a register that INSN reads some
10086 	 cycles later.  */
10087 
10088       /* if a load, then the dependence must be on the memory address;
10089 	 add an extra "cycle".  Note that the cost could be two cycles
10090 	 if the reg was written late in an instruction group; we ca not tell
10091 	 here.  */
10092       if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10093 	return cost + 3;
10094 
10095       /* Get the delay only if the address of the store is the dependence.  */
10096       if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10097 	{
10098 	  rtx pat = PATTERN(insn);
10099 	  rtx dep_pat = PATTERN (dep_insn);
10100 
10101 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10102 	    return cost;  /* This should not happen!  */
10103 
10104 	  /* The dependency between the two instructions was on the data that
10105 	     is being stored.  Assume that this implies that the address of the
10106 	     store is not dependent.  */
10107 	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10108 	    return cost;
10109 
10110 	  return cost + 3;  /* An approximation.  */
10111 	}
10112 
10113       /* A shift instruction cannot receive its data from an instruction
10114 	 in the same cycle; add a one cycle penalty.  */
10115       if (insn_type == TYPE_SHIFT)
10116 	return cost + 3;   /* Split before cascade into shift.  */
10117     }
10118   else
10119     {
10120       /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10121 	 INSN writes some cycles later.  */
10122 
10123       /* These are only significant for the fpu unit; writing a fp reg before
10124          the fpu has finished with it stalls the processor.  */
10125 
10126       /* Reusing an integer register causes no problems.  */
10127       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10128 	return 0;
10129     }
10130 
10131   return cost;
10132 }
10133 
10134 static int
10135 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10136 			int cost)
10137 {
10138   enum attr_type insn_type, dep_type;
10139   rtx pat = PATTERN(insn);
10140   rtx dep_pat = PATTERN (dep_insn);
10141 
10142   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10143     return cost;
10144 
10145   insn_type = get_attr_type (insn);
10146   dep_type = get_attr_type (dep_insn);
10147 
10148   switch (dtype)
10149     {
10150     case 0:
10151       /* Data dependency; DEP_INSN writes a register that INSN reads some
10152 	 cycles later.  */
10153 
10154       switch (insn_type)
10155 	{
10156 	case TYPE_STORE:
10157 	case TYPE_FPSTORE:
10158 	  /* Get the delay iff the address of the store is the dependence.  */
10159 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10160 	    return cost;
10161 
10162 	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10163 	    return cost;
10164 	  return cost + 3;
10165 
10166 	case TYPE_LOAD:
10167 	case TYPE_SLOAD:
10168 	case TYPE_FPLOAD:
10169 	  /* If a load, then the dependence must be on the memory address.  If
10170 	     the addresses aren't equal, then it might be a false dependency */
10171 	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10172 	    {
10173 	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10174 		  || GET_CODE (SET_DEST (dep_pat)) != MEM
10175 		  || GET_CODE (SET_SRC (pat)) != MEM
10176 		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10177 				    XEXP (SET_SRC (pat), 0)))
10178 		return cost + 2;
10179 
10180 	      return cost + 8;
10181 	    }
10182 	  break;
10183 
10184 	case TYPE_BRANCH:
10185 	  /* Compare to branch latency is 0.  There is no benefit from
10186 	     separating compare and branch.  */
10187 	  if (dep_type == TYPE_COMPARE)
10188 	    return 0;
10189 	  /* Floating point compare to branch latency is less than
10190 	     compare to conditional move.  */
10191 	  if (dep_type == TYPE_FPCMP)
10192 	    return cost - 1;
10193 	  break;
10194 	default:
10195 	  break;
10196 	}
10197 	break;
10198 
10199     case REG_DEP_ANTI:
10200       /* Anti-dependencies only penalize the fpu unit.  */
10201       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10202         return 0;
10203       break;
10204 
10205     default:
10206       break;
10207     }
10208 
10209   return cost;
10210 }
10211 
10212 static int
10213 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10214 		   unsigned int)
10215 {
10216   switch (sparc_cpu)
10217     {
10218     case PROCESSOR_SUPERSPARC:
10219       cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10220       break;
10221     case PROCESSOR_HYPERSPARC:
10222     case PROCESSOR_SPARCLITE86X:
10223       cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10224       break;
10225     default:
10226       break;
10227     }
10228   return cost;
10229 }
10230 
10231 static void
10232 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10233 		  int sched_verbose ATTRIBUTE_UNUSED,
10234 		  int max_ready ATTRIBUTE_UNUSED)
10235 {}
10236 
10237 static int
10238 sparc_use_sched_lookahead (void)
10239 {
10240   switch (sparc_cpu)
10241     {
10242     case PROCESSOR_ULTRASPARC:
10243     case PROCESSOR_ULTRASPARC3:
10244       return 4;
10245     case PROCESSOR_SUPERSPARC:
10246     case PROCESSOR_HYPERSPARC:
10247     case PROCESSOR_SPARCLITE86X:
10248       return 3;
10249     case PROCESSOR_NIAGARA4:
10250     case PROCESSOR_NIAGARA7:
10251     case PROCESSOR_M8:
10252       return 2;
10253     case PROCESSOR_NIAGARA:
10254     case PROCESSOR_NIAGARA2:
10255     case PROCESSOR_NIAGARA3:
10256     default:
10257       return 0;
10258     }
10259 }
10260 
10261 static int
10262 sparc_issue_rate (void)
10263 {
10264   switch (sparc_cpu)
10265     {
10266     case PROCESSOR_ULTRASPARC:
10267     case PROCESSOR_ULTRASPARC3:
10268     case PROCESSOR_M8:
10269       return 4;
10270     case PROCESSOR_SUPERSPARC:
10271       return 3;
10272     case PROCESSOR_HYPERSPARC:
10273     case PROCESSOR_SPARCLITE86X:
10274     case PROCESSOR_V9:
10275       /* Assume V9 processors are capable of at least dual-issue.  */
10276     case PROCESSOR_NIAGARA4:
10277     case PROCESSOR_NIAGARA7:
10278       return 2;
10279     case PROCESSOR_NIAGARA:
10280     case PROCESSOR_NIAGARA2:
10281     case PROCESSOR_NIAGARA3:
10282     default:
10283       return 1;
10284     }
10285 }
10286 
10287 int
10288 sparc_branch_cost (bool speed_p, bool predictable_p)
10289 {
10290   if (!speed_p)
10291     return 2;
10292 
10293   /* For pre-V9 processors we use a single value (usually 3) to take into
10294      account the potential annulling of the delay slot (which ends up being
10295      a bubble in the pipeline slot) plus a cycle to take into consideration
10296      the instruction cache effects.
10297 
10298      On V9 and later processors, which have branch prediction facilities,
10299      we take into account whether the branch is (easily) predictable.  */
10300   const int cost = sparc_costs->branch_cost;
10301 
10302   switch (sparc_cpu)
10303     {
10304     case PROCESSOR_V9:
10305     case PROCESSOR_ULTRASPARC:
10306     case PROCESSOR_ULTRASPARC3:
10307     case PROCESSOR_NIAGARA:
10308     case PROCESSOR_NIAGARA2:
10309     case PROCESSOR_NIAGARA3:
10310     case PROCESSOR_NIAGARA4:
10311     case PROCESSOR_NIAGARA7:
10312     case PROCESSOR_M8:
10313       return cost + (predictable_p ? 0 : 2);
10314 
10315     default:
10316       return cost;
10317     }
10318 }
10319 
10320 static int
10321 set_extends (rtx_insn *insn)
10322 {
10323   register rtx pat = PATTERN (insn);
10324 
10325   switch (GET_CODE (SET_SRC (pat)))
10326     {
10327       /* Load and some shift instructions zero extend.  */
10328     case MEM:
10329     case ZERO_EXTEND:
10330       /* sethi clears the high bits */
10331     case HIGH:
10332       /* LO_SUM is used with sethi.  sethi cleared the high
10333 	 bits and the values used with lo_sum are positive */
10334     case LO_SUM:
10335       /* Store flag stores 0 or 1 */
10336     case LT: case LTU:
10337     case GT: case GTU:
10338     case LE: case LEU:
10339     case GE: case GEU:
10340     case EQ:
10341     case NE:
10342       return 1;
10343     case AND:
10344       {
10345 	rtx op0 = XEXP (SET_SRC (pat), 0);
10346 	rtx op1 = XEXP (SET_SRC (pat), 1);
10347 	if (GET_CODE (op1) == CONST_INT)
10348 	  return INTVAL (op1) >= 0;
10349 	if (GET_CODE (op0) != REG)
10350 	  return 0;
10351 	if (sparc_check_64 (op0, insn) == 1)
10352 	  return 1;
10353 	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10354       }
10355     case IOR:
10356     case XOR:
10357       {
10358 	rtx op0 = XEXP (SET_SRC (pat), 0);
10359 	rtx op1 = XEXP (SET_SRC (pat), 1);
10360 	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10361 	  return 0;
10362 	if (GET_CODE (op1) == CONST_INT)
10363 	  return INTVAL (op1) >= 0;
10364 	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10365       }
10366     case LSHIFTRT:
10367       return GET_MODE (SET_SRC (pat)) == SImode;
10368       /* Positive integers leave the high bits zero.  */
10369     case CONST_INT:
10370       return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10371     case ASHIFTRT:
10372     case SIGN_EXTEND:
10373       return - (GET_MODE (SET_SRC (pat)) == SImode);
10374     case REG:
10375       return sparc_check_64 (SET_SRC (pat), insn);
10376     default:
10377       return 0;
10378     }
10379 }
10380 
10381 /* We _ought_ to have only one kind per function, but...  */
10382 static GTY(()) rtx sparc_addr_diff_list;
10383 static GTY(()) rtx sparc_addr_list;
10384 
10385 void
10386 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10387 {
10388   vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10389   if (diff)
10390     sparc_addr_diff_list
10391       = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10392   else
10393     sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10394 }
10395 
10396 static void
10397 sparc_output_addr_vec (rtx vec)
10398 {
10399   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10400   int idx, vlen = XVECLEN (body, 0);
10401 
10402 #ifdef ASM_OUTPUT_ADDR_VEC_START
10403   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10404 #endif
10405 
10406 #ifdef ASM_OUTPUT_CASE_LABEL
10407   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10408 			 NEXT_INSN (lab));
10409 #else
10410   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10411 #endif
10412 
10413   for (idx = 0; idx < vlen; idx++)
10414     {
10415       ASM_OUTPUT_ADDR_VEC_ELT
10416 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10417     }
10418 
10419 #ifdef ASM_OUTPUT_ADDR_VEC_END
10420   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10421 #endif
10422 }
10423 
10424 static void
10425 sparc_output_addr_diff_vec (rtx vec)
10426 {
10427   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10428   rtx base = XEXP (XEXP (body, 0), 0);
10429   int idx, vlen = XVECLEN (body, 1);
10430 
10431 #ifdef ASM_OUTPUT_ADDR_VEC_START
10432   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10433 #endif
10434 
10435 #ifdef ASM_OUTPUT_CASE_LABEL
10436   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10437 			 NEXT_INSN (lab));
10438 #else
10439   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10440 #endif
10441 
10442   for (idx = 0; idx < vlen; idx++)
10443     {
10444       ASM_OUTPUT_ADDR_DIFF_ELT
10445         (asm_out_file,
10446          body,
10447          CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10448          CODE_LABEL_NUMBER (base));
10449     }
10450 
10451 #ifdef ASM_OUTPUT_ADDR_VEC_END
10452   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10453 #endif
10454 }
10455 
10456 static void
10457 sparc_output_deferred_case_vectors (void)
10458 {
10459   rtx t;
10460   int align;
10461 
10462   if (sparc_addr_list == NULL_RTX
10463       && sparc_addr_diff_list == NULL_RTX)
10464     return;
10465 
10466   /* Align to cache line in the function's code section.  */
10467   switch_to_section (current_function_section ());
10468 
10469   align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10470   if (align > 0)
10471     ASM_OUTPUT_ALIGN (asm_out_file, align);
10472 
10473   for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10474     sparc_output_addr_vec (XEXP (t, 0));
10475   for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10476     sparc_output_addr_diff_vec (XEXP (t, 0));
10477 
10478   sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10479 }
10480 
10481 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10482    unknown.  Return 1 if the high bits are zero, -1 if the register is
10483    sign extended.  */
10484 int
10485 sparc_check_64 (rtx x, rtx_insn *insn)
10486 {
10487   /* If a register is set only once it is safe to ignore insns this
10488      code does not know how to handle.  The loop will either recognize
10489      the single set and return the correct value or fail to recognize
10490      it and return 0.  */
10491   int set_once = 0;
10492   rtx y = x;
10493 
10494   gcc_assert (GET_CODE (x) == REG);
10495 
10496   if (GET_MODE (x) == DImode)
10497     y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10498 
10499   if (flag_expensive_optimizations
10500       && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10501     set_once = 1;
10502 
10503   if (insn == 0)
10504     {
10505       if (set_once)
10506 	insn = get_last_insn_anywhere ();
10507       else
10508 	return 0;
10509     }
10510 
10511   while ((insn = PREV_INSN (insn)))
10512     {
10513       switch (GET_CODE (insn))
10514 	{
10515 	case JUMP_INSN:
10516 	case NOTE:
10517 	  break;
10518 	case CODE_LABEL:
10519 	case CALL_INSN:
10520 	default:
10521 	  if (! set_once)
10522 	    return 0;
10523 	  break;
10524 	case INSN:
10525 	  {
10526 	    rtx pat = PATTERN (insn);
10527 	    if (GET_CODE (pat) != SET)
10528 	      return 0;
10529 	    if (rtx_equal_p (x, SET_DEST (pat)))
10530 	      return set_extends (insn);
10531 	    if (y && rtx_equal_p (y, SET_DEST (pat)))
10532 	      return set_extends (insn);
10533 	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10534 	      return 0;
10535 	  }
10536 	}
10537     }
10538   return 0;
10539 }
10540 
10541 /* Output a wide shift instruction in V8+ mode.  INSN is the instruction,
10542    OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
10543 
10544 const char *
10545 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10546 {
10547   static char asm_code[60];
10548 
10549   /* The scratch register is only required when the destination
10550      register is not a 64-bit global or out register.  */
10551   if (which_alternative != 2)
10552     operands[3] = operands[0];
10553 
10554   /* We can only shift by constants <= 63. */
10555   if (GET_CODE (operands[2]) == CONST_INT)
10556     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10557 
10558   if (GET_CODE (operands[1]) == CONST_INT)
10559     {
10560       output_asm_insn ("mov\t%1, %3", operands);
10561     }
10562   else
10563     {
10564       output_asm_insn ("sllx\t%H1, 32, %3", operands);
10565       if (sparc_check_64 (operands[1], insn) <= 0)
10566 	output_asm_insn ("srl\t%L1, 0, %L1", operands);
10567       output_asm_insn ("or\t%L1, %3, %3", operands);
10568     }
10569 
10570   strcpy (asm_code, opcode);
10571 
10572   if (which_alternative != 2)
10573     return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10574   else
10575     return
10576       strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10577 }
10578 
10579 /* Output rtl to increment the profiler label LABELNO
10580    for profiling a function entry.  */
10581 
10582 void
10583 sparc_profile_hook (int labelno)
10584 {
10585   char buf[32];
10586   rtx lab, fun;
10587 
10588   fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10589   if (NO_PROFILE_COUNTERS)
10590     {
10591       emit_library_call (fun, LCT_NORMAL, VOIDmode);
10592     }
10593   else
10594     {
10595       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10596       lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10597       emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10598     }
10599 }
10600 
10601 #ifdef TARGET_SOLARIS
10602 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
10603 
10604 static void
10605 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10606 				     tree decl ATTRIBUTE_UNUSED)
10607 {
10608   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10609     {
10610       solaris_elf_asm_comdat_section (name, flags, decl);
10611       return;
10612     }
10613 
10614   fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10615 
10616   if (!(flags & SECTION_DEBUG))
10617     fputs (",#alloc", asm_out_file);
10618 #if HAVE_GAS_SECTION_EXCLUDE
10619   if (flags & SECTION_EXCLUDE)
10620     fputs (",#exclude", asm_out_file);
10621 #endif
10622   if (flags & SECTION_WRITE)
10623     fputs (",#write", asm_out_file);
10624   if (flags & SECTION_TLS)
10625     fputs (",#tls", asm_out_file);
10626   if (flags & SECTION_CODE)
10627     fputs (",#execinstr", asm_out_file);
10628 
10629   if (flags & SECTION_NOTYPE)
10630     ;
10631   else if (flags & SECTION_BSS)
10632     fputs (",#nobits", asm_out_file);
10633   else
10634     fputs (",#progbits", asm_out_file);
10635 
10636   fputc ('\n', asm_out_file);
10637 }
10638 #endif /* TARGET_SOLARIS */
10639 
10640 /* We do not allow indirect calls to be optimized into sibling calls.
10641 
10642    We cannot use sibling calls when delayed branches are disabled
10643    because they will likely require the call delay slot to be filled.
10644 
10645    Also, on SPARC 32-bit we cannot emit a sibling call when the
10646    current function returns a structure.  This is because the "unimp
10647    after call" convention would cause the callee to return to the
10648    wrong place.  The generic code already disallows cases where the
10649    function being called returns a structure.
10650 
10651    It may seem strange how this last case could occur.  Usually there
10652    is code after the call which jumps to epilogue code which dumps the
10653    return value into the struct return area.  That ought to invalidate
10654    the sibling call right?  Well, in the C++ case we can end up passing
10655    the pointer to the struct return area to a constructor (which returns
10656    void) and then nothing else happens.  Such a sibling call would look
10657    valid without the added check here.
10658 
10659    VxWorks PIC PLT entries require the global pointer to be initialized
10660    on entry.  We therefore can't emit sibling calls to them.  */
10661 static bool
10662 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10663 {
10664   return (decl
10665 	  && flag_delayed_branch
10666 	  && (TARGET_ARCH64 || ! cfun->returns_struct)
10667 	  && !(TARGET_VXWORKS_RTP
10668 	       && flag_pic
10669 	       && !targetm.binds_local_p (decl)));
10670 }
10671 
10672 /* libfunc renaming.  */
10673 
10674 static void
10675 sparc_init_libfuncs (void)
10676 {
10677   if (TARGET_ARCH32)
10678     {
10679       /* Use the subroutines that Sun's library provides for integer
10680 	 multiply and divide.  The `*' prevents an underscore from
10681 	 being prepended by the compiler. .umul is a little faster
10682 	 than .mul.  */
10683       set_optab_libfunc (smul_optab, SImode, "*.umul");
10684       set_optab_libfunc (sdiv_optab, SImode, "*.div");
10685       set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10686       set_optab_libfunc (smod_optab, SImode, "*.rem");
10687       set_optab_libfunc (umod_optab, SImode, "*.urem");
10688 
10689       /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
10690       set_optab_libfunc (add_optab, TFmode, "_Q_add");
10691       set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10692       set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10693       set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10694       set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10695 
10696       /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
10697 	 is because with soft-float, the SFmode and DFmode sqrt
10698 	 instructions will be absent, and the compiler will notice and
10699 	 try to use the TFmode sqrt instruction for calls to the
10700 	 builtin function sqrt, but this fails.  */
10701       if (TARGET_FPU)
10702 	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10703 
10704       set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10705       set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10706       set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10707       set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10708       set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10709       set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10710 
10711       set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
10712       set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
10713       set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
10714       set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
10715 
10716       set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
10717       set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
10718       set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10719       set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10720 
10721       if (DITF_CONVERSION_LIBFUNCS)
10722 	{
10723 	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
10724 	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
10725 	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10726 	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10727 	}
10728 
10729       if (SUN_CONVERSION_LIBFUNCS)
10730 	{
10731 	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10732 	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10733 	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10734 	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10735 	}
10736     }
10737   if (TARGET_ARCH64)
10738     {
10739       /* In the SPARC 64bit ABI, SImode multiply and divide functions
10740 	 do not exist in the library.  Make sure the compiler does not
10741 	 emit calls to them by accident.  (It should always use the
10742          hardware instructions.)  */
10743       set_optab_libfunc (smul_optab, SImode, 0);
10744       set_optab_libfunc (sdiv_optab, SImode, 0);
10745       set_optab_libfunc (udiv_optab, SImode, 0);
10746       set_optab_libfunc (smod_optab, SImode, 0);
10747       set_optab_libfunc (umod_optab, SImode, 0);
10748 
10749       if (SUN_INTEGER_MULTIPLY_64)
10750 	{
10751 	  set_optab_libfunc (smul_optab, DImode, "__mul64");
10752 	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
10753 	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10754 	  set_optab_libfunc (smod_optab, DImode, "__rem64");
10755 	  set_optab_libfunc (umod_optab, DImode, "__urem64");
10756 	}
10757 
10758       if (SUN_CONVERSION_LIBFUNCS)
10759 	{
10760 	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10761 	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10762 	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10763 	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10764 	}
10765     }
10766 }
10767 
10768 /* SPARC builtins.  */
10769 enum sparc_builtins
10770 {
10771   /* FPU builtins.  */
10772   SPARC_BUILTIN_LDFSR,
10773   SPARC_BUILTIN_STFSR,
10774 
10775   /* VIS 1.0 builtins.  */
10776   SPARC_BUILTIN_FPACK16,
10777   SPARC_BUILTIN_FPACK32,
10778   SPARC_BUILTIN_FPACKFIX,
10779   SPARC_BUILTIN_FEXPAND,
10780   SPARC_BUILTIN_FPMERGE,
10781   SPARC_BUILTIN_FMUL8X16,
10782   SPARC_BUILTIN_FMUL8X16AU,
10783   SPARC_BUILTIN_FMUL8X16AL,
10784   SPARC_BUILTIN_FMUL8SUX16,
10785   SPARC_BUILTIN_FMUL8ULX16,
10786   SPARC_BUILTIN_FMULD8SUX16,
10787   SPARC_BUILTIN_FMULD8ULX16,
10788   SPARC_BUILTIN_FALIGNDATAV4HI,
10789   SPARC_BUILTIN_FALIGNDATAV8QI,
10790   SPARC_BUILTIN_FALIGNDATAV2SI,
10791   SPARC_BUILTIN_FALIGNDATADI,
10792   SPARC_BUILTIN_WRGSR,
10793   SPARC_BUILTIN_RDGSR,
10794   SPARC_BUILTIN_ALIGNADDR,
10795   SPARC_BUILTIN_ALIGNADDRL,
10796   SPARC_BUILTIN_PDIST,
10797   SPARC_BUILTIN_EDGE8,
10798   SPARC_BUILTIN_EDGE8L,
10799   SPARC_BUILTIN_EDGE16,
10800   SPARC_BUILTIN_EDGE16L,
10801   SPARC_BUILTIN_EDGE32,
10802   SPARC_BUILTIN_EDGE32L,
10803   SPARC_BUILTIN_FCMPLE16,
10804   SPARC_BUILTIN_FCMPLE32,
10805   SPARC_BUILTIN_FCMPNE16,
10806   SPARC_BUILTIN_FCMPNE32,
10807   SPARC_BUILTIN_FCMPGT16,
10808   SPARC_BUILTIN_FCMPGT32,
10809   SPARC_BUILTIN_FCMPEQ16,
10810   SPARC_BUILTIN_FCMPEQ32,
10811   SPARC_BUILTIN_FPADD16,
10812   SPARC_BUILTIN_FPADD16S,
10813   SPARC_BUILTIN_FPADD32,
10814   SPARC_BUILTIN_FPADD32S,
10815   SPARC_BUILTIN_FPSUB16,
10816   SPARC_BUILTIN_FPSUB16S,
10817   SPARC_BUILTIN_FPSUB32,
10818   SPARC_BUILTIN_FPSUB32S,
10819   SPARC_BUILTIN_ARRAY8,
10820   SPARC_BUILTIN_ARRAY16,
10821   SPARC_BUILTIN_ARRAY32,
10822 
10823   /* VIS 2.0 builtins.  */
10824   SPARC_BUILTIN_EDGE8N,
10825   SPARC_BUILTIN_EDGE8LN,
10826   SPARC_BUILTIN_EDGE16N,
10827   SPARC_BUILTIN_EDGE16LN,
10828   SPARC_BUILTIN_EDGE32N,
10829   SPARC_BUILTIN_EDGE32LN,
10830   SPARC_BUILTIN_BMASK,
10831   SPARC_BUILTIN_BSHUFFLEV4HI,
10832   SPARC_BUILTIN_BSHUFFLEV8QI,
10833   SPARC_BUILTIN_BSHUFFLEV2SI,
10834   SPARC_BUILTIN_BSHUFFLEDI,
10835 
10836   /* VIS 3.0 builtins.  */
10837   SPARC_BUILTIN_CMASK8,
10838   SPARC_BUILTIN_CMASK16,
10839   SPARC_BUILTIN_CMASK32,
10840   SPARC_BUILTIN_FCHKSM16,
10841   SPARC_BUILTIN_FSLL16,
10842   SPARC_BUILTIN_FSLAS16,
10843   SPARC_BUILTIN_FSRL16,
10844   SPARC_BUILTIN_FSRA16,
10845   SPARC_BUILTIN_FSLL32,
10846   SPARC_BUILTIN_FSLAS32,
10847   SPARC_BUILTIN_FSRL32,
10848   SPARC_BUILTIN_FSRA32,
10849   SPARC_BUILTIN_PDISTN,
10850   SPARC_BUILTIN_FMEAN16,
10851   SPARC_BUILTIN_FPADD64,
10852   SPARC_BUILTIN_FPSUB64,
10853   SPARC_BUILTIN_FPADDS16,
10854   SPARC_BUILTIN_FPADDS16S,
10855   SPARC_BUILTIN_FPSUBS16,
10856   SPARC_BUILTIN_FPSUBS16S,
10857   SPARC_BUILTIN_FPADDS32,
10858   SPARC_BUILTIN_FPADDS32S,
10859   SPARC_BUILTIN_FPSUBS32,
10860   SPARC_BUILTIN_FPSUBS32S,
10861   SPARC_BUILTIN_FUCMPLE8,
10862   SPARC_BUILTIN_FUCMPNE8,
10863   SPARC_BUILTIN_FUCMPGT8,
10864   SPARC_BUILTIN_FUCMPEQ8,
10865   SPARC_BUILTIN_FHADDS,
10866   SPARC_BUILTIN_FHADDD,
10867   SPARC_BUILTIN_FHSUBS,
10868   SPARC_BUILTIN_FHSUBD,
10869   SPARC_BUILTIN_FNHADDS,
10870   SPARC_BUILTIN_FNHADDD,
10871   SPARC_BUILTIN_UMULXHI,
10872   SPARC_BUILTIN_XMULX,
10873   SPARC_BUILTIN_XMULXHI,
10874 
10875   /* VIS 4.0 builtins.  */
10876   SPARC_BUILTIN_FPADD8,
10877   SPARC_BUILTIN_FPADDS8,
10878   SPARC_BUILTIN_FPADDUS8,
10879   SPARC_BUILTIN_FPADDUS16,
10880   SPARC_BUILTIN_FPCMPLE8,
10881   SPARC_BUILTIN_FPCMPGT8,
10882   SPARC_BUILTIN_FPCMPULE16,
10883   SPARC_BUILTIN_FPCMPUGT16,
10884   SPARC_BUILTIN_FPCMPULE32,
10885   SPARC_BUILTIN_FPCMPUGT32,
10886   SPARC_BUILTIN_FPMAX8,
10887   SPARC_BUILTIN_FPMAX16,
10888   SPARC_BUILTIN_FPMAX32,
10889   SPARC_BUILTIN_FPMAXU8,
10890   SPARC_BUILTIN_FPMAXU16,
10891   SPARC_BUILTIN_FPMAXU32,
10892   SPARC_BUILTIN_FPMIN8,
10893   SPARC_BUILTIN_FPMIN16,
10894   SPARC_BUILTIN_FPMIN32,
10895   SPARC_BUILTIN_FPMINU8,
10896   SPARC_BUILTIN_FPMINU16,
10897   SPARC_BUILTIN_FPMINU32,
10898   SPARC_BUILTIN_FPSUB8,
10899   SPARC_BUILTIN_FPSUBS8,
10900   SPARC_BUILTIN_FPSUBUS8,
10901   SPARC_BUILTIN_FPSUBUS16,
10902 
10903   /* VIS 4.0B builtins.  */
10904 
10905   /* Note that all the DICTUNPACK* entries should be kept
10906      contiguous.  */
10907   SPARC_BUILTIN_FIRST_DICTUNPACK,
10908   SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10909   SPARC_BUILTIN_DICTUNPACK16,
10910   SPARC_BUILTIN_DICTUNPACK32,
10911   SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10912 
10913   /* Note that all the FPCMP*SHL entries should be kept
10914      contiguous.  */
10915   SPARC_BUILTIN_FIRST_FPCMPSHL,
10916   SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10917   SPARC_BUILTIN_FPCMPGT8SHL,
10918   SPARC_BUILTIN_FPCMPEQ8SHL,
10919   SPARC_BUILTIN_FPCMPNE8SHL,
10920   SPARC_BUILTIN_FPCMPLE16SHL,
10921   SPARC_BUILTIN_FPCMPGT16SHL,
10922   SPARC_BUILTIN_FPCMPEQ16SHL,
10923   SPARC_BUILTIN_FPCMPNE16SHL,
10924   SPARC_BUILTIN_FPCMPLE32SHL,
10925   SPARC_BUILTIN_FPCMPGT32SHL,
10926   SPARC_BUILTIN_FPCMPEQ32SHL,
10927   SPARC_BUILTIN_FPCMPNE32SHL,
10928   SPARC_BUILTIN_FPCMPULE8SHL,
10929   SPARC_BUILTIN_FPCMPUGT8SHL,
10930   SPARC_BUILTIN_FPCMPULE16SHL,
10931   SPARC_BUILTIN_FPCMPUGT16SHL,
10932   SPARC_BUILTIN_FPCMPULE32SHL,
10933   SPARC_BUILTIN_FPCMPUGT32SHL,
10934   SPARC_BUILTIN_FPCMPDE8SHL,
10935   SPARC_BUILTIN_FPCMPDE16SHL,
10936   SPARC_BUILTIN_FPCMPDE32SHL,
10937   SPARC_BUILTIN_FPCMPUR8SHL,
10938   SPARC_BUILTIN_FPCMPUR16SHL,
10939   SPARC_BUILTIN_FPCMPUR32SHL,
10940   SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10941 
10942   SPARC_BUILTIN_MAX
10943 };
10944 
10945 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10946 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10947 
10948 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10949    The instruction should require a constant operand of some sort.  The
10950    function prints an error if OPVAL is not valid.  */
10951 
10952 static int
10953 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10954 {
10955   if (GET_CODE (opval) != CONST_INT)
10956     {
10957       error ("%qs expects a constant argument", insn_data[icode].name);
10958       return false;
10959     }
10960 
10961   if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10962     {
10963       error ("constant argument out of range for %qs", insn_data[icode].name);
10964       return false;
10965     }
10966   return true;
10967 }
10968 
10969 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE.  Return the
10970    function decl or NULL_TREE if the builtin was not added.  */
10971 
10972 static tree
10973 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10974 	     tree type)
10975 {
10976   tree t
10977     = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10978 
10979   if (t)
10980     {
10981       sparc_builtins[code] = t;
10982       sparc_builtins_icode[code] = icode;
10983     }
10984 
10985   return t;
10986 }
10987 
10988 /* Likewise, but also marks the function as "const".  */
10989 
10990 static tree
10991 def_builtin_const (const char *name, enum insn_code icode,
10992 		   enum sparc_builtins code, tree type)
10993 {
10994   tree t = def_builtin (name, icode, code, type);
10995 
10996   if (t)
10997     TREE_READONLY (t) = 1;
10998 
10999   return t;
11000 }
11001 
11002 /* Implement the TARGET_INIT_BUILTINS target hook.
11003    Create builtin functions for special SPARC instructions.  */
11004 
11005 static void
11006 sparc_init_builtins (void)
11007 {
11008   if (TARGET_FPU)
11009     sparc_fpu_init_builtins ();
11010 
11011   if (TARGET_VIS)
11012     sparc_vis_init_builtins ();
11013 }
11014 
11015 /* Create builtin functions for FPU instructions.  */
11016 
11017 static void
11018 sparc_fpu_init_builtins (void)
11019 {
11020   tree ftype
11021     = build_function_type_list (void_type_node,
11022 				build_pointer_type (unsigned_type_node), 0);
11023   def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11024 	       SPARC_BUILTIN_LDFSR, ftype);
11025   def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11026 	       SPARC_BUILTIN_STFSR, ftype);
11027 }
11028 
11029 /* Create builtin functions for VIS instructions.  */
11030 
11031 static void
11032 sparc_vis_init_builtins (void)
11033 {
11034   tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11035   tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11036   tree v4hi = build_vector_type (intHI_type_node, 4);
11037   tree v2hi = build_vector_type (intHI_type_node, 2);
11038   tree v2si = build_vector_type (intSI_type_node, 2);
11039   tree v1si = build_vector_type (intSI_type_node, 1);
11040 
11041   tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11042   tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11043   tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11044   tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11045   tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11046   tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11047   tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11048   tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11049   tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11050   tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11051   tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11052   tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11053   tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11054   tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11055   tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11056 							 v8qi, v8qi,
11057 							 intDI_type_node, 0);
11058   tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11059 						      v8qi, v8qi, 0);
11060   tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11061 						      v8qi, v8qi, 0);
11062   tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11063 						    intSI_type_node, 0);
11064   tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11065 						    intSI_type_node, 0);
11066   tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11067 						    intDI_type_node, 0);
11068   tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11069 						  intDI_type_node,
11070 						  intDI_type_node, 0);
11071   tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11072 						  intSI_type_node,
11073 						  intSI_type_node, 0);
11074   tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11075 		        			    ptr_type_node,
11076 					            intSI_type_node, 0);
11077   tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11078 		        			    ptr_type_node,
11079 					            intDI_type_node, 0);
11080   tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11081 		        			    ptr_type_node,
11082 					            ptr_type_node, 0);
11083   tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11084 		        			    ptr_type_node,
11085 					            ptr_type_node, 0);
11086   tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11087 						      v4hi, v4hi, 0);
11088   tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11089 						      v2si, v2si, 0);
11090   tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11091 						      v4hi, v4hi, 0);
11092   tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11093 						      v2si, v2si, 0);
11094   tree void_ftype_di = build_function_type_list (void_type_node,
11095 						 intDI_type_node, 0);
11096   tree di_ftype_void = build_function_type_list (intDI_type_node,
11097 						 void_type_node, 0);
11098   tree void_ftype_si = build_function_type_list (void_type_node,
11099 						 intSI_type_node, 0);
11100   tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11101 						  float_type_node,
11102 						  float_type_node, 0);
11103   tree df_ftype_df_df = build_function_type_list (double_type_node,
11104 						  double_type_node,
11105 						  double_type_node, 0);
11106 
11107   /* Packing and expanding vectors.  */
11108   def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11109 	       SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11110   def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11111 	       SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11112   def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11113 	       SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11114   def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11115 		     SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11116   def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11117 		     SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11118 
11119   /* Multiplications.  */
11120   def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11121 		     SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11122   def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11123 		     SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11124   def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11125 		     SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11126   def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11127 		     SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11128   def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11129 		     SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11130   def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11131 		     SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11132   def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11133 		     SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11134 
11135   /* Data aligning.  */
11136   def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11137 	       SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11138   def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11139 	       SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11140   def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11141 	       SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11142   def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11143 	       SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11144 
11145   def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11146 	       SPARC_BUILTIN_WRGSR, void_ftype_di);
11147   def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11148 	       SPARC_BUILTIN_RDGSR, di_ftype_void);
11149 
11150   if (TARGET_ARCH64)
11151     {
11152       def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11153 		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11154       def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11155 		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11156     }
11157   else
11158     {
11159       def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11160 		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11161       def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11162 		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11163     }
11164 
11165   /* Pixel distance.  */
11166   def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11167 		     SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11168 
11169   /* Edge handling.  */
11170   if (TARGET_ARCH64)
11171     {
11172       def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11173 			 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11174       def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11175 			 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11176       def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11177 			 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11178       def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11179 			 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11180       def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11181 			 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11182       def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11183 			 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11184     }
11185   else
11186     {
11187       def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11188 			 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11189       def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11190 			 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11191       def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11192 			 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11193       def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11194 			 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11195       def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11196 			 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11197       def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11198 			 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11199     }
11200 
11201   /* Pixel compare.  */
11202   if (TARGET_ARCH64)
11203     {
11204       def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11205 			 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11206       def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11207 			 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11208       def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11209 			 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11210       def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11211 			 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11212       def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11213 			 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11214       def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11215 			 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11216       def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11217 			 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11218       def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11219 			 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11220     }
11221   else
11222     {
11223       def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11224 			 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11225       def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11226 			 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11227       def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11228 			 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11229       def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11230 			 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11231       def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11232 			 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11233       def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11234 			 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11235       def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11236 			 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11237       def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11238 			 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11239     }
11240 
11241   /* Addition and subtraction.  */
11242   def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11243 		     SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11244   def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11245 		     SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11246   def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11247 		     SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11248   def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11249 		     SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11250   def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11251 		     SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11252   def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11253 		     SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11254   def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11255 		     SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11256   def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11257 		     SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11258 
11259   /* Three-dimensional array addressing.  */
11260   if (TARGET_ARCH64)
11261     {
11262       def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11263 			 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11264       def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11265 			 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11266       def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11267 			 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11268     }
11269   else
11270     {
11271       def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11272 			 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11273       def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11274 			 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11275       def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11276 			 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11277     }
11278 
11279   if (TARGET_VIS2)
11280     {
11281       /* Edge handling.  */
11282       if (TARGET_ARCH64)
11283 	{
11284 	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11285 			     SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11286 	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11287 			     SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11288 	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11289 			     SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11290 	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11291 			     SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11292 	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11293 			     SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11294 	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11295 			     SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11296 	}
11297       else
11298 	{
11299 	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11300 			     SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11301 	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11302 			     SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11303 	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11304 			     SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11305 	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11306 			     SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11307 	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11308 			     SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11309 	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11310 			     SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11311 	}
11312 
11313       /* Byte mask and shuffle.  */
11314       if (TARGET_ARCH64)
11315 	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11316 		     SPARC_BUILTIN_BMASK, di_ftype_di_di);
11317       else
11318 	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11319 		     SPARC_BUILTIN_BMASK, si_ftype_si_si);
11320       def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11321 		   SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11322       def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11323 		   SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11324       def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11325 		   SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11326       def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11327 		   SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11328     }
11329 
11330   if (TARGET_VIS3)
11331     {
11332       if (TARGET_ARCH64)
11333 	{
11334 	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11335 		       SPARC_BUILTIN_CMASK8, void_ftype_di);
11336 	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11337 		       SPARC_BUILTIN_CMASK16, void_ftype_di);
11338 	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11339 		       SPARC_BUILTIN_CMASK32, void_ftype_di);
11340 	}
11341       else
11342 	{
11343 	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11344 		       SPARC_BUILTIN_CMASK8, void_ftype_si);
11345 	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11346 		       SPARC_BUILTIN_CMASK16, void_ftype_si);
11347 	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11348 		       SPARC_BUILTIN_CMASK32, void_ftype_si);
11349 	}
11350 
11351       def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11352 			 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11353 
11354       def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11355 			 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11356       def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11357 			 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11358       def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11359 			 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11360       def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11361 			 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11362       def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11363 			 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11364       def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11365 			 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11366       def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11367 			 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11368       def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11369 			 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11370 
11371       if (TARGET_ARCH64)
11372 	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11373 			   SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11374       else
11375 	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11376 			   SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11377 
11378       def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11379 			 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11380       def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11381 			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11382       def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11383 			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11384 
11385       def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11386 			 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11387       def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11388 			 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11389       def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11390 			 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11391       def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11392 			 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11393       def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11394 			 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11395       def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11396 			 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11397       def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11398 			 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11399       def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11400 			 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11401 
11402       if (TARGET_ARCH64)
11403 	{
11404 	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11405 			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11406 	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11407 			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11408 	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11409 			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11410 	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11411 			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11412 	}
11413       else
11414 	{
11415 	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11416 			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11417 	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11418 			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11419 	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11420 			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11421 	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11422 			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11423 	}
11424 
11425       def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11426 			 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11427       def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11428 			 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11429       def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11430 			 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11431       def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11432 			 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11433       def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11434 			 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11435       def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11436 			 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11437 
11438       def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11439 			 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11440       def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11441 			 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11442       def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11443 			 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11444     }
11445 
11446   if (TARGET_VIS4)
11447     {
11448       def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11449 			 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11450       def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11451 			 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11452       def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11453 			 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11454       def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11455 			 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11456 
11457 
11458       if (TARGET_ARCH64)
11459 	{
11460 	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11461 			     SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11462 	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11463 			     SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11464 	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11465 			     SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11466 	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11467 			     SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11468 	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11469 			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11470 	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11471 			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11472 	}
11473       else
11474 	{
11475 	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11476 			     SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11477 	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11478 			     SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11479 	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11480 			     SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11481 	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11482 			     SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11483 	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11484 			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11485 	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11486 			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11487 	}
11488 
11489       def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11490 			 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11491       def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11492 			 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11493       def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11494 			 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11495       def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11496 			 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11497       def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11498 			 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11499       def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11500 			 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11501       def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11502 			 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11503       def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11504 			 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11505       def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11506 			 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11507       def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11508 			 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11509       def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11510 			 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11511       def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11512 			 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11513       def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11514 			 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11515       def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11516 			 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11517       def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11518 			 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11519       def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11520 			 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11521     }
11522 
11523   if (TARGET_VIS4B)
11524     {
11525       def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11526 			 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11527       def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11528 			 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11529       def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11530 			 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11531 
11532       if (TARGET_ARCH64)
11533 	{
11534 	  tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11535 								 v8qi, v8qi,
11536 								 intSI_type_node, 0);
11537 	  tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11538 								 v4hi, v4hi,
11539 								 intSI_type_node, 0);
11540 	  tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11541 								 v2si, v2si,
11542 								 intSI_type_node, 0);
11543 
11544 	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11545 			     SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11546 	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11547 			     SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11548 	  def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11549 			     SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11550 	  def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11551 			     SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11552 
11553 	  def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11554 			     SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11555 	  def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11556 			     SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11557 	  def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11558 			     SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11559 	  def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11560 			     SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11561 
11562 	  def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11563 			     SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11564 	  def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11565 			     SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11566 	  def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11567 			     SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11568 	  def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11569 			     SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11570 
11571 
11572 	  def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11573 			     SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11574 	  def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11575 			     SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11576 
11577 	  def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11578 			     SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11579 	  def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11580 			     SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11581 
11582 	  def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11583 			     SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11584 	  def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11585 			     SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11586 
11587 	  def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11588 			     SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11589 	  def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11590 			     SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11591 	  def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11592 			     SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11593 
11594 	  def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11595 			     SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11596 	  def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11597 			     SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11598 	  def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11599 			     SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11600 
11601 	}
11602       else
11603 	{
11604 	  tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11605 								 v8qi, v8qi,
11606 								 intSI_type_node, 0);
11607 	  tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11608 								 v4hi, v4hi,
11609 								 intSI_type_node, 0);
11610 	  tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11611 								 v2si, v2si,
11612 								 intSI_type_node, 0);
11613 
11614 	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11615 			     SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11616 	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11617 			     SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11618 	  def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11619 			     SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11620 	  def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11621 			     SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11622 
11623 	  def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11624 			     SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11625 	  def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11626 			     SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11627 	  def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11628 			     SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11629 	  def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11630 			     SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11631 
11632 	  def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11633 			     SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11634 	  def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11635 			     SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11636 	  def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11637 			     SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11638 	  def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11639 			     SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11640 
11641 
11642 	  def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11643 			     SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11644 	  def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11645 			     SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11646 
11647 	  def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11648 			     SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11649 	  def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11650 			     SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11651 
11652 	  def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11653 			     SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11654 	  def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11655 			     SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11656 
11657 	  def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11658 			     SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11659 	  def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11660 			     SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11661 	  def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11662 			     SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11663 
11664 	  def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11665 			     SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11666 	  def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11667 			     SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11668 	  def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11669 			     SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11670 	}
11671     }
11672 }
11673 
11674 /* Implement TARGET_BUILTIN_DECL hook.  */
11675 
11676 static tree
11677 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11678 {
11679   if (code >= SPARC_BUILTIN_MAX)
11680     return error_mark_node;
11681 
11682   return sparc_builtins[code];
11683 }
11684 
11685 /* Implemented TARGET_EXPAND_BUILTIN hook.  */
11686 
11687 static rtx
11688 sparc_expand_builtin (tree exp, rtx target,
11689 		      rtx subtarget ATTRIBUTE_UNUSED,
11690 		      machine_mode tmode ATTRIBUTE_UNUSED,
11691 		      int ignore ATTRIBUTE_UNUSED)
11692 {
11693   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11694   enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11695   enum insn_code icode = sparc_builtins_icode[code];
11696   bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11697   call_expr_arg_iterator iter;
11698   int arg_count = 0;
11699   rtx pat, op[4];
11700   tree arg;
11701 
11702   if (nonvoid)
11703     {
11704       machine_mode tmode = insn_data[icode].operand[0].mode;
11705       if (!target
11706 	  || GET_MODE (target) != tmode
11707 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11708 	op[0] = gen_reg_rtx (tmode);
11709       else
11710 	op[0] = target;
11711     }
11712   else
11713     op[0] = NULL_RTX;
11714 
11715   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11716     {
11717       const struct insn_operand_data *insn_op;
11718       int idx;
11719 
11720       if (arg == error_mark_node)
11721 	return NULL_RTX;
11722 
11723       arg_count++;
11724       idx = arg_count - !nonvoid;
11725       insn_op = &insn_data[icode].operand[idx];
11726       op[arg_count] = expand_normal (arg);
11727 
11728       /* Some of the builtins require constant arguments.  We check
11729 	 for this here.  */
11730       if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11731 	   && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11732 	   && arg_count == 3)
11733 	  || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11734 	      && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11735 	      && arg_count == 2))
11736 	{
11737 	  if (!check_constant_argument (icode, idx, op[arg_count]))
11738 	    return const0_rtx;
11739 	}
11740 
11741       if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11742 	{
11743 	  if (!address_operand (op[arg_count], SImode))
11744 	    {
11745 	      op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11746 	      op[arg_count] = copy_addr_to_reg (op[arg_count]);
11747 	    }
11748 	  op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11749 	}
11750 
11751       else if (insn_op->mode == V1DImode
11752 	       && GET_MODE (op[arg_count]) == DImode)
11753 	op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11754 
11755       else if (insn_op->mode == V1SImode
11756 	       && GET_MODE (op[arg_count]) == SImode)
11757 	op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11758 
11759       if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11760 							insn_op->mode))
11761 	op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11762     }
11763 
11764   switch (arg_count)
11765     {
11766     case 0:
11767       pat = GEN_FCN (icode) (op[0]);
11768       break;
11769     case 1:
11770       if (nonvoid)
11771 	pat = GEN_FCN (icode) (op[0], op[1]);
11772       else
11773 	pat = GEN_FCN (icode) (op[1]);
11774       break;
11775     case 2:
11776       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11777       break;
11778     case 3:
11779       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11780       break;
11781     default:
11782       gcc_unreachable ();
11783     }
11784 
11785   if (!pat)
11786     return NULL_RTX;
11787 
11788   emit_insn (pat);
11789 
11790   return (nonvoid ? op[0] : const0_rtx);
11791 }
11792 
11793 /* Return the upper 16 bits of the 8x16 multiplication.  */
11794 
11795 static int
11796 sparc_vis_mul8x16 (int e8, int e16)
11797 {
11798   return (e8 * e16 + 128) / 256;
11799 }
11800 
11801 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11802    the result into the array N_ELTS, whose elements are of INNER_TYPE.  */
11803 
11804 static void
11805 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11806 			  tree inner_type, tree cst0, tree cst1)
11807 {
11808   unsigned i, num = VECTOR_CST_NELTS (cst0);
11809   int scale;
11810 
11811   switch (fncode)
11812     {
11813     case SPARC_BUILTIN_FMUL8X16:
11814       for (i = 0; i < num; ++i)
11815 	{
11816 	  int val
11817 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11818 				 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11819 	  n_elts->quick_push (build_int_cst (inner_type, val));
11820 	}
11821       break;
11822 
11823     case SPARC_BUILTIN_FMUL8X16AU:
11824       scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11825 
11826       for (i = 0; i < num; ++i)
11827 	{
11828 	  int val
11829 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11830 				 scale);
11831 	  n_elts->quick_push (build_int_cst (inner_type, val));
11832 	}
11833       break;
11834 
11835     case SPARC_BUILTIN_FMUL8X16AL:
11836       scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11837 
11838       for (i = 0; i < num; ++i)
11839 	{
11840 	  int val
11841 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11842 				 scale);
11843 	  n_elts->quick_push (build_int_cst (inner_type, val));
11844 	}
11845       break;
11846 
11847     default:
11848       gcc_unreachable ();
11849     }
11850 }
11851 
11852 /* Implement TARGET_FOLD_BUILTIN hook.
11853 
11854    Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
11855    result of the function call is ignored.  NULL_TREE is returned if the
11856    function could not be folded.  */
11857 
11858 static tree
11859 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11860 		    tree *args, bool ignore)
11861 {
11862   enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11863   tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11864   tree arg0, arg1, arg2;
11865 
11866   if (ignore)
11867     switch (code)
11868       {
11869       case SPARC_BUILTIN_LDFSR:
11870       case SPARC_BUILTIN_STFSR:
11871       case SPARC_BUILTIN_ALIGNADDR:
11872       case SPARC_BUILTIN_WRGSR:
11873       case SPARC_BUILTIN_BMASK:
11874       case SPARC_BUILTIN_CMASK8:
11875       case SPARC_BUILTIN_CMASK16:
11876       case SPARC_BUILTIN_CMASK32:
11877 	break;
11878 
11879       default:
11880 	return build_zero_cst (rtype);
11881       }
11882 
11883   switch (code)
11884     {
11885     case SPARC_BUILTIN_FEXPAND:
11886       arg0 = args[0];
11887       STRIP_NOPS (arg0);
11888 
11889       if (TREE_CODE (arg0) == VECTOR_CST)
11890 	{
11891 	  tree inner_type = TREE_TYPE (rtype);
11892 	  unsigned i;
11893 
11894 	  tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11895 	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11896 	    {
11897 	      unsigned HOST_WIDE_INT val
11898 		= TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11899 	      n_elts.quick_push (build_int_cst (inner_type, val << 4));
11900 	    }
11901 	  return n_elts.build ();
11902 	}
11903       break;
11904 
11905     case SPARC_BUILTIN_FMUL8X16:
11906     case SPARC_BUILTIN_FMUL8X16AU:
11907     case SPARC_BUILTIN_FMUL8X16AL:
11908       arg0 = args[0];
11909       arg1 = args[1];
11910       STRIP_NOPS (arg0);
11911       STRIP_NOPS (arg1);
11912 
11913       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11914 	{
11915 	  tree inner_type = TREE_TYPE (rtype);
11916 	  tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11917 	  sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11918 	  return n_elts.build ();
11919 	}
11920       break;
11921 
11922     case SPARC_BUILTIN_FPMERGE:
11923       arg0 = args[0];
11924       arg1 = args[1];
11925       STRIP_NOPS (arg0);
11926       STRIP_NOPS (arg1);
11927 
11928       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11929 	{
11930 	  tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11931 	  unsigned i;
11932 	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11933 	    {
11934 	      n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11935 	      n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11936 	    }
11937 
11938 	  return n_elts.build ();
11939 	}
11940       break;
11941 
11942     case SPARC_BUILTIN_PDIST:
11943     case SPARC_BUILTIN_PDISTN:
11944       arg0 = args[0];
11945       arg1 = args[1];
11946       STRIP_NOPS (arg0);
11947       STRIP_NOPS (arg1);
11948       if (code == SPARC_BUILTIN_PDIST)
11949 	{
11950 	  arg2 = args[2];
11951 	  STRIP_NOPS (arg2);
11952 	}
11953       else
11954 	arg2 = integer_zero_node;
11955 
11956       if (TREE_CODE (arg0) == VECTOR_CST
11957 	  && TREE_CODE (arg1) == VECTOR_CST
11958 	  && TREE_CODE (arg2) == INTEGER_CST)
11959 	{
11960 	  bool overflow = false;
11961 	  widest_int result = wi::to_widest (arg2);
11962 	  widest_int tmp;
11963 	  unsigned i;
11964 
11965 	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11966 	    {
11967 	      tree e0 = VECTOR_CST_ELT (arg0, i);
11968 	      tree e1 = VECTOR_CST_ELT (arg1, i);
11969 
11970 	      wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11971 
11972 	      tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11973 	      tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11974 	      if (wi::neg_p (tmp))
11975 		tmp = wi::neg (tmp, &neg2_ovf);
11976 	      else
11977 		neg2_ovf = wi::OVF_NONE;
11978 	      result = wi::add (result, tmp, SIGNED, &add2_ovf);
11979 	      overflow |= ((neg1_ovf != wi::OVF_NONE)
11980 			   | (neg2_ovf != wi::OVF_NONE)
11981 			   | (add1_ovf != wi::OVF_NONE)
11982 			   | (add2_ovf != wi::OVF_NONE));
11983 	    }
11984 
11985 	  gcc_assert (!overflow);
11986 
11987 	  return wide_int_to_tree (rtype, result);
11988 	}
11989 
11990     default:
11991       break;
11992     }
11993 
11994   return NULL_TREE;
11995 }
11996 
11997 /* ??? This duplicates information provided to the compiler by the
11998    ??? scheduler description.  Some day, teach genautomata to output
11999    ??? the latencies and then CSE will just use that.  */
12000 
12001 static bool
12002 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12003 		 int opno ATTRIBUTE_UNUSED,
12004 		 int *total, bool speed ATTRIBUTE_UNUSED)
12005 {
12006   int code = GET_CODE (x);
12007   bool float_mode_p = FLOAT_MODE_P (mode);
12008 
12009   switch (code)
12010     {
12011     case CONST_INT:
12012       if (SMALL_INT (x))
12013 	*total = 0;
12014       else
12015 	*total = 2;
12016       return true;
12017 
12018     case CONST_WIDE_INT:
12019       *total = 0;
12020       if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12021 	*total += 2;
12022       if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12023 	*total += 2;
12024       return true;
12025 
12026     case HIGH:
12027       *total = 2;
12028       return true;
12029 
12030     case CONST:
12031     case LABEL_REF:
12032     case SYMBOL_REF:
12033       *total = 4;
12034       return true;
12035 
12036     case CONST_DOUBLE:
12037       *total = 8;
12038       return true;
12039 
12040     case MEM:
12041       /* If outer-code was a sign or zero extension, a cost
12042 	 of COSTS_N_INSNS (1) was already added in.  This is
12043 	 why we are subtracting it back out.  */
12044       if (outer_code == ZERO_EXTEND)
12045 	{
12046 	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12047 	}
12048       else if (outer_code == SIGN_EXTEND)
12049 	{
12050 	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12051 	}
12052       else if (float_mode_p)
12053 	{
12054 	  *total = sparc_costs->float_load;
12055 	}
12056       else
12057 	{
12058 	  *total = sparc_costs->int_load;
12059 	}
12060 
12061       return true;
12062 
12063     case PLUS:
12064     case MINUS:
12065       if (float_mode_p)
12066 	*total = sparc_costs->float_plusminus;
12067       else
12068 	*total = COSTS_N_INSNS (1);
12069       return false;
12070 
12071     case FMA:
12072       {
12073 	rtx sub;
12074 
12075 	gcc_assert (float_mode_p);
12076 	*total = sparc_costs->float_mul;
12077 
12078 	sub = XEXP (x, 0);
12079 	if (GET_CODE (sub) == NEG)
12080 	  sub = XEXP (sub, 0);
12081 	*total += rtx_cost (sub, mode, FMA, 0, speed);
12082 
12083 	sub = XEXP (x, 2);
12084 	if (GET_CODE (sub) == NEG)
12085 	  sub = XEXP (sub, 0);
12086 	*total += rtx_cost (sub, mode, FMA, 2, speed);
12087 	return true;
12088       }
12089 
12090     case MULT:
12091       if (float_mode_p)
12092 	*total = sparc_costs->float_mul;
12093       else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12094 	*total = COSTS_N_INSNS (25);
12095       else
12096 	{
12097 	  int bit_cost;
12098 
12099 	  bit_cost = 0;
12100 	  if (sparc_costs->int_mul_bit_factor)
12101 	    {
12102 	      int nbits;
12103 
12104 	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12105 		{
12106 		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12107 		  for (nbits = 0; value != 0; value &= value - 1)
12108 		    nbits++;
12109 		}
12110 	      else
12111 		nbits = 7;
12112 
12113 	      if (nbits < 3)
12114 		nbits = 3;
12115 	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12116 	      bit_cost = COSTS_N_INSNS (bit_cost);
12117 	    }
12118 
12119 	  if (mode == DImode || !TARGET_HARD_MUL)
12120 	    *total = sparc_costs->int_mulX + bit_cost;
12121 	  else
12122 	    *total = sparc_costs->int_mul + bit_cost;
12123 	}
12124       return false;
12125 
12126     case ASHIFT:
12127     case ASHIFTRT:
12128     case LSHIFTRT:
12129       *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12130       return false;
12131 
12132     case DIV:
12133     case UDIV:
12134     case MOD:
12135     case UMOD:
12136       if (float_mode_p)
12137 	{
12138 	  if (mode == DFmode)
12139 	    *total = sparc_costs->float_div_df;
12140 	  else
12141 	    *total = sparc_costs->float_div_sf;
12142 	}
12143       else
12144 	{
12145 	  if (mode == DImode)
12146 	    *total = sparc_costs->int_divX;
12147 	  else
12148 	    *total = sparc_costs->int_div;
12149 	}
12150       return false;
12151 
12152     case NEG:
12153       if (! float_mode_p)
12154 	{
12155 	  *total = COSTS_N_INSNS (1);
12156 	  return false;
12157 	}
12158       /* FALLTHRU */
12159 
12160     case ABS:
12161     case FLOAT:
12162     case UNSIGNED_FLOAT:
12163     case FIX:
12164     case UNSIGNED_FIX:
12165     case FLOAT_EXTEND:
12166     case FLOAT_TRUNCATE:
12167       *total = sparc_costs->float_move;
12168       return false;
12169 
12170     case SQRT:
12171       if (mode == DFmode)
12172 	*total = sparc_costs->float_sqrt_df;
12173       else
12174 	*total = sparc_costs->float_sqrt_sf;
12175       return false;
12176 
12177     case COMPARE:
12178       if (float_mode_p)
12179 	*total = sparc_costs->float_cmp;
12180       else
12181 	*total = COSTS_N_INSNS (1);
12182       return false;
12183 
12184     case IF_THEN_ELSE:
12185       if (float_mode_p)
12186 	*total = sparc_costs->float_cmove;
12187       else
12188 	*total = sparc_costs->int_cmove;
12189       return false;
12190 
12191     case IOR:
12192       /* Handle the NAND vector patterns.  */
12193       if (sparc_vector_mode_supported_p (mode)
12194 	  && GET_CODE (XEXP (x, 0)) == NOT
12195 	  && GET_CODE (XEXP (x, 1)) == NOT)
12196 	{
12197 	  *total = COSTS_N_INSNS (1);
12198 	  return true;
12199 	}
12200       else
12201         return false;
12202 
12203     default:
12204       return false;
12205     }
12206 }
12207 
12208 /* Return true if CLASS is either GENERAL_REGS or I64_REGS.  */
12209 
12210 static inline bool
12211 general_or_i64_p (reg_class_t rclass)
12212 {
12213   return (rclass == GENERAL_REGS || rclass == I64_REGS);
12214 }
12215 
12216 /* Implement TARGET_REGISTER_MOVE_COST.  */
12217 
12218 static int
12219 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12220 			  reg_class_t from, reg_class_t to)
12221 {
12222   bool need_memory = false;
12223 
12224   /* This helps postreload CSE to eliminate redundant comparisons.  */
12225   if (from == NO_REGS || to == NO_REGS)
12226     return 100;
12227 
12228   if (from == FPCC_REGS || to == FPCC_REGS)
12229     need_memory = true;
12230   else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12231 	   || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12232     {
12233       if (TARGET_VIS3)
12234 	{
12235 	  int size = GET_MODE_SIZE (mode);
12236 	  if (size == 8 || size == 4)
12237 	    {
12238 	      if (! TARGET_ARCH32 || size == 4)
12239 		return 4;
12240 	      else
12241 		return 6;
12242 	    }
12243 	}
12244       need_memory = true;
12245     }
12246 
12247   if (need_memory)
12248     {
12249       if (sparc_cpu == PROCESSOR_ULTRASPARC
12250 	  || sparc_cpu == PROCESSOR_ULTRASPARC3
12251 	  || sparc_cpu == PROCESSOR_NIAGARA
12252 	  || sparc_cpu == PROCESSOR_NIAGARA2
12253 	  || sparc_cpu == PROCESSOR_NIAGARA3
12254 	  || sparc_cpu == PROCESSOR_NIAGARA4
12255 	  || sparc_cpu == PROCESSOR_NIAGARA7
12256 	  || sparc_cpu == PROCESSOR_M8)
12257 	return 12;
12258 
12259       return 6;
12260     }
12261 
12262   return 2;
12263 }
12264 
12265 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12266    This is achieved by means of a manual dynamic stack space allocation in
12267    the current frame.  We make the assumption that SEQ doesn't contain any
12268    function calls, with the possible exception of calls to the GOT helper.  */
12269 
12270 static void
12271 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12272 {
12273   /* We must preserve the lowest 16 words for the register save area.  */
12274   HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12275   /* We really need only 2 words of fresh stack space.  */
12276   HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12277 
12278   rtx slot
12279     = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12280 					     SPARC_STACK_BIAS + offset));
12281 
12282   emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12283   emit_insn (gen_rtx_SET (slot, reg));
12284   if (reg2)
12285     emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12286 			    reg2));
12287   emit_insn (seq);
12288   if (reg2)
12289     emit_insn (gen_rtx_SET (reg2,
12290 			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
12291   emit_insn (gen_rtx_SET (reg, slot));
12292   emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12293 }
12294 
12295 /* Output the assembler code for a thunk function.  THUNK_DECL is the
12296    declaration for the thunk function itself, FUNCTION is the decl for
12297    the target function.  DELTA is an immediate constant offset to be
12298    added to THIS.  If VCALL_OFFSET is nonzero, the word at address
12299    (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
12300 
12301 static void
12302 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12303 		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12304 		       tree function)
12305 {
12306   rtx this_rtx, funexp;
12307   rtx_insn *insn;
12308   unsigned int int_arg_first;
12309 
12310   reload_completed = 1;
12311   epilogue_completed = 1;
12312 
12313   emit_note (NOTE_INSN_PROLOGUE_END);
12314 
12315   if (TARGET_FLAT)
12316     {
12317       sparc_leaf_function_p = 1;
12318 
12319       int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12320     }
12321   else if (flag_delayed_branch)
12322     {
12323       /* We will emit a regular sibcall below, so we need to instruct
12324 	 output_sibcall that we are in a leaf function.  */
12325       sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12326 
12327       /* This will cause final.c to invoke leaf_renumber_regs so we
12328 	 must behave as if we were in a not-yet-leafified function.  */
12329       int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12330     }
12331   else
12332     {
12333       /* We will emit the sibcall manually below, so we will need to
12334 	 manually spill non-leaf registers.  */
12335       sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12336 
12337       /* We really are in a leaf function.  */
12338       int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12339     }
12340 
12341   /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
12342      returns a structure, the structure return pointer is there instead.  */
12343   if (TARGET_ARCH64
12344       && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12345     this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12346   else
12347     this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12348 
12349   /* Add DELTA.  When possible use a plain add, otherwise load it into
12350      a register first.  */
12351   if (delta)
12352     {
12353       rtx delta_rtx = GEN_INT (delta);
12354 
12355       if (! SPARC_SIMM13_P (delta))
12356 	{
12357 	  rtx scratch = gen_rtx_REG (Pmode, 1);
12358 	  emit_move_insn (scratch, delta_rtx);
12359 	  delta_rtx = scratch;
12360 	}
12361 
12362       /* THIS_RTX += DELTA.  */
12363       emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12364     }
12365 
12366   /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
12367   if (vcall_offset)
12368     {
12369       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12370       rtx scratch = gen_rtx_REG (Pmode, 1);
12371 
12372       gcc_assert (vcall_offset < 0);
12373 
12374       /* SCRATCH = *THIS_RTX.  */
12375       emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12376 
12377       /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
12378 	 may not have any available scratch register at this point.  */
12379       if (SPARC_SIMM13_P (vcall_offset))
12380 	;
12381       /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
12382       else if (! fixed_regs[5]
12383 	       /* The below sequence is made up of at least 2 insns,
12384 		  while the default method may need only one.  */
12385 	       && vcall_offset < -8192)
12386 	{
12387 	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
12388 	  emit_move_insn (scratch2, vcall_offset_rtx);
12389 	  vcall_offset_rtx = scratch2;
12390 	}
12391       else
12392 	{
12393 	  rtx increment = GEN_INT (-4096);
12394 
12395 	  /* VCALL_OFFSET is a negative number whose typical range can be
12396 	     estimated as -32768..0 in 32-bit mode.  In almost all cases
12397 	     it is therefore cheaper to emit multiple add insns than
12398 	     spilling and loading the constant into a register (at least
12399 	     6 insns).  */
12400 	  while (! SPARC_SIMM13_P (vcall_offset))
12401 	    {
12402 	      emit_insn (gen_add2_insn (scratch, increment));
12403 	      vcall_offset += 4096;
12404 	    }
12405 	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12406 	}
12407 
12408       /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
12409       emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12410 					    gen_rtx_PLUS (Pmode,
12411 							  scratch,
12412 							  vcall_offset_rtx)));
12413 
12414       /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
12415       emit_insn (gen_add2_insn (this_rtx, scratch));
12416     }
12417 
12418   /* Generate a tail call to the target function.  */
12419   if (! TREE_USED (function))
12420     {
12421       assemble_external (function);
12422       TREE_USED (function) = 1;
12423     }
12424   funexp = XEXP (DECL_RTL (function), 0);
12425 
12426   if (flag_delayed_branch)
12427     {
12428       funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12429       insn = emit_call_insn (gen_sibcall (funexp));
12430       SIBLING_CALL_P (insn) = 1;
12431     }
12432   else
12433     {
12434       /* The hoops we have to jump through in order to generate a sibcall
12435 	 without using delay slots...  */
12436       rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12437 
12438       if (flag_pic)
12439         {
12440 	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
12441 	  start_sequence ();
12442 	  load_got_register ();  /* clobbers %o7 */
12443 	  if (!TARGET_VXWORKS_RTP)
12444 	    pic_offset_table_rtx = got_register_rtx;
12445 	  scratch = sparc_legitimize_pic_address (funexp, scratch);
12446 	  seq = get_insns ();
12447 	  end_sequence ();
12448 	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12449 	}
12450       else if (TARGET_ARCH32)
12451 	{
12452 	  emit_insn (gen_rtx_SET (scratch,
12453 				  gen_rtx_HIGH (SImode, funexp)));
12454 	  emit_insn (gen_rtx_SET (scratch,
12455 				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
12456 	}
12457       else  /* TARGET_ARCH64 */
12458         {
12459 	  switch (sparc_code_model)
12460 	    {
12461 	    case CM_MEDLOW:
12462 	    case CM_MEDMID:
12463 	      /* The destination can serve as a temporary.  */
12464 	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12465 	      break;
12466 
12467 	    case CM_MEDANY:
12468 	    case CM_EMBMEDANY:
12469 	      /* The destination cannot serve as a temporary.  */
12470 	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
12471 	      start_sequence ();
12472 	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12473 	      seq = get_insns ();
12474 	      end_sequence ();
12475 	      emit_and_preserve (seq, spill_reg, 0);
12476 	      break;
12477 
12478 	    default:
12479 	      gcc_unreachable ();
12480 	    }
12481 	}
12482 
12483       emit_jump_insn (gen_indirect_jump (scratch));
12484     }
12485 
12486   emit_barrier ();
12487 
12488   /* Run just enough of rest_of_compilation to get the insns emitted.
12489      There's not really enough bulk here to make other passes such as
12490      instruction scheduling worth while.  Note that use_thunk calls
12491      assemble_start_function and assemble_end_function.  */
12492   insn = get_insns ();
12493   shorten_branches (insn);
12494   final_start_function (insn, file, 1);
12495   final (insn, file, 1);
12496   final_end_function ();
12497 
12498   reload_completed = 0;
12499   epilogue_completed = 0;
12500 }
12501 
12502 /* Return true if sparc_output_mi_thunk would be able to output the
12503    assembler code for the thunk function specified by the arguments
12504    it is passed, and false otherwise.  */
12505 static bool
12506 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12507 			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12508 			   HOST_WIDE_INT vcall_offset,
12509 			   const_tree function ATTRIBUTE_UNUSED)
12510 {
12511   /* Bound the loop used in the default method above.  */
12512   return (vcall_offset >= -32768 || ! fixed_regs[5]);
12513 }
12514 
12515 /* How to allocate a 'struct machine_function'.  */
12516 
12517 static struct machine_function *
12518 sparc_init_machine_status (void)
12519 {
12520   return ggc_cleared_alloc<machine_function> ();
12521 }
12522 
12523 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
12524 
12525 static unsigned HOST_WIDE_INT
12526 sparc_asan_shadow_offset (void)
12527 {
12528   return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12529 }
12530 
12531 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12532    We need to emit DTP-relative relocations.  */
12533 
12534 static void
12535 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12536 {
12537   switch (size)
12538     {
12539     case 4:
12540       fputs ("\t.word\t%r_tls_dtpoff32(", file);
12541       break;
12542     case 8:
12543       fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12544       break;
12545     default:
12546       gcc_unreachable ();
12547     }
12548   output_addr_const (file, x);
12549   fputs (")", file);
12550 }
12551 
12552 /* Do whatever processing is required at the end of a file.  */
12553 
12554 static void
12555 sparc_file_end (void)
12556 {
12557   /* If we need to emit the special GOT helper function, do so now.  */
12558   if (got_helper_needed)
12559     {
12560       const char *name = XSTR (got_helper_rtx, 0);
12561 #ifdef DWARF2_UNWIND_INFO
12562       bool do_cfi;
12563 #endif
12564 
12565       if (USE_HIDDEN_LINKONCE)
12566 	{
12567 	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12568 				  get_identifier (name),
12569 				  build_function_type_list (void_type_node,
12570                                                             NULL_TREE));
12571 	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12572 					   NULL_TREE, void_type_node);
12573 	  TREE_PUBLIC (decl) = 1;
12574 	  TREE_STATIC (decl) = 1;
12575 	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12576 	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12577 	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
12578 	  resolve_unique_section (decl, 0, flag_function_sections);
12579 	  allocate_struct_function (decl, true);
12580 	  cfun->is_thunk = 1;
12581 	  current_function_decl = decl;
12582 	  init_varasm_status ();
12583 	  assemble_start_function (decl, name);
12584 	}
12585       else
12586 	{
12587 	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12588           switch_to_section (text_section);
12589 	  if (align > 0)
12590 	    ASM_OUTPUT_ALIGN (asm_out_file, align);
12591 	  ASM_OUTPUT_LABEL (asm_out_file, name);
12592 	}
12593 
12594 #ifdef DWARF2_UNWIND_INFO
12595       do_cfi = dwarf2out_do_cfi_asm ();
12596       if (do_cfi)
12597 	output_asm_insn (".cfi_startproc", NULL);
12598 #endif
12599       if (flag_delayed_branch)
12600 	{
12601 	  output_asm_insn ("jmp\t%%o7+8", NULL);
12602 	  output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12603 	}
12604       else
12605 	{
12606 	  output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12607 	  output_asm_insn ("jmp\t%%o7+8", NULL);
12608 	  output_asm_insn (" nop", NULL);
12609 	}
12610 #ifdef DWARF2_UNWIND_INFO
12611       if (do_cfi)
12612 	output_asm_insn (".cfi_endproc", NULL);
12613 #endif
12614     }
12615 
12616   if (NEED_INDICATE_EXEC_STACK)
12617     file_end_indicate_exec_stack ();
12618 
12619 #ifdef TARGET_SOLARIS
12620   solaris_file_end ();
12621 #endif
12622 }
12623 
12624 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12625 /* Implement TARGET_MANGLE_TYPE.  */
12626 
12627 static const char *
12628 sparc_mangle_type (const_tree type)
12629 {
12630   if (TARGET_ARCH32
12631       && TYPE_MAIN_VARIANT (type) == long_double_type_node
12632       && TARGET_LONG_DOUBLE_128)
12633     return "g";
12634 
12635   /* For all other types, use normal C++ mangling.  */
12636   return NULL;
12637 }
12638 #endif
12639 
12640 /* Expand a membar instruction for various use cases.  Both the LOAD_STORE
12641    and BEFORE_AFTER arguments of the form X_Y.  They are two-bit masks where
12642    bit 0 indicates that X is true, and bit 1 indicates Y is true.  */
12643 
12644 void
12645 sparc_emit_membar_for_model (enum memmodel model,
12646 			     int load_store, int before_after)
12647 {
12648   /* Bits for the MEMBAR mmask field.  */
12649   const int LoadLoad = 1;
12650   const int StoreLoad = 2;
12651   const int LoadStore = 4;
12652   const int StoreStore = 8;
12653 
12654   int mm = 0, implied = 0;
12655 
12656   switch (sparc_memory_model)
12657     {
12658     case SMM_SC:
12659       /* Sequential Consistency.  All memory transactions are immediately
12660 	 visible in sequential execution order.  No barriers needed.  */
12661       implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12662       break;
12663 
12664     case SMM_TSO:
12665       /* Total Store Ordering: all memory transactions with store semantics
12666 	 are followed by an implied StoreStore.  */
12667       implied |= StoreStore;
12668 
12669       /* If we're not looking for a raw barrer (before+after), then atomic
12670 	 operations get the benefit of being both load and store.  */
12671       if (load_store == 3 && before_after == 1)
12672 	implied |= StoreLoad;
12673       /* FALLTHRU */
12674 
12675     case SMM_PSO:
12676       /* Partial Store Ordering: all memory transactions with load semantics
12677 	 are followed by an implied LoadLoad | LoadStore.  */
12678       implied |= LoadLoad | LoadStore;
12679 
12680       /* If we're not looking for a raw barrer (before+after), then atomic
12681 	 operations get the benefit of being both load and store.  */
12682       if (load_store == 3 && before_after == 2)
12683 	implied |= StoreLoad | StoreStore;
12684       /* FALLTHRU */
12685 
12686     case SMM_RMO:
12687       /* Relaxed Memory Ordering: no implicit bits.  */
12688       break;
12689 
12690     default:
12691       gcc_unreachable ();
12692     }
12693 
12694   if (before_after & 1)
12695     {
12696       if (is_mm_release (model) || is_mm_acq_rel (model)
12697 	  || is_mm_seq_cst (model))
12698 	{
12699 	  if (load_store & 1)
12700 	    mm |= LoadLoad | StoreLoad;
12701 	  if (load_store & 2)
12702 	    mm |= LoadStore | StoreStore;
12703 	}
12704     }
12705   if (before_after & 2)
12706     {
12707       if (is_mm_acquire (model) || is_mm_acq_rel (model)
12708 	  || is_mm_seq_cst (model))
12709 	{
12710 	  if (load_store & 1)
12711 	    mm |= LoadLoad | LoadStore;
12712 	  if (load_store & 2)
12713 	    mm |= StoreLoad | StoreStore;
12714 	}
12715     }
12716 
12717   /* Remove the bits implied by the system memory model.  */
12718   mm &= ~implied;
12719 
12720   /* For raw barriers (before+after), always emit a barrier.
12721      This will become a compile-time barrier if needed.  */
12722   if (mm || before_after == 3)
12723     emit_insn (gen_membar (GEN_INT (mm)));
12724 }
12725 
12726 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12727    compare and swap on the word containing the byte or half-word.  */
12728 
12729 static void
12730 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12731 				  rtx oldval, rtx newval)
12732 {
12733   rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12734   rtx addr = gen_reg_rtx (Pmode);
12735   rtx off = gen_reg_rtx (SImode);
12736   rtx oldv = gen_reg_rtx (SImode);
12737   rtx newv = gen_reg_rtx (SImode);
12738   rtx oldvalue = gen_reg_rtx (SImode);
12739   rtx newvalue = gen_reg_rtx (SImode);
12740   rtx res = gen_reg_rtx (SImode);
12741   rtx resv = gen_reg_rtx (SImode);
12742   rtx memsi, val, mask, cc;
12743 
12744   emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12745 
12746   if (Pmode != SImode)
12747     addr1 = gen_lowpart (SImode, addr1);
12748   emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12749 
12750   memsi = gen_rtx_MEM (SImode, addr);
12751   set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12752   MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12753 
12754   val = copy_to_reg (memsi);
12755 
12756   emit_insn (gen_rtx_SET (off,
12757 			  gen_rtx_XOR (SImode, off,
12758 				       GEN_INT (GET_MODE (mem) == QImode
12759 						? 3 : 2))));
12760 
12761   emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12762 
12763   if (GET_MODE (mem) == QImode)
12764     mask = force_reg (SImode, GEN_INT (0xff));
12765   else
12766     mask = force_reg (SImode, GEN_INT (0xffff));
12767 
12768   emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12769 
12770   emit_insn (gen_rtx_SET (val,
12771 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12772 				       val)));
12773 
12774   oldval = gen_lowpart (SImode, oldval);
12775   emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12776 
12777   newval = gen_lowpart_common (SImode, newval);
12778   emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12779 
12780   emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12781 
12782   emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12783 
12784   rtx_code_label *end_label = gen_label_rtx ();
12785   rtx_code_label *loop_label = gen_label_rtx ();
12786   emit_label (loop_label);
12787 
12788   emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12789 
12790   emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12791 
12792   emit_move_insn (bool_result, const1_rtx);
12793 
12794   emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12795 
12796   emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12797 
12798   emit_insn (gen_rtx_SET (resv,
12799 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12800 				       res)));
12801 
12802   emit_move_insn (bool_result, const0_rtx);
12803 
12804   cc = gen_compare_reg_1 (NE, resv, val);
12805   emit_insn (gen_rtx_SET (val, resv));
12806 
12807   /* Use cbranchcc4 to separate the compare and branch!  */
12808   emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12809 				  cc, const0_rtx, loop_label));
12810 
12811   emit_label (end_label);
12812 
12813   emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12814 
12815   emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12816 
12817   emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12818 }
12819 
12820 /* Expand code to perform a compare-and-swap.  */
12821 
12822 void
12823 sparc_expand_compare_and_swap (rtx operands[])
12824 {
12825   rtx bval, retval, mem, oldval, newval;
12826   machine_mode mode;
12827   enum memmodel model;
12828 
12829   bval = operands[0];
12830   retval = operands[1];
12831   mem = operands[2];
12832   oldval = operands[3];
12833   newval = operands[4];
12834   model = (enum memmodel) INTVAL (operands[6]);
12835   mode = GET_MODE (mem);
12836 
12837   sparc_emit_membar_for_model (model, 3, 1);
12838 
12839   if (reg_overlap_mentioned_p (retval, oldval))
12840     oldval = copy_to_reg (oldval);
12841 
12842   if (mode == QImode || mode == HImode)
12843     sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12844   else
12845     {
12846       rtx (*gen) (rtx, rtx, rtx, rtx);
12847       rtx x;
12848 
12849       if (mode == SImode)
12850 	gen = gen_atomic_compare_and_swapsi_1;
12851       else
12852 	gen = gen_atomic_compare_and_swapdi_1;
12853       emit_insn (gen (retval, mem, oldval, newval));
12854 
12855       x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12856       if (x != bval)
12857 	convert_move (bval, x, 1);
12858     }
12859 
12860   sparc_emit_membar_for_model (model, 3, 2);
12861 }
12862 
12863 void
12864 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12865 {
12866   rtx t_1, t_2, t_3;
12867 
12868   sel = gen_lowpart (DImode, sel);
12869   switch (vmode)
12870     {
12871     case E_V2SImode:
12872       /* inp = xxxxxxxAxxxxxxxB */
12873       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12874 				 NULL_RTX, 1, OPTAB_DIRECT);
12875       /* t_1 = ....xxxxxxxAxxx. */
12876       sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12877 				 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12878       t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12879 				 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12880       /* sel = .......B */
12881       /* t_1 = ...A.... */
12882       sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12883       /* sel = ...A...B */
12884       sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12885       /* sel = AAAABBBB * 4 */
12886       t_1 = force_reg (SImode, GEN_INT (0x01230123));
12887       /* sel = { A*4, A*4+1, A*4+2, ... } */
12888       break;
12889 
12890     case E_V4HImode:
12891       /* inp = xxxAxxxBxxxCxxxD */
12892       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12893 				 NULL_RTX, 1, OPTAB_DIRECT);
12894       t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12895 				 NULL_RTX, 1, OPTAB_DIRECT);
12896       t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12897 				 NULL_RTX, 1, OPTAB_DIRECT);
12898       /* t_1 = ..xxxAxxxBxxxCxx */
12899       /* t_2 = ....xxxAxxxBxxxC */
12900       /* t_3 = ......xxxAxxxBxx */
12901       sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12902 				 GEN_INT (0x07),
12903 				 NULL_RTX, 1, OPTAB_DIRECT);
12904       t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12905 				 GEN_INT (0x0700),
12906 				 NULL_RTX, 1, OPTAB_DIRECT);
12907       t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12908 				 GEN_INT (0x070000),
12909 				 NULL_RTX, 1, OPTAB_DIRECT);
12910       t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12911 				 GEN_INT (0x07000000),
12912 				 NULL_RTX, 1, OPTAB_DIRECT);
12913       /* sel = .......D */
12914       /* t_1 = .....C.. */
12915       /* t_2 = ...B.... */
12916       /* t_3 = .A...... */
12917       sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12918       t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12919       sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12920       /* sel = .A.B.C.D */
12921       sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12922       /* sel = AABBCCDD * 2 */
12923       t_1 = force_reg (SImode, GEN_INT (0x01010101));
12924       /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12925       break;
12926 
12927     case E_V8QImode:
12928       /* input = xAxBxCxDxExFxGxH */
12929       sel = expand_simple_binop (DImode, AND, sel,
12930 				 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12931 					  | 0x0f0f0f0f),
12932 				 NULL_RTX, 1, OPTAB_DIRECT);
12933       /* sel = .A.B.C.D.E.F.G.H */
12934       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12935 				 NULL_RTX, 1, OPTAB_DIRECT);
12936       /* t_1 = ..A.B.C.D.E.F.G. */
12937       sel = expand_simple_binop (DImode, IOR, sel, t_1,
12938 				 NULL_RTX, 1, OPTAB_DIRECT);
12939       /* sel = .AABBCCDDEEFFGGH */
12940       sel = expand_simple_binop (DImode, AND, sel,
12941 				 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12942 					  | 0xff00ff),
12943 				 NULL_RTX, 1, OPTAB_DIRECT);
12944       /* sel = ..AB..CD..EF..GH */
12945       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12946 				 NULL_RTX, 1, OPTAB_DIRECT);
12947       /* t_1 = ....AB..CD..EF.. */
12948       sel = expand_simple_binop (DImode, IOR, sel, t_1,
12949 				 NULL_RTX, 1, OPTAB_DIRECT);
12950       /* sel = ..ABABCDCDEFEFGH */
12951       sel = expand_simple_binop (DImode, AND, sel,
12952 				 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12953 				 NULL_RTX, 1, OPTAB_DIRECT);
12954       /* sel = ....ABCD....EFGH */
12955       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12956 				 NULL_RTX, 1, OPTAB_DIRECT);
12957       /* t_1 = ........ABCD.... */
12958       sel = gen_lowpart (SImode, sel);
12959       t_1 = gen_lowpart (SImode, t_1);
12960       break;
12961 
12962     default:
12963       gcc_unreachable ();
12964     }
12965 
12966   /* Always perform the final addition/merge within the bmask insn.  */
12967   emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12968 }
12969 
12970 /* Implement TARGET_VEC_PERM_CONST.  */
12971 
12972 static bool
12973 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12974 				rtx op1, const vec_perm_indices &sel)
12975 {
12976   if (!TARGET_VIS2)
12977     return false;
12978 
12979   /* All permutes are supported.  */
12980   if (!target)
12981     return true;
12982 
12983   /* Force target-independent code to convert constant permutations on other
12984      modes down to V8QI.  Rely on this to avoid the complexity of the byte
12985      order of the permutation.  */
12986   if (vmode != V8QImode)
12987     return false;
12988 
12989   unsigned int i, mask;
12990   for (i = mask = 0; i < 8; ++i)
12991     mask |= (sel[i] & 0xf) << (28 - i*4);
12992   rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12993 
12994   emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12995   emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12996   return true;
12997 }
12998 
12999 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
13000 
13001 static bool
13002 sparc_frame_pointer_required (void)
13003 {
13004   /* If the stack pointer is dynamically modified in the function, it cannot
13005      serve as the frame pointer.  */
13006   if (cfun->calls_alloca)
13007     return true;
13008 
13009   /* If the function receives nonlocal gotos, it needs to save the frame
13010      pointer in the nonlocal_goto_save_area object.  */
13011   if (cfun->has_nonlocal_label)
13012     return true;
13013 
13014   /* In flat mode, that's it.  */
13015   if (TARGET_FLAT)
13016     return false;
13017 
13018   /* Otherwise, the frame pointer is required if the function isn't leaf, but
13019      we cannot use sparc_leaf_function_p since it hasn't been computed yet.  */
13020   return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13021 }
13022 
13023 /* The way this is structured, we can't eliminate SFP in favor of SP
13024    if the frame pointer is required: we want to use the SFP->HFP elimination
13025    in that case.  But the test in update_eliminables doesn't know we are
13026    assuming below that we only do the former elimination.  */
13027 
13028 static bool
13029 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13030 {
13031   return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13032 }
13033 
13034 /* Return the hard frame pointer directly to bypass the stack bias.  */
13035 
13036 static rtx
13037 sparc_builtin_setjmp_frame_value (void)
13038 {
13039   return hard_frame_pointer_rtx;
13040 }
13041 
13042 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13043    they won't be allocated.  */
13044 
13045 static void
13046 sparc_conditional_register_usage (void)
13047 {
13048   if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13049     {
13050       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13051       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13052     }
13053   /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13054   /* then honor it.  */
13055   if (TARGET_ARCH32 && fixed_regs[5])
13056     fixed_regs[5] = 1;
13057   else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13058     fixed_regs[5] = 0;
13059   if (! TARGET_V9)
13060     {
13061       int regno;
13062       for (regno = SPARC_FIRST_V9_FP_REG;
13063 	   regno <= SPARC_LAST_V9_FP_REG;
13064 	   regno++)
13065 	fixed_regs[regno] = 1;
13066       /* %fcc0 is used by v8 and v9.  */
13067       for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13068 	   regno <= SPARC_LAST_V9_FCC_REG;
13069 	   regno++)
13070 	fixed_regs[regno] = 1;
13071     }
13072   if (! TARGET_FPU)
13073     {
13074       int regno;
13075       for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13076 	fixed_regs[regno] = 1;
13077     }
13078   /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13079   /* then honor it.  Likewise with g3 and g4.  */
13080   if (fixed_regs[2] == 2)
13081     fixed_regs[2] = ! TARGET_APP_REGS;
13082   if (fixed_regs[3] == 2)
13083     fixed_regs[3] = ! TARGET_APP_REGS;
13084   if (TARGET_ARCH32 && fixed_regs[4] == 2)
13085     fixed_regs[4] = ! TARGET_APP_REGS;
13086   else if (TARGET_CM_EMBMEDANY)
13087     fixed_regs[4] = 1;
13088   else if (fixed_regs[4] == 2)
13089     fixed_regs[4] = 0;
13090   if (TARGET_FLAT)
13091     {
13092       int regno;
13093       /* Disable leaf functions.  */
13094       memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13095       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13096 	leaf_reg_remap [regno] = regno;
13097     }
13098   if (TARGET_VIS)
13099     global_regs[SPARC_GSR_REG] = 1;
13100 }
13101 
13102 /* Implement TARGET_USE_PSEUDO_PIC_REG.  */
13103 
13104 static bool
13105 sparc_use_pseudo_pic_reg (void)
13106 {
13107   return !TARGET_VXWORKS_RTP && flag_pic;
13108 }
13109 
13110 /* Implement TARGET_INIT_PIC_REG.  */
13111 
13112 static void
13113 sparc_init_pic_reg (void)
13114 {
13115   edge entry_edge;
13116   rtx_insn *seq;
13117 
13118   /* In PIC mode, we need to always initialize the PIC register if optimization
13119      is enabled, because we are called from IRA and LRA may later force things
13120      to the constant pool for optimization purposes.  */
13121   if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13122     return;
13123 
13124   start_sequence ();
13125   load_got_register ();
13126   if (!TARGET_VXWORKS_RTP)
13127     emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13128   seq = get_insns ();
13129   end_sequence ();
13130 
13131   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13132   insert_insn_on_edge (seq, entry_edge);
13133   commit_one_edge_insertion (entry_edge);
13134 }
13135 
13136 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13137 
13138    - We can't load constants into FP registers.
13139    - We can't load FP constants into integer registers when soft-float,
13140      because there is no soft-float pattern with a r/F constraint.
13141    - We can't load FP constants into integer registers for TFmode unless
13142      it is 0.0L, because there is no movtf pattern with a r/F constraint.
13143    - Try and reload integer constants (symbolic or otherwise) back into
13144      registers directly, rather than having them dumped to memory.  */
13145 
13146 static reg_class_t
13147 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13148 {
13149   machine_mode mode = GET_MODE (x);
13150   if (CONSTANT_P (x))
13151     {
13152       if (FP_REG_CLASS_P (rclass)
13153 	  || rclass == GENERAL_OR_FP_REGS
13154 	  || rclass == GENERAL_OR_EXTRA_FP_REGS
13155 	  || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13156 	  || (mode == TFmode && ! const_zero_operand (x, mode)))
13157 	return NO_REGS;
13158 
13159       if (GET_MODE_CLASS (mode) == MODE_INT)
13160 	return GENERAL_REGS;
13161 
13162       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13163 	{
13164 	  if (! FP_REG_CLASS_P (rclass)
13165 	      || !(const_zero_operand (x, mode)
13166 		   || const_all_ones_operand (x, mode)))
13167 	    return NO_REGS;
13168 	}
13169     }
13170 
13171   if (TARGET_VIS3
13172       && ! TARGET_ARCH64
13173       && (rclass == EXTRA_FP_REGS
13174 	  || rclass == GENERAL_OR_EXTRA_FP_REGS))
13175     {
13176       int regno = true_regnum (x);
13177 
13178       if (SPARC_INT_REG_P (regno))
13179 	return (rclass == EXTRA_FP_REGS
13180 		? FP_REGS : GENERAL_OR_FP_REGS);
13181     }
13182 
13183   return rclass;
13184 }
13185 
13186 /* Return true if we use LRA instead of reload pass.  */
13187 
13188 static bool
13189 sparc_lra_p (void)
13190 {
13191   return TARGET_LRA;
13192 }
13193 
13194 /* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
13195    OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
13196 
13197 const char *
13198 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13199 {
13200   char mulstr[32];
13201 
13202   gcc_assert (! TARGET_ARCH64);
13203 
13204   if (sparc_check_64 (operands[1], insn) <= 0)
13205     output_asm_insn ("srl\t%L1, 0, %L1", operands);
13206   if (which_alternative == 1)
13207     output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13208   if (GET_CODE (operands[2]) == CONST_INT)
13209     {
13210       if (which_alternative == 1)
13211 	{
13212 	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
13213 	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13214 	  output_asm_insn (mulstr, operands);
13215 	  return "srlx\t%L0, 32, %H0";
13216 	}
13217       else
13218 	{
13219 	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
13220           output_asm_insn ("or\t%L1, %3, %3", operands);
13221           sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13222 	  output_asm_insn (mulstr, operands);
13223 	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
13224           return "mov\t%3, %L0";
13225 	}
13226     }
13227   else if (rtx_equal_p (operands[1], operands[2]))
13228     {
13229       if (which_alternative == 1)
13230 	{
13231 	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
13232           sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13233 	  output_asm_insn (mulstr, operands);
13234 	  return "srlx\t%L0, 32, %H0";
13235 	}
13236       else
13237 	{
13238 	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
13239           output_asm_insn ("or\t%L1, %3, %3", operands);
13240 	  sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13241 	  output_asm_insn (mulstr, operands);
13242 	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
13243           return "mov\t%3, %L0";
13244 	}
13245     }
13246   if (sparc_check_64 (operands[2], insn) <= 0)
13247     output_asm_insn ("srl\t%L2, 0, %L2", operands);
13248   if (which_alternative == 1)
13249     {
13250       output_asm_insn ("or\t%L1, %H1, %H1", operands);
13251       output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13252       output_asm_insn ("or\t%L2, %L1, %L1", operands);
13253       sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13254       output_asm_insn (mulstr, operands);
13255       return "srlx\t%L0, 32, %H0";
13256     }
13257   else
13258     {
13259       output_asm_insn ("sllx\t%H1, 32, %3", operands);
13260       output_asm_insn ("sllx\t%H2, 32, %4", operands);
13261       output_asm_insn ("or\t%L1, %3, %3", operands);
13262       output_asm_insn ("or\t%L2, %4, %4", operands);
13263       sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13264       output_asm_insn (mulstr, operands);
13265       output_asm_insn ("srlx\t%3, 32, %H0", operands);
13266       return "mov\t%3, %L0";
13267     }
13268 }
13269 
13270 /* Subroutine of sparc_expand_vector_init.  Emit code to initialize
13271    all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn.  MODE
13272    and INNER_MODE are the modes describing TARGET.  */
13273 
13274 static void
13275 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13276 		      machine_mode inner_mode)
13277 {
13278   rtx t1, final_insn, sel;
13279   int bmask;
13280 
13281   t1 = gen_reg_rtx (mode);
13282 
13283   elt = convert_modes (SImode, inner_mode, elt, true);
13284   emit_move_insn (gen_lowpart(SImode, t1), elt);
13285 
13286   switch (mode)
13287     {
13288     case E_V2SImode:
13289       final_insn = gen_bshufflev2si_vis (target, t1, t1);
13290       bmask = 0x45674567;
13291       break;
13292     case E_V4HImode:
13293       final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13294       bmask = 0x67676767;
13295       break;
13296     case E_V8QImode:
13297       final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13298       bmask = 0x77777777;
13299       break;
13300     default:
13301       gcc_unreachable ();
13302     }
13303 
13304   sel = force_reg (SImode, GEN_INT (bmask));
13305   emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13306   emit_insn (final_insn);
13307 }
13308 
13309 /* Subroutine of sparc_expand_vector_init.  Emit code to initialize
13310    all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn.  */
13311 
13312 static void
13313 vector_init_fpmerge (rtx target, rtx elt)
13314 {
13315   rtx t1, t2, t2_low, t3, t3_low;
13316 
13317   t1 = gen_reg_rtx (V4QImode);
13318   elt = convert_modes (SImode, QImode, elt, true);
13319   emit_move_insn (gen_lowpart (SImode, t1), elt);
13320 
13321   t2 = gen_reg_rtx (V8QImode);
13322   t2_low = gen_lowpart (V4QImode, t2);
13323   emit_insn (gen_fpmerge_vis (t2, t1, t1));
13324 
13325   t3 = gen_reg_rtx (V8QImode);
13326   t3_low = gen_lowpart (V4QImode, t3);
13327   emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13328 
13329   emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13330 }
13331 
13332 /* Subroutine of sparc_expand_vector_init.  Emit code to initialize
13333    all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn.  */
13334 
13335 static void
13336 vector_init_faligndata (rtx target, rtx elt)
13337 {
13338   rtx t1 = gen_reg_rtx (V4HImode);
13339   int i;
13340 
13341   elt = convert_modes (SImode, HImode, elt, true);
13342   emit_move_insn (gen_lowpart (SImode, t1), elt);
13343 
13344   emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13345 				  force_reg (SImode, GEN_INT (6)),
13346 				  const0_rtx));
13347 
13348   for (i = 0; i < 4; i++)
13349     emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13350 }
13351 
13352 /* Emit code to initialize TARGET to values for individual fields VALS.  */
13353 
13354 void
13355 sparc_expand_vector_init (rtx target, rtx vals)
13356 {
13357   const machine_mode mode = GET_MODE (target);
13358   const machine_mode inner_mode = GET_MODE_INNER (mode);
13359   const int n_elts = GET_MODE_NUNITS (mode);
13360   int i, n_var = 0;
13361   bool all_same = true;
13362   rtx mem;
13363 
13364   for (i = 0; i < n_elts; i++)
13365     {
13366       rtx x = XVECEXP (vals, 0, i);
13367       if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13368 	n_var++;
13369 
13370       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13371 	all_same = false;
13372     }
13373 
13374   if (n_var == 0)
13375     {
13376       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13377       return;
13378     }
13379 
13380   if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13381     {
13382       if (GET_MODE_SIZE (inner_mode) == 4)
13383 	{
13384 	  emit_move_insn (gen_lowpart (SImode, target),
13385 			  gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13386 	  return;
13387 	}
13388       else if (GET_MODE_SIZE (inner_mode) == 8)
13389 	{
13390 	  emit_move_insn (gen_lowpart (DImode, target),
13391 			  gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13392 	  return;
13393 	}
13394     }
13395   else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13396 	   && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13397     {
13398       emit_move_insn (gen_highpart (word_mode, target),
13399 		      gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13400       emit_move_insn (gen_lowpart (word_mode, target),
13401 		      gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13402       return;
13403     }
13404 
13405   if (all_same && GET_MODE_SIZE (mode) == 8)
13406     {
13407       if (TARGET_VIS2)
13408 	{
13409 	  vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13410 	  return;
13411 	}
13412       if (mode == V8QImode)
13413 	{
13414 	  vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13415 	  return;
13416 	}
13417       if (mode == V4HImode)
13418 	{
13419 	  vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13420 	  return;
13421 	}
13422     }
13423 
13424   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13425   for (i = 0; i < n_elts; i++)
13426     emit_move_insn (adjust_address_nv (mem, inner_mode,
13427 				       i * GET_MODE_SIZE (inner_mode)),
13428 		    XVECEXP (vals, 0, i));
13429   emit_move_insn (target, mem);
13430 }
13431 
13432 /* Implement TARGET_SECONDARY_RELOAD.  */
13433 
13434 static reg_class_t
13435 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13436 			machine_mode mode, secondary_reload_info *sri)
13437 {
13438   enum reg_class rclass = (enum reg_class) rclass_i;
13439 
13440   sri->icode = CODE_FOR_nothing;
13441   sri->extra_cost = 0;
13442 
13443   /* We need a temporary when loading/storing a HImode/QImode value
13444      between memory and the FPU registers.  This can happen when combine puts
13445      a paradoxical subreg in a float/fix conversion insn.  */
13446   if (FP_REG_CLASS_P (rclass)
13447       && (mode == HImode || mode == QImode)
13448       && (GET_CODE (x) == MEM
13449 	  || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13450 	      && true_regnum (x) == -1)))
13451     return GENERAL_REGS;
13452 
13453   /* On 32-bit we need a temporary when loading/storing a DFmode value
13454      between unaligned memory and the upper FPU registers.  */
13455   if (TARGET_ARCH32
13456       && rclass == EXTRA_FP_REGS
13457       && mode == DFmode
13458       && GET_CODE (x) == MEM
13459       && ! mem_min_alignment (x, 8))
13460     return FP_REGS;
13461 
13462   if (((TARGET_CM_MEDANY
13463 	&& symbolic_operand (x, mode))
13464        || (TARGET_CM_EMBMEDANY
13465 	   && text_segment_operand (x, mode)))
13466       && ! flag_pic)
13467     {
13468       if (in_p)
13469 	sri->icode = direct_optab_handler (reload_in_optab, mode);
13470       else
13471 	sri->icode = direct_optab_handler (reload_out_optab, mode);
13472       return NO_REGS;
13473     }
13474 
13475   if (TARGET_VIS3 && TARGET_ARCH32)
13476     {
13477       int regno = true_regnum (x);
13478 
13479       /* When using VIS3 fp<-->int register moves, on 32-bit we have
13480 	 to move 8-byte values in 4-byte pieces.  This only works via
13481 	 FP_REGS, and not via EXTRA_FP_REGS.  Therefore if we try to
13482 	 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13483 	 an FP_REGS intermediate move.  */
13484       if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13485 	  || ((general_or_i64_p (rclass)
13486 	       || rclass == GENERAL_OR_FP_REGS)
13487 	      && SPARC_FP_REG_P (regno)))
13488 	{
13489 	  sri->extra_cost = 2;
13490 	  return FP_REGS;
13491 	}
13492     }
13493 
13494   return NO_REGS;
13495 }
13496 
13497 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13498 
13499    On SPARC when not VIS3 it is not possible to directly move data
13500    between GENERAL_REGS and FP_REGS.  */
13501 
13502 static bool
13503 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13504 			       reg_class_t class2)
13505 {
13506   return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13507 	  && (! TARGET_VIS3
13508 	      || GET_MODE_SIZE (mode) > 8
13509 	      || GET_MODE_SIZE (mode) < 4));
13510 }
13511 
13512 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13513 
13514    get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13515    because the movsi and movsf patterns don't handle r/f moves.
13516    For v8 we copy the default definition.  */
13517 
13518 static machine_mode
13519 sparc_secondary_memory_needed_mode (machine_mode mode)
13520 {
13521   if (TARGET_ARCH64)
13522     {
13523       if (GET_MODE_BITSIZE (mode) < 32)
13524 	return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13525       return mode;
13526     }
13527   else
13528     {
13529       if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13530 	return mode_for_size (BITS_PER_WORD,
13531 			      GET_MODE_CLASS (mode), 0).require ();
13532       return mode;
13533     }
13534 }
13535 
13536 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13537    OPERANDS[0] in MODE.  OPERANDS[1] is the operator of the condition.  */
13538 
13539 bool
13540 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13541 {
13542   enum rtx_code rc = GET_CODE (operands[1]);
13543   machine_mode cmp_mode;
13544   rtx cc_reg, dst, cmp;
13545 
13546   cmp = operands[1];
13547   if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13548     return false;
13549 
13550   if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13551     cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13552 
13553   cmp_mode = GET_MODE (XEXP (cmp, 0));
13554   rc = GET_CODE (cmp);
13555 
13556   dst = operands[0];
13557   if (! rtx_equal_p (operands[2], dst)
13558       && ! rtx_equal_p (operands[3], dst))
13559     {
13560       if (reg_overlap_mentioned_p (dst, cmp))
13561 	dst = gen_reg_rtx (mode);
13562 
13563       emit_move_insn (dst, operands[3]);
13564     }
13565   else if (operands[2] == dst)
13566     {
13567       operands[2] = operands[3];
13568 
13569       if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13570         rc = reverse_condition_maybe_unordered (rc);
13571       else
13572         rc = reverse_condition (rc);
13573     }
13574 
13575   if (XEXP (cmp, 1) == const0_rtx
13576       && GET_CODE (XEXP (cmp, 0)) == REG
13577       && cmp_mode == DImode
13578       && v9_regcmp_p (rc))
13579     cc_reg = XEXP (cmp, 0);
13580   else
13581     cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13582 
13583   cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13584 
13585   emit_insn (gen_rtx_SET (dst,
13586 			  gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13587 
13588   if (dst != operands[0])
13589     emit_move_insn (operands[0], dst);
13590 
13591   return true;
13592 }
13593 
13594 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13595    into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13596    OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
13597    FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13598    code to be used for the condition mask.  */
13599 
13600 void
13601 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13602 {
13603   rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13604   enum rtx_code code = GET_CODE (operands[3]);
13605 
13606   mask = gen_reg_rtx (Pmode);
13607   cop0 = operands[4];
13608   cop1 = operands[5];
13609   if (code == LT || code == GE)
13610     {
13611       rtx t;
13612 
13613       code = swap_condition (code);
13614       t = cop0; cop0 = cop1; cop1 = t;
13615     }
13616 
13617   gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13618 
13619   fcmp = gen_rtx_UNSPEC (Pmode,
13620 			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13621 			 fcode);
13622 
13623   cmask = gen_rtx_UNSPEC (DImode,
13624 			  gen_rtvec (2, mask, gsr),
13625 			  ccode);
13626 
13627   bshuf = gen_rtx_UNSPEC (mode,
13628 			  gen_rtvec (3, operands[1], operands[2], gsr),
13629 			  UNSPEC_BSHUFFLE);
13630 
13631   emit_insn (gen_rtx_SET (mask, fcmp));
13632   emit_insn (gen_rtx_SET (gsr, cmask));
13633 
13634   emit_insn (gen_rtx_SET (operands[0], bshuf));
13635 }
13636 
13637 /* On sparc, any mode which naturally allocates into the float
13638    registers should return 4 here.  */
13639 
13640 unsigned int
13641 sparc_regmode_natural_size (machine_mode mode)
13642 {
13643   int size = UNITS_PER_WORD;
13644 
13645   if (TARGET_ARCH64)
13646     {
13647       enum mode_class mclass = GET_MODE_CLASS (mode);
13648 
13649       if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13650 	size = 4;
13651     }
13652 
13653   return size;
13654 }
13655 
13656 /* Implement TARGET_HARD_REGNO_NREGS.
13657 
13658    On SPARC, ordinary registers hold 32 bits worth; this means both
13659    integer and floating point registers.  On v9, integer regs hold 64
13660    bits worth; floating point regs hold 32 bits worth (this includes the
13661    new fp regs as even the odd ones are included in the hard register
13662    count).  */
13663 
13664 static unsigned int
13665 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13666 {
13667   if (regno == SPARC_GSR_REG)
13668     return 1;
13669   if (TARGET_ARCH64)
13670     {
13671       if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13672 	return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13673       return CEIL (GET_MODE_SIZE (mode), 4);
13674     }
13675   return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13676 }
13677 
13678 /* Implement TARGET_HARD_REGNO_MODE_OK.
13679 
13680    ??? Because of the funny way we pass parameters we should allow certain
13681    ??? types of float/complex values to be in integer registers during
13682    ??? RTL generation.  This only matters on arch32.  */
13683 
13684 static bool
13685 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13686 {
13687   return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13688 }
13689 
13690 /* Implement TARGET_MODES_TIEABLE_P.
13691 
13692    For V9 we have to deal with the fact that only the lower 32 floating
13693    point registers are 32-bit addressable.  */
13694 
13695 static bool
13696 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13697 {
13698   enum mode_class mclass1, mclass2;
13699   unsigned short size1, size2;
13700 
13701   if (mode1 == mode2)
13702     return true;
13703 
13704   mclass1 = GET_MODE_CLASS (mode1);
13705   mclass2 = GET_MODE_CLASS (mode2);
13706   if (mclass1 != mclass2)
13707     return false;
13708 
13709   if (! TARGET_V9)
13710     return true;
13711 
13712   /* Classes are the same and we are V9 so we have to deal with upper
13713      vs. lower floating point registers.  If one of the modes is a
13714      4-byte mode, and the other is not, we have to mark them as not
13715      tieable because only the lower 32 floating point register are
13716      addressable 32-bits at a time.
13717 
13718      We can't just test explicitly for SFmode, otherwise we won't
13719      cover the vector mode cases properly.  */
13720 
13721   if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13722     return true;
13723 
13724   size1 = GET_MODE_SIZE (mode1);
13725   size2 = GET_MODE_SIZE (mode2);
13726   if ((size1 > 4 && size2 == 4)
13727       || (size2 > 4 && size1 == 4))
13728     return false;
13729 
13730   return true;
13731 }
13732 
13733 /* Implement TARGET_CSTORE_MODE.  */
13734 
13735 static scalar_int_mode
13736 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13737 {
13738   return (TARGET_ARCH64 ? DImode : SImode);
13739 }
13740 
13741 /* Return the compound expression made of T1 and T2.  */
13742 
13743 static inline tree
13744 compound_expr (tree t1, tree t2)
13745 {
13746   return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13747 }
13748 
13749 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
13750 
13751 static void
13752 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13753 {
13754   if (!TARGET_FPU)
13755     return;
13756 
13757   const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13758   const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13759 
13760   /* We generate the equivalent of feholdexcept (&fenv_var):
13761 
13762        unsigned int fenv_var;
13763        __builtin_store_fsr (&fenv_var);
13764 
13765        unsigned int tmp1_var;
13766        tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13767 
13768        __builtin_load_fsr (&tmp1_var);  */
13769 
13770   tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13771   TREE_ADDRESSABLE (fenv_var) = 1;
13772   tree fenv_addr = build_fold_addr_expr (fenv_var);
13773   tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13774   tree hold_stfsr
13775     = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13776 	      build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13777 
13778   tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13779   TREE_ADDRESSABLE (tmp1_var) = 1;
13780   tree masked_fenv_var
13781     = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13782 	      build_int_cst (unsigned_type_node,
13783 			     ~(accrued_exception_mask | trap_enable_mask)));
13784   tree hold_mask
13785     = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13786 	      NULL_TREE, NULL_TREE);
13787 
13788   tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13789   tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13790   tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13791 
13792   *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13793 
13794   /* We reload the value of tmp1_var to clear the exceptions:
13795 
13796        __builtin_load_fsr (&tmp1_var);  */
13797 
13798   *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13799 
13800   /* We generate the equivalent of feupdateenv (&fenv_var):
13801 
13802        unsigned int tmp2_var;
13803        __builtin_store_fsr (&tmp2_var);
13804 
13805        __builtin_load_fsr (&fenv_var);
13806 
13807        if (SPARC_LOW_FE_EXCEPT_VALUES)
13808          tmp2_var >>= 5;
13809        __atomic_feraiseexcept ((int) tmp2_var);  */
13810 
13811   tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13812   TREE_ADDRESSABLE (tmp2_var) = 1;
13813   tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13814   tree update_stfsr
13815     = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13816 	      build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13817 
13818   tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13819 
13820   tree atomic_feraiseexcept
13821     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13822   tree update_call
13823     = build_call_expr (atomic_feraiseexcept, 1,
13824 		       fold_convert (integer_type_node, tmp2_var));
13825 
13826   if (SPARC_LOW_FE_EXCEPT_VALUES)
13827     {
13828       tree shifted_tmp2_var
13829 	= build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13830 		  build_int_cst (unsigned_type_node, 5));
13831       tree update_shift
13832 	= build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13833       update_call = compound_expr (update_shift, update_call);
13834     }
13835 
13836   *update
13837     = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13838 }
13839 
13840 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  Borrowed from the PA port.
13841 
13842    SImode loads to floating-point registers are not zero-extended.
13843    The definition for LOAD_EXTEND_OP specifies that integer loads
13844    narrower than BITS_PER_WORD will be zero-extended.  As a result,
13845    we inhibit changes from SImode unless they are to a mode that is
13846    identical in size.
13847 
13848    Likewise for SFmode, since word-mode paradoxical subregs are
13849    problematic on big-endian architectures.  */
13850 
13851 static bool
13852 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13853 			     reg_class_t rclass)
13854 {
13855   if (TARGET_ARCH64
13856       && GET_MODE_SIZE (from) == 4
13857       && GET_MODE_SIZE (to) != 4)
13858     return !reg_classes_intersect_p (rclass, FP_REGS);
13859   return true;
13860 }
13861 
13862 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
13863 
13864 static HOST_WIDE_INT
13865 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13866 {
13867   if (TREE_CODE (exp) == STRING_CST)
13868     return MAX (align, FASTEST_ALIGNMENT);
13869   return align;
13870 }
13871 
13872 #include "gt-sparc.h"
13873