xref: /openbsd-src/gnu/usr.bin/gcc/gcc/config/ia64/ia64.c (revision a67f0032ff015a4f10c1aaf6c63004fb17009442)
1 /* Definitions of target machine for GNU compiler.
2    Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3    Contributed by James E. Wilson <wilson@cygnus.com> and
4    		  David Mosberger <davidm@hpl.hp.com>.
5 
6 This file is part of GNU CC.
7 
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12 
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING.  If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "except.h"
39 #include "function.h"
40 #include "ggc.h"
41 #include "basic-block.h"
42 #include "toplev.h"
43 #include "sched-int.h"
44 #include "timevar.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "tm_p.h"
48 #include "langhooks.h"
49 
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51    ASM_OUTPUT_LABELREF.  */
52 int ia64_asm_output_label = 0;
53 
54 /* Define the information needed to generate branch and scc insns.  This is
55    stored from the compare operation.  */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
58 
59 /* Register names for ia64_expand_prologue.  */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70   "r104","r105","r106","r107","r108","r109","r110","r111",
71   "r112","r113","r114","r115","r116","r117","r118","r119",
72   "r120","r121","r122","r123","r124","r125","r126","r127"};
73 
74 /* ??? These strings could be shared with REGISTER_NAMES.  */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
77 
78 /* ??? These strings could be shared with REGISTER_NAMES.  */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90 
91 /* ??? These strings could be shared with REGISTER_NAMES.  */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94 
95 /* String used with the -mfixed-range= option.  */
96 const char *ia64_fixed_range_string;
97 
98 /* Determines whether we use adds, addl, or movl to generate our
99    TLS immediate offsets.  */
100 int ia64_tls_size = 22;
101 
102 /* String used with the -mtls-size= option.  */
103 const char *ia64_tls_size_string;
104 
105 /* Determines whether we run our final scheduling pass or not.  We always
106    avoid the normal second scheduling pass.  */
107 static int ia64_flag_schedule_insns2;
108 
109 /* Variables which are this size or smaller are put in the sdata/sbss
110    sections.  */
111 
112 unsigned int ia64_section_threshold;
113 
114 /* Structure to be filled in by ia64_compute_frame_size with register
115    save masks and offsets for the current function.  */
116 
117 struct ia64_frame_info
118 {
119   HOST_WIDE_INT total_size;	/* size of the stack frame, not including
120 				   the caller's scratch area.  */
121   HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
122   HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
123   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
124   HARD_REG_SET mask;		/* mask of saved registers.  */
125   unsigned int gr_used_mask;	/* mask of registers in use as gr spill
126 				   registers or long-term scratches.  */
127   int n_spilled;		/* number of spilled registers.  */
128   int reg_fp;			/* register for fp.  */
129   int reg_save_b0;		/* save register for b0.  */
130   int reg_save_pr;		/* save register for prs.  */
131   int reg_save_ar_pfs;		/* save register for ar.pfs.  */
132   int reg_save_ar_unat;		/* save register for ar.unat.  */
133   int reg_save_ar_lc;		/* save register for ar.lc.  */
134   int reg_save_gp;		/* save register for gp.  */
135   int n_input_regs;		/* number of input registers used.  */
136   int n_local_regs;		/* number of local registers used.  */
137   int n_output_regs;		/* number of output registers used.  */
138   int n_rotate_regs;		/* number of rotating registers used.  */
139 
140   char need_regstk;		/* true if a .regstk directive needed.  */
141   char initialized;		/* true if the data is finalized.  */
142 };
143 
144 /* Current frame information calculated by ia64_compute_frame_size.  */
145 static struct ia64_frame_info current_frame_info;
146 
147 static rtx gen_tls_get_addr PARAMS ((void));
148 static rtx gen_thread_pointer PARAMS ((void));
149 static int find_gr_spill PARAMS ((int));
150 static int next_scratch_gr_reg PARAMS ((void));
151 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
152 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
153 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
154 static void finish_spill_pointers PARAMS ((void));
155 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
156 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
157 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
158 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
159 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
160 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
161 
162 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
163 static void fix_range PARAMS ((const char *));
164 static struct machine_function * ia64_init_machine_status PARAMS ((void));
165 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
166 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
167 static void emit_predicate_relation_info PARAMS ((void));
168 static bool ia64_in_small_data_p PARAMS ((tree));
169 static void ia64_encode_section_info PARAMS ((tree, int));
170 static const char *ia64_strip_name_encoding PARAMS ((const char *));
171 static void process_epilogue PARAMS ((void));
172 static int process_set PARAMS ((FILE *, rtx));
173 
174 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
175 					     tree, rtx));
176 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
177 					     tree, rtx));
178 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
179 						 enum machine_mode,
180 						 int, tree, rtx));
181 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
182 						  tree, rtx));
183 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
184 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
185 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
186 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
187 static void ia64_output_function_end_prologue PARAMS ((FILE *));
188 
189 static int ia64_issue_rate PARAMS ((void));
190 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
191 static void ia64_sched_init PARAMS ((FILE *, int, int));
192 static void ia64_sched_finish PARAMS ((FILE *, int));
193 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
194 						int *, int, int));
195 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
196 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
197 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
198 
199 static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
200 					  HOST_WIDE_INT, tree));
201 
202 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
203 					     unsigned HOST_WIDE_INT));
204 static void ia64_rwreloc_select_section PARAMS ((tree, int,
205 					         unsigned HOST_WIDE_INT))
206      ATTRIBUTE_UNUSED;
207 static void ia64_rwreloc_unique_section PARAMS ((tree, int))
208      ATTRIBUTE_UNUSED;
209 static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
210 					             unsigned HOST_WIDE_INT))
211      ATTRIBUTE_UNUSED;
212 static unsigned int ia64_rwreloc_section_type_flags
213      PARAMS ((tree, const char *, int))
214      ATTRIBUTE_UNUSED;
215 
216 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
217      ATTRIBUTE_UNUSED;
218 
219 /* Table of valid machine attributes.  */
220 static const struct attribute_spec ia64_attribute_table[] =
221 {
222   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
223   { "syscall_linkage", 0, 0, false, true,  true,  NULL },
224   { NULL,              0, 0, false, false, false, NULL }
225 };
226 
227 /* Initialize the GCC target structure.  */
228 #undef TARGET_ATTRIBUTE_TABLE
229 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
230 
231 #undef TARGET_INIT_BUILTINS
232 #define TARGET_INIT_BUILTINS ia64_init_builtins
233 
234 #undef TARGET_EXPAND_BUILTIN
235 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
236 
237 #undef TARGET_ASM_BYTE_OP
238 #define TARGET_ASM_BYTE_OP "\tdata1\t"
239 #undef TARGET_ASM_ALIGNED_HI_OP
240 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
241 #undef TARGET_ASM_ALIGNED_SI_OP
242 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
245 #undef TARGET_ASM_UNALIGNED_HI_OP
246 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
247 #undef TARGET_ASM_UNALIGNED_SI_OP
248 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
249 #undef TARGET_ASM_UNALIGNED_DI_OP
250 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
251 #undef TARGET_ASM_INTEGER
252 #define TARGET_ASM_INTEGER ia64_assemble_integer
253 
254 #undef TARGET_ASM_FUNCTION_PROLOGUE
255 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
256 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
257 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
258 #undef TARGET_ASM_FUNCTION_EPILOGUE
259 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
260 
261 #undef TARGET_IN_SMALL_DATA_P
262 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
263 #undef TARGET_ENCODE_SECTION_INFO
264 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
265 #undef TARGET_STRIP_NAME_ENCODING
266 #define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
267 
268 #undef TARGET_SCHED_ADJUST_COST
269 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
270 #undef TARGET_SCHED_ISSUE_RATE
271 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
272 #undef TARGET_SCHED_VARIABLE_ISSUE
273 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
274 #undef TARGET_SCHED_INIT
275 #define TARGET_SCHED_INIT ia64_sched_init
276 #undef TARGET_SCHED_FINISH
277 #define TARGET_SCHED_FINISH ia64_sched_finish
278 #undef TARGET_SCHED_REORDER
279 #define TARGET_SCHED_REORDER ia64_sched_reorder
280 #undef TARGET_SCHED_REORDER2
281 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
282 
283 #undef TARGET_ASM_OUTPUT_MI_THUNK
284 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
285 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
286 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
287 
288 struct gcc_target targetm = TARGET_INITIALIZER;
289 
290 /* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
291 
292 int
call_operand(op,mode)293 call_operand (op, mode)
294      rtx op;
295      enum machine_mode mode;
296 {
297   if (mode != GET_MODE (op) && mode != VOIDmode)
298     return 0;
299 
300   return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
301 	  || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
302 }
303 
304 /* Return 1 if OP refers to a symbol in the sdata section.  */
305 
306 int
sdata_symbolic_operand(op,mode)307 sdata_symbolic_operand (op, mode)
308      rtx op;
309      enum machine_mode mode ATTRIBUTE_UNUSED;
310 {
311   switch (GET_CODE (op))
312     {
313     case CONST:
314       if (GET_CODE (XEXP (op, 0)) != PLUS
315 	  || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
316 	break;
317       op = XEXP (XEXP (op, 0), 0);
318       /* FALLTHRU */
319 
320     case SYMBOL_REF:
321       if (CONSTANT_POOL_ADDRESS_P (op))
322 	return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
323       else
324 	{
325 	  const char *str = XSTR (op, 0);
326           return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
327 	}
328 
329     default:
330       break;
331     }
332 
333   return 0;
334 }
335 
336 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load.  */
337 
338 int
got_symbolic_operand(op,mode)339 got_symbolic_operand (op, mode)
340      rtx op;
341      enum machine_mode mode ATTRIBUTE_UNUSED;
342 {
343   switch (GET_CODE (op))
344     {
345     case CONST:
346       op = XEXP (op, 0);
347       if (GET_CODE (op) != PLUS)
348 	return 0;
349       if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
350 	return 0;
351       op = XEXP (op, 1);
352       if (GET_CODE (op) != CONST_INT)
353 	return 0;
354 
355 	return 1;
356 
357       /* Ok if we're not using GOT entries at all.  */
358       if (TARGET_NO_PIC || TARGET_AUTO_PIC)
359 	return 1;
360 
361       /* "Ok" while emitting rtl, since otherwise we won't be provided
362 	 with the entire offset during emission, which makes it very
363 	 hard to split the offset into high and low parts.  */
364       if (rtx_equal_function_value_matters)
365 	return 1;
366 
367       /* Force the low 14 bits of the constant to zero so that we do not
368 	 use up so many GOT entries.  */
369       return (INTVAL (op) & 0x3fff) == 0;
370 
371     case SYMBOL_REF:
372     case LABEL_REF:
373       return 1;
374 
375     default:
376       break;
377     }
378   return 0;
379 }
380 
381 /* Return 1 if OP refers to a symbol.  */
382 
383 int
symbolic_operand(op,mode)384 symbolic_operand (op, mode)
385      rtx op;
386      enum machine_mode mode ATTRIBUTE_UNUSED;
387 {
388   switch (GET_CODE (op))
389     {
390     case CONST:
391     case SYMBOL_REF:
392     case LABEL_REF:
393       return 1;
394 
395     default:
396       break;
397     }
398   return 0;
399 }
400 
401 /* Return tls_model if OP refers to a TLS symbol.  */
402 
403 int
tls_symbolic_operand(op,mode)404 tls_symbolic_operand (op, mode)
405      rtx op;
406      enum machine_mode mode ATTRIBUTE_UNUSED;
407 {
408   const char *str;
409 
410   if (GET_CODE (op) != SYMBOL_REF)
411     return 0;
412   str = XSTR (op, 0);
413   if (str[0] != ENCODE_SECTION_INFO_CHAR)
414     return 0;
415   switch (str[1])
416     {
417     case 'G':
418       return TLS_MODEL_GLOBAL_DYNAMIC;
419     case 'L':
420       return TLS_MODEL_LOCAL_DYNAMIC;
421     case 'i':
422       return TLS_MODEL_INITIAL_EXEC;
423     case 'l':
424       return TLS_MODEL_LOCAL_EXEC;
425     }
426   return 0;
427 }
428 
429 
430 /* Return 1 if OP refers to a function.  */
431 
432 int
function_operand(op,mode)433 function_operand (op, mode)
434      rtx op;
435      enum machine_mode mode ATTRIBUTE_UNUSED;
436 {
437   if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
438     return 1;
439   else
440     return 0;
441 }
442 
443 /* Return 1 if OP is setjmp or a similar function.  */
444 
445 /* ??? This is an unsatisfying solution.  Should rethink.  */
446 
447 int
setjmp_operand(op,mode)448 setjmp_operand (op, mode)
449      rtx op;
450      enum machine_mode mode ATTRIBUTE_UNUSED;
451 {
452   const char *name;
453   int retval = 0;
454 
455   if (GET_CODE (op) != SYMBOL_REF)
456     return 0;
457 
458   name = XSTR (op, 0);
459 
460   /* The following code is borrowed from special_function_p in calls.c.  */
461 
462   /* Disregard prefix _, __ or __x.  */
463   if (name[0] == '_')
464     {
465       if (name[1] == '_' && name[2] == 'x')
466 	name += 3;
467       else if (name[1] == '_')
468 	name += 2;
469       else
470 	name += 1;
471     }
472 
473   if (name[0] == 's')
474     {
475       retval
476 	= ((name[1] == 'e'
477 	    && (! strcmp (name, "setjmp")
478 		|| ! strcmp (name, "setjmp_syscall")))
479 	   || (name[1] == 'i'
480 	       && ! strcmp (name, "sigsetjmp"))
481 	   || (name[1] == 'a'
482 	       && ! strcmp (name, "savectx")));
483     }
484   else if ((name[0] == 'q' && name[1] == 's'
485 	    && ! strcmp (name, "qsetjmp"))
486 	   || (name[0] == 'v' && name[1] == 'f'
487 	       && ! strcmp (name, "vfork")))
488     retval = 1;
489 
490   return retval;
491 }
492 
493 /* Return 1 if OP is a general operand, but when pic exclude symbolic
494    operands.  */
495 
496 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
497    from PREDICATE_CODES.  */
498 
499 int
move_operand(op,mode)500 move_operand (op, mode)
501      rtx op;
502      enum machine_mode mode;
503 {
504   if (! TARGET_NO_PIC && symbolic_operand (op, mode))
505     return 0;
506 
507   return general_operand (op, mode);
508 }
509 
510 /* Return 1 if OP is a register operand that is (or could be) a GR reg.  */
511 
512 int
gr_register_operand(op,mode)513 gr_register_operand (op, mode)
514      rtx op;
515      enum machine_mode mode;
516 {
517   if (! register_operand (op, mode))
518     return 0;
519   if (GET_CODE (op) == SUBREG)
520     op = SUBREG_REG (op);
521   if (GET_CODE (op) == REG)
522     {
523       unsigned int regno = REGNO (op);
524       if (regno < FIRST_PSEUDO_REGISTER)
525 	return GENERAL_REGNO_P (regno);
526     }
527   return 1;
528 }
529 
530 /* Return 1 if OP is a register operand that is (or could be) an FR reg.  */
531 
532 int
fr_register_operand(op,mode)533 fr_register_operand (op, mode)
534      rtx op;
535      enum machine_mode mode;
536 {
537   if (! register_operand (op, mode))
538     return 0;
539   if (GET_CODE (op) == SUBREG)
540     op = SUBREG_REG (op);
541   if (GET_CODE (op) == REG)
542     {
543       unsigned int regno = REGNO (op);
544       if (regno < FIRST_PSEUDO_REGISTER)
545 	return FR_REGNO_P (regno);
546     }
547   return 1;
548 }
549 
550 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg.  */
551 
552 int
grfr_register_operand(op,mode)553 grfr_register_operand (op, mode)
554      rtx op;
555      enum machine_mode mode;
556 {
557   if (! register_operand (op, mode))
558     return 0;
559   if (GET_CODE (op) == SUBREG)
560     op = SUBREG_REG (op);
561   if (GET_CODE (op) == REG)
562     {
563       unsigned int regno = REGNO (op);
564       if (regno < FIRST_PSEUDO_REGISTER)
565 	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
566     }
567   return 1;
568 }
569 
570 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg.  */
571 
572 int
gr_nonimmediate_operand(op,mode)573 gr_nonimmediate_operand (op, mode)
574      rtx op;
575      enum machine_mode mode;
576 {
577   if (! nonimmediate_operand (op, mode))
578     return 0;
579   if (GET_CODE (op) == SUBREG)
580     op = SUBREG_REG (op);
581   if (GET_CODE (op) == REG)
582     {
583       unsigned int regno = REGNO (op);
584       if (regno < FIRST_PSEUDO_REGISTER)
585 	return GENERAL_REGNO_P (regno);
586     }
587   return 1;
588 }
589 
590 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
591 
592 int
fr_nonimmediate_operand(op,mode)593 fr_nonimmediate_operand (op, mode)
594      rtx op;
595      enum machine_mode mode;
596 {
597   if (! nonimmediate_operand (op, mode))
598     return 0;
599   if (GET_CODE (op) == SUBREG)
600     op = SUBREG_REG (op);
601   if (GET_CODE (op) == REG)
602     {
603       unsigned int regno = REGNO (op);
604       if (regno < FIRST_PSEUDO_REGISTER)
605 	return FR_REGNO_P (regno);
606     }
607   return 1;
608 }
609 
610 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
611 
612 int
grfr_nonimmediate_operand(op,mode)613 grfr_nonimmediate_operand (op, mode)
614      rtx op;
615      enum machine_mode mode;
616 {
617   if (! nonimmediate_operand (op, mode))
618     return 0;
619   if (GET_CODE (op) == SUBREG)
620     op = SUBREG_REG (op);
621   if (GET_CODE (op) == REG)
622     {
623       unsigned int regno = REGNO (op);
624       if (regno < FIRST_PSEUDO_REGISTER)
625 	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
626     }
627   return 1;
628 }
629 
630 /* Return 1 if OP is a GR register operand, or zero.  */
631 
632 int
gr_reg_or_0_operand(op,mode)633 gr_reg_or_0_operand (op, mode)
634      rtx op;
635      enum machine_mode mode;
636 {
637   return (op == const0_rtx || gr_register_operand (op, mode));
638 }
639 
640 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand.  */
641 
642 int
gr_reg_or_5bit_operand(op,mode)643 gr_reg_or_5bit_operand (op, mode)
644      rtx op;
645      enum machine_mode mode;
646 {
647   return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
648 	  || GET_CODE (op) == CONSTANT_P_RTX
649 	  || gr_register_operand (op, mode));
650 }
651 
652 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand.  */
653 
654 int
gr_reg_or_6bit_operand(op,mode)655 gr_reg_or_6bit_operand (op, mode)
656      rtx op;
657      enum machine_mode mode;
658 {
659   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
660 	  || GET_CODE (op) == CONSTANT_P_RTX
661 	  || gr_register_operand (op, mode));
662 }
663 
664 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand.  */
665 
666 int
gr_reg_or_8bit_operand(op,mode)667 gr_reg_or_8bit_operand (op, mode)
668      rtx op;
669      enum machine_mode mode;
670 {
671   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
672 	  || GET_CODE (op) == CONSTANT_P_RTX
673 	  || gr_register_operand (op, mode));
674 }
675 
676 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate.  */
677 
678 int
grfr_reg_or_8bit_operand(op,mode)679 grfr_reg_or_8bit_operand (op, mode)
680      rtx op;
681      enum machine_mode mode;
682 {
683   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
684 	  || GET_CODE (op) == CONSTANT_P_RTX
685 	  || grfr_register_operand (op, mode));
686 }
687 
688 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
689    operand.  */
690 
691 int
gr_reg_or_8bit_adjusted_operand(op,mode)692 gr_reg_or_8bit_adjusted_operand (op, mode)
693      rtx op;
694      enum machine_mode mode;
695 {
696   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
697 	  || GET_CODE (op) == CONSTANT_P_RTX
698 	  || gr_register_operand (op, mode));
699 }
700 
701 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
702    immediate and an 8 bit adjusted immediate operand.  This is necessary
703    because when we emit a compare, we don't know what the condition will be,
704    so we need the union of the immediates accepted by GT and LT.  */
705 
706 int
gr_reg_or_8bit_and_adjusted_operand(op,mode)707 gr_reg_or_8bit_and_adjusted_operand (op, mode)
708      rtx op;
709      enum machine_mode mode;
710 {
711   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
712 	   && CONST_OK_FOR_L (INTVAL (op)))
713 	  || GET_CODE (op) == CONSTANT_P_RTX
714 	  || gr_register_operand (op, mode));
715 }
716 
717 /* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
718 
719 int
gr_reg_or_14bit_operand(op,mode)720 gr_reg_or_14bit_operand (op, mode)
721      rtx op;
722      enum machine_mode mode;
723 {
724   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
725 	  || GET_CODE (op) == CONSTANT_P_RTX
726 	  || gr_register_operand (op, mode));
727 }
728 
729 /* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
730 
731 int
gr_reg_or_22bit_operand(op,mode)732 gr_reg_or_22bit_operand (op, mode)
733      rtx op;
734      enum machine_mode mode;
735 {
736   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
737 	  || GET_CODE (op) == CONSTANT_P_RTX
738 	  || gr_register_operand (op, mode));
739 }
740 
741 /* Return 1 if OP is a 6 bit immediate operand.  */
742 
743 int
shift_count_operand(op,mode)744 shift_count_operand (op, mode)
745      rtx op;
746      enum machine_mode mode ATTRIBUTE_UNUSED;
747 {
748   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
749 	  || GET_CODE (op) == CONSTANT_P_RTX);
750 }
751 
752 /* Return 1 if OP is a 5 bit immediate operand.  */
753 
754 int
shift_32bit_count_operand(op,mode)755 shift_32bit_count_operand (op, mode)
756      rtx op;
757      enum machine_mode mode ATTRIBUTE_UNUSED;
758 {
759   return ((GET_CODE (op) == CONST_INT
760 	   && (INTVAL (op) >= 0 && INTVAL (op) < 32))
761 	  || GET_CODE (op) == CONSTANT_P_RTX);
762 }
763 
764 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
765 
766 int
shladd_operand(op,mode)767 shladd_operand (op, mode)
768      rtx op;
769      enum machine_mode mode ATTRIBUTE_UNUSED;
770 {
771   return (GET_CODE (op) == CONST_INT
772 	  && (INTVAL (op) == 2 || INTVAL (op) == 4
773 	      || INTVAL (op) == 8 || INTVAL (op) == 16));
774 }
775 
776 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand.  */
777 
778 int
fetchadd_operand(op,mode)779 fetchadd_operand (op, mode)
780      rtx op;
781      enum machine_mode mode ATTRIBUTE_UNUSED;
782 {
783   return (GET_CODE (op) == CONST_INT
784           && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
785               INTVAL (op) == -4  || INTVAL (op) == -1 ||
786               INTVAL (op) == 1   || INTVAL (op) == 4  ||
787               INTVAL (op) == 8   || INTVAL (op) == 16));
788 }
789 
790 /* Return 1 if OP is a floating-point constant zero, one, or a register.  */
791 
792 int
fr_reg_or_fp01_operand(op,mode)793 fr_reg_or_fp01_operand (op, mode)
794      rtx op;
795      enum machine_mode mode;
796 {
797   return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
798 	  || fr_register_operand (op, mode));
799 }
800 
801 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
802    POST_MODIFY with a REG as displacement.  */
803 
804 int
destination_operand(op,mode)805 destination_operand (op, mode)
806      rtx op;
807      enum machine_mode mode;
808 {
809   if (! nonimmediate_operand (op, mode))
810     return 0;
811   if (GET_CODE (op) == MEM
812       && GET_CODE (XEXP (op, 0)) == POST_MODIFY
813       && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
814     return 0;
815   return 1;
816 }
817 
818 /* Like memory_operand, but don't allow post-increments.  */
819 
820 int
not_postinc_memory_operand(op,mode)821 not_postinc_memory_operand (op, mode)
822      rtx op;
823      enum machine_mode mode;
824 {
825   return (memory_operand (op, mode)
826 	  && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
827 }
828 
829 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
830    signed immediate operand.  */
831 
832 int
normal_comparison_operator(op,mode)833 normal_comparison_operator (op, mode)
834     register rtx op;
835     enum machine_mode mode;
836 {
837   enum rtx_code code = GET_CODE (op);
838   return ((mode == VOIDmode || GET_MODE (op) == mode)
839 	  && (code == EQ || code == NE
840 	      || code == GT || code == LE || code == GTU || code == LEU));
841 }
842 
843 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
844    signed immediate operand.  */
845 
846 int
adjusted_comparison_operator(op,mode)847 adjusted_comparison_operator (op, mode)
848     register rtx op;
849     enum machine_mode mode;
850 {
851   enum rtx_code code = GET_CODE (op);
852   return ((mode == VOIDmode || GET_MODE (op) == mode)
853 	  && (code == LT || code == GE || code == LTU || code == GEU));
854 }
855 
856 /* Return 1 if this is a signed inequality operator.  */
857 
858 int
signed_inequality_operator(op,mode)859 signed_inequality_operator (op, mode)
860     register rtx op;
861     enum machine_mode mode;
862 {
863   enum rtx_code code = GET_CODE (op);
864   return ((mode == VOIDmode || GET_MODE (op) == mode)
865 	  && (code == GE || code == GT
866 	      || code == LE || code == LT));
867 }
868 
869 /* Return 1 if this operator is valid for predication.  */
870 
871 int
predicate_operator(op,mode)872 predicate_operator (op, mode)
873     register rtx op;
874     enum machine_mode mode;
875 {
876   enum rtx_code code = GET_CODE (op);
877   return ((GET_MODE (op) == mode || mode == VOIDmode)
878 	  && (code == EQ || code == NE));
879 }
880 
881 /* Return 1 if this operator can be used in a conditional operation.  */
882 
883 int
condop_operator(op,mode)884 condop_operator (op, mode)
885     register rtx op;
886     enum machine_mode mode;
887 {
888   enum rtx_code code = GET_CODE (op);
889   return ((GET_MODE (op) == mode || mode == VOIDmode)
890 	  && (code == PLUS || code == MINUS || code == AND
891 	      || code == IOR || code == XOR));
892 }
893 
894 /* Return 1 if this is the ar.lc register.  */
895 
896 int
ar_lc_reg_operand(op,mode)897 ar_lc_reg_operand (op, mode)
898      register rtx op;
899      enum machine_mode mode;
900 {
901   return (GET_MODE (op) == DImode
902 	  && (mode == DImode || mode == VOIDmode)
903 	  && GET_CODE (op) == REG
904 	  && REGNO (op) == AR_LC_REGNUM);
905 }
906 
907 /* Return 1 if this is the ar.ccv register.  */
908 
909 int
ar_ccv_reg_operand(op,mode)910 ar_ccv_reg_operand (op, mode)
911      register rtx op;
912      enum machine_mode mode;
913 {
914   return ((GET_MODE (op) == mode || mode == VOIDmode)
915 	  && GET_CODE (op) == REG
916 	  && REGNO (op) == AR_CCV_REGNUM);
917 }
918 
919 /* Return 1 if this is the ar.pfs register.  */
920 
921 int
ar_pfs_reg_operand(op,mode)922 ar_pfs_reg_operand (op, mode)
923      register rtx op;
924      enum machine_mode mode;
925 {
926   return ((GET_MODE (op) == mode || mode == VOIDmode)
927 	  && GET_CODE (op) == REG
928 	  && REGNO (op) == AR_PFS_REGNUM);
929 }
930 
931 /* Like general_operand, but don't allow (mem (addressof)).  */
932 
933 int
general_tfmode_operand(op,mode)934 general_tfmode_operand (op, mode)
935      rtx op;
936      enum machine_mode mode;
937 {
938   if (! general_operand (op, mode))
939     return 0;
940   if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
941     return 0;
942   return 1;
943 }
944 
945 /* Similarly.  */
946 
947 int
destination_tfmode_operand(op,mode)948 destination_tfmode_operand (op, mode)
949      rtx op;
950      enum machine_mode mode;
951 {
952   if (! destination_operand (op, mode))
953     return 0;
954   if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
955     return 0;
956   return 1;
957 }
958 
959 /* Similarly.  */
960 
961 int
tfreg_or_fp01_operand(op,mode)962 tfreg_or_fp01_operand (op, mode)
963      rtx op;
964      enum machine_mode mode;
965 {
966   if (GET_CODE (op) == SUBREG)
967     return 0;
968   return fr_reg_or_fp01_operand (op, mode);
969 }
970 
971 /* Return 1 if OP is valid as a base register in a reg + offset address.  */
972 
973 int
basereg_operand(op,mode)974 basereg_operand (op, mode)
975      rtx op;
976      enum machine_mode mode;
977 {
978   /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
979      checks from pa.c basereg_operand as well?  Seems to be OK without them
980      in test runs.  */
981 
982   return (register_operand (op, mode) &&
983 	  REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
984 }
985 
986 /* Return 1 if the operands of a move are ok.  */
987 
988 int
ia64_move_ok(dst,src)989 ia64_move_ok (dst, src)
990      rtx dst, src;
991 {
992   /* If we're under init_recog_no_volatile, we'll not be able to use
993      memory_operand.  So check the code directly and don't worry about
994      the validity of the underlying address, which should have been
995      checked elsewhere anyway.  */
996   if (GET_CODE (dst) != MEM)
997     return 1;
998   if (GET_CODE (src) == MEM)
999     return 0;
1000   if (register_operand (src, VOIDmode))
1001     return 1;
1002 
1003   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
1004   if (INTEGRAL_MODE_P (GET_MODE (dst)))
1005     return src == const0_rtx;
1006   else
1007     return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1008 }
1009 
1010 /* Return 0 if we are doing C++ code.  This optimization fails with
1011    C++ because of GNAT c++/6685.  */
1012 
1013 int
addp4_optimize_ok(op1,op2)1014 addp4_optimize_ok (op1, op2)
1015      rtx op1, op2;
1016 {
1017 
1018   if (!strcmp (lang_hooks.name, "GNU C++"))
1019     return 0;
1020 
1021   return (basereg_operand (op1, GET_MODE(op1)) !=
1022 	  basereg_operand (op2, GET_MODE(op2)));
1023 }
1024 
1025 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
1026    Return the length of the field, or <= 0 on failure.  */
1027 
1028 int
ia64_depz_field_mask(rop,rshift)1029 ia64_depz_field_mask (rop, rshift)
1030      rtx rop, rshift;
1031 {
1032   unsigned HOST_WIDE_INT op = INTVAL (rop);
1033   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1034 
1035   /* Get rid of the zero bits we're shifting in.  */
1036   op >>= shift;
1037 
1038   /* We must now have a solid block of 1's at bit 0.  */
1039   return exact_log2 (op + 1);
1040 }
1041 
1042 /* Expand a symbolic constant load.  */
1043 /* ??? Should generalize this, so that we can also support 32 bit pointers.  */
1044 
1045 void
ia64_expand_load_address(dest,src,scratch)1046 ia64_expand_load_address (dest, src, scratch)
1047       rtx dest, src, scratch;
1048 {
1049   rtx temp;
1050 
1051   /* The destination could be a MEM during initial rtl generation,
1052      which isn't a valid destination for the PIC load address patterns.  */
1053   if (! register_operand (dest, DImode))
1054     if (! scratch || ! register_operand (scratch, DImode))
1055       temp = gen_reg_rtx (DImode);
1056     else
1057       temp = scratch;
1058   else
1059     temp = dest;
1060 
1061   if (tls_symbolic_operand (src, Pmode))
1062     abort ();
1063 
1064   if (TARGET_AUTO_PIC)
1065     emit_insn (gen_load_gprel64 (temp, src));
1066   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1067     emit_insn (gen_load_fptr (temp, src));
1068   else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
1069            && sdata_symbolic_operand (src, VOIDmode))
1070     emit_insn (gen_load_gprel (temp, src));
1071   else if (GET_CODE (src) == CONST
1072 	   && GET_CODE (XEXP (src, 0)) == PLUS
1073 	   && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1074 	   && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1075     {
1076       rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1077       rtx sym = XEXP (XEXP (src, 0), 0);
1078       HOST_WIDE_INT ofs, hi, lo;
1079 
1080       /* Split the offset into a sign extended 14-bit low part
1081 	 and a complementary high part.  */
1082       ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1083       lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1084       hi = ofs - lo;
1085 
1086       if (! scratch)
1087 	scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1088 
1089       ia64_expand_load_address (subtarget, plus_constant (sym, hi), scratch);
1090       emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1091     }
1092   else
1093     {
1094       rtx insn;
1095       if (! scratch)
1096 	scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1097 
1098       insn = emit_insn (gen_load_symptr (temp, src, scratch));
1099 #ifdef POINTERS_EXTEND_UNSIGNED
1100       if (GET_MODE (temp) != GET_MODE (src))
1101 	src = convert_memory_address (GET_MODE (temp), src);
1102 #endif
1103       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1104     }
1105 
1106   if (temp != dest)
1107     {
1108       if (GET_MODE (dest) != GET_MODE (temp))
1109 	temp = convert_to_mode (GET_MODE (dest), temp, 0);
1110       emit_move_insn (dest, temp);
1111     }
1112 }
1113 
1114 static GTY(()) rtx gen_tls_tga;
1115 static rtx
gen_tls_get_addr()1116 gen_tls_get_addr ()
1117 {
1118   if (!gen_tls_tga)
1119     {
1120       gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1121      }
1122   return gen_tls_tga;
1123 }
1124 
1125 static GTY(()) rtx thread_pointer_rtx;
1126 static rtx
gen_thread_pointer()1127 gen_thread_pointer ()
1128 {
1129   if (!thread_pointer_rtx)
1130     {
1131       thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1132       RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1133     }
1134   return thread_pointer_rtx;
1135 }
1136 
1137 rtx
ia64_expand_move(op0,op1)1138 ia64_expand_move (op0, op1)
1139      rtx op0, op1;
1140 {
1141   enum machine_mode mode = GET_MODE (op0);
1142 
1143   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1144     op1 = force_reg (mode, op1);
1145 
1146   if (mode == Pmode || mode == ptr_mode)
1147     {
1148       enum tls_model tls_kind;
1149       if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1150 	{
1151 	  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1152 	  rtx orig_op0 = op0;
1153 
1154 	  switch (tls_kind)
1155 	    {
1156 	    case TLS_MODEL_GLOBAL_DYNAMIC:
1157 	      start_sequence ();
1158 
1159 	      tga_op1 = gen_reg_rtx (Pmode);
1160 	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1161 	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1162 	      RTX_UNCHANGING_P (tga_op1) = 1;
1163 
1164 	      tga_op2 = gen_reg_rtx (Pmode);
1165 	      emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1166 	      tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1167 	      RTX_UNCHANGING_P (tga_op2) = 1;
1168 
1169 	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1170 						 LCT_CONST, Pmode, 2, tga_op1,
1171 						 Pmode, tga_op2, Pmode);
1172 
1173 	      insns = get_insns ();
1174 	      end_sequence ();
1175 
1176 	      if (GET_MODE (op0) != Pmode)
1177 		op0 = tga_ret;
1178 	      emit_libcall_block (insns, op0, tga_ret, op1);
1179 	      break;
1180 
1181 	    case TLS_MODEL_LOCAL_DYNAMIC:
1182 	      /* ??? This isn't the completely proper way to do local-dynamic
1183 		 If the call to __tls_get_addr is used only by a single symbol,
1184 		 then we should (somehow) move the dtprel to the second arg
1185 		 to avoid the extra add.  */
1186 	      start_sequence ();
1187 
1188 	      tga_op1 = gen_reg_rtx (Pmode);
1189 	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1190 	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1191 	      RTX_UNCHANGING_P (tga_op1) = 1;
1192 
1193 	      tga_op2 = const0_rtx;
1194 
1195 	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1196 						 LCT_CONST, Pmode, 2, tga_op1,
1197 						 Pmode, tga_op2, Pmode);
1198 
1199 	      insns = get_insns ();
1200 	      end_sequence ();
1201 
1202 	      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1203 					UNSPEC_LD_BASE);
1204 	      tmp = gen_reg_rtx (Pmode);
1205 	      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1206 
1207 	      if (!register_operand (op0, Pmode))
1208 		op0 = gen_reg_rtx (Pmode);
1209 	      if (TARGET_TLS64)
1210 		{
1211 		  emit_insn (gen_load_dtprel (op0, op1));
1212 		  emit_insn (gen_adddi3 (op0, tmp, op0));
1213 		}
1214 	      else
1215 		emit_insn (gen_add_dtprel (op0, tmp, op1));
1216 	      break;
1217 
1218 	    case TLS_MODEL_INITIAL_EXEC:
1219 	      tmp = gen_reg_rtx (Pmode);
1220 	      emit_insn (gen_load_ltoff_tprel (tmp, op1));
1221 	      tmp = gen_rtx_MEM (Pmode, tmp);
1222 	      RTX_UNCHANGING_P (tmp) = 1;
1223 	      tmp = force_reg (Pmode, tmp);
1224 
1225 	      if (!register_operand (op0, Pmode))
1226 		op0 = gen_reg_rtx (Pmode);
1227 	      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1228 	      break;
1229 
1230 	    case TLS_MODEL_LOCAL_EXEC:
1231 	      if (!register_operand (op0, Pmode))
1232 		op0 = gen_reg_rtx (Pmode);
1233 	      if (TARGET_TLS64)
1234 		{
1235 		  emit_insn (gen_load_tprel (op0, op1));
1236 		  emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1237 		}
1238 	      else
1239 		emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1240 	      break;
1241 
1242 	    default:
1243 	      abort ();
1244 	    }
1245 
1246 	  if (orig_op0 == op0)
1247 	    return NULL_RTX;
1248 	  if (GET_MODE (orig_op0) == Pmode)
1249 	    return op0;
1250 	  return gen_lowpart (GET_MODE (orig_op0), op0);
1251 	}
1252       else if (!TARGET_NO_PIC &&
1253 	       (symbolic_operand (op1, Pmode) ||
1254 		symbolic_operand (op1, ptr_mode)))
1255 	{
1256 	  /* Before optimization starts, delay committing to any particular
1257 	     type of PIC address load.  If this function gets deferred, we
1258 	     may acquire information that changes the value of the
1259 	     sdata_symbolic_operand predicate.
1260 
1261 	     But don't delay for function pointers.  Loading a function address
1262 	     actually loads the address of the descriptor not the function.
1263 	     If we represent these as SYMBOL_REFs, then they get cse'd with
1264 	     calls, and we end up with calls to the descriptor address instead
1265 	     of calls to the function address.  Functions are not candidates
1266 	     for sdata anyways.
1267 
1268 	     Don't delay for LABEL_REF because the splitter loses REG_LABEL
1269 	     notes.  Don't delay for pool addresses on general principals;
1270 	     they'll never become non-local behind our back.  */
1271 
1272 	  if (rtx_equal_function_value_matters
1273 	      && GET_CODE (op1) != LABEL_REF
1274 	      && ! (GET_CODE (op1) == SYMBOL_REF
1275 		    && (SYMBOL_REF_FLAG (op1)
1276 			|| CONSTANT_POOL_ADDRESS_P (op1)
1277 			|| STRING_POOL_ADDRESS_P (op1))))
1278 	    if (GET_MODE (op1) == DImode)
1279 	      emit_insn (gen_movdi_symbolic (op0, op1));
1280 	    else
1281 	      emit_insn (gen_movsi_symbolic (op0, op1));
1282 	  else
1283 	    ia64_expand_load_address (op0, op1, NULL_RTX);
1284 	  return NULL_RTX;
1285 	}
1286     }
1287 
1288   return op1;
1289 }
1290 
1291 /* Split a post-reload TImode reference into two DImode components.  */
1292 
1293 rtx
ia64_split_timode(out,in,scratch)1294 ia64_split_timode (out, in, scratch)
1295      rtx out[2];
1296      rtx in, scratch;
1297 {
1298   switch (GET_CODE (in))
1299     {
1300     case REG:
1301       out[0] = gen_rtx_REG (DImode, REGNO (in));
1302       out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1303       return NULL_RTX;
1304 
1305     case MEM:
1306       {
1307 	rtx base = XEXP (in, 0);
1308 
1309 	switch (GET_CODE (base))
1310 	  {
1311 	  case REG:
1312 	    out[0] = adjust_address (in, DImode, 0);
1313 	    break;
1314 	  case POST_MODIFY:
1315 	    base = XEXP (base, 0);
1316 	    out[0] = adjust_address (in, DImode, 0);
1317 	    break;
1318 
1319 	  /* Since we're changing the mode, we need to change to POST_MODIFY
1320 	     as well to preserve the size of the increment.  Either that or
1321 	     do the update in two steps, but we've already got this scratch
1322 	     register handy so let's use it.  */
1323 	  case POST_INC:
1324 	    base = XEXP (base, 0);
1325 	    out[0]
1326 	      = change_address (in, DImode,
1327 				gen_rtx_POST_MODIFY
1328 				(Pmode, base, plus_constant (base, 16)));
1329 	    break;
1330 	  case POST_DEC:
1331 	    base = XEXP (base, 0);
1332 	    out[0]
1333 	      = change_address (in, DImode,
1334 				gen_rtx_POST_MODIFY
1335 				(Pmode, base, plus_constant (base, -16)));
1336 	    break;
1337 	  default:
1338 	    abort ();
1339 	  }
1340 
1341 	if (scratch == NULL_RTX)
1342 	  abort ();
1343 	out[1] = change_address (in, DImode, scratch);
1344 	return gen_adddi3 (scratch, base, GEN_INT (8));
1345       }
1346 
1347     case CONST_INT:
1348     case CONST_DOUBLE:
1349       split_double (in, &out[0], &out[1]);
1350       return NULL_RTX;
1351 
1352     default:
1353       abort ();
1354     }
1355 }
1356 
1357 /* ??? Fixing GR->FR TFmode moves during reload is hard.  You need to go
1358    through memory plus an extra GR scratch register.  Except that you can
1359    either get the first from SECONDARY_MEMORY_NEEDED or the second from
1360    SECONDARY_RELOAD_CLASS, but not both.
1361 
1362    We got into problems in the first place by allowing a construct like
1363    (subreg:TF (reg:TI)), which we got from a union containing a long double.
1364    This solution attempts to prevent this situation from occurring.  When
1365    we see something like the above, we spill the inner register to memory.  */
1366 
1367 rtx
spill_tfmode_operand(in,force)1368 spill_tfmode_operand (in, force)
1369      rtx in;
1370      int force;
1371 {
1372   if (GET_CODE (in) == SUBREG
1373       && GET_MODE (SUBREG_REG (in)) == TImode
1374       && GET_CODE (SUBREG_REG (in)) == REG)
1375     {
1376       rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true);
1377       return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1378     }
1379   else if (force && GET_CODE (in) == REG)
1380     {
1381       rtx mem = gen_mem_addressof (in, NULL_TREE, true);
1382       return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1383     }
1384   else if (GET_CODE (in) == MEM
1385 	   && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1386     return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1387   else
1388     return in;
1389 }
1390 
1391 /* Emit comparison instruction if necessary, returning the expression
1392    that holds the compare result in the proper mode.  */
1393 
1394 rtx
ia64_expand_compare(code,mode)1395 ia64_expand_compare (code, mode)
1396      enum rtx_code code;
1397      enum machine_mode mode;
1398 {
1399   rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1400   rtx cmp;
1401 
1402   /* If we have a BImode input, then we already have a compare result, and
1403      do not need to emit another comparison.  */
1404   if (GET_MODE (op0) == BImode)
1405     {
1406       if ((code == NE || code == EQ) && op1 == const0_rtx)
1407 	cmp = op0;
1408       else
1409 	abort ();
1410     }
1411   else
1412     {
1413       cmp = gen_reg_rtx (BImode);
1414       emit_insn (gen_rtx_SET (VOIDmode, cmp,
1415 			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1416       code = NE;
1417     }
1418 
1419   return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1420 }
1421 
1422 /* Emit the appropriate sequence for a call.  */
1423 void
ia64_expand_call(retval,addr,nextarg,sibcall_p)1424 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1425      rtx retval;
1426      rtx addr;
1427      rtx nextarg ATTRIBUTE_UNUSED;
1428      int sibcall_p;
1429 {
1430   rtx insn, b0;
1431 
1432   addr = XEXP (addr, 0);
1433   addr = convert_memory_address (DImode, addr);
1434   b0 = gen_rtx_REG (DImode, R_BR (0));
1435 
1436   /* ??? Should do this for functions known to bind local too.  */
1437   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1438     {
1439       if (sibcall_p)
1440 	insn = gen_sibcall_nogp (addr);
1441       else if (! retval)
1442 	insn = gen_call_nogp (addr, b0);
1443       else
1444 	insn = gen_call_value_nogp (retval, addr, b0);
1445       insn = emit_call_insn (insn);
1446     }
1447   else
1448     {
1449       if (sibcall_p)
1450 	insn = gen_sibcall_gp (addr);
1451       else if (! retval)
1452 	insn = gen_call_gp (addr, b0);
1453       else
1454 	insn = gen_call_value_gp (retval, addr, b0);
1455       insn = emit_call_insn (insn);
1456 
1457       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1458     }
1459 
1460   if (sibcall_p)
1461     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1462 }
1463 void
ia64_reload_gp()1464 ia64_reload_gp ()
1465 {
1466   rtx tmp;
1467 
1468   if (current_frame_info.reg_save_gp)
1469     tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1470   else
1471     {
1472       HOST_WIDE_INT offset;
1473 
1474       offset = (current_frame_info.spill_cfa_off
1475 	        + current_frame_info.spill_size);
1476       if (frame_pointer_needed)
1477         {
1478           tmp = hard_frame_pointer_rtx;
1479           offset = -offset;
1480         }
1481       else
1482         {
1483           tmp = stack_pointer_rtx;
1484           offset = current_frame_info.total_size - offset;
1485         }
1486 
1487       if (CONST_OK_FOR_I (offset))
1488         emit_insn (gen_adddi3 (pic_offset_table_rtx,
1489 			       tmp, GEN_INT (offset)));
1490       else
1491         {
1492           emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1493           emit_insn (gen_adddi3 (pic_offset_table_rtx,
1494 			         pic_offset_table_rtx, tmp));
1495         }
1496 
1497       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1498     }
1499 
1500   emit_move_insn (pic_offset_table_rtx, tmp);
1501 }
1502 
1503 void
ia64_split_call(retval,addr,retaddr,scratch_r,scratch_b,noreturn_p,sibcall_p)1504 ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1505 		 noreturn_p, sibcall_p)
1506      rtx retval, addr, retaddr, scratch_r, scratch_b;
1507      int noreturn_p, sibcall_p;
1508 {
1509   rtx insn;
1510   bool is_desc = false;
1511 
1512   /* If we find we're calling through a register, then we're actually
1513      calling through a descriptor, so load up the values.  */
1514   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1515     {
1516       rtx tmp;
1517       bool addr_dead_p;
1518 
1519       /* ??? We are currently constrained to *not* use peep2, because
1520 	 we can legitimiately change the global lifetime of the GP
1521 	 (in the form of killing where previously live).  This is
1522 	 because a call through a descriptor doesn't use the previous
1523 	 value of the GP, while a direct call does, and we do not
1524 	 commit to either form until the split here.
1525 
1526 	 That said, this means that we lack precise life info for
1527 	 whether ADDR is dead after this call.  This is not terribly
1528 	 important, since we can fix things up essentially for free
1529 	 with the POST_DEC below, but it's nice to not use it when we
1530 	 can immediately tell it's not necessary.  */
1531       addr_dead_p = ((noreturn_p || sibcall_p
1532 		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1533 					    REGNO (addr)))
1534 		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1535 
1536       /* Load the code address into scratch_b.  */
1537       tmp = gen_rtx_POST_INC (Pmode, addr);
1538       tmp = gen_rtx_MEM (Pmode, tmp);
1539       emit_move_insn (scratch_r, tmp);
1540       emit_move_insn (scratch_b, scratch_r);
1541 
1542       /* Load the GP address.  If ADDR is not dead here, then we must
1543 	 revert the change made above via the POST_INCREMENT.  */
1544       if (!addr_dead_p)
1545 	tmp = gen_rtx_POST_DEC (Pmode, addr);
1546       else
1547 	tmp = addr;
1548       tmp = gen_rtx_MEM (Pmode, tmp);
1549       emit_move_insn (pic_offset_table_rtx, tmp);
1550 
1551       is_desc = true;
1552       addr = scratch_b;
1553     }
1554 
1555   if (sibcall_p)
1556     insn = gen_sibcall_nogp (addr);
1557   else if (retval)
1558     insn = gen_call_value_nogp (retval, addr, retaddr);
1559   else
1560     insn = gen_call_nogp (addr, retaddr);
1561   emit_call_insn (insn);
1562 
1563   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1564     ia64_reload_gp ();
1565 }
1566 
1567 /* Begin the assembly file.  */
1568 
1569 void
emit_safe_across_calls(f)1570 emit_safe_across_calls (f)
1571      FILE *f;
1572 {
1573   unsigned int rs, re;
1574   int out_state;
1575 
1576   rs = 1;
1577   out_state = 0;
1578   while (1)
1579     {
1580       while (rs < 64 && call_used_regs[PR_REG (rs)])
1581 	rs++;
1582       if (rs >= 64)
1583 	break;
1584       for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1585 	continue;
1586       if (out_state == 0)
1587 	{
1588 	  fputs ("\t.pred.safe_across_calls ", f);
1589 	  out_state = 1;
1590 	}
1591       else
1592 	fputc (',', f);
1593       if (re == rs + 1)
1594 	fprintf (f, "p%u", rs);
1595       else
1596 	fprintf (f, "p%u-p%u", rs, re - 1);
1597       rs = re + 1;
1598     }
1599   if (out_state)
1600     fputc ('\n', f);
1601 }
1602 
1603 /* Helper function for ia64_compute_frame_size: find an appropriate general
1604    register to spill some special register to.  SPECIAL_SPILL_MASK contains
1605    bits in GR0 to GR31 that have already been allocated by this routine.
1606    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
1607 
1608 static int
find_gr_spill(try_locals)1609 find_gr_spill (try_locals)
1610      int try_locals;
1611 {
1612   int regno;
1613 
1614   /* If this is a leaf function, first try an otherwise unused
1615      call-clobbered register.  */
1616   if (current_function_is_leaf)
1617     {
1618       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1619 	if (! regs_ever_live[regno]
1620 	    && call_used_regs[regno]
1621 	    && ! fixed_regs[regno]
1622 	    && ! global_regs[regno]
1623 	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1624 	  {
1625 	    current_frame_info.gr_used_mask |= 1 << regno;
1626 	    return regno;
1627 	  }
1628     }
1629 
1630   if (try_locals)
1631     {
1632       regno = current_frame_info.n_local_regs;
1633       /* If there is a frame pointer, then we can't use loc79, because
1634 	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
1635 	 reg_name switching code in ia64_expand_prologue.  */
1636       if (regno < (80 - frame_pointer_needed))
1637 	{
1638 	  current_frame_info.n_local_regs = regno + 1;
1639 	  return LOC_REG (0) + regno;
1640 	}
1641     }
1642 
1643   /* Failed to find a general register to spill to.  Must use stack.  */
1644   return 0;
1645 }
1646 
1647 /* In order to make for nice schedules, we try to allocate every temporary
1648    to a different register.  We must of course stay away from call-saved,
1649    fixed, and global registers.  We must also stay away from registers
1650    allocated in current_frame_info.gr_used_mask, since those include regs
1651    used all through the prologue.
1652 
1653    Any register allocated here must be used immediately.  The idea is to
1654    aid scheduling, not to solve data flow problems.  */
1655 
1656 static int last_scratch_gr_reg;
1657 
1658 static int
next_scratch_gr_reg()1659 next_scratch_gr_reg ()
1660 {
1661   int i, regno;
1662 
1663   for (i = 0; i < 32; ++i)
1664     {
1665       regno = (last_scratch_gr_reg + i + 1) & 31;
1666       if (call_used_regs[regno]
1667 	  && ! fixed_regs[regno]
1668 	  && ! global_regs[regno]
1669 	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1670 	{
1671 	  last_scratch_gr_reg = regno;
1672 	  return regno;
1673 	}
1674     }
1675 
1676   /* There must be _something_ available.  */
1677   abort ();
1678 }
1679 
1680 /* Helper function for ia64_compute_frame_size, called through
1681    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
1682 
1683 static void
mark_reg_gr_used_mask(reg,data)1684 mark_reg_gr_used_mask (reg, data)
1685      rtx reg;
1686      void *data ATTRIBUTE_UNUSED;
1687 {
1688   unsigned int regno = REGNO (reg);
1689   if (regno < 32)
1690     {
1691       unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1692       for (i = 0; i < n; ++i)
1693 	current_frame_info.gr_used_mask |= 1 << (regno + i);
1694     }
1695 }
1696 
1697 /* Returns the number of bytes offset between the frame pointer and the stack
1698    pointer for the current function.  SIZE is the number of bytes of space
1699    needed for local variables.  */
1700 
1701 static void
ia64_compute_frame_size(size)1702 ia64_compute_frame_size (size)
1703      HOST_WIDE_INT size;
1704 {
1705   HOST_WIDE_INT total_size;
1706   HOST_WIDE_INT spill_size = 0;
1707   HOST_WIDE_INT extra_spill_size = 0;
1708   HOST_WIDE_INT pretend_args_size;
1709   HARD_REG_SET mask;
1710   int n_spilled = 0;
1711   int spilled_gr_p = 0;
1712   int spilled_fr_p = 0;
1713   unsigned int regno;
1714   int i;
1715 
1716   if (current_frame_info.initialized)
1717     return;
1718 
1719   memset (&current_frame_info, 0, sizeof current_frame_info);
1720   CLEAR_HARD_REG_SET (mask);
1721 
1722   /* Don't allocate scratches to the return register.  */
1723   diddle_return_value (mark_reg_gr_used_mask, NULL);
1724 
1725   /* Don't allocate scratches to the EH scratch registers.  */
1726   if (cfun->machine->ia64_eh_epilogue_sp)
1727     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1728   if (cfun->machine->ia64_eh_epilogue_bsp)
1729     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1730 
1731   /* Find the size of the register stack frame.  We have only 80 local
1732      registers, because we reserve 8 for the inputs and 8 for the
1733      outputs.  */
1734 
1735   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1736      since we'll be adjusting that down later.  */
1737   regno = LOC_REG (78) + ! frame_pointer_needed;
1738   for (; regno >= LOC_REG (0); regno--)
1739     if (regs_ever_live[regno])
1740       break;
1741   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1742 
1743   /* For functions marked with the syscall_linkage attribute, we must mark
1744      all eight input registers as in use, so that locals aren't visible to
1745      the caller.  */
1746 
1747   if (cfun->machine->n_varargs > 0
1748       || lookup_attribute ("syscall_linkage",
1749 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1750     current_frame_info.n_input_regs = 8;
1751   else
1752     {
1753       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1754 	if (regs_ever_live[regno])
1755 	  break;
1756       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1757     }
1758 
1759   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1760     if (regs_ever_live[regno])
1761       break;
1762   i = regno - OUT_REG (0) + 1;
1763 
1764   /* When -p profiling, we need one output register for the mcount argument.
1765      Likwise for -a profiling for the bb_init_func argument.  For -ax
1766      profiling, we need two output registers for the two bb_init_trace_func
1767      arguments.  */
1768   if (current_function_profile)
1769     i = MAX (i, 1);
1770   current_frame_info.n_output_regs = i;
1771 
1772   /* ??? No rotating register support yet.  */
1773   current_frame_info.n_rotate_regs = 0;
1774 
1775   /* Discover which registers need spilling, and how much room that
1776      will take.  Begin with floating point and general registers,
1777      which will always wind up on the stack.  */
1778 
1779   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1780     if (regs_ever_live[regno] && ! call_used_regs[regno])
1781       {
1782 	SET_HARD_REG_BIT (mask, regno);
1783 	spill_size += 16;
1784 	n_spilled += 1;
1785 	spilled_fr_p = 1;
1786       }
1787 
1788   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1789     if (regs_ever_live[regno] && ! call_used_regs[regno])
1790       {
1791 	SET_HARD_REG_BIT (mask, regno);
1792 	spill_size += 8;
1793 	n_spilled += 1;
1794 	spilled_gr_p = 1;
1795       }
1796 
1797   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1798     if (regs_ever_live[regno] && ! call_used_regs[regno])
1799       {
1800 	SET_HARD_REG_BIT (mask, regno);
1801 	spill_size += 8;
1802 	n_spilled += 1;
1803       }
1804 
1805   /* Now come all special registers that might get saved in other
1806      general registers.  */
1807 
1808   if (frame_pointer_needed)
1809     {
1810       current_frame_info.reg_fp = find_gr_spill (1);
1811       /* If we did not get a register, then we take LOC79.  This is guaranteed
1812 	 to be free, even if regs_ever_live is already set, because this is
1813 	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
1814 	 as we don't count loc79 above.  */
1815       if (current_frame_info.reg_fp == 0)
1816 	{
1817 	  current_frame_info.reg_fp = LOC_REG (79);
1818 	  current_frame_info.n_local_regs++;
1819 	}
1820     }
1821 
1822   if (! current_function_is_leaf)
1823     {
1824       /* Emit a save of BR0 if we call other functions.  Do this even
1825 	 if this function doesn't return, as EH depends on this to be
1826 	 able to unwind the stack.  */
1827       SET_HARD_REG_BIT (mask, BR_REG (0));
1828 
1829       current_frame_info.reg_save_b0 = find_gr_spill (1);
1830       if (current_frame_info.reg_save_b0 == 0)
1831 	{
1832 	  spill_size += 8;
1833 	  n_spilled += 1;
1834 	}
1835 
1836       /* Similarly for ar.pfs.  */
1837       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1838       current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1839       if (current_frame_info.reg_save_ar_pfs == 0)
1840 	{
1841 	  extra_spill_size += 8;
1842 	  n_spilled += 1;
1843 	}
1844 
1845       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
1846 	 registers are clobbered, so we fall back to the stack.  */
1847       current_frame_info.reg_save_gp
1848 	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1849       if (current_frame_info.reg_save_gp == 0)
1850 	{
1851 	  SET_HARD_REG_BIT (mask, GR_REG (1));
1852 	  spill_size += 8;
1853 	  n_spilled += 1;
1854 	}
1855     }
1856   else
1857     {
1858       if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1859 	{
1860 	  SET_HARD_REG_BIT (mask, BR_REG (0));
1861 	  spill_size += 8;
1862 	  n_spilled += 1;
1863 	}
1864 
1865       if (regs_ever_live[AR_PFS_REGNUM])
1866 	{
1867 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1868 	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1869 	  if (current_frame_info.reg_save_ar_pfs == 0)
1870 	    {
1871 	      extra_spill_size += 8;
1872 	      n_spilled += 1;
1873 	    }
1874 	}
1875     }
1876 
1877   /* Unwind descriptor hackery: things are most efficient if we allocate
1878      consecutive GR save registers for RP, PFS, FP in that order. However,
1879      it is absolutely critical that FP get the only hard register that's
1880      guaranteed to be free, so we allocated it first.  If all three did
1881      happen to be allocated hard regs, and are consecutive, rearrange them
1882      into the preferred order now.  */
1883   if (current_frame_info.reg_fp != 0
1884       && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1885       && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1886     {
1887       current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1888       current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1889       current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1890     }
1891 
1892   /* See if we need to store the predicate register block.  */
1893   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1894     if (regs_ever_live[regno] && ! call_used_regs[regno])
1895       break;
1896   if (regno <= PR_REG (63))
1897     {
1898       SET_HARD_REG_BIT (mask, PR_REG (0));
1899       current_frame_info.reg_save_pr = find_gr_spill (1);
1900       if (current_frame_info.reg_save_pr == 0)
1901 	{
1902 	  extra_spill_size += 8;
1903 	  n_spilled += 1;
1904 	}
1905 
1906       /* ??? Mark them all as used so that register renaming and such
1907 	 are free to use them.  */
1908       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1909 	regs_ever_live[regno] = 1;
1910     }
1911 
1912   /* If we're forced to use st8.spill, we're forced to save and restore
1913      ar.unat as well.  The check for existing liveness allows inline asm
1914      to touch ar.unat.  */
1915   if (spilled_gr_p || cfun->machine->n_varargs
1916       || regs_ever_live[AR_UNAT_REGNUM])
1917     {
1918       regs_ever_live[AR_UNAT_REGNUM] = 1;
1919       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1920       current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1921       if (current_frame_info.reg_save_ar_unat == 0)
1922 	{
1923 	  extra_spill_size += 8;
1924 	  n_spilled += 1;
1925 	}
1926     }
1927 
1928   if (regs_ever_live[AR_LC_REGNUM])
1929     {
1930       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1931       current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1932       if (current_frame_info.reg_save_ar_lc == 0)
1933 	{
1934 	  extra_spill_size += 8;
1935 	  n_spilled += 1;
1936 	}
1937     }
1938 
1939   /* If we have an odd number of words of pretend arguments written to
1940      the stack, then the FR save area will be unaligned.  We round the
1941      size of this area up to keep things 16 byte aligned.  */
1942   if (spilled_fr_p)
1943     pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1944   else
1945     pretend_args_size = current_function_pretend_args_size;
1946 
1947   total_size = (spill_size + extra_spill_size + size + pretend_args_size
1948 		+ current_function_outgoing_args_size);
1949   total_size = IA64_STACK_ALIGN (total_size);
1950 
1951   /* We always use the 16-byte scratch area provided by the caller, but
1952      if we are a leaf function, there's no one to which we need to provide
1953      a scratch area.  */
1954   if (current_function_is_leaf)
1955     total_size = MAX (0, total_size - 16);
1956 
1957   current_frame_info.total_size = total_size;
1958   current_frame_info.spill_cfa_off = pretend_args_size - 16;
1959   current_frame_info.spill_size = spill_size;
1960   current_frame_info.extra_spill_size = extra_spill_size;
1961   COPY_HARD_REG_SET (current_frame_info.mask, mask);
1962   current_frame_info.n_spilled = n_spilled;
1963   current_frame_info.initialized = reload_completed;
1964 }
1965 
1966 /* Compute the initial difference between the specified pair of registers.  */
1967 
1968 HOST_WIDE_INT
ia64_initial_elimination_offset(from,to)1969 ia64_initial_elimination_offset (from, to)
1970      int from, to;
1971 {
1972   HOST_WIDE_INT offset;
1973 
1974   ia64_compute_frame_size (get_frame_size ());
1975   switch (from)
1976     {
1977     case FRAME_POINTER_REGNUM:
1978       if (to == HARD_FRAME_POINTER_REGNUM)
1979 	{
1980 	  if (current_function_is_leaf)
1981 	    offset = -current_frame_info.total_size;
1982 	  else
1983 	    offset = -(current_frame_info.total_size
1984 		       - current_function_outgoing_args_size - 16);
1985 	}
1986       else if (to == STACK_POINTER_REGNUM)
1987 	{
1988 	  if (current_function_is_leaf)
1989 	    offset = 0;
1990 	  else
1991 	    offset = 16 + current_function_outgoing_args_size;
1992 	}
1993       else
1994 	abort ();
1995       break;
1996 
1997     case ARG_POINTER_REGNUM:
1998       /* Arguments start above the 16 byte save area, unless stdarg
1999 	 in which case we store through the 16 byte save area.  */
2000       if (to == HARD_FRAME_POINTER_REGNUM)
2001 	offset = 16 - current_function_pretend_args_size;
2002       else if (to == STACK_POINTER_REGNUM)
2003 	offset = (current_frame_info.total_size
2004 		  + 16 - current_function_pretend_args_size);
2005       else
2006 	abort ();
2007       break;
2008 
2009     default:
2010       abort ();
2011     }
2012 
2013   return offset;
2014 }
2015 
2016 /* If there are more than a trivial number of register spills, we use
2017    two interleaved iterators so that we can get two memory references
2018    per insn group.
2019 
2020    In order to simplify things in the prologue and epilogue expanders,
2021    we use helper functions to fix up the memory references after the
2022    fact with the appropriate offsets to a POST_MODIFY memory mode.
2023    The following data structure tracks the state of the two iterators
2024    while insns are being emitted.  */
2025 
2026 struct spill_fill_data
2027 {
2028   rtx init_after;		/* point at which to emit initializations */
2029   rtx init_reg[2];		/* initial base register */
2030   rtx iter_reg[2];		/* the iterator registers */
2031   rtx *prev_addr[2];		/* address of last memory use */
2032   rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2033   HOST_WIDE_INT prev_off[2];	/* last offset */
2034   int n_iter;			/* number of iterators in use */
2035   int next_iter;		/* next iterator to use */
2036   unsigned int save_gr_used_mask;
2037 };
2038 
2039 static struct spill_fill_data spill_fill_data;
2040 
2041 static void
setup_spill_pointers(n_spills,init_reg,cfa_off)2042 setup_spill_pointers (n_spills, init_reg, cfa_off)
2043      int n_spills;
2044      rtx init_reg;
2045      HOST_WIDE_INT cfa_off;
2046 {
2047   int i;
2048 
2049   spill_fill_data.init_after = get_last_insn ();
2050   spill_fill_data.init_reg[0] = init_reg;
2051   spill_fill_data.init_reg[1] = init_reg;
2052   spill_fill_data.prev_addr[0] = NULL;
2053   spill_fill_data.prev_addr[1] = NULL;
2054   spill_fill_data.prev_insn[0] = NULL;
2055   spill_fill_data.prev_insn[1] = NULL;
2056   spill_fill_data.prev_off[0] = cfa_off;
2057   spill_fill_data.prev_off[1] = cfa_off;
2058   spill_fill_data.next_iter = 0;
2059   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2060 
2061   spill_fill_data.n_iter = 1 + (n_spills > 2);
2062   for (i = 0; i < spill_fill_data.n_iter; ++i)
2063     {
2064       int regno = next_scratch_gr_reg ();
2065       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2066       current_frame_info.gr_used_mask |= 1 << regno;
2067     }
2068 }
2069 
2070 static void
finish_spill_pointers()2071 finish_spill_pointers ()
2072 {
2073   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2074 }
2075 
2076 static rtx
spill_restore_mem(reg,cfa_off)2077 spill_restore_mem (reg, cfa_off)
2078      rtx reg;
2079      HOST_WIDE_INT cfa_off;
2080 {
2081   int iter = spill_fill_data.next_iter;
2082   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2083   rtx disp_rtx = GEN_INT (disp);
2084   rtx mem;
2085 
2086   if (spill_fill_data.prev_addr[iter])
2087     {
2088       if (CONST_OK_FOR_N (disp))
2089 	{
2090 	  *spill_fill_data.prev_addr[iter]
2091 	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2092 				   gen_rtx_PLUS (DImode,
2093 						 spill_fill_data.iter_reg[iter],
2094 						 disp_rtx));
2095 	  REG_NOTES (spill_fill_data.prev_insn[iter])
2096 	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2097 				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2098 	}
2099       else
2100 	{
2101 	  /* ??? Could use register post_modify for loads.  */
2102 	  if (! CONST_OK_FOR_I (disp))
2103 	    {
2104 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2105 	      emit_move_insn (tmp, disp_rtx);
2106 	      disp_rtx = tmp;
2107 	    }
2108 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2109 				 spill_fill_data.iter_reg[iter], disp_rtx));
2110 	}
2111     }
2112   /* Micro-optimization: if we've created a frame pointer, it's at
2113      CFA 0, which may allow the real iterator to be initialized lower,
2114      slightly increasing parallelism.  Also, if there are few saves
2115      it may eliminate the iterator entirely.  */
2116   else if (disp == 0
2117 	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2118 	   && frame_pointer_needed)
2119     {
2120       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2121       set_mem_alias_set (mem, get_varargs_alias_set ());
2122       return mem;
2123     }
2124   else
2125     {
2126       rtx seq, insn;
2127 
2128       if (disp == 0)
2129 	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2130 			 spill_fill_data.init_reg[iter]);
2131       else
2132 	{
2133 	  start_sequence ();
2134 
2135 	  if (! CONST_OK_FOR_I (disp))
2136 	    {
2137 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2138 	      emit_move_insn (tmp, disp_rtx);
2139 	      disp_rtx = tmp;
2140 	    }
2141 
2142 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2143 				 spill_fill_data.init_reg[iter],
2144 				 disp_rtx));
2145 
2146 	  seq = get_insns ();
2147 	  end_sequence ();
2148 	}
2149 
2150       /* Careful for being the first insn in a sequence.  */
2151       if (spill_fill_data.init_after)
2152 	insn = emit_insn_after (seq, spill_fill_data.init_after);
2153       else
2154 	{
2155 	  rtx first = get_insns ();
2156 	  if (first)
2157 	    insn = emit_insn_before (seq, first);
2158 	  else
2159 	    insn = emit_insn (seq);
2160 	}
2161       spill_fill_data.init_after = insn;
2162 
2163       /* If DISP is 0, we may or may not have a further adjustment
2164 	 afterward.  If we do, then the load/store insn may be modified
2165 	 to be a post-modify.  If we don't, then this copy may be
2166 	 eliminated by copyprop_hardreg_forward, which makes this
2167 	 insn garbage, which runs afoul of the sanity check in
2168 	 propagate_one_insn.  So mark this insn as legal to delete.  */
2169       if (disp == 0)
2170 	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2171 					     REG_NOTES (insn));
2172     }
2173 
2174   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2175 
2176   /* ??? Not all of the spills are for varargs, but some of them are.
2177      The rest of the spills belong in an alias set of their own.  But
2178      it doesn't actually hurt to include them here.  */
2179   set_mem_alias_set (mem, get_varargs_alias_set ());
2180 
2181   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2182   spill_fill_data.prev_off[iter] = cfa_off;
2183 
2184   if (++iter >= spill_fill_data.n_iter)
2185     iter = 0;
2186   spill_fill_data.next_iter = iter;
2187 
2188   return mem;
2189 }
2190 
2191 static void
2192 do_spill (move_fn, reg, cfa_off, frame_reg)
2193      rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2194      rtx reg, frame_reg;
2195      HOST_WIDE_INT cfa_off;
2196 {
2197   int iter = spill_fill_data.next_iter;
2198   rtx mem, insn;
2199 
2200   mem = spill_restore_mem (reg, cfa_off);
2201   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2202   spill_fill_data.prev_insn[iter] = insn;
2203 
2204   if (frame_reg)
2205     {
2206       rtx base;
2207       HOST_WIDE_INT off;
2208 
2209       RTX_FRAME_RELATED_P (insn) = 1;
2210 
2211       /* Don't even pretend that the unwind code can intuit its way
2212 	 through a pair of interleaved post_modify iterators.  Just
2213 	 provide the correct answer.  */
2214 
2215       if (frame_pointer_needed)
2216 	{
2217 	  base = hard_frame_pointer_rtx;
2218 	  off = - cfa_off;
2219 	}
2220       else
2221 	{
2222 	  base = stack_pointer_rtx;
2223 	  off = current_frame_info.total_size - cfa_off;
2224 	}
2225 
2226       REG_NOTES (insn)
2227 	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2228 		gen_rtx_SET (VOIDmode,
2229 			     gen_rtx_MEM (GET_MODE (reg),
2230 					  plus_constant (base, off)),
2231 			     frame_reg),
2232 		REG_NOTES (insn));
2233     }
2234 }
2235 
2236 static void
2237 do_restore (move_fn, reg, cfa_off)
2238      rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2239      rtx reg;
2240      HOST_WIDE_INT cfa_off;
2241 {
2242   int iter = spill_fill_data.next_iter;
2243   rtx insn;
2244 
2245   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2246 				GEN_INT (cfa_off)));
2247   spill_fill_data.prev_insn[iter] = insn;
2248 }
2249 
2250 /* Wrapper functions that discards the CONST_INT spill offset.  These
2251    exist so that we can give gr_spill/gr_fill the offset they need and
2252    use a consistant function interface.  */
2253 
2254 static rtx
gen_movdi_x(dest,src,offset)2255 gen_movdi_x (dest, src, offset)
2256      rtx dest, src;
2257      rtx offset ATTRIBUTE_UNUSED;
2258 {
2259   return gen_movdi (dest, src);
2260 }
2261 
2262 static rtx
gen_fr_spill_x(dest,src,offset)2263 gen_fr_spill_x (dest, src, offset)
2264      rtx dest, src;
2265      rtx offset ATTRIBUTE_UNUSED;
2266 {
2267   return gen_fr_spill (dest, src);
2268 }
2269 
2270 static rtx
gen_fr_restore_x(dest,src,offset)2271 gen_fr_restore_x (dest, src, offset)
2272      rtx dest, src;
2273      rtx offset ATTRIBUTE_UNUSED;
2274 {
2275   return gen_fr_restore (dest, src);
2276 }
2277 
2278 /* Called after register allocation to add any instructions needed for the
2279    prologue.  Using a prologue insn is favored compared to putting all of the
2280    instructions in output_function_prologue(), since it allows the scheduler
2281    to intermix instructions with the saves of the caller saved registers.  In
2282    some cases, it might be necessary to emit a barrier instruction as the last
2283    insn to prevent such scheduling.
2284 
2285    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2286    so that the debug info generation code can handle them properly.
2287 
2288    The register save area is layed out like so:
2289    cfa+16
2290 	[ varargs spill area ]
2291 	[ fr register spill area ]
2292 	[ br register spill area ]
2293 	[ ar register spill area ]
2294 	[ pr register spill area ]
2295 	[ gr register spill area ] */
2296 
2297 /* ??? Get inefficient code when the frame size is larger than can fit in an
2298    adds instruction.  */
2299 
2300 void
ia64_expand_prologue()2301 ia64_expand_prologue ()
2302 {
2303   rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2304   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2305   rtx reg, alt_reg;
2306 
2307   ia64_compute_frame_size (get_frame_size ());
2308   last_scratch_gr_reg = 15;
2309 
2310   /* If there is no epilogue, then we don't need some prologue insns.
2311      We need to avoid emitting the dead prologue insns, because flow
2312      will complain about them.  */
2313   if (optimize)
2314     {
2315       edge e;
2316 
2317       for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2318 	if ((e->flags & EDGE_FAKE) == 0
2319 	    && (e->flags & EDGE_FALLTHRU) != 0)
2320 	  break;
2321       epilogue_p = (e != NULL);
2322     }
2323   else
2324     epilogue_p = 1;
2325 
2326   /* Set the local, input, and output register names.  We need to do this
2327      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2328      half.  If we use in/loc/out register names, then we get assembler errors
2329      in crtn.S because there is no alloc insn or regstk directive in there.  */
2330   if (! TARGET_REG_NAMES)
2331     {
2332       int inputs = current_frame_info.n_input_regs;
2333       int locals = current_frame_info.n_local_regs;
2334       int outputs = current_frame_info.n_output_regs;
2335 
2336       for (i = 0; i < inputs; i++)
2337 	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2338       for (i = 0; i < locals; i++)
2339 	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2340       for (i = 0; i < outputs; i++)
2341 	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2342     }
2343 
2344   /* Set the frame pointer register name.  The regnum is logically loc79,
2345      but of course we'll not have allocated that many locals.  Rather than
2346      worrying about renumbering the existing rtxs, we adjust the name.  */
2347   /* ??? This code means that we can never use one local register when
2348      there is a frame pointer.  loc79 gets wasted in this case, as it is
2349      renamed to a register that will never be used.  See also the try_locals
2350      code in find_gr_spill.  */
2351   if (current_frame_info.reg_fp)
2352     {
2353       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2354       reg_names[HARD_FRAME_POINTER_REGNUM]
2355 	= reg_names[current_frame_info.reg_fp];
2356       reg_names[current_frame_info.reg_fp] = tmp;
2357     }
2358 
2359   /* We don't need an alloc instruction if we've used no outputs or locals.  */
2360   if (current_frame_info.n_local_regs == 0
2361       && current_frame_info.n_output_regs == 0
2362       && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2363       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2364     {
2365       /* If there is no alloc, but there are input registers used, then we
2366 	 need a .regstk directive.  */
2367       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2368       ar_pfs_save_reg = NULL_RTX;
2369     }
2370   else
2371     {
2372       current_frame_info.need_regstk = 0;
2373 
2374       if (current_frame_info.reg_save_ar_pfs)
2375 	regno = current_frame_info.reg_save_ar_pfs;
2376       else
2377 	regno = next_scratch_gr_reg ();
2378       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2379 
2380       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2381 				   GEN_INT (current_frame_info.n_input_regs),
2382 				   GEN_INT (current_frame_info.n_local_regs),
2383 				   GEN_INT (current_frame_info.n_output_regs),
2384 				   GEN_INT (current_frame_info.n_rotate_regs)));
2385       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2386     }
2387 
2388   /* Set up frame pointer, stack pointer, and spill iterators.  */
2389 
2390   n_varargs = cfun->machine->n_varargs;
2391   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2392 			stack_pointer_rtx, 0);
2393 
2394   if (frame_pointer_needed)
2395     {
2396       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2397       RTX_FRAME_RELATED_P (insn) = 1;
2398     }
2399 
2400   if (current_frame_info.total_size != 0)
2401     {
2402       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2403       rtx offset;
2404 
2405       if (CONST_OK_FOR_I (- current_frame_info.total_size))
2406 	offset = frame_size_rtx;
2407       else
2408 	{
2409 	  regno = next_scratch_gr_reg ();
2410  	  offset = gen_rtx_REG (DImode, regno);
2411 	  emit_move_insn (offset, frame_size_rtx);
2412 	}
2413 
2414       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2415 				    stack_pointer_rtx, offset));
2416 
2417       if (! frame_pointer_needed)
2418 	{
2419 	  RTX_FRAME_RELATED_P (insn) = 1;
2420 	  if (GET_CODE (offset) != CONST_INT)
2421 	    {
2422 	      REG_NOTES (insn)
2423 		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2424 			gen_rtx_SET (VOIDmode,
2425 				     stack_pointer_rtx,
2426 				     gen_rtx_PLUS (DImode,
2427 						   stack_pointer_rtx,
2428 						   frame_size_rtx)),
2429 			REG_NOTES (insn));
2430 	    }
2431 	}
2432 
2433       /* ??? At this point we must generate a magic insn that appears to
2434 	 modify the stack pointer, the frame pointer, and all spill
2435 	 iterators.  This would allow the most scheduling freedom.  For
2436 	 now, just hard stop.  */
2437       emit_insn (gen_blockage ());
2438     }
2439 
2440   /* Must copy out ar.unat before doing any integer spills.  */
2441   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2442     {
2443       if (current_frame_info.reg_save_ar_unat)
2444 	ar_unat_save_reg
2445 	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2446       else
2447 	{
2448 	  alt_regno = next_scratch_gr_reg ();
2449 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2450 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2451 	}
2452 
2453       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2454       insn = emit_move_insn (ar_unat_save_reg, reg);
2455       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2456 
2457       /* Even if we're not going to generate an epilogue, we still
2458 	 need to save the register so that EH works.  */
2459       if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2460 	emit_insn (gen_prologue_use (ar_unat_save_reg));
2461     }
2462   else
2463     ar_unat_save_reg = NULL_RTX;
2464 
2465   /* Spill all varargs registers.  Do this before spilling any GR registers,
2466      since we want the UNAT bits for the GR registers to override the UNAT
2467      bits from varargs, which we don't care about.  */
2468 
2469   cfa_off = -16;
2470   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2471     {
2472       reg = gen_rtx_REG (DImode, regno);
2473       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2474     }
2475 
2476   /* Locate the bottom of the register save area.  */
2477   cfa_off = (current_frame_info.spill_cfa_off
2478 	     + current_frame_info.spill_size
2479 	     + current_frame_info.extra_spill_size);
2480 
2481   /* Save the predicate register block either in a register or in memory.  */
2482   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2483     {
2484       reg = gen_rtx_REG (DImode, PR_REG (0));
2485       if (current_frame_info.reg_save_pr != 0)
2486 	{
2487 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2488 	  insn = emit_move_insn (alt_reg, reg);
2489 
2490 	  /* ??? Denote pr spill/fill by a DImode move that modifies all
2491 	     64 hard registers.  */
2492 	  RTX_FRAME_RELATED_P (insn) = 1;
2493 	  REG_NOTES (insn)
2494 	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2495 			gen_rtx_SET (VOIDmode, alt_reg, reg),
2496 			REG_NOTES (insn));
2497 
2498 	  /* Even if we're not going to generate an epilogue, we still
2499 	     need to save the register so that EH works.  */
2500 	  if (! epilogue_p)
2501 	    emit_insn (gen_prologue_use (alt_reg));
2502 	}
2503       else
2504 	{
2505 	  alt_regno = next_scratch_gr_reg ();
2506 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2507 	  insn = emit_move_insn (alt_reg, reg);
2508 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2509 	  cfa_off -= 8;
2510 	}
2511     }
2512 
2513   /* Handle AR regs in numerical order.  All of them get special handling.  */
2514   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2515       && current_frame_info.reg_save_ar_unat == 0)
2516     {
2517       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2518       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2519       cfa_off -= 8;
2520     }
2521 
2522   /* The alloc insn already copied ar.pfs into a general register.  The
2523      only thing we have to do now is copy that register to a stack slot
2524      if we'd not allocated a local register for the job.  */
2525   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2526       && current_frame_info.reg_save_ar_pfs == 0)
2527     {
2528       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2529       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2530       cfa_off -= 8;
2531     }
2532 
2533   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2534     {
2535       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2536       if (current_frame_info.reg_save_ar_lc != 0)
2537 	{
2538 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2539 	  insn = emit_move_insn (alt_reg, reg);
2540 	  RTX_FRAME_RELATED_P (insn) = 1;
2541 
2542 	  /* Even if we're not going to generate an epilogue, we still
2543 	     need to save the register so that EH works.  */
2544 	  if (! epilogue_p)
2545 	    emit_insn (gen_prologue_use (alt_reg));
2546 	}
2547       else
2548 	{
2549 	  alt_regno = next_scratch_gr_reg ();
2550 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2551 	  emit_move_insn (alt_reg, reg);
2552 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2553 	  cfa_off -= 8;
2554 	}
2555     }
2556 
2557   if (current_frame_info.reg_save_gp)
2558     {
2559       insn = emit_move_insn (gen_rtx_REG (DImode,
2560 					  current_frame_info.reg_save_gp),
2561 			     pic_offset_table_rtx);
2562       /* We don't know for sure yet if this is actually needed, since
2563 	 we've not split the PIC call patterns.  If all of the calls
2564 	 are indirect, and not followed by any uses of the gp, then
2565 	 this save is dead.  Allow it to go away.  */
2566       REG_NOTES (insn)
2567 	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2568     }
2569 
2570   /* We should now be at the base of the gr/br/fr spill area.  */
2571   if (cfa_off != (current_frame_info.spill_cfa_off
2572 		  + current_frame_info.spill_size))
2573     abort ();
2574 
2575   /* Spill all general registers.  */
2576   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2577     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2578       {
2579 	reg = gen_rtx_REG (DImode, regno);
2580 	do_spill (gen_gr_spill, reg, cfa_off, reg);
2581 	cfa_off -= 8;
2582       }
2583 
2584   /* Handle BR0 specially -- it may be getting stored permanently in
2585      some GR register.  */
2586   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2587     {
2588       reg = gen_rtx_REG (DImode, BR_REG (0));
2589       if (current_frame_info.reg_save_b0 != 0)
2590 	{
2591 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2592 	  insn = emit_move_insn (alt_reg, reg);
2593 	  RTX_FRAME_RELATED_P (insn) = 1;
2594 
2595 	  /* Even if we're not going to generate an epilogue, we still
2596 	     need to save the register so that EH works.  */
2597 	  if (! epilogue_p)
2598 	    emit_insn (gen_prologue_use (alt_reg));
2599 	}
2600       else
2601 	{
2602 	  alt_regno = next_scratch_gr_reg ();
2603 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2604 	  emit_move_insn (alt_reg, reg);
2605 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2606 	  cfa_off -= 8;
2607 	}
2608     }
2609 
2610   /* Spill the rest of the BR registers.  */
2611   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2612     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2613       {
2614 	alt_regno = next_scratch_gr_reg ();
2615 	alt_reg = gen_rtx_REG (DImode, alt_regno);
2616 	reg = gen_rtx_REG (DImode, regno);
2617 	emit_move_insn (alt_reg, reg);
2618 	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2619 	cfa_off -= 8;
2620       }
2621 
2622   /* Align the frame and spill all FR registers.  */
2623   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2624     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2625       {
2626         if (cfa_off & 15)
2627 	  abort ();
2628 	reg = gen_rtx_REG (TFmode, regno);
2629 	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2630 	cfa_off -= 16;
2631       }
2632 
2633   if (cfa_off != current_frame_info.spill_cfa_off)
2634     abort ();
2635 
2636   finish_spill_pointers ();
2637 }
2638 
2639 /* Called after register allocation to add any instructions needed for the
2640    epilogue.  Using an epilogue insn is favored compared to putting all of the
2641    instructions in output_function_prologue(), since it allows the scheduler
2642    to intermix instructions with the saves of the caller saved registers.  In
2643    some cases, it might be necessary to emit a barrier instruction as the last
2644    insn to prevent such scheduling.  */
2645 
2646 void
ia64_expand_epilogue(sibcall_p)2647 ia64_expand_epilogue (sibcall_p)
2648      int sibcall_p;
2649 {
2650   rtx insn, reg, alt_reg, ar_unat_save_reg;
2651   int regno, alt_regno, cfa_off;
2652 
2653   ia64_compute_frame_size (get_frame_size ());
2654 
2655   /* If there is a frame pointer, then we use it instead of the stack
2656      pointer, so that the stack pointer does not need to be valid when
2657      the epilogue starts.  See EXIT_IGNORE_STACK.  */
2658   if (frame_pointer_needed)
2659     setup_spill_pointers (current_frame_info.n_spilled,
2660 			  hard_frame_pointer_rtx, 0);
2661   else
2662     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2663 			  current_frame_info.total_size);
2664 
2665   if (current_frame_info.total_size != 0)
2666     {
2667       /* ??? At this point we must generate a magic insn that appears to
2668          modify the spill iterators and the frame pointer.  This would
2669 	 allow the most scheduling freedom.  For now, just hard stop.  */
2670       emit_insn (gen_blockage ());
2671     }
2672 
2673   /* Locate the bottom of the register save area.  */
2674   cfa_off = (current_frame_info.spill_cfa_off
2675 	     + current_frame_info.spill_size
2676 	     + current_frame_info.extra_spill_size);
2677 
2678   /* Restore the predicate registers.  */
2679   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2680     {
2681       if (current_frame_info.reg_save_pr != 0)
2682 	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2683       else
2684 	{
2685 	  alt_regno = next_scratch_gr_reg ();
2686 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2687 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2688 	  cfa_off -= 8;
2689 	}
2690       reg = gen_rtx_REG (DImode, PR_REG (0));
2691       emit_move_insn (reg, alt_reg);
2692     }
2693 
2694   /* Restore the application registers.  */
2695 
2696   /* Load the saved unat from the stack, but do not restore it until
2697      after the GRs have been restored.  */
2698   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2699     {
2700       if (current_frame_info.reg_save_ar_unat != 0)
2701         ar_unat_save_reg
2702 	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2703       else
2704 	{
2705 	  alt_regno = next_scratch_gr_reg ();
2706 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2707 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2708 	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2709 	  cfa_off -= 8;
2710 	}
2711     }
2712   else
2713     ar_unat_save_reg = NULL_RTX;
2714 
2715   if (current_frame_info.reg_save_ar_pfs != 0)
2716     {
2717       alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2718       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2719       emit_move_insn (reg, alt_reg);
2720     }
2721   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2722     {
2723       alt_regno = next_scratch_gr_reg ();
2724       alt_reg = gen_rtx_REG (DImode, alt_regno);
2725       do_restore (gen_movdi_x, alt_reg, cfa_off);
2726       cfa_off -= 8;
2727       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2728       emit_move_insn (reg, alt_reg);
2729     }
2730 
2731   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2732     {
2733       if (current_frame_info.reg_save_ar_lc != 0)
2734 	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2735       else
2736 	{
2737 	  alt_regno = next_scratch_gr_reg ();
2738 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2739 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2740 	  cfa_off -= 8;
2741 	}
2742       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2743       emit_move_insn (reg, alt_reg);
2744     }
2745 
2746   /* We should now be at the base of the gr/br/fr spill area.  */
2747   if (cfa_off != (current_frame_info.spill_cfa_off
2748 		  + current_frame_info.spill_size))
2749     abort ();
2750 
2751   /* The GP may be stored on the stack in the prologue, but it's
2752      never restored in the epilogue.  Skip the stack slot.  */
2753   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2754     cfa_off -= 8;
2755 
2756   /* Restore all general registers.  */
2757   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2758     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2759       {
2760 	reg = gen_rtx_REG (DImode, regno);
2761 	do_restore (gen_gr_restore, reg, cfa_off);
2762 	cfa_off -= 8;
2763       }
2764 
2765   /* Restore the branch registers.  Handle B0 specially, as it may
2766      have gotten stored in some GR register.  */
2767   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2768     {
2769       if (current_frame_info.reg_save_b0 != 0)
2770 	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2771       else
2772 	{
2773 	  alt_regno = next_scratch_gr_reg ();
2774 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2775 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2776 	  cfa_off -= 8;
2777 	}
2778       reg = gen_rtx_REG (DImode, BR_REG (0));
2779       emit_move_insn (reg, alt_reg);
2780     }
2781 
2782   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2783     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2784       {
2785 	alt_regno = next_scratch_gr_reg ();
2786 	alt_reg = gen_rtx_REG (DImode, alt_regno);
2787 	do_restore (gen_movdi_x, alt_reg, cfa_off);
2788 	cfa_off -= 8;
2789 	reg = gen_rtx_REG (DImode, regno);
2790 	emit_move_insn (reg, alt_reg);
2791       }
2792 
2793   /* Restore floating point registers.  */
2794   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2795     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2796       {
2797         if (cfa_off & 15)
2798 	  abort ();
2799 	reg = gen_rtx_REG (TFmode, regno);
2800 	do_restore (gen_fr_restore_x, reg, cfa_off);
2801 	cfa_off -= 16;
2802       }
2803 
2804   /* Restore ar.unat for real.  */
2805   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2806     {
2807       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2808       emit_move_insn (reg, ar_unat_save_reg);
2809     }
2810 
2811   if (cfa_off != current_frame_info.spill_cfa_off)
2812     abort ();
2813 
2814   finish_spill_pointers ();
2815 
2816   if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2817     {
2818       /* ??? At this point we must generate a magic insn that appears to
2819          modify the spill iterators, the stack pointer, and the frame
2820 	 pointer.  This would allow the most scheduling freedom.  For now,
2821 	 just hard stop.  */
2822       emit_insn (gen_blockage ());
2823     }
2824 
2825   if (cfun->machine->ia64_eh_epilogue_sp)
2826     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2827   else if (frame_pointer_needed)
2828     {
2829       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2830       RTX_FRAME_RELATED_P (insn) = 1;
2831     }
2832   else if (current_frame_info.total_size)
2833     {
2834       rtx offset, frame_size_rtx;
2835 
2836       frame_size_rtx = GEN_INT (current_frame_info.total_size);
2837       if (CONST_OK_FOR_I (current_frame_info.total_size))
2838 	offset = frame_size_rtx;
2839       else
2840 	{
2841 	  regno = next_scratch_gr_reg ();
2842 	  offset = gen_rtx_REG (DImode, regno);
2843 	  emit_move_insn (offset, frame_size_rtx);
2844 	}
2845 
2846       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2847 				    offset));
2848 
2849       RTX_FRAME_RELATED_P (insn) = 1;
2850       if (GET_CODE (offset) != CONST_INT)
2851 	{
2852 	  REG_NOTES (insn)
2853 	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2854 			gen_rtx_SET (VOIDmode,
2855 				     stack_pointer_rtx,
2856 				     gen_rtx_PLUS (DImode,
2857 						   stack_pointer_rtx,
2858 						   frame_size_rtx)),
2859 			REG_NOTES (insn));
2860 	}
2861     }
2862 
2863   if (cfun->machine->ia64_eh_epilogue_bsp)
2864     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2865 
2866   if (! sibcall_p)
2867     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2868   else
2869     {
2870       int fp = GR_REG (2);
2871       /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2872 	 first available call clobbered register.  If there was a frame_pointer
2873 	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2874 	 so we have to make sure we're using the string "r2" when emitting
2875 	 the register name for the assmbler.  */
2876       if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2877 	fp = HARD_FRAME_POINTER_REGNUM;
2878 
2879       /* We must emit an alloc to force the input registers to become output
2880 	 registers.  Otherwise, if the callee tries to pass its parameters
2881 	 through to another call without an intervening alloc, then these
2882 	 values get lost.  */
2883       /* ??? We don't need to preserve all input registers.  We only need to
2884 	 preserve those input registers used as arguments to the sibling call.
2885 	 It is unclear how to compute that number here.  */
2886       if (current_frame_info.n_input_regs != 0)
2887 	{
2888 	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
2889 	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2890 				const0_rtx, const0_rtx,
2891 				n_inputs, const0_rtx));
2892 	  RTX_FRAME_RELATED_P (insn) = 1;
2893 	}
2894     }
2895 }
2896 
2897 /* Return 1 if br.ret can do all the work required to return from a
2898    function.  */
2899 
2900 int
ia64_direct_return()2901 ia64_direct_return ()
2902 {
2903   if (reload_completed && ! frame_pointer_needed)
2904     {
2905       ia64_compute_frame_size (get_frame_size ());
2906 
2907       return (current_frame_info.total_size == 0
2908 	      && current_frame_info.n_spilled == 0
2909 	      && current_frame_info.reg_save_b0 == 0
2910 	      && current_frame_info.reg_save_pr == 0
2911 	      && current_frame_info.reg_save_ar_pfs == 0
2912 	      && current_frame_info.reg_save_ar_unat == 0
2913 	      && current_frame_info.reg_save_ar_lc == 0);
2914     }
2915   return 0;
2916 }
2917 
2918 /* Return the magic cookie that we use to hold the return address
2919    during early compilation.  */
2920 
2921 rtx
ia64_return_addr_rtx(count,frame)2922 ia64_return_addr_rtx (count, frame)
2923      HOST_WIDE_INT count;
2924      rtx frame ATTRIBUTE_UNUSED;
2925 {
2926   if (count != 0)
2927     return NULL;
2928   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2929 }
2930 
2931 /* Split this value after reload, now that we know where the return
2932    address is saved.  */
2933 
2934 void
ia64_split_return_addr_rtx(dest)2935 ia64_split_return_addr_rtx (dest)
2936      rtx dest;
2937 {
2938   rtx src;
2939 
2940   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2941     {
2942       if (current_frame_info.reg_save_b0 != 0)
2943 	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2944       else
2945 	{
2946 	  HOST_WIDE_INT off;
2947 	  unsigned int regno;
2948 
2949 	  /* Compute offset from CFA for BR0.  */
2950 	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
2951 	  off = (current_frame_info.spill_cfa_off
2952 		 + current_frame_info.spill_size);
2953 	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2954 	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2955 	      off -= 8;
2956 
2957 	  /* Convert CFA offset to a register based offset.  */
2958 	  if (frame_pointer_needed)
2959 	    src = hard_frame_pointer_rtx;
2960 	  else
2961 	    {
2962 	      src = stack_pointer_rtx;
2963 	      off += current_frame_info.total_size;
2964 	    }
2965 
2966 	  /* Load address into scratch register.  */
2967 	  if (CONST_OK_FOR_I (off))
2968 	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
2969 	  else
2970 	    {
2971 	      emit_move_insn (dest, GEN_INT (off));
2972 	      emit_insn (gen_adddi3 (dest, src, dest));
2973 	    }
2974 
2975 	  src = gen_rtx_MEM (Pmode, dest);
2976 	}
2977     }
2978   else
2979     src = gen_rtx_REG (DImode, BR_REG (0));
2980 
2981   emit_move_insn (dest, src);
2982 }
2983 
2984 int
ia64_hard_regno_rename_ok(from,to)2985 ia64_hard_regno_rename_ok (from, to)
2986      int from;
2987      int to;
2988 {
2989   /* Don't clobber any of the registers we reserved for the prologue.  */
2990   if (to == current_frame_info.reg_fp
2991       || to == current_frame_info.reg_save_b0
2992       || to == current_frame_info.reg_save_pr
2993       || to == current_frame_info.reg_save_ar_pfs
2994       || to == current_frame_info.reg_save_ar_unat
2995       || to == current_frame_info.reg_save_ar_lc)
2996     return 0;
2997 
2998   if (from == current_frame_info.reg_fp
2999       || from == current_frame_info.reg_save_b0
3000       || from == current_frame_info.reg_save_pr
3001       || from == current_frame_info.reg_save_ar_pfs
3002       || from == current_frame_info.reg_save_ar_unat
3003       || from == current_frame_info.reg_save_ar_lc)
3004     return 0;
3005 
3006   /* Don't use output registers outside the register frame.  */
3007   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3008     return 0;
3009 
3010   /* Retain even/oddness on predicate register pairs.  */
3011   if (PR_REGNO_P (from) && PR_REGNO_P (to))
3012     return (from & 1) == (to & 1);
3013 
3014   return 1;
3015 }
3016 
3017 /* Target hook for assembling integer objects.  Handle word-sized
3018    aligned objects and detect the cases when @fptr is needed.  */
3019 
3020 static bool
ia64_assemble_integer(x,size,aligned_p)3021 ia64_assemble_integer (x, size, aligned_p)
3022      rtx x;
3023      unsigned int size;
3024      int aligned_p;
3025 {
3026   if (size == (TARGET_ILP32 ? 4 : 8)
3027       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3028       && GET_CODE (x) == SYMBOL_REF
3029       && SYMBOL_REF_FLAG (x))
3030     {
3031       static const char * const directive[2][2] = {
3032 	  /* 64-bit pointer */  /* 32-bit pointer */
3033 	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
3034 	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
3035       };
3036       fputs (directive[aligned_p != 0][TARGET_ILP32 != 0], asm_out_file);
3037       output_addr_const (asm_out_file, x);
3038       fputs (")\n", asm_out_file);
3039       return true;
3040     }
3041   return default_assemble_integer (x, size, aligned_p);
3042 }
3043 
3044 /* Emit the function prologue.  */
3045 
3046 static void
ia64_output_function_prologue(file,size)3047 ia64_output_function_prologue (file, size)
3048      FILE *file;
3049      HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3050 {
3051   int mask, grsave, grsave_prev;
3052 
3053   if (current_frame_info.need_regstk)
3054     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3055 	     current_frame_info.n_input_regs,
3056 	     current_frame_info.n_local_regs,
3057 	     current_frame_info.n_output_regs,
3058 	     current_frame_info.n_rotate_regs);
3059 
3060   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3061     return;
3062 
3063   /* Emit the .prologue directive.  */
3064 
3065   mask = 0;
3066   grsave = grsave_prev = 0;
3067   if (current_frame_info.reg_save_b0 != 0)
3068     {
3069       mask |= 8;
3070       grsave = grsave_prev = current_frame_info.reg_save_b0;
3071     }
3072   if (current_frame_info.reg_save_ar_pfs != 0
3073       && (grsave_prev == 0
3074 	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3075     {
3076       mask |= 4;
3077       if (grsave_prev == 0)
3078 	grsave = current_frame_info.reg_save_ar_pfs;
3079       grsave_prev = current_frame_info.reg_save_ar_pfs;
3080     }
3081   if (current_frame_info.reg_fp != 0
3082       && (grsave_prev == 0
3083 	  || current_frame_info.reg_fp == grsave_prev + 1))
3084     {
3085       mask |= 2;
3086       if (grsave_prev == 0)
3087 	grsave = HARD_FRAME_POINTER_REGNUM;
3088       grsave_prev = current_frame_info.reg_fp;
3089     }
3090   if (current_frame_info.reg_save_pr != 0
3091       && (grsave_prev == 0
3092 	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3093     {
3094       mask |= 1;
3095       if (grsave_prev == 0)
3096 	grsave = current_frame_info.reg_save_pr;
3097     }
3098 
3099   if (mask)
3100     fprintf (file, "\t.prologue %d, %d\n", mask,
3101 	     ia64_dbx_register_number (grsave));
3102   else
3103     fputs ("\t.prologue\n", file);
3104 
3105   /* Emit a .spill directive, if necessary, to relocate the base of
3106      the register spill area.  */
3107   if (current_frame_info.spill_cfa_off != -16)
3108     fprintf (file, "\t.spill %ld\n",
3109 	     (long) (current_frame_info.spill_cfa_off
3110 		     + current_frame_info.spill_size));
3111 }
3112 
3113 /* Emit the .body directive at the scheduled end of the prologue.  */
3114 
3115 static void
ia64_output_function_end_prologue(file)3116 ia64_output_function_end_prologue (file)
3117      FILE *file;
3118 {
3119   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3120     return;
3121 
3122   fputs ("\t.body\n", file);
3123 }
3124 
3125 /* Emit the function epilogue.  */
3126 
3127 static void
ia64_output_function_epilogue(file,size)3128 ia64_output_function_epilogue (file, size)
3129      FILE *file ATTRIBUTE_UNUSED;
3130      HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3131 {
3132   int i;
3133 
3134   if (current_frame_info.reg_fp)
3135     {
3136       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3137       reg_names[HARD_FRAME_POINTER_REGNUM]
3138 	= reg_names[current_frame_info.reg_fp];
3139       reg_names[current_frame_info.reg_fp] = tmp;
3140     }
3141   if (! TARGET_REG_NAMES)
3142     {
3143       for (i = 0; i < current_frame_info.n_input_regs; i++)
3144 	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3145       for (i = 0; i < current_frame_info.n_local_regs; i++)
3146 	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3147       for (i = 0; i < current_frame_info.n_output_regs; i++)
3148 	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3149     }
3150 
3151   current_frame_info.initialized = 0;
3152 }
3153 
3154 int
ia64_dbx_register_number(regno)3155 ia64_dbx_register_number (regno)
3156      int regno;
3157 {
3158   /* In ia64_expand_prologue we quite literally renamed the frame pointer
3159      from its home at loc79 to something inside the register frame.  We
3160      must perform the same renumbering here for the debug info.  */
3161   if (current_frame_info.reg_fp)
3162     {
3163       if (regno == HARD_FRAME_POINTER_REGNUM)
3164 	regno = current_frame_info.reg_fp;
3165       else if (regno == current_frame_info.reg_fp)
3166 	regno = HARD_FRAME_POINTER_REGNUM;
3167     }
3168 
3169   if (IN_REGNO_P (regno))
3170     return 32 + regno - IN_REG (0);
3171   else if (LOC_REGNO_P (regno))
3172     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3173   else if (OUT_REGNO_P (regno))
3174     return (32 + current_frame_info.n_input_regs
3175 	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3176   else
3177     return regno;
3178 }
3179 
3180 void
ia64_initialize_trampoline(addr,fnaddr,static_chain)3181 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3182      rtx addr, fnaddr, static_chain;
3183 {
3184   rtx addr_reg, eight = GEN_INT (8);
3185 
3186   /* Load up our iterator.  */
3187   addr_reg = gen_reg_rtx (Pmode);
3188   emit_move_insn (addr_reg, addr);
3189 
3190   /* The first two words are the fake descriptor:
3191      __ia64_trampoline, ADDR+16.  */
3192   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3193 		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3194   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3195 
3196   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3197 		  copy_to_reg (plus_constant (addr, 16)));
3198   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3199 
3200   /* The third word is the target descriptor.  */
3201   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3202   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3203 
3204   /* The fourth word is the static chain.  */
3205   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3206 }
3207 
3208 /* Do any needed setup for a variadic function.  CUM has not been updated
3209    for the last named argument which has type TYPE and mode MODE.
3210 
3211    We generate the actual spill instructions during prologue generation.  */
3212 
3213 void
ia64_setup_incoming_varargs(cum,int_mode,type,pretend_size,second_time)3214 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3215      CUMULATIVE_ARGS cum;
3216      int             int_mode;
3217      tree            type;
3218      int *           pretend_size;
3219      int	     second_time ATTRIBUTE_UNUSED;
3220 {
3221   /* Skip the current argument.  */
3222   ia64_function_arg_advance (&cum, int_mode, type, 1);
3223 
3224   if (cum.words < MAX_ARGUMENT_SLOTS)
3225     {
3226       int n = MAX_ARGUMENT_SLOTS - cum.words;
3227       *pretend_size = n * UNITS_PER_WORD;
3228       cfun->machine->n_varargs = n;
3229     }
3230 }
3231 
3232 /* Check whether TYPE is a homogeneous floating point aggregate.  If
3233    it is, return the mode of the floating point type that appears
3234    in all leafs.  If it is not, return VOIDmode.
3235 
3236    An aggregate is a homogeneous floating point aggregate is if all
3237    fields/elements in it have the same floating point type (e.g,
3238    SFmode).  128-bit quad-precision floats are excluded.  */
3239 
3240 static enum machine_mode
hfa_element_mode(type,nested)3241 hfa_element_mode (type, nested)
3242      tree type;
3243      int nested;
3244 {
3245   enum machine_mode element_mode = VOIDmode;
3246   enum machine_mode mode;
3247   enum tree_code code = TREE_CODE (type);
3248   int know_element_mode = 0;
3249   tree t;
3250 
3251   switch (code)
3252     {
3253     case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3254     case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
3255     case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3256     case FILE_TYPE:	case SET_TYPE:		case LANG_TYPE:
3257     case FUNCTION_TYPE:
3258       return VOIDmode;
3259 
3260       /* Fortran complex types are supposed to be HFAs, so we need to handle
3261 	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3262 	 types though.  */
3263     case COMPLEX_TYPE:
3264       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3265 	  && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3266 	return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3267 			      * BITS_PER_UNIT, MODE_FLOAT, 0);
3268       else
3269 	return VOIDmode;
3270 
3271     case REAL_TYPE:
3272       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3273 	 mode if this is contained within an aggregate.  */
3274       if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3275 	return TYPE_MODE (type);
3276       else
3277 	return VOIDmode;
3278 
3279     case ARRAY_TYPE:
3280       return hfa_element_mode (TREE_TYPE (type), 1);
3281 
3282     case RECORD_TYPE:
3283     case UNION_TYPE:
3284     case QUAL_UNION_TYPE:
3285       for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3286 	{
3287 	  if (TREE_CODE (t) != FIELD_DECL)
3288 	    continue;
3289 
3290 	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3291 	  if (know_element_mode)
3292 	    {
3293 	      if (mode != element_mode)
3294 		return VOIDmode;
3295 	    }
3296 	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3297 	    return VOIDmode;
3298 	  else
3299 	    {
3300 	      know_element_mode = 1;
3301 	      element_mode = mode;
3302 	    }
3303 	}
3304       return element_mode;
3305 
3306     default:
3307       /* If we reach here, we probably have some front-end specific type
3308 	 that the backend doesn't know about.  This can happen via the
3309 	 aggregate_value_p call in init_function_start.  All we can do is
3310 	 ignore unknown tree types.  */
3311       return VOIDmode;
3312     }
3313 
3314   return VOIDmode;
3315 }
3316 
3317 /* Return rtx for register where argument is passed, or zero if it is passed
3318    on the stack.  */
3319 
3320 /* ??? 128-bit quad-precision floats are always passed in general
3321    registers.  */
3322 
3323 rtx
ia64_function_arg(cum,mode,type,named,incoming)3324 ia64_function_arg (cum, mode, type, named, incoming)
3325      CUMULATIVE_ARGS *cum;
3326      enum machine_mode mode;
3327      tree type;
3328      int named;
3329      int incoming;
3330 {
3331   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3332   int words = (((mode == BLKmode ? int_size_in_bytes (type)
3333 		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3334 	       / UNITS_PER_WORD);
3335   int offset = 0;
3336   enum machine_mode hfa_mode = VOIDmode;
3337 
3338   /* Integer and float arguments larger than 8 bytes start at the next even
3339      boundary.  Aggregates larger than 8 bytes start at the next even boundary
3340      if the aggregate has 16 byte alignment.  Net effect is that types with
3341      alignment greater than 8 start at the next even boundary.  */
3342   /* ??? The ABI does not specify how to handle aggregates with alignment from
3343      9 to 15 bytes, or greater than 16.   We handle them all as if they had
3344      16 byte alignment.  Such aggregates can occur only if gcc extensions are
3345      used.  */
3346   if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3347        : (words > 1))
3348       && (cum->words & 1))
3349     offset = 1;
3350 
3351   /* If all argument slots are used, then it must go on the stack.  */
3352   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3353     return 0;
3354 
3355   /* Check for and handle homogeneous FP aggregates.  */
3356   if (type)
3357     hfa_mode = hfa_element_mode (type, 0);
3358 
3359   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3360      and unprototyped hfas are passed specially.  */
3361   if (hfa_mode != VOIDmode && (! cum->prototype || named))
3362     {
3363       rtx loc[16];
3364       int i = 0;
3365       int fp_regs = cum->fp_regs;
3366       int int_regs = cum->words + offset;
3367       int hfa_size = GET_MODE_SIZE (hfa_mode);
3368       int byte_size;
3369       int args_byte_size;
3370 
3371       /* If prototyped, pass it in FR regs then GR regs.
3372 	 If not prototyped, pass it in both FR and GR regs.
3373 
3374 	 If this is an SFmode aggregate, then it is possible to run out of
3375 	 FR regs while GR regs are still left.  In that case, we pass the
3376 	 remaining part in the GR regs.  */
3377 
3378       /* Fill the FP regs.  We do this always.  We stop if we reach the end
3379 	 of the argument, the last FP register, or the last argument slot.  */
3380 
3381       byte_size = ((mode == BLKmode)
3382 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3383       args_byte_size = int_regs * UNITS_PER_WORD;
3384       offset = 0;
3385       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3386 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3387 	{
3388 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3389 				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3390 							      + fp_regs)),
3391 				      GEN_INT (offset));
3392 	  offset += hfa_size;
3393 	  args_byte_size += hfa_size;
3394 	  fp_regs++;
3395 	}
3396 
3397       /* If no prototype, then the whole thing must go in GR regs.  */
3398       if (! cum->prototype)
3399 	offset = 0;
3400       /* If this is an SFmode aggregate, then we might have some left over
3401 	 that needs to go in GR regs.  */
3402       else if (byte_size != offset)
3403 	int_regs += offset / UNITS_PER_WORD;
3404 
3405       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
3406 
3407       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3408 	{
3409 	  enum machine_mode gr_mode = DImode;
3410 
3411 	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
3412 	     then this goes in a GR reg left adjusted/little endian, right
3413 	     adjusted/big endian.  */
3414 	  /* ??? Currently this is handled wrong, because 4-byte hunks are
3415 	     always right adjusted/little endian.  */
3416 	  if (offset & 0x4)
3417 	    gr_mode = SImode;
3418 	  /* If we have an even 4 byte hunk because the aggregate is a
3419 	     multiple of 4 bytes in size, then this goes in a GR reg right
3420 	     adjusted/little endian.  */
3421 	  else if (byte_size - offset == 4)
3422 	    gr_mode = SImode;
3423 	  /* Complex floats need to have float mode.  */
3424 	  if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3425 	    gr_mode = hfa_mode;
3426 
3427 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3428 				      gen_rtx_REG (gr_mode, (basereg
3429 							     + int_regs)),
3430 				      GEN_INT (offset));
3431 	  offset += GET_MODE_SIZE (gr_mode);
3432 	  int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3433 		      ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3434 	}
3435 
3436       /* If we ended up using just one location, just return that one loc, but
3437 	 change the mode back to the argument mode.  */
3438       if (i == 1)
3439 	return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3440       else
3441 	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3442     }
3443 
3444   /* Integral and aggregates go in general registers.  If we have run out of
3445      FR registers, then FP values must also go in general registers.  This can
3446      happen when we have a SFmode HFA.  */
3447   else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3448           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3449     {
3450       int byte_size = ((mode == BLKmode)
3451                        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3452       if (BYTES_BIG_ENDIAN
3453 	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3454 	&& byte_size < UNITS_PER_WORD
3455 	&& byte_size > 0)
3456 	{
3457 	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3458 					  gen_rtx_REG (DImode,
3459 						       (basereg + cum->words
3460 							+ offset)),
3461 					  const0_rtx);
3462 	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3463 	}
3464       else
3465 	return gen_rtx_REG (mode, basereg + cum->words + offset);
3466 
3467     }
3468 
3469   /* If there is a prototype, then FP values go in a FR register when
3470      named, and in a GR registeer when unnamed.  */
3471   else if (cum->prototype)
3472     {
3473       if (! named)
3474 	return gen_rtx_REG (mode, basereg + cum->words + offset);
3475       else
3476 	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3477     }
3478   /* If there is no prototype, then FP values go in both FR and GR
3479      registers.  */
3480   else
3481     {
3482       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3483 				      gen_rtx_REG (mode, (FR_ARG_FIRST
3484 							  + cum->fp_regs)),
3485 				      const0_rtx);
3486       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3487 				      gen_rtx_REG (mode,
3488 						   (basereg + cum->words
3489 						    + offset)),
3490 				      const0_rtx);
3491 
3492       return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3493     }
3494 }
3495 
3496 /* Return number of words, at the beginning of the argument, that must be
3497    put in registers.  0 is the argument is entirely in registers or entirely
3498    in memory.  */
3499 
3500 int
ia64_function_arg_partial_nregs(cum,mode,type,named)3501 ia64_function_arg_partial_nregs (cum, mode, type, named)
3502      CUMULATIVE_ARGS *cum;
3503      enum machine_mode mode;
3504      tree type;
3505      int named ATTRIBUTE_UNUSED;
3506 {
3507   int words = (((mode == BLKmode ? int_size_in_bytes (type)
3508 		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3509 	       / UNITS_PER_WORD);
3510   int offset = 0;
3511 
3512   /* Arguments with alignment larger than 8 bytes start at the next even
3513      boundary.  */
3514   if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3515        : (words > 1))
3516       && (cum->words & 1))
3517     offset = 1;
3518 
3519   /* If all argument slots are used, then it must go on the stack.  */
3520   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3521     return 0;
3522 
3523   /* It doesn't matter whether the argument goes in FR or GR regs.  If
3524      it fits within the 8 argument slots, then it goes entirely in
3525      registers.  If it extends past the last argument slot, then the rest
3526      goes on the stack.  */
3527 
3528   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3529     return 0;
3530 
3531   return MAX_ARGUMENT_SLOTS - cum->words - offset;
3532 }
3533 
3534 /* Update CUM to point after this argument.  This is patterned after
3535    ia64_function_arg.  */
3536 
3537 void
ia64_function_arg_advance(cum,mode,type,named)3538 ia64_function_arg_advance (cum, mode, type, named)
3539      CUMULATIVE_ARGS *cum;
3540      enum machine_mode mode;
3541      tree type;
3542      int named;
3543 {
3544   int words = (((mode == BLKmode ? int_size_in_bytes (type)
3545 		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3546 	       / UNITS_PER_WORD);
3547   int offset = 0;
3548   enum machine_mode hfa_mode = VOIDmode;
3549 
3550   /* If all arg slots are already full, then there is nothing to do.  */
3551   if (cum->words >= MAX_ARGUMENT_SLOTS)
3552     return;
3553 
3554   /* Arguments with alignment larger than 8 bytes start at the next even
3555      boundary.  */
3556   if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3557        : (words > 1))
3558       && (cum->words & 1))
3559     offset = 1;
3560 
3561   cum->words += words + offset;
3562 
3563   /* Check for and handle homogeneous FP aggregates.  */
3564   if (type)
3565     hfa_mode = hfa_element_mode (type, 0);
3566 
3567   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3568      and unprototyped hfas are passed specially.  */
3569   if (hfa_mode != VOIDmode && (! cum->prototype || named))
3570     {
3571       int fp_regs = cum->fp_regs;
3572       /* This is the original value of cum->words + offset.  */
3573       int int_regs = cum->words - words;
3574       int hfa_size = GET_MODE_SIZE (hfa_mode);
3575       int byte_size;
3576       int args_byte_size;
3577 
3578       /* If prototyped, pass it in FR regs then GR regs.
3579 	 If not prototyped, pass it in both FR and GR regs.
3580 
3581 	 If this is an SFmode aggregate, then it is possible to run out of
3582 	 FR regs while GR regs are still left.  In that case, we pass the
3583 	 remaining part in the GR regs.  */
3584 
3585       /* Fill the FP regs.  We do this always.  We stop if we reach the end
3586 	 of the argument, the last FP register, or the last argument slot.  */
3587 
3588       byte_size = ((mode == BLKmode)
3589 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3590       args_byte_size = int_regs * UNITS_PER_WORD;
3591       offset = 0;
3592       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3593 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3594 	{
3595 	  offset += hfa_size;
3596 	  args_byte_size += hfa_size;
3597 	  fp_regs++;
3598 	}
3599 
3600       cum->fp_regs = fp_regs;
3601     }
3602 
3603   /* Integral and aggregates go in general registers.  If we have run out of
3604      FR registers, then FP values must also go in general registers.  This can
3605      happen when we have a SFmode HFA.  */
3606   else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3607     cum->int_regs = cum->words;
3608 
3609   /* If there is a prototype, then FP values go in a FR register when
3610      named, and in a GR registeer when unnamed.  */
3611   else if (cum->prototype)
3612     {
3613       if (! named)
3614 	cum->int_regs = cum->words;
3615       else
3616 	/* ??? Complex types should not reach here.  */
3617 	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3618     }
3619   /* If there is no prototype, then FP values go in both FR and GR
3620      registers.  */
3621   else
3622     {
3623       /* ??? Complex types should not reach here.  */
3624       cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3625       cum->int_regs = cum->words;
3626     }
3627 }
3628 
3629 /* Variable sized types are passed by reference.  */
3630 /* ??? At present this is a GCC extension to the IA-64 ABI.  */
3631 
3632 int
ia64_function_arg_pass_by_reference(cum,mode,type,named)3633 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3634      CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3635      enum machine_mode mode ATTRIBUTE_UNUSED;
3636      tree type;
3637      int named ATTRIBUTE_UNUSED;
3638 {
3639   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3640 }
3641 
3642 
3643 /* Implement va_arg.  */
3644 
3645 rtx
ia64_va_arg(valist,type)3646 ia64_va_arg (valist, type)
3647      tree valist, type;
3648 {
3649   tree t;
3650 
3651   /* Variable sized types are passed by reference.  */
3652   if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3653     {
3654       rtx addr = force_reg (ptr_mode,
3655 	    std_expand_builtin_va_arg (valist, build_pointer_type (type)));
3656 #ifdef POINTERS_EXTEND_UNSIGNED
3657       addr = convert_memory_address (Pmode, addr);
3658 #endif
3659       return gen_rtx_MEM (ptr_mode, addr);
3660     }
3661 
3662   /* Arguments with alignment larger than 8 bytes start at the next even
3663      boundary.  */
3664   if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3665     {
3666       t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3667 		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3668       t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3669 		 build_int_2 (-2 * UNITS_PER_WORD, -1));
3670       t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3671       TREE_SIDE_EFFECTS (t) = 1;
3672       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3673     }
3674 
3675   return std_expand_builtin_va_arg (valist, type);
3676 }
3677 
3678 /* Return 1 if function return value returned in memory.  Return 0 if it is
3679    in a register.  */
3680 
3681 int
ia64_return_in_memory(valtype)3682 ia64_return_in_memory (valtype)
3683      tree valtype;
3684 {
3685   enum machine_mode mode;
3686   enum machine_mode hfa_mode;
3687   HOST_WIDE_INT byte_size;
3688 
3689   mode = TYPE_MODE (valtype);
3690   byte_size = GET_MODE_SIZE (mode);
3691   if (mode == BLKmode)
3692     {
3693       byte_size = int_size_in_bytes (valtype);
3694       if (byte_size < 0)
3695 	return 1;
3696     }
3697 
3698   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
3699 
3700   hfa_mode = hfa_element_mode (valtype, 0);
3701   if (hfa_mode != VOIDmode)
3702     {
3703       int hfa_size = GET_MODE_SIZE (hfa_mode);
3704 
3705       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3706 	return 1;
3707       else
3708 	return 0;
3709     }
3710   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3711     return 1;
3712   else
3713     return 0;
3714 }
3715 
3716 /* Return rtx for register that holds the function return value.  */
3717 
3718 rtx
ia64_function_value(valtype,func)3719 ia64_function_value (valtype, func)
3720      tree valtype;
3721      tree func ATTRIBUTE_UNUSED;
3722 {
3723   enum machine_mode mode;
3724   enum machine_mode hfa_mode;
3725 
3726   mode = TYPE_MODE (valtype);
3727   hfa_mode = hfa_element_mode (valtype, 0);
3728 
3729   if (hfa_mode != VOIDmode)
3730     {
3731       rtx loc[8];
3732       int i;
3733       int hfa_size;
3734       int byte_size;
3735       int offset;
3736 
3737       hfa_size = GET_MODE_SIZE (hfa_mode);
3738       byte_size = ((mode == BLKmode)
3739 		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3740       offset = 0;
3741       for (i = 0; offset < byte_size; i++)
3742 	{
3743 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3744 				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3745 				      GEN_INT (offset));
3746 	  offset += hfa_size;
3747 	}
3748 
3749       if (i == 1)
3750 	return XEXP (loc[0], 0);
3751       else
3752 	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3753     }
3754   else if (FLOAT_TYPE_P (valtype) &&
3755            ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3756     return gen_rtx_REG (mode, FR_ARG_FIRST);
3757   else
3758     {
3759       if (BYTES_BIG_ENDIAN
3760 	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3761 	{
3762 	  rtx loc[8];
3763 	  int offset;
3764 	  int bytesize;
3765 	  int i;
3766 
3767 	  offset = 0;
3768 	  bytesize = int_size_in_bytes (valtype);
3769 	  for (i = 0; offset < bytesize; i++)
3770 	    {
3771 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3772 					  gen_rtx_REG (DImode,
3773 						       GR_RET_FIRST + i),
3774 					  GEN_INT (offset));
3775 	      offset += UNITS_PER_WORD;
3776 	    }
3777 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3778 	}
3779       else
3780 	return gen_rtx_REG (mode, GR_RET_FIRST);
3781     }
3782 }
3783 
3784 /* Print a memory address as an operand to reference that memory location.  */
3785 
3786 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
3787    also call this from ia64_print_operand for memory addresses.  */
3788 
3789 void
ia64_print_operand_address(stream,address)3790 ia64_print_operand_address (stream, address)
3791      FILE * stream ATTRIBUTE_UNUSED;
3792      rtx    address ATTRIBUTE_UNUSED;
3793 {
3794 }
3795 
3796 /* Print an operand to an assembler instruction.
3797    C	Swap and print a comparison operator.
3798    D	Print an FP comparison operator.
3799    E    Print 32 - constant, for SImode shifts as extract.
3800    e    Print 64 - constant, for DImode rotates.
3801    F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3802         a floating point register emitted normally.
3803    I	Invert a predicate register by adding 1.
3804    J    Select the proper predicate register for a condition.
3805    j    Select the inverse predicate register for a condition.
3806    O	Append .acq for volatile load.
3807    P	Postincrement of a MEM.
3808    Q	Append .rel for volatile store.
3809    S	Shift amount for shladd instruction.
3810    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3811 	for Intel assembler.
3812    U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3813 	for Intel assembler.
3814    r	Print register name, or constant 0 as r0.  HP compatibility for
3815 	Linux kernel.  */
3816 void
ia64_print_operand(file,x,code)3817 ia64_print_operand (file, x, code)
3818      FILE * file;
3819      rtx    x;
3820      int    code;
3821 {
3822   const char *str;
3823 
3824   switch (code)
3825     {
3826     case 0:
3827       /* Handled below.  */
3828       break;
3829 
3830     case 'C':
3831       {
3832 	enum rtx_code c = swap_condition (GET_CODE (x));
3833 	fputs (GET_RTX_NAME (c), file);
3834 	return;
3835       }
3836 
3837     case 'D':
3838       switch (GET_CODE (x))
3839 	{
3840 	case NE:
3841 	  str = "neq";
3842 	  break;
3843 	case UNORDERED:
3844 	  str = "unord";
3845 	  break;
3846 	case ORDERED:
3847 	  str = "ord";
3848 	  break;
3849 	default:
3850 	  str = GET_RTX_NAME (GET_CODE (x));
3851 	  break;
3852 	}
3853       fputs (str, file);
3854       return;
3855 
3856     case 'E':
3857       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3858       return;
3859 
3860     case 'e':
3861       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3862       return;
3863 
3864     case 'F':
3865       if (x == CONST0_RTX (GET_MODE (x)))
3866 	str = reg_names [FR_REG (0)];
3867       else if (x == CONST1_RTX (GET_MODE (x)))
3868 	str = reg_names [FR_REG (1)];
3869       else if (GET_CODE (x) == REG)
3870 	str = reg_names [REGNO (x)];
3871       else
3872 	abort ();
3873       fputs (str, file);
3874       return;
3875 
3876     case 'I':
3877       fputs (reg_names [REGNO (x) + 1], file);
3878       return;
3879 
3880     case 'J':
3881     case 'j':
3882       {
3883 	unsigned int regno = REGNO (XEXP (x, 0));
3884 	if (GET_CODE (x) == EQ)
3885 	  regno += 1;
3886 	if (code == 'j')
3887 	  regno ^= 1;
3888         fputs (reg_names [regno], file);
3889       }
3890       return;
3891 
3892     case 'O':
3893       if (MEM_VOLATILE_P (x))
3894 	fputs(".acq", file);
3895       return;
3896 
3897     case 'P':
3898       {
3899 	HOST_WIDE_INT value;
3900 
3901 	switch (GET_CODE (XEXP (x, 0)))
3902 	  {
3903 	  default:
3904 	    return;
3905 
3906 	  case POST_MODIFY:
3907 	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3908 	    if (GET_CODE (x) == CONST_INT)
3909 	      value = INTVAL (x);
3910 	    else if (GET_CODE (x) == REG)
3911 	      {
3912 		fprintf (file, ", %s", reg_names[REGNO (x)]);
3913 		return;
3914 	      }
3915 	    else
3916 	      abort ();
3917 	    break;
3918 
3919 	  case POST_INC:
3920 	    value = GET_MODE_SIZE (GET_MODE (x));
3921 	    break;
3922 
3923 	  case POST_DEC:
3924 	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3925 	    break;
3926 	  }
3927 
3928 	putc (',', file);
3929 	putc (' ', file);
3930 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3931 	return;
3932       }
3933 
3934     case 'Q':
3935       if (MEM_VOLATILE_P (x))
3936 	fputs(".rel", file);
3937       return;
3938 
3939     case 'S':
3940       fprintf (file, "%d", exact_log2 (INTVAL (x)));
3941       return;
3942 
3943     case 'T':
3944       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3945 	{
3946 	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3947 	  return;
3948 	}
3949       break;
3950 
3951     case 'U':
3952       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3953 	{
3954 	  const char *prefix = "0x";
3955 	  if (INTVAL (x) & 0x80000000)
3956 	    {
3957 	      fprintf (file, "0xffffffff");
3958 	      prefix = "";
3959 	    }
3960 	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3961 	  return;
3962 	}
3963       break;
3964 
3965     case 'r':
3966       /* If this operand is the constant zero, write it as register zero.
3967 	 Any register, zero, or CONST_INT value is OK here.  */
3968       if (GET_CODE (x) == REG)
3969 	fputs (reg_names[REGNO (x)], file);
3970       else if (x == CONST0_RTX (GET_MODE (x)))
3971 	fputs ("r0", file);
3972       else if (GET_CODE (x) == CONST_INT)
3973 	output_addr_const (file, x);
3974       else
3975 	output_operand_lossage ("invalid %%r value");
3976       return;
3977 
3978     case '+':
3979       {
3980 	const char *which;
3981 
3982 	/* For conditional branches, returns or calls, substitute
3983 	   sptk, dptk, dpnt, or spnt for %s.  */
3984 	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3985 	if (x)
3986 	  {
3987 	    int pred_val = INTVAL (XEXP (x, 0));
3988 
3989 	    /* Guess top and bottom 10% statically predicted.  */
3990 	    if (pred_val < REG_BR_PROB_BASE / 50)
3991 	      which = ".spnt";
3992 	    else if (pred_val < REG_BR_PROB_BASE / 2)
3993 	      which = ".dpnt";
3994 	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3995 	      which = ".dptk";
3996 	    else
3997 	      which = ".sptk";
3998 	  }
3999 	else if (GET_CODE (current_output_insn) == CALL_INSN)
4000 	  which = ".sptk";
4001 	else
4002 	  which = ".dptk";
4003 
4004 	fputs (which, file);
4005 	return;
4006       }
4007 
4008     case ',':
4009       x = current_insn_predicate;
4010       if (x)
4011 	{
4012 	  unsigned int regno = REGNO (XEXP (x, 0));
4013 	  if (GET_CODE (x) == EQ)
4014 	    regno += 1;
4015           fprintf (file, "(%s) ", reg_names [regno]);
4016 	}
4017       return;
4018 
4019     default:
4020       output_operand_lossage ("ia64_print_operand: unknown code");
4021       return;
4022     }
4023 
4024   switch (GET_CODE (x))
4025     {
4026       /* This happens for the spill/restore instructions.  */
4027     case POST_INC:
4028     case POST_DEC:
4029     case POST_MODIFY:
4030       x = XEXP (x, 0);
4031       /* ... fall through ...  */
4032 
4033     case REG:
4034       fputs (reg_names [REGNO (x)], file);
4035       break;
4036 
4037     case MEM:
4038       {
4039 	rtx addr = XEXP (x, 0);
4040 	if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4041 	  addr = XEXP (addr, 0);
4042 	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4043 	break;
4044       }
4045 
4046     default:
4047       output_addr_const (file, x);
4048       break;
4049     }
4050 
4051   return;
4052 }
4053 
4054 /* Calulate the cost of moving data from a register in class FROM to
4055    one in class TO, using MODE.  */
4056 
4057 int
ia64_register_move_cost(mode,from,to)4058 ia64_register_move_cost (mode, from, to)
4059      enum machine_mode mode;
4060      enum reg_class from, to;
4061 {
4062   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4063   if (to == ADDL_REGS)
4064     to = GR_REGS;
4065   if (from == ADDL_REGS)
4066     from = GR_REGS;
4067 
4068   /* All costs are symmetric, so reduce cases by putting the
4069      lower number class as the destination.  */
4070   if (from < to)
4071     {
4072       enum reg_class tmp = to;
4073       to = from, from = tmp;
4074     }
4075 
4076   /* Moving from FR<->GR in TFmode must be more expensive than 2,
4077      so that we get secondary memory reloads.  Between FR_REGS,
4078      we have to make this at least as expensive as MEMORY_MOVE_COST
4079      to avoid spectacularly poor register class preferencing.  */
4080   if (mode == TFmode)
4081     {
4082       if (to != GR_REGS || from != GR_REGS)
4083         return MEMORY_MOVE_COST (mode, to, 0);
4084       else
4085 	return 3;
4086     }
4087 
4088   switch (to)
4089     {
4090     case PR_REGS:
4091       /* Moving between PR registers takes two insns.  */
4092       if (from == PR_REGS)
4093 	return 3;
4094       /* Moving between PR and anything but GR is impossible.  */
4095       if (from != GR_REGS)
4096 	return MEMORY_MOVE_COST (mode, to, 0);
4097       break;
4098 
4099     case BR_REGS:
4100       /* Moving between BR and anything but GR is impossible.  */
4101       if (from != GR_REGS && from != GR_AND_BR_REGS)
4102 	return MEMORY_MOVE_COST (mode, to, 0);
4103       break;
4104 
4105     case AR_I_REGS:
4106     case AR_M_REGS:
4107       /* Moving between AR and anything but GR is impossible.  */
4108       if (from != GR_REGS)
4109 	return MEMORY_MOVE_COST (mode, to, 0);
4110       break;
4111 
4112     case GR_REGS:
4113     case FR_REGS:
4114     case GR_AND_FR_REGS:
4115     case GR_AND_BR_REGS:
4116     case ALL_REGS:
4117       break;
4118 
4119     default:
4120       abort ();
4121     }
4122 
4123   return 2;
4124 }
4125 
4126 /* This function returns the register class required for a secondary
4127    register when copying between one of the registers in CLASS, and X,
4128    using MODE.  A return value of NO_REGS means that no secondary register
4129    is required.  */
4130 
4131 enum reg_class
ia64_secondary_reload_class(class,mode,x)4132 ia64_secondary_reload_class (class, mode, x)
4133      enum reg_class class;
4134      enum machine_mode mode ATTRIBUTE_UNUSED;
4135      rtx x;
4136 {
4137   int regno = -1;
4138 
4139   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4140     regno = true_regnum (x);
4141 
4142   switch (class)
4143     {
4144     case BR_REGS:
4145     case AR_M_REGS:
4146     case AR_I_REGS:
4147       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4148 	 interaction.  We end up with two pseudos with overlapping lifetimes
4149 	 both of which are equiv to the same constant, and both which need
4150 	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4151 	 changes depending on the path length, which means the qty_first_reg
4152 	 check in make_regs_eqv can give different answers at different times.
4153 	 At some point I'll probably need a reload_indi pattern to handle
4154 	 this.
4155 
4156 	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4157 	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4158 	 non-general registers for good measure.  */
4159       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4160 	return GR_REGS;
4161 
4162       /* This is needed if a pseudo used as a call_operand gets spilled to a
4163 	 stack slot.  */
4164       if (GET_CODE (x) == MEM)
4165 	return GR_REGS;
4166       break;
4167 
4168     case FR_REGS:
4169       /* Need to go through general regsters to get to other class regs.  */
4170       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4171 	return GR_REGS;
4172 
4173       /* This can happen when a paradoxical subreg is an operand to the
4174 	 muldi3 pattern.  */
4175       /* ??? This shouldn't be necessary after instruction scheduling is
4176 	 enabled, because paradoxical subregs are not accepted by
4177 	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4178 	 stop the paradoxical subreg stupidity in the *_operand functions
4179 	 in recog.c.  */
4180       if (GET_CODE (x) == MEM
4181 	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4182 	      || GET_MODE (x) == QImode))
4183 	return GR_REGS;
4184 
4185       /* This can happen because of the ior/and/etc patterns that accept FP
4186 	 registers as operands.  If the third operand is a constant, then it
4187 	 needs to be reloaded into a FP register.  */
4188       if (GET_CODE (x) == CONST_INT)
4189 	return GR_REGS;
4190 
4191       /* This can happen because of register elimination in a muldi3 insn.
4192 	 E.g. `26107 * (unsigned long)&u'.  */
4193       if (GET_CODE (x) == PLUS)
4194 	return GR_REGS;
4195       break;
4196 
4197     case PR_REGS:
4198       /* ??? This happens if we cse/gcse a BImode value across a call,
4199 	 and the function has a nonlocal goto.  This is because global
4200 	 does not allocate call crossing pseudos to hard registers when
4201 	 current_function_has_nonlocal_goto is true.  This is relatively
4202 	 common for C++ programs that use exceptions.  To reproduce,
4203 	 return NO_REGS and compile libstdc++.  */
4204       if (GET_CODE (x) == MEM)
4205 	return GR_REGS;
4206 
4207       /* This can happen when we take a BImode subreg of a DImode value,
4208 	 and that DImode value winds up in some non-GR register.  */
4209       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4210 	return GR_REGS;
4211       break;
4212 
4213     case GR_REGS:
4214       /* Since we have no offsettable memory addresses, we need a temporary
4215 	 to hold the address of the second word.  */
4216       if (mode == TImode)
4217 	return GR_REGS;
4218       break;
4219 
4220     default:
4221       break;
4222     }
4223 
4224   return NO_REGS;
4225 }
4226 
4227 /* Emit text to declare externally defined variables and functions, because
4228    the Intel assembler does not support undefined externals.  */
4229 
4230 void
ia64_asm_output_external(file,decl,name)4231 ia64_asm_output_external (file, decl, name)
4232      FILE *file;
4233      tree decl;
4234      const char *name;
4235 {
4236   int save_referenced;
4237 
4238   /* GNU as does not need anything here, but the HP linker does need
4239      something for external functions.  */
4240 
4241   if (TARGET_GNU_AS
4242       && (!TARGET_HPUX_LD
4243 	  || TREE_CODE (decl) != FUNCTION_DECL
4244 	  || strstr(name, "__builtin_") == name))
4245     return;
4246 
4247   /* ??? The Intel assembler creates a reference that needs to be satisfied by
4248      the linker when we do this, so we need to be careful not to do this for
4249      builtin functions which have no library equivalent.  Unfortunately, we
4250      can't tell here whether or not a function will actually be called by
4251      expand_expr, so we pull in library functions even if we may not need
4252      them later.  */
4253   if (! strcmp (name, "__builtin_next_arg")
4254       || ! strcmp (name, "alloca")
4255       || ! strcmp (name, "__builtin_constant_p")
4256       || ! strcmp (name, "__builtin_args_info"))
4257     return;
4258 
4259   if (TARGET_HPUX_LD)
4260     ia64_hpux_add_extern_decl (name);
4261   else
4262     {
4263       /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4264          restore it.  */
4265       save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4266       if (TREE_CODE (decl) == FUNCTION_DECL)
4267         ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4268       (*targetm.asm_out.globalize_label) (file, name);
4269       TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4270     }
4271 }
4272 
4273 /* Parse the -mfixed-range= option string.  */
4274 
4275 static void
fix_range(const_str)4276 fix_range (const_str)
4277      const char *const_str;
4278 {
4279   int i, first, last;
4280   char *str, *dash, *comma;
4281 
4282   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4283      REG2 are either register names or register numbers.  The effect
4284      of this option is to mark the registers in the range from REG1 to
4285      REG2 as ``fixed'' so they won't be used by the compiler.  This is
4286      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
4287 
4288   i = strlen (const_str);
4289   str = (char *) alloca (i + 1);
4290   memcpy (str, const_str, i + 1);
4291 
4292   while (1)
4293     {
4294       dash = strchr (str, '-');
4295       if (!dash)
4296 	{
4297 	  warning ("value of -mfixed-range must have form REG1-REG2");
4298 	  return;
4299 	}
4300       *dash = '\0';
4301 
4302       comma = strchr (dash + 1, ',');
4303       if (comma)
4304 	*comma = '\0';
4305 
4306       first = decode_reg_name (str);
4307       if (first < 0)
4308 	{
4309 	  warning ("unknown register name: %s", str);
4310 	  return;
4311 	}
4312 
4313       last = decode_reg_name (dash + 1);
4314       if (last < 0)
4315 	{
4316 	  warning ("unknown register name: %s", dash + 1);
4317 	  return;
4318 	}
4319 
4320       *dash = '-';
4321 
4322       if (first > last)
4323 	{
4324 	  warning ("%s-%s is an empty range", str, dash + 1);
4325 	  return;
4326 	}
4327 
4328       for (i = first; i <= last; ++i)
4329 	fixed_regs[i] = call_used_regs[i] = 1;
4330 
4331       if (!comma)
4332 	break;
4333 
4334       *comma = ',';
4335       str = comma + 1;
4336     }
4337 }
4338 
4339 static struct machine_function *
ia64_init_machine_status()4340 ia64_init_machine_status ()
4341 {
4342   return ggc_alloc_cleared (sizeof (struct machine_function));
4343 }
4344 
4345 /* Handle TARGET_OPTIONS switches.  */
4346 
4347 void
ia64_override_options()4348 ia64_override_options ()
4349 {
4350   if (TARGET_AUTO_PIC)
4351     target_flags |= MASK_CONST_GP;
4352 
4353   if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4354     {
4355       warning ("cannot optimize floating point division for both latency and throughput");
4356       target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4357     }
4358 
4359   if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4360     {
4361       warning ("cannot optimize integer division for both latency and throughput");
4362       target_flags &= ~MASK_INLINE_INT_DIV_THR;
4363     }
4364 
4365   if (ia64_fixed_range_string)
4366     fix_range (ia64_fixed_range_string);
4367 
4368   if (ia64_tls_size_string)
4369     {
4370       char *end;
4371       unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4372       if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4373 	error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4374       else
4375 	ia64_tls_size = tmp;
4376     }
4377 
4378   ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4379   flag_schedule_insns_after_reload = 0;
4380 
4381   ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4382 
4383   init_machine_status = ia64_init_machine_status;
4384 
4385   /* Tell the compiler which flavor of TFmode we're using.  */
4386   if (INTEL_EXTENDED_IEEE_FORMAT)
4387     real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4388 }
4389 
4390 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4391 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4392 static enum attr_type ia64_safe_type PARAMS((rtx));
4393 
4394 static enum attr_itanium_requires_unit0
ia64_safe_itanium_requires_unit0(insn)4395 ia64_safe_itanium_requires_unit0 (insn)
4396      rtx insn;
4397 {
4398   if (recog_memoized (insn) >= 0)
4399     return get_attr_itanium_requires_unit0 (insn);
4400   else
4401     return ITANIUM_REQUIRES_UNIT0_NO;
4402 }
4403 
4404 static enum attr_itanium_class
ia64_safe_itanium_class(insn)4405 ia64_safe_itanium_class (insn)
4406      rtx insn;
4407 {
4408   if (recog_memoized (insn) >= 0)
4409     return get_attr_itanium_class (insn);
4410   else
4411     return ITANIUM_CLASS_UNKNOWN;
4412 }
4413 
4414 static enum attr_type
ia64_safe_type(insn)4415 ia64_safe_type (insn)
4416      rtx insn;
4417 {
4418   if (recog_memoized (insn) >= 0)
4419     return get_attr_type (insn);
4420   else
4421     return TYPE_UNKNOWN;
4422 }
4423 
4424 /* The following collection of routines emit instruction group stop bits as
4425    necessary to avoid dependencies.  */
4426 
4427 /* Need to track some additional registers as far as serialization is
4428    concerned so we can properly handle br.call and br.ret.  We could
4429    make these registers visible to gcc, but since these registers are
4430    never explicitly used in gcc generated code, it seems wasteful to
4431    do so (plus it would make the call and return patterns needlessly
4432    complex).  */
4433 #define REG_GP		(GR_REG (1))
4434 #define REG_RP		(BR_REG (0))
4435 #define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
4436 /* This is used for volatile asms which may require a stop bit immediately
4437    before and after them.  */
4438 #define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
4439 #define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
4440 #define NUM_REGS	(AR_UNAT_BIT_0 + 64)
4441 
4442 /* For each register, we keep track of how it has been written in the
4443    current instruction group.
4444 
4445    If a register is written unconditionally (no qualifying predicate),
4446    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4447 
4448    If a register is written if its qualifying predicate P is true, we
4449    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
4450    may be written again by the complement of P (P^1) and when this happens,
4451    WRITE_COUNT gets set to 2.
4452 
4453    The result of this is that whenever an insn attempts to write a register
4454    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4455 
4456    If a predicate register is written by a floating-point insn, we set
4457    WRITTEN_BY_FP to true.
4458 
4459    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4460    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
4461 
4462 struct reg_write_state
4463 {
4464   unsigned int write_count : 2;
4465   unsigned int first_pred : 16;
4466   unsigned int written_by_fp : 1;
4467   unsigned int written_by_and : 1;
4468   unsigned int written_by_or : 1;
4469 };
4470 
4471 /* Cumulative info for the current instruction group.  */
4472 struct reg_write_state rws_sum[NUM_REGS];
4473 /* Info for the current instruction.  This gets copied to rws_sum after a
4474    stop bit is emitted.  */
4475 struct reg_write_state rws_insn[NUM_REGS];
4476 
4477 /* Indicates whether this is the first instruction after a stop bit,
4478    in which case we don't need another stop bit.  Without this, we hit
4479    the abort in ia64_variable_issue when scheduling an alloc.  */
4480 static int first_instruction;
4481 
4482 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4483    RTL for one instruction.  */
4484 struct reg_flags
4485 {
4486   unsigned int is_write : 1;	/* Is register being written?  */
4487   unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
4488   unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
4489   unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
4490   unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
4491   unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
4492 };
4493 
4494 static void rws_update PARAMS ((struct reg_write_state *, int,
4495 				struct reg_flags, int));
4496 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4497 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4498 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4499 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4500 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4501 static void init_insn_group_barriers PARAMS ((void));
4502 static int group_barrier_needed_p PARAMS ((rtx));
4503 static int safe_group_barrier_needed_p PARAMS ((rtx));
4504 
4505 /* Update *RWS for REGNO, which is being written by the current instruction,
4506    with predicate PRED, and associated register flags in FLAGS.  */
4507 
4508 static void
rws_update(rws,regno,flags,pred)4509 rws_update (rws, regno, flags, pred)
4510      struct reg_write_state *rws;
4511      int regno;
4512      struct reg_flags flags;
4513      int pred;
4514 {
4515   if (pred)
4516     rws[regno].write_count++;
4517   else
4518     rws[regno].write_count = 2;
4519   rws[regno].written_by_fp |= flags.is_fp;
4520   /* ??? Not tracking and/or across differing predicates.  */
4521   rws[regno].written_by_and = flags.is_and;
4522   rws[regno].written_by_or = flags.is_or;
4523   rws[regno].first_pred = pred;
4524 }
4525 
4526 /* Handle an access to register REGNO of type FLAGS using predicate register
4527    PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
4528    a dependency with an earlier instruction in the same group.  */
4529 
4530 static int
rws_access_regno(regno,flags,pred)4531 rws_access_regno (regno, flags, pred)
4532      int regno;
4533      struct reg_flags flags;
4534      int pred;
4535 {
4536   int need_barrier = 0;
4537 
4538   if (regno >= NUM_REGS)
4539     abort ();
4540 
4541   if (! PR_REGNO_P (regno))
4542     flags.is_and = flags.is_or = 0;
4543 
4544   if (flags.is_write)
4545     {
4546       int write_count;
4547 
4548       /* One insn writes same reg multiple times?  */
4549       if (rws_insn[regno].write_count > 0)
4550 	abort ();
4551 
4552       /* Update info for current instruction.  */
4553       rws_update (rws_insn, regno, flags, pred);
4554       write_count = rws_sum[regno].write_count;
4555 
4556       switch (write_count)
4557 	{
4558 	case 0:
4559 	  /* The register has not been written yet.  */
4560 	  rws_update (rws_sum, regno, flags, pred);
4561 	  break;
4562 
4563 	case 1:
4564 	  /* The register has been written via a predicate.  If this is
4565 	     not a complementary predicate, then we need a barrier.  */
4566 	  /* ??? This assumes that P and P+1 are always complementary
4567 	     predicates for P even.  */
4568 	  if (flags.is_and && rws_sum[regno].written_by_and)
4569 	    ;
4570 	  else if (flags.is_or && rws_sum[regno].written_by_or)
4571 	    ;
4572 	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
4573 	    need_barrier = 1;
4574 	  rws_update (rws_sum, regno, flags, pred);
4575 	  break;
4576 
4577 	case 2:
4578 	  /* The register has been unconditionally written already.  We
4579 	     need a barrier.  */
4580 	  if (flags.is_and && rws_sum[regno].written_by_and)
4581 	    ;
4582 	  else if (flags.is_or && rws_sum[regno].written_by_or)
4583 	    ;
4584 	  else
4585 	    need_barrier = 1;
4586 	  rws_sum[regno].written_by_and = flags.is_and;
4587 	  rws_sum[regno].written_by_or = flags.is_or;
4588 	  break;
4589 
4590 	default:
4591 	  abort ();
4592 	}
4593     }
4594   else
4595     {
4596       if (flags.is_branch)
4597 	{
4598 	  /* Branches have several RAW exceptions that allow to avoid
4599 	     barriers.  */
4600 
4601 	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4602 	    /* RAW dependencies on branch regs are permissible as long
4603 	       as the writer is a non-branch instruction.  Since we
4604 	       never generate code that uses a branch register written
4605 	       by a branch instruction, handling this case is
4606 	       easy.  */
4607 	    return 0;
4608 
4609 	  if (REGNO_REG_CLASS (regno) == PR_REGS
4610 	      && ! rws_sum[regno].written_by_fp)
4611 	    /* The predicates of a branch are available within the
4612 	       same insn group as long as the predicate was written by
4613 	       something other than a floating-point instruction.  */
4614 	    return 0;
4615 	}
4616 
4617       if (flags.is_and && rws_sum[regno].written_by_and)
4618 	return 0;
4619       if (flags.is_or && rws_sum[regno].written_by_or)
4620 	return 0;
4621 
4622       switch (rws_sum[regno].write_count)
4623 	{
4624 	case 0:
4625 	  /* The register has not been written yet.  */
4626 	  break;
4627 
4628 	case 1:
4629 	  /* The register has been written via a predicate.  If this is
4630 	     not a complementary predicate, then we need a barrier.  */
4631 	  /* ??? This assumes that P and P+1 are always complementary
4632 	     predicates for P even.  */
4633 	  if ((rws_sum[regno].first_pred ^ 1) != pred)
4634 	    need_barrier = 1;
4635 	  break;
4636 
4637 	case 2:
4638 	  /* The register has been unconditionally written already.  We
4639 	     need a barrier.  */
4640 	  need_barrier = 1;
4641 	  break;
4642 
4643 	default:
4644 	  abort ();
4645 	}
4646     }
4647 
4648   return need_barrier;
4649 }
4650 
4651 static int
rws_access_reg(reg,flags,pred)4652 rws_access_reg (reg, flags, pred)
4653      rtx reg;
4654      struct reg_flags flags;
4655      int pred;
4656 {
4657   int regno = REGNO (reg);
4658   int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4659 
4660   if (n == 1)
4661     return rws_access_regno (regno, flags, pred);
4662   else
4663     {
4664       int need_barrier = 0;
4665       while (--n >= 0)
4666 	need_barrier |= rws_access_regno (regno + n, flags, pred);
4667       return need_barrier;
4668     }
4669 }
4670 
4671 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4672    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
4673 
4674 static void
update_set_flags(x,pflags,ppred,pcond)4675 update_set_flags (x, pflags, ppred, pcond)
4676      rtx x;
4677      struct reg_flags *pflags;
4678      int *ppred;
4679      rtx *pcond;
4680 {
4681   rtx src = SET_SRC (x);
4682 
4683   *pcond = 0;
4684 
4685   switch (GET_CODE (src))
4686     {
4687     case CALL:
4688       return;
4689 
4690     case IF_THEN_ELSE:
4691       if (SET_DEST (x) == pc_rtx)
4692 	/* X is a conditional branch.  */
4693 	return;
4694       else
4695 	{
4696 	  int is_complemented = 0;
4697 
4698 	  /* X is a conditional move.  */
4699 	  rtx cond = XEXP (src, 0);
4700 	  if (GET_CODE (cond) == EQ)
4701 	    is_complemented = 1;
4702 	  cond = XEXP (cond, 0);
4703 	  if (GET_CODE (cond) != REG
4704 	      && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4705 	    abort ();
4706 	  *pcond = cond;
4707 	  if (XEXP (src, 1) == SET_DEST (x)
4708 	      || XEXP (src, 2) == SET_DEST (x))
4709 	    {
4710 	      /* X is a conditional move that conditionally writes the
4711 		 destination.  */
4712 
4713 	      /* We need another complement in this case.  */
4714 	      if (XEXP (src, 1) == SET_DEST (x))
4715 		is_complemented = ! is_complemented;
4716 
4717 	      *ppred = REGNO (cond);
4718 	      if (is_complemented)
4719 		++*ppred;
4720 	    }
4721 
4722 	  /* ??? If this is a conditional write to the dest, then this
4723 	     instruction does not actually read one source.  This probably
4724 	     doesn't matter, because that source is also the dest.  */
4725 	  /* ??? Multiple writes to predicate registers are allowed
4726 	     if they are all AND type compares, or if they are all OR
4727 	     type compares.  We do not generate such instructions
4728 	     currently.  */
4729 	}
4730       /* ... fall through ...  */
4731 
4732     default:
4733       if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4734 	  && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4735 	/* Set pflags->is_fp to 1 so that we know we're dealing
4736 	   with a floating point comparison when processing the
4737 	   destination of the SET.  */
4738 	pflags->is_fp = 1;
4739 
4740       /* Discover if this is a parallel comparison.  We only handle
4741 	 and.orcm and or.andcm at present, since we must retain a
4742 	 strict inverse on the predicate pair.  */
4743       else if (GET_CODE (src) == AND)
4744 	pflags->is_and = 1;
4745       else if (GET_CODE (src) == IOR)
4746 	pflags->is_or = 1;
4747 
4748       break;
4749     }
4750 }
4751 
4752 /* Subroutine of rtx_needs_barrier; this function determines whether the
4753    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
4754    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
4755    for this insn.  */
4756 
4757 static int
set_src_needs_barrier(x,flags,pred,cond)4758 set_src_needs_barrier (x, flags, pred, cond)
4759      rtx x;
4760      struct reg_flags flags;
4761      int pred;
4762      rtx cond;
4763 {
4764   int need_barrier = 0;
4765   rtx dst;
4766   rtx src = SET_SRC (x);
4767 
4768   if (GET_CODE (src) == CALL)
4769     /* We don't need to worry about the result registers that
4770        get written by subroutine call.  */
4771     return rtx_needs_barrier (src, flags, pred);
4772   else if (SET_DEST (x) == pc_rtx)
4773     {
4774       /* X is a conditional branch.  */
4775       /* ??? This seems redundant, as the caller sets this bit for
4776 	 all JUMP_INSNs.  */
4777       flags.is_branch = 1;
4778       return rtx_needs_barrier (src, flags, pred);
4779     }
4780 
4781   need_barrier = rtx_needs_barrier (src, flags, pred);
4782 
4783   /* This instruction unconditionally uses a predicate register.  */
4784   if (cond)
4785     need_barrier |= rws_access_reg (cond, flags, 0);
4786 
4787   dst = SET_DEST (x);
4788   if (GET_CODE (dst) == ZERO_EXTRACT)
4789     {
4790       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4791       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4792       dst = XEXP (dst, 0);
4793     }
4794   return need_barrier;
4795 }
4796 
4797 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4798    Return 1 is this access creates a dependency with an earlier instruction
4799    in the same group.  */
4800 
4801 static int
rtx_needs_barrier(x,flags,pred)4802 rtx_needs_barrier (x, flags, pred)
4803      rtx x;
4804      struct reg_flags flags;
4805      int pred;
4806 {
4807   int i, j;
4808   int is_complemented = 0;
4809   int need_barrier = 0;
4810   const char *format_ptr;
4811   struct reg_flags new_flags;
4812   rtx cond = 0;
4813 
4814   if (! x)
4815     return 0;
4816 
4817   new_flags = flags;
4818 
4819   switch (GET_CODE (x))
4820     {
4821     case SET:
4822       update_set_flags (x, &new_flags, &pred, &cond);
4823       need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4824       if (GET_CODE (SET_SRC (x)) != CALL)
4825 	{
4826 	  new_flags.is_write = 1;
4827 	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4828 	}
4829       break;
4830 
4831     case CALL:
4832       new_flags.is_write = 0;
4833       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4834 
4835       /* Avoid multiple register writes, in case this is a pattern with
4836 	 multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
4837       if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4838 	{
4839 	  new_flags.is_write = 1;
4840 	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4841 	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4842 	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4843 	}
4844       break;
4845 
4846     case COND_EXEC:
4847       /* X is a predicated instruction.  */
4848 
4849       cond = COND_EXEC_TEST (x);
4850       if (pred)
4851 	abort ();
4852       need_barrier = rtx_needs_barrier (cond, flags, 0);
4853 
4854       if (GET_CODE (cond) == EQ)
4855 	is_complemented = 1;
4856       cond = XEXP (cond, 0);
4857       if (GET_CODE (cond) != REG
4858 	  && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4859 	abort ();
4860       pred = REGNO (cond);
4861       if (is_complemented)
4862 	++pred;
4863 
4864       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4865       return need_barrier;
4866 
4867     case CLOBBER:
4868     case USE:
4869       /* Clobber & use are for earlier compiler-phases only.  */
4870       break;
4871 
4872     case ASM_OPERANDS:
4873     case ASM_INPUT:
4874       /* We always emit stop bits for traditional asms.  We emit stop bits
4875 	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
4876       if (GET_CODE (x) != ASM_OPERANDS
4877 	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4878 	{
4879 	  /* Avoid writing the register multiple times if we have multiple
4880 	     asm outputs.  This avoids an abort in rws_access_reg.  */
4881 	  if (! rws_insn[REG_VOLATILE].write_count)
4882 	    {
4883 	      new_flags.is_write = 1;
4884 	      rws_access_regno (REG_VOLATILE, new_flags, pred);
4885 	    }
4886 	  return 1;
4887 	}
4888 
4889       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4890 	 We can not just fall through here since then we would be confused
4891 	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4892 	 traditional asms unlike their normal usage.  */
4893 
4894       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4895 	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4896 	  need_barrier = 1;
4897       break;
4898 
4899     case PARALLEL:
4900       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4901 	{
4902 	  rtx pat = XVECEXP (x, 0, i);
4903 	  if (GET_CODE (pat) == SET)
4904 	    {
4905 	      update_set_flags (pat, &new_flags, &pred, &cond);
4906 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4907 	    }
4908 	  else if (GET_CODE (pat) == USE
4909 		   || GET_CODE (pat) == CALL
4910 		   || GET_CODE (pat) == ASM_OPERANDS)
4911 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4912 	  else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4913 	    abort ();
4914 	}
4915       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4916 	{
4917 	  rtx pat = XVECEXP (x, 0, i);
4918 	  if (GET_CODE (pat) == SET)
4919 	    {
4920 	      if (GET_CODE (SET_SRC (pat)) != CALL)
4921 		{
4922 		  new_flags.is_write = 1;
4923 		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4924 						     pred);
4925 		}
4926 	    }
4927 	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4928 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4929 	}
4930       break;
4931 
4932     case SUBREG:
4933       x = SUBREG_REG (x);
4934       /* FALLTHRU */
4935     case REG:
4936       if (REGNO (x) == AR_UNAT_REGNUM)
4937 	{
4938 	  for (i = 0; i < 64; ++i)
4939 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4940 	}
4941       else
4942 	need_barrier = rws_access_reg (x, flags, pred);
4943       break;
4944 
4945     case MEM:
4946       /* Find the regs used in memory address computation.  */
4947       new_flags.is_write = 0;
4948       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4949       break;
4950 
4951     case CONST_INT:   case CONST_DOUBLE:
4952     case SYMBOL_REF:  case LABEL_REF:     case CONST:
4953       break;
4954 
4955       /* Operators with side-effects.  */
4956     case POST_INC:    case POST_DEC:
4957       if (GET_CODE (XEXP (x, 0)) != REG)
4958 	abort ();
4959 
4960       new_flags.is_write = 0;
4961       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4962       new_flags.is_write = 1;
4963       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4964       break;
4965 
4966     case POST_MODIFY:
4967       if (GET_CODE (XEXP (x, 0)) != REG)
4968 	abort ();
4969 
4970       new_flags.is_write = 0;
4971       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4972       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4973       new_flags.is_write = 1;
4974       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4975       break;
4976 
4977       /* Handle common unary and binary ops for efficiency.  */
4978     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
4979     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
4980     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
4981     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
4982     case NE:       case EQ:      case GE:      case GT:        case LE:
4983     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
4984       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4985       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4986       break;
4987 
4988     case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
4989     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
4990     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
4991     case SQRT:     case FFS:
4992       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4993       break;
4994 
4995     case UNSPEC:
4996       switch (XINT (x, 1))
4997 	{
4998 	case UNSPEC_LTOFF_DTPMOD:
4999 	case UNSPEC_LTOFF_DTPREL:
5000 	case UNSPEC_DTPREL:
5001 	case UNSPEC_LTOFF_TPREL:
5002 	case UNSPEC_TPREL:
5003 	case UNSPEC_PRED_REL_MUTEX:
5004 	case UNSPEC_PIC_CALL:
5005         case UNSPEC_MF:
5006         case UNSPEC_FETCHADD_ACQ:
5007 	case UNSPEC_BSP_VALUE:
5008 	case UNSPEC_FLUSHRS:
5009 	case UNSPEC_BUNDLE_SELECTOR:
5010           break;
5011 
5012 	case UNSPEC_GR_SPILL:
5013 	case UNSPEC_GR_RESTORE:
5014 	  {
5015 	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5016 	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5017 
5018 	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5019 	    new_flags.is_write = (XINT (x, 1) == 1);
5020 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5021 					      new_flags, pred);
5022 	    break;
5023 	  }
5024 
5025 	case UNSPEC_FR_SPILL:
5026 	case UNSPEC_FR_RESTORE:
5027 	case UNSPEC_POPCNT:
5028 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5029 	  break;
5030 
5031         case UNSPEC_ADDP4:
5032 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5033 	  break;
5034 
5035 	case UNSPEC_FR_RECIP_APPROX:
5036 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5037 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5038 	  break;
5039 
5040         case UNSPEC_CMPXCHG_ACQ:
5041 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5042 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5043 	  break;
5044 
5045 	default:
5046 	  abort ();
5047 	}
5048       break;
5049 
5050     case UNSPEC_VOLATILE:
5051       switch (XINT (x, 1))
5052 	{
5053 	case UNSPECV_ALLOC:
5054 	  /* Alloc must always be the first instruction of a group.
5055 	     We force this by always returning true.  */
5056 	  /* ??? We might get better scheduling if we explicitly check for
5057 	     input/local/output register dependencies, and modify the
5058 	     scheduler so that alloc is always reordered to the start of
5059 	     the current group.  We could then eliminate all of the
5060 	     first_instruction code.  */
5061 	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5062 
5063 	  new_flags.is_write = 1;
5064 	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5065 	  return 1;
5066 
5067 	case UNSPECV_SET_BSP:
5068 	  need_barrier = 1;
5069           break;
5070 
5071 	case UNSPECV_BLOCKAGE:
5072 	case UNSPECV_INSN_GROUP_BARRIER:
5073 	case UNSPECV_BREAK:
5074 	case UNSPECV_PSAC_ALL:
5075 	case UNSPECV_PSAC_NORMAL:
5076 	  return 0;
5077 
5078 	default:
5079 	  abort ();
5080 	}
5081       break;
5082 
5083     case RETURN:
5084       new_flags.is_write = 0;
5085       need_barrier  = rws_access_regno (REG_RP, flags, pred);
5086       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5087 
5088       new_flags.is_write = 1;
5089       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5090       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5091       break;
5092 
5093     default:
5094       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5095       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5096 	switch (format_ptr[i])
5097 	  {
5098 	  case '0':	/* unused field */
5099 	  case 'i':	/* integer */
5100 	  case 'n':	/* note */
5101 	  case 'w':	/* wide integer */
5102 	  case 's':	/* pointer to string */
5103 	  case 'S':	/* optional pointer to string */
5104 	    break;
5105 
5106 	  case 'e':
5107 	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5108 	      need_barrier = 1;
5109 	    break;
5110 
5111 	  case 'E':
5112 	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5113 	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5114 		need_barrier = 1;
5115 	    break;
5116 
5117 	  default:
5118 	    abort ();
5119 	  }
5120       break;
5121     }
5122   return need_barrier;
5123 }
5124 
5125 /* Clear out the state for group_barrier_needed_p at the start of a
5126    sequence of insns.  */
5127 
5128 static void
init_insn_group_barriers()5129 init_insn_group_barriers ()
5130 {
5131   memset (rws_sum, 0, sizeof (rws_sum));
5132   first_instruction = 1;
5133 }
5134 
5135 /* Given the current state, recorded by previous calls to this function,
5136    determine whether a group barrier (a stop bit) is necessary before INSN.
5137    Return nonzero if so.  */
5138 
5139 static int
group_barrier_needed_p(insn)5140 group_barrier_needed_p (insn)
5141      rtx insn;
5142 {
5143   rtx pat;
5144   int need_barrier = 0;
5145   struct reg_flags flags;
5146 
5147   memset (&flags, 0, sizeof (flags));
5148   switch (GET_CODE (insn))
5149     {
5150     case NOTE:
5151       break;
5152 
5153     case BARRIER:
5154       /* A barrier doesn't imply an instruction group boundary.  */
5155       break;
5156 
5157     case CODE_LABEL:
5158       memset (rws_insn, 0, sizeof (rws_insn));
5159       return 1;
5160 
5161     case CALL_INSN:
5162       flags.is_branch = 1;
5163       flags.is_sibcall = SIBLING_CALL_P (insn);
5164       memset (rws_insn, 0, sizeof (rws_insn));
5165 
5166       /* Don't bundle a call following another call.  */
5167       if ((pat = prev_active_insn (insn))
5168 	  && GET_CODE (pat) == CALL_INSN)
5169 	{
5170 	  need_barrier = 1;
5171 	  break;
5172 	}
5173 
5174       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5175       break;
5176 
5177     case JUMP_INSN:
5178       flags.is_branch = 1;
5179 
5180       /* Don't bundle a jump following a call.  */
5181       if ((pat = prev_active_insn (insn))
5182 	  && GET_CODE (pat) == CALL_INSN)
5183 	{
5184 	  need_barrier = 1;
5185 	  break;
5186 	}
5187       /* FALLTHRU */
5188 
5189     case INSN:
5190       if (GET_CODE (PATTERN (insn)) == USE
5191 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5192 	/* Don't care about USE and CLOBBER "insns"---those are used to
5193 	   indicate to the optimizer that it shouldn't get rid of
5194 	   certain operations.  */
5195 	break;
5196 
5197       pat = PATTERN (insn);
5198 
5199       /* Ug.  Hack hacks hacked elsewhere.  */
5200       switch (recog_memoized (insn))
5201 	{
5202 	  /* We play dependency tricks with the epilogue in order
5203 	     to get proper schedules.  Undo this for dv analysis.  */
5204 	case CODE_FOR_epilogue_deallocate_stack:
5205 	case CODE_FOR_prologue_allocate_stack:
5206 	  pat = XVECEXP (pat, 0, 0);
5207 	  break;
5208 
5209 	  /* The pattern we use for br.cloop confuses the code above.
5210 	     The second element of the vector is representative.  */
5211 	case CODE_FOR_doloop_end_internal:
5212 	  pat = XVECEXP (pat, 0, 1);
5213 	  break;
5214 
5215 	  /* Doesn't generate code.  */
5216 	case CODE_FOR_pred_rel_mutex:
5217 	case CODE_FOR_prologue_use:
5218 	  return 0;
5219 
5220 	default:
5221 	  break;
5222 	}
5223 
5224       memset (rws_insn, 0, sizeof (rws_insn));
5225       need_barrier = rtx_needs_barrier (pat, flags, 0);
5226 
5227       /* Check to see if the previous instruction was a volatile
5228 	 asm.  */
5229       if (! need_barrier)
5230 	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5231       break;
5232 
5233     default:
5234       abort ();
5235     }
5236 
5237   if (first_instruction)
5238     {
5239       need_barrier = 0;
5240       first_instruction = 0;
5241     }
5242 
5243   return need_barrier;
5244 }
5245 
5246 /* Like group_barrier_needed_p, but do not clobber the current state.  */
5247 
5248 static int
safe_group_barrier_needed_p(insn)5249 safe_group_barrier_needed_p (insn)
5250      rtx insn;
5251 {
5252   struct reg_write_state rws_saved[NUM_REGS];
5253   int saved_first_instruction;
5254   int t;
5255 
5256   memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5257   saved_first_instruction = first_instruction;
5258 
5259   t = group_barrier_needed_p (insn);
5260 
5261   memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5262   first_instruction = saved_first_instruction;
5263 
5264   return t;
5265 }
5266 
5267 /* INSNS is an chain of instructions.  Scan the chain, and insert stop bits
5268    as necessary to eliminate dependendencies.  This function assumes that
5269    a final instruction scheduling pass has been run which has already
5270    inserted most of the necessary stop bits.  This function only inserts
5271    new ones at basic block boundaries, since these are invisible to the
5272    scheduler.  */
5273 
5274 static void
emit_insn_group_barriers(dump,insns)5275 emit_insn_group_barriers (dump, insns)
5276      FILE *dump;
5277      rtx insns;
5278 {
5279   rtx insn;
5280   rtx last_label = 0;
5281   int insns_since_last_label = 0;
5282 
5283   init_insn_group_barriers ();
5284 
5285   for (insn = insns; insn; insn = NEXT_INSN (insn))
5286     {
5287       if (GET_CODE (insn) == CODE_LABEL)
5288 	{
5289 	  if (insns_since_last_label)
5290 	    last_label = insn;
5291 	  insns_since_last_label = 0;
5292 	}
5293       else if (GET_CODE (insn) == NOTE
5294 	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5295 	{
5296 	  if (insns_since_last_label)
5297 	    last_label = insn;
5298 	  insns_since_last_label = 0;
5299 	}
5300       else if (GET_CODE (insn) == INSN
5301 	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5302 	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5303 	{
5304 	  init_insn_group_barriers ();
5305 	  last_label = 0;
5306 	}
5307       else if (INSN_P (insn))
5308 	{
5309 	  insns_since_last_label = 1;
5310 
5311 	  if (group_barrier_needed_p (insn))
5312 	    {
5313 	      if (last_label)
5314 		{
5315 		  if (dump)
5316 		    fprintf (dump, "Emitting stop before label %d\n",
5317 			     INSN_UID (last_label));
5318 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5319 		  insn = last_label;
5320 
5321 		  init_insn_group_barriers ();
5322 		  last_label = 0;
5323 		}
5324 	    }
5325 	}
5326     }
5327 }
5328 
5329 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5330    This function has to emit all necessary group barriers.  */
5331 
5332 static void
emit_all_insn_group_barriers(dump,insns)5333 emit_all_insn_group_barriers (dump, insns)
5334      FILE *dump ATTRIBUTE_UNUSED;
5335      rtx insns;
5336 {
5337   rtx insn;
5338 
5339   init_insn_group_barriers ();
5340 
5341   for (insn = insns; insn; insn = NEXT_INSN (insn))
5342     {
5343       if (GET_CODE (insn) == BARRIER)
5344 	{
5345 	  rtx last = prev_active_insn (insn);
5346 
5347 	  if (! last)
5348 	    continue;
5349 	  if (GET_CODE (last) == JUMP_INSN
5350 	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5351 	    last = prev_active_insn (last);
5352 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5353 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5354 
5355 	  init_insn_group_barriers ();
5356 	}
5357       else if (INSN_P (insn))
5358 	{
5359 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5360 	    init_insn_group_barriers ();
5361 	  else if (group_barrier_needed_p (insn))
5362 	    {
5363 	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5364 	      init_insn_group_barriers ();
5365 	      group_barrier_needed_p (insn);
5366 	    }
5367 	}
5368     }
5369 }
5370 
5371 static int errata_find_address_regs PARAMS ((rtx *, void *));
5372 static void errata_emit_nops PARAMS ((rtx));
5373 static void fixup_errata PARAMS ((void));
5374 
5375 /* This structure is used to track some details about the previous insns
5376    groups so we can determine if it may be necessary to insert NOPs to
5377    workaround hardware errata.  */
5378 static struct group
5379 {
5380   HARD_REG_SET p_reg_set;
5381   HARD_REG_SET gr_reg_conditionally_set;
5382 } last_group[2];
5383 
5384 /* Index into the last_group array.  */
5385 static int group_idx;
5386 
5387 /* Called through for_each_rtx; determines if a hard register that was
5388    conditionally set in the previous group is used as an address register.
5389    It ensures that for_each_rtx returns 1 in that case.  */
5390 static int
errata_find_address_regs(xp,data)5391 errata_find_address_regs (xp, data)
5392      rtx *xp;
5393      void *data ATTRIBUTE_UNUSED;
5394 {
5395   rtx x = *xp;
5396   if (GET_CODE (x) != MEM)
5397     return 0;
5398   x = XEXP (x, 0);
5399   if (GET_CODE (x) == POST_MODIFY)
5400     x = XEXP (x, 0);
5401   if (GET_CODE (x) == REG)
5402     {
5403       struct group *prev_group = last_group + (group_idx ^ 1);
5404       if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5405 			     REGNO (x)))
5406 	return 1;
5407       return -1;
5408     }
5409   return 0;
5410 }
5411 
5412 /* Called for each insn; this function keeps track of the state in
5413    last_group and emits additional NOPs if necessary to work around
5414    an Itanium A/B step erratum.  */
5415 static void
errata_emit_nops(insn)5416 errata_emit_nops (insn)
5417      rtx insn;
5418 {
5419   struct group *this_group = last_group + group_idx;
5420   struct group *prev_group = last_group + (group_idx ^ 1);
5421   rtx pat = PATTERN (insn);
5422   rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5423   rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5424   enum attr_type type;
5425   rtx set = real_pat;
5426 
5427   if (GET_CODE (real_pat) == USE
5428       || GET_CODE (real_pat) == CLOBBER
5429       || GET_CODE (real_pat) == ASM_INPUT
5430       || GET_CODE (real_pat) == ADDR_VEC
5431       || GET_CODE (real_pat) == ADDR_DIFF_VEC
5432       || asm_noperands (PATTERN (insn)) >= 0)
5433     return;
5434 
5435   /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5436      parts of it.  */
5437 
5438   if (GET_CODE (set) == PARALLEL)
5439     {
5440       int i;
5441       set = XVECEXP (real_pat, 0, 0);
5442       for (i = 1; i < XVECLEN (real_pat, 0); i++)
5443 	if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5444 	    && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5445 	  {
5446 	    set = 0;
5447 	    break;
5448 	  }
5449     }
5450 
5451   if (set && GET_CODE (set) != SET)
5452     set = 0;
5453 
5454   type  = get_attr_type (insn);
5455 
5456   if (type == TYPE_F
5457       && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5458     SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5459 
5460   if ((type == TYPE_M || type == TYPE_A) && cond && set
5461       && REG_P (SET_DEST (set))
5462       && GET_CODE (SET_SRC (set)) != PLUS
5463       && GET_CODE (SET_SRC (set)) != MINUS
5464       && (GET_CODE (SET_SRC (set)) != ASHIFT
5465 	  || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5466       && (GET_CODE (SET_SRC (set)) != MEM
5467 	  || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5468       && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5469     {
5470       if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5471 	  || ! REG_P (XEXP (cond, 0)))
5472 	abort ();
5473 
5474       if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5475 	SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5476     }
5477   if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5478     {
5479       emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5480       emit_insn_before (gen_nop (), insn);
5481       emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5482       group_idx = 0;
5483       memset (last_group, 0, sizeof last_group);
5484     }
5485 }
5486 
5487 /* Emit extra nops if they are required to work around hardware errata.  */
5488 
5489 static void
fixup_errata()5490 fixup_errata ()
5491 {
5492   rtx insn;
5493 
5494   if (! TARGET_B_STEP)
5495     return;
5496 
5497   group_idx = 0;
5498   memset (last_group, 0, sizeof last_group);
5499 
5500   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5501     {
5502       if (!INSN_P (insn))
5503 	continue;
5504 
5505       if (ia64_safe_type (insn) == TYPE_S)
5506 	{
5507 	  group_idx ^= 1;
5508 	  memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5509 	}
5510       else
5511 	errata_emit_nops (insn);
5512     }
5513 }
5514 
5515 /* Instruction scheduling support.  */
5516 /* Describe one bundle.  */
5517 
5518 struct bundle
5519 {
5520   /* Zero if there's no possibility of a stop in this bundle other than
5521      at the end, otherwise the position of the optional stop bit.  */
5522   int possible_stop;
5523   /* The types of the three slots.  */
5524   enum attr_type t[3];
5525   /* The pseudo op to be emitted into the assembler output.  */
5526   const char *name;
5527 };
5528 
5529 #define NR_BUNDLES 10
5530 
5531 /* A list of all available bundles.  */
5532 
5533 static const struct bundle bundle[NR_BUNDLES] =
5534 {
5535   { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5536   { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5537   { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5538   { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5539 #if NR_BUNDLES == 10
5540   { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5541   { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5542 #endif
5543   { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5544   { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5545   { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5546   /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5547      it matches an L type insn.  Otherwise we'll try to generate L type
5548      nops.  */
5549   { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5550 };
5551 
5552 /* Describe a packet of instructions.  Packets consist of two bundles that
5553    are visible to the hardware in one scheduling window.  */
5554 
5555 struct ia64_packet
5556 {
5557   const struct bundle *t1, *t2;
5558   /* Precomputed value of the first split issue in this packet if a cycle
5559      starts at its beginning.  */
5560   int first_split;
5561   /* For convenience, the insn types are replicated here so we don't have
5562      to go through T1 and T2 all the time.  */
5563   enum attr_type t[6];
5564 };
5565 
5566 /* An array containing all possible packets.  */
5567 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5568 static struct ia64_packet packets[NR_PACKETS];
5569 
5570 /* Map attr_type to a string with the name.  */
5571 
5572 static const char *const type_names[] =
5573 {
5574   "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5575 };
5576 
5577 /* Nonzero if we should insert stop bits into the schedule.  */
5578 int ia64_final_schedule = 0;
5579 
5580 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5581 static rtx ia64_single_set PARAMS ((rtx));
5582 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5583 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5584 static void maybe_rotate PARAMS ((FILE *));
5585 static void finish_last_head PARAMS ((FILE *, int));
5586 static void rotate_one_bundle PARAMS ((FILE *));
5587 static void rotate_two_bundles PARAMS ((FILE *));
5588 static void nop_cycles_until PARAMS ((int, FILE *));
5589 static void cycle_end_fill_slots PARAMS ((FILE *));
5590 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5591 static int get_split PARAMS ((const struct ia64_packet *, int));
5592 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5593 				   const struct ia64_packet *, int));
5594 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5595 				      rtx *, enum attr_type *, int));
5596 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5597 static void dump_current_packet PARAMS ((FILE *));
5598 static void schedule_stop PARAMS ((FILE *));
5599 static rtx gen_nop_type PARAMS ((enum attr_type));
5600 static void ia64_emit_nops PARAMS ((void));
5601 
5602 /* Map a bundle number to its pseudo-op.  */
5603 
5604 const char *
get_bundle_name(b)5605 get_bundle_name (b)
5606      int b;
5607 {
5608   return bundle[b].name;
5609 }
5610 
5611 /* Compute the slot which will cause a split issue in packet P if the
5612    current cycle begins at slot BEGIN.  */
5613 
5614 static int
itanium_split_issue(p,begin)5615 itanium_split_issue (p, begin)
5616      const struct ia64_packet *p;
5617      int begin;
5618 {
5619   int type_count[TYPE_S];
5620   int i;
5621   int split = 6;
5622 
5623   if (begin < 3)
5624     {
5625       /* Always split before and after MMF.  */
5626       if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5627 	return 3;
5628       if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5629 	return 3;
5630       /* Always split after MBB and BBB.  */
5631       if (p->t[1] == TYPE_B)
5632 	return 3;
5633       /* Split after first bundle in MIB BBB combination.  */
5634       if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5635 	return 3;
5636     }
5637 
5638   memset (type_count, 0, sizeof type_count);
5639   for (i = begin; i < split; i++)
5640     {
5641       enum attr_type t0 = p->t[i];
5642       /* An MLX bundle reserves the same units as an MFI bundle.  */
5643       enum attr_type t = (t0 == TYPE_L ? TYPE_F
5644 			  : t0 == TYPE_X ? TYPE_I
5645 			  : t0);
5646 
5647       /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5648 	 2 integer per cycle.  */
5649       int max = (t == TYPE_B ? 3 : 2);
5650       if (type_count[t] == max)
5651 	return i;
5652 
5653       type_count[t]++;
5654     }
5655   return split;
5656 }
5657 
5658 /* Return the maximum number of instructions a cpu can issue.  */
5659 
5660 static int
ia64_issue_rate()5661 ia64_issue_rate ()
5662 {
5663   return 6;
5664 }
5665 
5666 /* Helper function - like single_set, but look inside COND_EXEC.  */
5667 
5668 static rtx
ia64_single_set(insn)5669 ia64_single_set (insn)
5670      rtx insn;
5671 {
5672   rtx x = PATTERN (insn), ret;
5673   if (GET_CODE (x) == COND_EXEC)
5674     x = COND_EXEC_CODE (x);
5675   if (GET_CODE (x) == SET)
5676     return x;
5677 
5678   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5679      Although they are not classical single set, the second set is there just
5680      to protect it from moving past FP-relative stack accesses.  */
5681   switch (recog_memoized (insn))
5682     {
5683     case CODE_FOR_prologue_allocate_stack:
5684     case CODE_FOR_epilogue_deallocate_stack:
5685       ret = XVECEXP (x, 0, 0);
5686       break;
5687 
5688     default:
5689       ret = single_set_2 (insn, x);
5690       break;
5691     }
5692 
5693   return ret;
5694 }
5695 
5696 /* Adjust the cost of a scheduling dependency.  Return the new cost of
5697    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
5698 
5699 static int
ia64_adjust_cost(insn,link,dep_insn,cost)5700 ia64_adjust_cost (insn, link, dep_insn, cost)
5701      rtx insn, link, dep_insn;
5702      int cost;
5703 {
5704   enum attr_type dep_type;
5705   enum attr_itanium_class dep_class;
5706   enum attr_itanium_class insn_class;
5707   rtx dep_set, set, src, addr;
5708 
5709   if (GET_CODE (PATTERN (insn)) == CLOBBER
5710       || GET_CODE (PATTERN (insn)) == USE
5711       || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5712       || GET_CODE (PATTERN (dep_insn)) == USE
5713       /* @@@ Not accurate for indirect calls.  */
5714       || GET_CODE (insn) == CALL_INSN
5715       || ia64_safe_type (insn) == TYPE_S)
5716     return 0;
5717 
5718   if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5719       || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5720     return 0;
5721 
5722   dep_type = ia64_safe_type (dep_insn);
5723   dep_class = ia64_safe_itanium_class (dep_insn);
5724   insn_class = ia64_safe_itanium_class (insn);
5725 
5726   /* Compares that feed a conditional branch can execute in the same
5727      cycle.  */
5728   dep_set = ia64_single_set (dep_insn);
5729   set = ia64_single_set (insn);
5730 
5731   if (dep_type != TYPE_F
5732       && dep_set
5733       && GET_CODE (SET_DEST (dep_set)) == REG
5734       && PR_REG (REGNO (SET_DEST (dep_set)))
5735       && GET_CODE (insn) == JUMP_INSN)
5736     return 0;
5737 
5738   if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5739     {
5740       /* ??? Can't find any information in the documenation about whether
5741 	 a sequence
5742 	   st [rx] = ra
5743 	   ld rb = [ry]
5744 	 splits issue.  Assume it doesn't.  */
5745       return 0;
5746     }
5747 
5748   src = set ? SET_SRC (set) : 0;
5749   addr = 0;
5750   if (set)
5751     {
5752       if (GET_CODE (SET_DEST (set)) == MEM)
5753 	addr = XEXP (SET_DEST (set), 0);
5754       else if (GET_CODE (SET_DEST (set)) == SUBREG
5755 	       && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5756 	addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5757       else
5758 	{
5759 	  addr = src;
5760 	  if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5761 	    addr = XVECEXP (addr, 0, 0);
5762 	  while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5763 	    addr = XEXP (addr, 0);
5764 
5765 	  /* Note that LO_SUM is used for GOT loads.  */
5766 	  if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM)
5767 	    addr = XEXP (addr, 0);
5768 	  else
5769 	    addr = 0;
5770 	}
5771     }
5772 
5773   if (addr && GET_CODE (addr) == POST_MODIFY)
5774     addr = XEXP (addr, 0);
5775 
5776   set = ia64_single_set (dep_insn);
5777 
5778   if ((dep_class == ITANIUM_CLASS_IALU
5779        || dep_class == ITANIUM_CLASS_ILOG
5780        || dep_class == ITANIUM_CLASS_LD)
5781       && (insn_class == ITANIUM_CLASS_LD
5782 	  || insn_class == ITANIUM_CLASS_ST))
5783     {
5784       if (! addr || ! set)
5785 	abort ();
5786       /* This isn't completely correct - an IALU that feeds an address has
5787 	 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5788 	 otherwise.  Unfortunately there's no good way to describe this.  */
5789       if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5790 	return cost + 1;
5791     }
5792 
5793   if ((dep_class == ITANIUM_CLASS_IALU
5794        || dep_class == ITANIUM_CLASS_ILOG
5795        || dep_class == ITANIUM_CLASS_LD)
5796       && (insn_class == ITANIUM_CLASS_MMMUL
5797 	  || insn_class == ITANIUM_CLASS_MMSHF
5798 	  || insn_class == ITANIUM_CLASS_MMSHFI))
5799     return 3;
5800 
5801   if (dep_class == ITANIUM_CLASS_FMAC
5802       && (insn_class == ITANIUM_CLASS_FMISC
5803 	  || insn_class == ITANIUM_CLASS_FCVTFX
5804 	  || insn_class == ITANIUM_CLASS_XMPY))
5805     return 7;
5806 
5807   if ((dep_class == ITANIUM_CLASS_FMAC
5808        || dep_class == ITANIUM_CLASS_FMISC
5809        || dep_class == ITANIUM_CLASS_FCVTFX
5810        || dep_class == ITANIUM_CLASS_XMPY)
5811       && insn_class == ITANIUM_CLASS_STF)
5812     return 8;
5813 
5814   /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5815      but HP engineers say any non-MM operation.  */
5816   if ((dep_class == ITANIUM_CLASS_MMMUL
5817        || dep_class == ITANIUM_CLASS_MMSHF
5818        || dep_class == ITANIUM_CLASS_MMSHFI)
5819       && insn_class != ITANIUM_CLASS_MMMUL
5820       && insn_class != ITANIUM_CLASS_MMSHF
5821       && insn_class != ITANIUM_CLASS_MMSHFI)
5822     return 4;
5823 
5824   return cost;
5825 }
5826 
5827 /* Describe the current state of the Itanium pipeline.  */
5828 static struct
5829 {
5830   /* The first slot that is used in the current cycle.  */
5831   int first_slot;
5832   /* The next slot to fill.  */
5833   int cur;
5834   /* The packet we have selected for the current issue window.  */
5835   const struct ia64_packet *packet;
5836   /* The position of the split issue that occurs due to issue width
5837      limitations (6 if there's no split issue).  */
5838   int split;
5839   /* Record data about the insns scheduled so far in the same issue
5840      window.  The elements up to but not including FIRST_SLOT belong
5841      to the previous cycle, the ones starting with FIRST_SLOT belong
5842      to the current cycle.  */
5843   enum attr_type types[6];
5844   rtx insns[6];
5845   int stopbit[6];
5846   /* Nonzero if we decided to schedule a stop bit.  */
5847   int last_was_stop;
5848 } sched_data;
5849 
5850 /* Temporary arrays; they have enough elements to hold all insns that
5851    can be ready at the same time while scheduling of the current block.
5852    SCHED_READY can hold ready insns, SCHED_TYPES their types.  */
5853 static rtx *sched_ready;
5854 static enum attr_type *sched_types;
5855 
5856 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5857    of packet P.  */
5858 
5859 static int
insn_matches_slot(p,itype,slot,insn)5860 insn_matches_slot (p, itype, slot, insn)
5861      const struct ia64_packet *p;
5862      enum attr_type itype;
5863      int slot;
5864      rtx insn;
5865 {
5866   enum attr_itanium_requires_unit0 u0;
5867   enum attr_type stype = p->t[slot];
5868 
5869   if (insn)
5870     {
5871       u0 = ia64_safe_itanium_requires_unit0 (insn);
5872       if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5873 	{
5874 	  int i;
5875 	  for (i = sched_data.first_slot; i < slot; i++)
5876 	    if (p->t[i] == stype
5877 		|| (stype == TYPE_F && p->t[i] == TYPE_L)
5878 		|| (stype == TYPE_I && p->t[i] == TYPE_X))
5879 	      return 0;
5880 	}
5881       if (GET_CODE (insn) == CALL_INSN)
5882 	{
5883 	  /* Reject calls in multiway branch packets.  We want to limit
5884 	     the number of multiway branches we generate (since the branch
5885 	     predictor is limited), and this seems to work fairly well.
5886 	     (If we didn't do this, we'd have to add another test here to
5887 	     force calls into the third slot of the bundle.)  */
5888 	  if (slot < 3)
5889 	    {
5890 	      if (p->t[1] == TYPE_B)
5891 		return 0;
5892 	    }
5893 	  else
5894 	    {
5895 	      if (p->t[4] == TYPE_B)
5896 		return 0;
5897 	    }
5898 	}
5899     }
5900 
5901   if (itype == stype)
5902     return 1;
5903   if (itype == TYPE_A)
5904     return stype == TYPE_M || stype == TYPE_I;
5905   return 0;
5906 }
5907 
5908 /* Like emit_insn_before, but skip cycle_display notes.
5909    ??? When cycle display notes are implemented, update this.  */
5910 
5911 static void
ia64_emit_insn_before(insn,before)5912 ia64_emit_insn_before (insn, before)
5913      rtx insn, before;
5914 {
5915   emit_insn_before (insn, before);
5916 }
5917 
5918 /* When rotating a bundle out of the issue window, insert a bundle selector
5919    insn in front of it.  DUMP is the scheduling dump file or NULL.  START
5920    is either 0 or 3, depending on whether we want to emit a bundle selector
5921    for the first bundle or the second bundle in the current issue window.
5922 
5923    The selector insns are emitted this late because the selected packet can
5924    be changed until parts of it get rotated out.  */
5925 
5926 static void
finish_last_head(dump,start)5927 finish_last_head (dump, start)
5928      FILE *dump;
5929      int start;
5930 {
5931   const struct ia64_packet *p = sched_data.packet;
5932   const struct bundle *b = start == 0 ? p->t1 : p->t2;
5933   int bundle_type = b - bundle;
5934   rtx insn;
5935   int i;
5936 
5937   if (! ia64_final_schedule)
5938     return;
5939 
5940   for (i = start; sched_data.insns[i] == 0; i++)
5941     if (i == start + 3)
5942       abort ();
5943   insn = sched_data.insns[i];
5944 
5945   if (dump)
5946     fprintf (dump, "//    Emitting template before %d: %s\n",
5947 	     INSN_UID (insn), b->name);
5948 
5949   ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5950 }
5951 
5952 /* We can't schedule more insns this cycle.  Fix up the scheduling state
5953    and advance FIRST_SLOT and CUR.
5954    We have to distribute the insns that are currently found between
5955    FIRST_SLOT and CUR into the slots of the packet we have selected.  So
5956    far, they are stored successively in the fields starting at FIRST_SLOT;
5957    now they must be moved to the correct slots.
5958    DUMP is the current scheduling dump file, or NULL.  */
5959 
5960 static void
cycle_end_fill_slots(dump)5961 cycle_end_fill_slots (dump)
5962      FILE *dump;
5963 {
5964   const struct ia64_packet *packet = sched_data.packet;
5965   int slot, i;
5966   enum attr_type tmp_types[6];
5967   rtx tmp_insns[6];
5968 
5969   memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5970   memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5971 
5972   for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5973     {
5974       enum attr_type t = tmp_types[i];
5975       if (t != ia64_safe_type (tmp_insns[i]))
5976 	abort ();
5977       while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5978 	{
5979 	  if (slot > sched_data.split)
5980 	    abort ();
5981 	  if (dump)
5982 	    fprintf (dump, "// Packet needs %s, have %s\n",
5983 		     type_names[packet->t[slot]], type_names[t]);
5984 	  sched_data.types[slot] = packet->t[slot];
5985 	  sched_data.insns[slot] = 0;
5986 	  sched_data.stopbit[slot] = 0;
5987 
5988 	  /* ??? TYPE_L instructions always fill up two slots, but we don't
5989 	     support TYPE_L nops.  */
5990 	  if (packet->t[slot] == TYPE_L)
5991 	    abort ();
5992 
5993 	  slot++;
5994 	}
5995 
5996       /* Do _not_ use T here.  If T == TYPE_A, then we'd risk changing the
5997 	 actual slot type later.  */
5998       sched_data.types[slot] = packet->t[slot];
5999       sched_data.insns[slot] = tmp_insns[i];
6000       sched_data.stopbit[slot] = 0;
6001       slot++;
6002 
6003       /* TYPE_L instructions always fill up two slots.  */
6004       if (t == TYPE_L)
6005 	{
6006 	  sched_data.types[slot] = packet->t[slot];
6007 	  sched_data.insns[slot] = 0;
6008 	  sched_data.stopbit[slot] = 0;
6009 	  slot++;
6010 	}
6011     }
6012 
6013   /* This isn't right - there's no need to pad out until the forced split;
6014      the CPU will automatically split if an insn isn't ready.  */
6015 #if 0
6016   while (slot < sched_data.split)
6017     {
6018       sched_data.types[slot] = packet->t[slot];
6019       sched_data.insns[slot] = 0;
6020       sched_data.stopbit[slot] = 0;
6021       slot++;
6022     }
6023 #endif
6024 
6025   sched_data.first_slot = sched_data.cur = slot;
6026 }
6027 
6028 /* Bundle rotations, as described in the Itanium optimization manual.
6029    We can rotate either one or both bundles out of the issue window.
6030    DUMP is the current scheduling dump file, or NULL.  */
6031 
6032 static void
rotate_one_bundle(dump)6033 rotate_one_bundle (dump)
6034      FILE *dump;
6035 {
6036   if (dump)
6037     fprintf (dump, "// Rotating one bundle.\n");
6038 
6039   finish_last_head (dump, 0);
6040   if (sched_data.cur > 3)
6041     {
6042       sched_data.cur -= 3;
6043       sched_data.first_slot -= 3;
6044       memmove (sched_data.types,
6045 	       sched_data.types + 3,
6046 	       sched_data.cur * sizeof *sched_data.types);
6047       memmove (sched_data.stopbit,
6048 	       sched_data.stopbit + 3,
6049 	       sched_data.cur * sizeof *sched_data.stopbit);
6050       memmove (sched_data.insns,
6051 	       sched_data.insns + 3,
6052 	       sched_data.cur * sizeof *sched_data.insns);
6053       sched_data.packet
6054 	= &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
6055     }
6056   else
6057     {
6058       sched_data.cur = 0;
6059       sched_data.first_slot = 0;
6060     }
6061 }
6062 
6063 static void
rotate_two_bundles(dump)6064 rotate_two_bundles (dump)
6065      FILE *dump;
6066 {
6067   if (dump)
6068     fprintf (dump, "// Rotating two bundles.\n");
6069 
6070   if (sched_data.cur == 0)
6071     return;
6072 
6073   finish_last_head (dump, 0);
6074   if (sched_data.cur > 3)
6075     finish_last_head (dump, 3);
6076   sched_data.cur = 0;
6077   sched_data.first_slot = 0;
6078 }
6079 
6080 /* We're beginning a new block.  Initialize data structures as necessary.  */
6081 
6082 static void
ia64_sched_init(dump,sched_verbose,max_ready)6083 ia64_sched_init (dump, sched_verbose, max_ready)
6084      FILE *dump ATTRIBUTE_UNUSED;
6085      int sched_verbose ATTRIBUTE_UNUSED;
6086      int max_ready;
6087 {
6088   static int initialized = 0;
6089 
6090   if (! initialized)
6091     {
6092       int b1, b2, i;
6093 
6094       initialized = 1;
6095 
6096       for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
6097 	{
6098 	  const struct bundle *t1 = bundle + b1;
6099 	  for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6100 	    {
6101 	      const struct bundle *t2 = bundle + b2;
6102 
6103 	      packets[i].t1 = t1;
6104 	      packets[i].t2 = t2;
6105 	    }
6106 	}
6107       for (i = 0; i < NR_PACKETS; i++)
6108 	{
6109 	  int j;
6110 	  for (j = 0; j < 3; j++)
6111 	    packets[i].t[j] = packets[i].t1->t[j];
6112 	  for (j = 0; j < 3; j++)
6113 	    packets[i].t[j + 3] = packets[i].t2->t[j];
6114 	  packets[i].first_split = itanium_split_issue (packets + i, 0);
6115 	}
6116 
6117     }
6118 
6119   init_insn_group_barriers ();
6120 
6121   memset (&sched_data, 0, sizeof sched_data);
6122   sched_types = (enum attr_type *) xmalloc (max_ready
6123 					    * sizeof (enum attr_type));
6124   sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
6125 }
6126 
6127 /* See if the packet P can match the insns we have already scheduled.  Return
6128    nonzero if so.  In *PSLOT, we store the first slot that is available for
6129    more instructions if we choose this packet.
6130    SPLIT holds the last slot we can use, there's a split issue after it so
6131    scheduling beyond it would cause us to use more than one cycle.  */
6132 
6133 static int
packet_matches_p(p,split,pslot)6134 packet_matches_p (p, split, pslot)
6135      const struct ia64_packet *p;
6136      int split;
6137      int *pslot;
6138 {
6139   int filled = sched_data.cur;
6140   int first = sched_data.first_slot;
6141   int i, slot;
6142 
6143   /* First, check if the first of the two bundles must be a specific one (due
6144      to stop bits).  */
6145   if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
6146     return 0;
6147   if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
6148     return 0;
6149 
6150   for (i = 0; i < first; i++)
6151     if (! insn_matches_slot (p, sched_data.types[i], i,
6152 			     sched_data.insns[i]))
6153       return 0;
6154   for (i = slot = first; i < filled; i++)
6155     {
6156       while (slot < split)
6157 	{
6158 	  if (insn_matches_slot (p, sched_data.types[i], slot,
6159 				 sched_data.insns[i]))
6160 	    break;
6161 	  slot++;
6162 	}
6163       if (slot == split)
6164 	return 0;
6165       slot++;
6166     }
6167 
6168   if (pslot)
6169     *pslot = slot;
6170   return 1;
6171 }
6172 
6173 /* A frontend for itanium_split_issue.  For a packet P and a slot
6174    number FIRST that describes the start of the current clock cycle,
6175    return the slot number of the first split issue.  This function
6176    uses the cached number found in P if possible.  */
6177 
6178 static int
get_split(p,first)6179 get_split (p, first)
6180      const struct ia64_packet *p;
6181      int first;
6182 {
6183   if (first == 0)
6184     return p->first_split;
6185   return itanium_split_issue (p, first);
6186 }
6187 
6188 /* Given N_READY insns in the array READY, whose types are found in the
6189    corresponding array TYPES, return the insn that is best suited to be
6190    scheduled in slot SLOT of packet P.  */
6191 
6192 static int
find_best_insn(ready,types,n_ready,p,slot)6193 find_best_insn (ready, types, n_ready, p, slot)
6194      rtx *ready;
6195      enum attr_type *types;
6196      int n_ready;
6197      const struct ia64_packet *p;
6198      int slot;
6199 {
6200   int best = -1;
6201   int best_pri = 0;
6202   while (n_ready-- > 0)
6203     {
6204       rtx insn = ready[n_ready];
6205       if (! insn)
6206 	continue;
6207       if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
6208 	break;
6209       /* If we have equally good insns, one of which has a stricter
6210 	 slot requirement, prefer the one with the stricter requirement.  */
6211       if (best >= 0 && types[n_ready] == TYPE_A)
6212 	continue;
6213       if (insn_matches_slot (p, types[n_ready], slot, insn))
6214 	{
6215 	  best = n_ready;
6216 	  best_pri = INSN_PRIORITY (ready[best]);
6217 
6218 	  /* If there's no way we could get a stricter requirement, stop
6219 	     looking now.  */
6220 	  if (types[n_ready] != TYPE_A
6221 	      && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6222 	    break;
6223 	  break;
6224 	}
6225     }
6226   return best;
6227 }
6228 
6229 /* Select the best packet to use given the current scheduler state and the
6230    current ready list.
6231    READY is an array holding N_READY ready insns; TYPES is a corresponding
6232    array that holds their types.  Store the best packet in *PPACKET and the
6233    number of insns that can be scheduled in the current cycle in *PBEST.  */
6234 
6235 static void
find_best_packet(pbest,ppacket,ready,types,n_ready)6236 find_best_packet (pbest, ppacket, ready, types, n_ready)
6237      int *pbest;
6238      const struct ia64_packet **ppacket;
6239      rtx *ready;
6240      enum attr_type *types;
6241      int n_ready;
6242 {
6243   int first = sched_data.first_slot;
6244   int best = 0;
6245   int lowest_end = 6;
6246   const struct ia64_packet *best_packet = NULL;
6247   int i;
6248 
6249   for (i = 0; i < NR_PACKETS; i++)
6250     {
6251       const struct ia64_packet *p = packets + i;
6252       int slot;
6253       int split = get_split (p, first);
6254       int win = 0;
6255       int first_slot, last_slot;
6256       int b_nops = 0;
6257 
6258       if (! packet_matches_p (p, split, &first_slot))
6259 	continue;
6260 
6261       memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6262 
6263       win = 0;
6264       last_slot = 6;
6265       for (slot = first_slot; slot < split; slot++)
6266 	{
6267 	  int insn_nr;
6268 
6269 	  /* Disallow a degenerate case where the first bundle doesn't
6270 	     contain anything but NOPs!  */
6271 	  if (first_slot == 0 && win == 0 && slot == 3)
6272 	    {
6273 	      win = -1;
6274 	      break;
6275 	    }
6276 
6277 	  insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6278 	  if (insn_nr >= 0)
6279 	    {
6280 	      sched_ready[insn_nr] = 0;
6281 	      last_slot = slot;
6282 	      win++;
6283 	    }
6284 	  else if (p->t[slot] == TYPE_B)
6285 	    b_nops++;
6286 	}
6287       /* We must disallow MBB/BBB packets if any of their B slots would be
6288 	 filled with nops.  */
6289       if (last_slot < 3)
6290 	{
6291 	  if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6292 	    win = -1;
6293 	}
6294       else
6295 	{
6296 	  if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6297 	    win = -1;
6298 	}
6299 
6300       if (win > best
6301 	  || (win == best && last_slot < lowest_end))
6302 	{
6303 	  best = win;
6304 	  lowest_end = last_slot;
6305 	  best_packet = p;
6306 	}
6307     }
6308   *pbest = best;
6309   *ppacket = best_packet;
6310 }
6311 
6312 /* Reorder the ready list so that the insns that can be issued in this cycle
6313    are found in the correct order at the end of the list.
6314    DUMP is the scheduling dump file, or NULL.  READY points to the start,
6315    E_READY to the end of the ready list.  MAY_FAIL determines what should be
6316    done if no insns can be scheduled in this cycle: if it is zero, we abort,
6317    otherwise we return 0.
6318    Return 1 if any insns can be scheduled in this cycle.  */
6319 
6320 static int
itanium_reorder(dump,ready,e_ready,may_fail)6321 itanium_reorder (dump, ready, e_ready, may_fail)
6322      FILE *dump;
6323      rtx *ready;
6324      rtx *e_ready;
6325      int may_fail;
6326 {
6327   const struct ia64_packet *best_packet;
6328   int n_ready = e_ready - ready;
6329   int first = sched_data.first_slot;
6330   int i, best, best_split, filled;
6331 
6332   for (i = 0; i < n_ready; i++)
6333     sched_types[i] = ia64_safe_type (ready[i]);
6334 
6335   find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6336 
6337   if (best == 0)
6338     {
6339       if (may_fail)
6340 	return 0;
6341       abort ();
6342     }
6343 
6344   if (dump)
6345     {
6346       fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6347 	       best_packet->t1->name,
6348 	       best_packet->t2 ? best_packet->t2->name : NULL, best);
6349     }
6350 
6351   best_split = itanium_split_issue (best_packet, first);
6352   packet_matches_p (best_packet, best_split, &filled);
6353 
6354   for (i = filled; i < best_split; i++)
6355     {
6356       int insn_nr;
6357 
6358       insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6359       if (insn_nr >= 0)
6360 	{
6361 	  rtx insn = ready[insn_nr];
6362 	  memmove (ready + insn_nr, ready + insn_nr + 1,
6363 		   (n_ready - insn_nr - 1) * sizeof (rtx));
6364 	  memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6365 		   (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6366 	  ready[--n_ready] = insn;
6367 	}
6368     }
6369 
6370   sched_data.packet = best_packet;
6371   sched_data.split = best_split;
6372   return 1;
6373 }
6374 
6375 /* Dump information about the current scheduling state to file DUMP.  */
6376 
6377 static void
dump_current_packet(dump)6378 dump_current_packet (dump)
6379      FILE *dump;
6380 {
6381   int i;
6382   fprintf (dump, "//    %d slots filled:", sched_data.cur);
6383   for (i = 0; i < sched_data.first_slot; i++)
6384     {
6385       rtx insn = sched_data.insns[i];
6386       fprintf (dump, " %s", type_names[sched_data.types[i]]);
6387       if (insn)
6388 	fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6389       if (sched_data.stopbit[i])
6390 	fprintf (dump, " ;;");
6391     }
6392   fprintf (dump, " :::");
6393   for (i = sched_data.first_slot; i < sched_data.cur; i++)
6394     {
6395       rtx insn = sched_data.insns[i];
6396       enum attr_type t = ia64_safe_type (insn);
6397       fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6398     }
6399   fprintf (dump, "\n");
6400 }
6401 
6402 /* Schedule a stop bit.  DUMP is the current scheduling dump file, or
6403    NULL.  */
6404 
6405 static void
schedule_stop(dump)6406 schedule_stop (dump)
6407      FILE *dump;
6408 {
6409   const struct ia64_packet *best = sched_data.packet;
6410   int i;
6411   int best_stop = 6;
6412 
6413   if (dump)
6414     fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6415 
6416   if (sched_data.cur == 0)
6417     {
6418       if (dump)
6419 	fprintf (dump, "//   At start of bundle, so nothing to do.\n");
6420 
6421       rotate_two_bundles (NULL);
6422       return;
6423     }
6424 
6425   for (i = -1; i < NR_PACKETS; i++)
6426     {
6427       /* This is a slight hack to give the current packet the first chance.
6428 	 This is done to avoid e.g. switching from MIB to MBB bundles.  */
6429       const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6430       int split = get_split (p, sched_data.first_slot);
6431       const struct bundle *compare;
6432       int next, stoppos;
6433 
6434       if (! packet_matches_p (p, split, &next))
6435 	continue;
6436 
6437       compare = next > 3 ? p->t2 : p->t1;
6438 
6439       stoppos = 3;
6440       if (compare->possible_stop)
6441 	stoppos = compare->possible_stop;
6442       if (next > 3)
6443 	stoppos += 3;
6444 
6445       if (stoppos < next || stoppos >= best_stop)
6446 	{
6447 	  if (compare->possible_stop == 0)
6448 	    continue;
6449 	  stoppos = (next > 3 ? 6 : 3);
6450 	}
6451       if (stoppos < next || stoppos >= best_stop)
6452 	continue;
6453 
6454       if (dump)
6455 	fprintf (dump, "//   switching from %s %s to %s %s (stop at %d)\n",
6456 		 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6457 		 stoppos);
6458 
6459       best_stop = stoppos;
6460       best = p;
6461     }
6462 
6463   sched_data.packet = best;
6464   cycle_end_fill_slots (dump);
6465   while (sched_data.cur < best_stop)
6466     {
6467       sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6468       sched_data.insns[sched_data.cur] = 0;
6469       sched_data.stopbit[sched_data.cur] = 0;
6470       sched_data.cur++;
6471     }
6472   sched_data.stopbit[sched_data.cur - 1] = 1;
6473   sched_data.first_slot = best_stop;
6474 
6475   if (dump)
6476     dump_current_packet (dump);
6477 }
6478 
6479 /* If necessary, perform one or two rotations on the scheduling state.
6480    This should only be called if we are starting a new cycle.  */
6481 
6482 static void
maybe_rotate(dump)6483 maybe_rotate (dump)
6484      FILE *dump;
6485 {
6486   cycle_end_fill_slots (dump);
6487   if (sched_data.cur == 6)
6488     rotate_two_bundles (dump);
6489   else if (sched_data.cur >= 3)
6490     rotate_one_bundle (dump);
6491   sched_data.first_slot = sched_data.cur;
6492 }
6493 
6494 /* The clock cycle when ia64_sched_reorder was last called.  */
6495 static int prev_cycle;
6496 
6497 /* The first insn scheduled in the previous cycle.  This is the saved
6498    value of sched_data.first_slot.  */
6499 static int prev_first;
6500 
6501 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR.  Used to
6502    pad out the delay between MM (shifts, etc.) and integer operations.  */
6503 
6504 static void
nop_cycles_until(clock_var,dump)6505 nop_cycles_until (clock_var, dump)
6506      int clock_var;
6507      FILE *dump;
6508 {
6509   int prev_clock = prev_cycle;
6510   int cycles_left = clock_var - prev_clock;
6511   bool did_stop = false;
6512 
6513   /* Finish the previous cycle; pad it out with NOPs.  */
6514   if (sched_data.cur == 3)
6515     {
6516       sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6517       did_stop = true;
6518       maybe_rotate (dump);
6519     }
6520   else if (sched_data.cur > 0)
6521     {
6522       int need_stop = 0;
6523       int split = itanium_split_issue (sched_data.packet, prev_first);
6524 
6525       if (sched_data.cur < 3 && split > 3)
6526 	{
6527 	  split = 3;
6528 	  need_stop = 1;
6529 	}
6530 
6531       if (split > sched_data.cur)
6532 	{
6533 	  int i;
6534 	  for (i = sched_data.cur; i < split; i++)
6535 	    {
6536 	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6537 	      sched_data.types[i] = sched_data.packet->t[i];
6538 	      sched_data.insns[i] = t;
6539 	      sched_data.stopbit[i] = 0;
6540 	    }
6541 	  sched_data.cur = split;
6542 	}
6543 
6544       if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6545 	  && cycles_left > 1)
6546 	{
6547 	  int i;
6548 	  for (i = sched_data.cur; i < 6; i++)
6549 	    {
6550 	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6551 	      sched_data.types[i] = sched_data.packet->t[i];
6552 	      sched_data.insns[i] = t;
6553 	      sched_data.stopbit[i] = 0;
6554 	    }
6555 	  sched_data.cur = 6;
6556 	  cycles_left--;
6557 	  need_stop = 1;
6558 	}
6559 
6560       if (need_stop || sched_data.cur == 6)
6561 	{
6562 	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6563 	  did_stop = true;
6564 	}
6565       maybe_rotate (dump);
6566     }
6567 
6568   cycles_left--;
6569   while (cycles_left > 0)
6570     {
6571       sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6572       sched_emit_insn (gen_nop_type (TYPE_M));
6573       sched_emit_insn (gen_nop_type (TYPE_I));
6574       if (cycles_left > 1)
6575 	{
6576 	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6577 	  cycles_left--;
6578 	}
6579       sched_emit_insn (gen_nop_type (TYPE_I));
6580       sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6581       did_stop = true;
6582       cycles_left--;
6583     }
6584 
6585   if (did_stop)
6586     init_insn_group_barriers ();
6587 }
6588 
6589 /* We are about to being issuing insns for this clock cycle.
6590    Override the default sort algorithm to better slot instructions.  */
6591 
6592 static int
ia64_internal_sched_reorder(dump,sched_verbose,ready,pn_ready,reorder_type,clock_var)6593 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6594 		    reorder_type, clock_var)
6595      FILE *dump ATTRIBUTE_UNUSED;
6596      int sched_verbose ATTRIBUTE_UNUSED;
6597      rtx *ready;
6598      int *pn_ready;
6599      int reorder_type, clock_var;
6600 {
6601   int n_asms;
6602   int n_ready = *pn_ready;
6603   rtx *e_ready = ready + n_ready;
6604   rtx *insnp;
6605 
6606   if (sched_verbose)
6607     {
6608       fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6609       dump_current_packet (dump);
6610     }
6611 
6612   /* Work around the pipeline flush that will occurr if the results of
6613      an MM instruction are accessed before the result is ready.  Intel
6614      documentation says this only happens with IALU, ISHF, ILOG, LD,
6615      and ST consumers, but experimental evidence shows that *any* non-MM
6616      type instruction will incurr the flush.  */
6617   if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6618     {
6619       for (insnp = ready; insnp < e_ready; insnp++)
6620 	{
6621 	  rtx insn = *insnp, link;
6622 	  enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6623 
6624 	  if (t == ITANIUM_CLASS_MMMUL
6625 	      || t == ITANIUM_CLASS_MMSHF
6626 	      || t == ITANIUM_CLASS_MMSHFI)
6627 	    continue;
6628 
6629 	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6630 	    if (REG_NOTE_KIND (link) == 0)
6631 	      {
6632 		rtx other = XEXP (link, 0);
6633 		enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6634 		if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6635 		  {
6636 		    nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6637 		    goto out;
6638 		  }
6639 	      }
6640 	}
6641     }
6642  out:
6643 
6644   prev_first = sched_data.first_slot;
6645   prev_cycle = clock_var;
6646 
6647   if (reorder_type == 0)
6648     maybe_rotate (sched_verbose ? dump : NULL);
6649 
6650   /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6651   n_asms = 0;
6652   for (insnp = ready; insnp < e_ready; insnp++)
6653     if (insnp < e_ready)
6654       {
6655 	rtx insn = *insnp;
6656 	enum attr_type t = ia64_safe_type (insn);
6657 	if (t == TYPE_UNKNOWN)
6658 	  {
6659 	    if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6660 		|| asm_noperands (PATTERN (insn)) >= 0)
6661 	      {
6662 		rtx lowest = ready[n_asms];
6663 		ready[n_asms] = insn;
6664 		*insnp = lowest;
6665 		n_asms++;
6666 	      }
6667 	    else
6668 	      {
6669 		rtx highest = ready[n_ready - 1];
6670 		ready[n_ready - 1] = insn;
6671 		*insnp = highest;
6672 		if (ia64_final_schedule && group_barrier_needed_p (insn))
6673 		  {
6674 		    schedule_stop (sched_verbose ? dump : NULL);
6675 		    sched_data.last_was_stop = 1;
6676 		    maybe_rotate (sched_verbose ? dump : NULL);
6677 		  }
6678 
6679 		return 1;
6680 	      }
6681 	  }
6682       }
6683   if (n_asms < n_ready)
6684     {
6685       /* Some normal insns to process.  Skip the asms.  */
6686       ready += n_asms;
6687       n_ready -= n_asms;
6688     }
6689   else if (n_ready > 0)
6690     {
6691       /* Only asm insns left.  */
6692       if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6693 	{
6694 	  schedule_stop (sched_verbose ? dump : NULL);
6695 	  sched_data.last_was_stop = 1;
6696 	  maybe_rotate (sched_verbose ? dump : NULL);
6697 	}
6698       cycle_end_fill_slots (sched_verbose ? dump : NULL);
6699       return 1;
6700     }
6701 
6702   if (ia64_final_schedule)
6703     {
6704       int nr_need_stop = 0;
6705 
6706       for (insnp = ready; insnp < e_ready; insnp++)
6707 	if (safe_group_barrier_needed_p (*insnp))
6708 	  nr_need_stop++;
6709 
6710       /* Schedule a stop bit if
6711           - all insns require a stop bit, or
6712           - we are starting a new cycle and _any_ insns require a stop bit.
6713          The reason for the latter is that if our schedule is accurate, then
6714          the additional stop won't decrease performance at this point (since
6715 	 there's a split issue at this point anyway), but it gives us more
6716          freedom when scheduling the currently ready insns.  */
6717       if ((reorder_type == 0 && nr_need_stop)
6718 	  || (reorder_type == 1 && n_ready == nr_need_stop))
6719 	{
6720 	  schedule_stop (sched_verbose ? dump : NULL);
6721 	  sched_data.last_was_stop = 1;
6722 	  maybe_rotate (sched_verbose ? dump : NULL);
6723 	  if (reorder_type == 1)
6724 	    return 0;
6725 	}
6726       else
6727 	{
6728 	  int deleted = 0;
6729 	  insnp = e_ready;
6730 	  /* Move down everything that needs a stop bit, preserving relative
6731 	     order.  */
6732 	  while (insnp-- > ready + deleted)
6733 	    while (insnp >= ready + deleted)
6734 	      {
6735 		rtx insn = *insnp;
6736 		if (! safe_group_barrier_needed_p (insn))
6737 		  break;
6738 		memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6739 		*ready = insn;
6740 		deleted++;
6741 	      }
6742 	  n_ready -= deleted;
6743 	  ready += deleted;
6744 	  if (deleted != nr_need_stop)
6745 	    abort ();
6746 	}
6747     }
6748 
6749   return itanium_reorder (sched_verbose ? dump : NULL,
6750 			  ready, e_ready, reorder_type == 1);
6751 }
6752 
6753 static int
ia64_sched_reorder(dump,sched_verbose,ready,pn_ready,clock_var)6754 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6755      FILE *dump;
6756      int sched_verbose;
6757      rtx *ready;
6758      int *pn_ready;
6759      int clock_var;
6760 {
6761   return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6762 				      pn_ready, 0, clock_var);
6763 }
6764 
6765 /* Like ia64_sched_reorder, but called after issuing each insn.
6766    Override the default sort algorithm to better slot instructions.  */
6767 
6768 static int
ia64_sched_reorder2(dump,sched_verbose,ready,pn_ready,clock_var)6769 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6770      FILE *dump ATTRIBUTE_UNUSED;
6771      int sched_verbose ATTRIBUTE_UNUSED;
6772      rtx *ready;
6773      int *pn_ready;
6774      int clock_var;
6775 {
6776   if (sched_data.last_was_stop)
6777     return 0;
6778 
6779   /* Detect one special case and try to optimize it.
6780      If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6781      then we can get better code by transforming this to 1.MFB;; 2.MIx.  */
6782   if (sched_data.first_slot == 1
6783       && sched_data.stopbit[0]
6784       && ((sched_data.cur == 4
6785 	   && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6786 	   && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6787 	   && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6788 	  || (sched_data.cur == 3
6789 	      && (sched_data.types[1] == TYPE_M
6790 		  || sched_data.types[1] == TYPE_A)
6791 	      && (sched_data.types[2] != TYPE_M
6792 		  && sched_data.types[2] != TYPE_I
6793 		  && sched_data.types[2] != TYPE_A))))
6794 
6795     {
6796       int i, best;
6797       rtx stop = sched_data.insns[1];
6798 
6799       /* Search backward for the stop bit that must be there.  */
6800       while (1)
6801 	{
6802 	  int insn_code;
6803 
6804 	  stop = PREV_INSN (stop);
6805 	  if (GET_CODE (stop) != INSN)
6806 	    abort ();
6807 	  insn_code = recog_memoized (stop);
6808 
6809 	  /* Ignore .pred.rel.mutex.
6810 
6811 	     ??? Update this to ignore cycle display notes too
6812 	     ??? once those are implemented  */
6813 	  if (insn_code == CODE_FOR_pred_rel_mutex
6814 	      || insn_code == CODE_FOR_prologue_use)
6815 	    continue;
6816 
6817 	  if (insn_code == CODE_FOR_insn_group_barrier)
6818 	    break;
6819 	  abort ();
6820 	}
6821 
6822       /* Adjust the stop bit's slot selector.  */
6823       if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6824 	abort ();
6825       XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6826 
6827       sched_data.stopbit[0] = 0;
6828       sched_data.stopbit[2] = 1;
6829 
6830       sched_data.types[5] = sched_data.types[3];
6831       sched_data.types[4] = sched_data.types[2];
6832       sched_data.types[3] = sched_data.types[1];
6833       sched_data.insns[5] = sched_data.insns[3];
6834       sched_data.insns[4] = sched_data.insns[2];
6835       sched_data.insns[3] = sched_data.insns[1];
6836       sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6837       sched_data.cur += 2;
6838       sched_data.first_slot = 3;
6839       for (i = 0; i < NR_PACKETS; i++)
6840 	{
6841 	  const struct ia64_packet *p = packets + i;
6842 	  if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6843 	    {
6844 	      sched_data.packet = p;
6845 	      break;
6846 	    }
6847 	}
6848       rotate_one_bundle (sched_verbose ? dump : NULL);
6849 
6850       best = 6;
6851       for (i = 0; i < NR_PACKETS; i++)
6852 	{
6853 	  const struct ia64_packet *p = packets + i;
6854 	  int split = get_split (p, sched_data.first_slot);
6855 	  int next;
6856 
6857 	  /* Disallow multiway branches here.  */
6858 	  if (p->t[1] == TYPE_B)
6859 	    continue;
6860 
6861 	  if (packet_matches_p (p, split, &next) && next < best)
6862 	    {
6863 	      best = next;
6864 	      sched_data.packet = p;
6865 	      sched_data.split = split;
6866 	    }
6867 	}
6868       if (best == 6)
6869 	abort ();
6870     }
6871 
6872   if (*pn_ready > 0)
6873     {
6874       int more = ia64_internal_sched_reorder (dump, sched_verbose,
6875 					      ready, pn_ready, 1,
6876 					      clock_var);
6877       if (more)
6878 	return more;
6879       /* Did we schedule a stop?  If so, finish this cycle.  */
6880       if (sched_data.cur == sched_data.first_slot)
6881 	return 0;
6882     }
6883 
6884   if (sched_verbose)
6885     fprintf (dump, "//   Can't issue more this cycle; updating type array.\n");
6886 
6887   cycle_end_fill_slots (sched_verbose ? dump : NULL);
6888   if (sched_verbose)
6889     dump_current_packet (dump);
6890   return 0;
6891 }
6892 
6893 /* We are about to issue INSN.  Return the number of insns left on the
6894    ready queue that can be issued this cycle.  */
6895 
6896 static int
ia64_variable_issue(dump,sched_verbose,insn,can_issue_more)6897 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6898      FILE *dump;
6899      int sched_verbose;
6900      rtx insn;
6901      int can_issue_more ATTRIBUTE_UNUSED;
6902 {
6903   enum attr_type t = ia64_safe_type (insn);
6904 
6905   if (sched_data.last_was_stop)
6906     {
6907       int t = sched_data.first_slot;
6908       if (t == 0)
6909 	t = 3;
6910       ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6911       init_insn_group_barriers ();
6912       sched_data.last_was_stop = 0;
6913     }
6914 
6915   if (t == TYPE_UNKNOWN)
6916     {
6917       if (sched_verbose)
6918 	fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6919       if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6920 	  || asm_noperands (PATTERN (insn)) >= 0)
6921 	{
6922 	  /* This must be some kind of asm.  Clear the scheduling state.  */
6923 	  rotate_two_bundles (sched_verbose ? dump : NULL);
6924 	  if (ia64_final_schedule)
6925 	    group_barrier_needed_p (insn);
6926 	}
6927       return 1;
6928     }
6929 
6930   /* This is _not_ just a sanity check.  group_barrier_needed_p will update
6931      important state info.  Don't delete this test.  */
6932   if (ia64_final_schedule
6933       && group_barrier_needed_p (insn))
6934     abort ();
6935 
6936   sched_data.stopbit[sched_data.cur] = 0;
6937   sched_data.insns[sched_data.cur] = insn;
6938   sched_data.types[sched_data.cur] = t;
6939 
6940   sched_data.cur++;
6941   if (sched_verbose)
6942     fprintf (dump, "// Scheduling insn %d of type %s\n",
6943 	     INSN_UID (insn), type_names[t]);
6944 
6945   if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6946     {
6947       schedule_stop (sched_verbose ? dump : NULL);
6948       sched_data.last_was_stop = 1;
6949     }
6950 
6951   return 1;
6952 }
6953 
6954 /* Free data allocated by ia64_sched_init.  */
6955 
6956 static void
ia64_sched_finish(dump,sched_verbose)6957 ia64_sched_finish (dump, sched_verbose)
6958      FILE *dump;
6959      int sched_verbose;
6960 {
6961   if (sched_verbose)
6962     fprintf (dump, "// Finishing schedule.\n");
6963   rotate_two_bundles (NULL);
6964   free (sched_types);
6965   free (sched_ready);
6966 }
6967 
6968 /* Emit pseudo-ops for the assembler to describe predicate relations.
6969    At present this assumes that we only consider predicate pairs to
6970    be mutex, and that the assembler can deduce proper values from
6971    straight-line code.  */
6972 
6973 static void
emit_predicate_relation_info()6974 emit_predicate_relation_info ()
6975 {
6976   basic_block bb;
6977 
6978   FOR_EACH_BB_REVERSE (bb)
6979     {
6980       int r;
6981       rtx head = bb->head;
6982 
6983       /* We only need such notes at code labels.  */
6984       if (GET_CODE (head) != CODE_LABEL)
6985 	continue;
6986       if (GET_CODE (NEXT_INSN (head)) == NOTE
6987 	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6988 	head = NEXT_INSN (head);
6989 
6990       for (r = PR_REG (0); r < PR_REG (64); r += 2)
6991 	if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6992 	  {
6993 	    rtx p = gen_rtx_REG (BImode, r);
6994 	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6995 	    if (head == bb->end)
6996 	      bb->end = n;
6997 	    head = n;
6998 	  }
6999     }
7000 
7001   /* Look for conditional calls that do not return, and protect predicate
7002      relations around them.  Otherwise the assembler will assume the call
7003      returns, and complain about uses of call-clobbered predicates after
7004      the call.  */
7005   FOR_EACH_BB_REVERSE (bb)
7006     {
7007       rtx insn = bb->head;
7008 
7009       while (1)
7010 	{
7011 	  if (GET_CODE (insn) == CALL_INSN
7012 	      && GET_CODE (PATTERN (insn)) == COND_EXEC
7013 	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7014 	    {
7015 	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7016 	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7017 	      if (bb->head == insn)
7018 		bb->head = b;
7019 	      if (bb->end == insn)
7020 		bb->end = a;
7021 	    }
7022 
7023 	  if (insn == bb->end)
7024 	    break;
7025 	  insn = NEXT_INSN (insn);
7026 	}
7027     }
7028 }
7029 
7030 /* Generate a NOP instruction of type T.  We will never generate L type
7031    nops.  */
7032 
7033 static rtx
gen_nop_type(t)7034 gen_nop_type (t)
7035      enum attr_type t;
7036 {
7037   switch (t)
7038     {
7039     case TYPE_M:
7040       return gen_nop_m ();
7041     case TYPE_I:
7042       return gen_nop_i ();
7043     case TYPE_B:
7044       return gen_nop_b ();
7045     case TYPE_F:
7046       return gen_nop_f ();
7047     case TYPE_X:
7048       return gen_nop_x ();
7049     default:
7050       abort ();
7051     }
7052 }
7053 
7054 /* After the last scheduling pass, fill in NOPs.  It's easier to do this
7055    here than while scheduling.  */
7056 
7057 static void
ia64_emit_nops()7058 ia64_emit_nops ()
7059 {
7060   rtx insn;
7061   const struct bundle *b = 0;
7062   int bundle_pos = 0;
7063 
7064   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7065     {
7066       rtx pat;
7067       enum attr_type t;
7068       pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
7069       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
7070 	continue;
7071       if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
7072 	  || GET_CODE (insn) == CODE_LABEL)
7073 	{
7074 	  if (b)
7075 	    while (bundle_pos < 3)
7076 	      {
7077 		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7078 		bundle_pos++;
7079 	      }
7080 	  if (GET_CODE (insn) != CODE_LABEL)
7081 	    b = bundle + INTVAL (XVECEXP (pat, 0, 0));
7082 	  else
7083 	    b = 0;
7084 	  bundle_pos = 0;
7085 	  continue;
7086 	}
7087       else if (GET_CODE (pat) == UNSPEC_VOLATILE
7088 	       && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
7089 	{
7090 	  int t = INTVAL (XVECEXP (pat, 0, 0));
7091 	  if (b)
7092 	    while (bundle_pos < t)
7093 	      {
7094 		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7095 		bundle_pos++;
7096 	      }
7097 	  continue;
7098 	}
7099 
7100       if (bundle_pos == 3)
7101 	b = 0;
7102 
7103       if (b && INSN_P (insn))
7104 	{
7105 	  t = ia64_safe_type (insn);
7106 	  if (asm_noperands (PATTERN (insn)) >= 0
7107 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)
7108 	    {
7109 	      while (bundle_pos < 3)
7110 		{
7111 		  if (b->t[bundle_pos] != TYPE_L)
7112 		    emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7113 		  bundle_pos++;
7114 		}
7115 	      continue;
7116 	    }
7117 
7118 	  if (t == TYPE_UNKNOWN)
7119 	    continue;
7120 	  while (bundle_pos < 3)
7121 	    {
7122 	      if (t == b->t[bundle_pos]
7123 		  || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
7124 				      || b->t[bundle_pos] == TYPE_I)))
7125 		break;
7126 
7127 	      emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7128 	      bundle_pos++;
7129 	    }
7130 	  if (bundle_pos < 3)
7131 	    bundle_pos++;
7132 	}
7133     }
7134 }
7135 
7136 /* Perform machine dependent operations on the rtl chain INSNS.  */
7137 
7138 void
ia64_reorg(insns)7139 ia64_reorg (insns)
7140      rtx insns;
7141 {
7142   /* We are freeing block_for_insn in the toplev to keep compatibility
7143      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
7144   compute_bb_for_insn ();
7145 
7146   /* If optimizing, we'll have split before scheduling.  */
7147   if (optimize == 0)
7148     split_all_insns (0);
7149 
7150   /* ??? update_life_info_in_dirty_blocks fails to terminate during
7151      non-optimizing bootstrap.  */
7152   update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7153 
7154   if (ia64_flag_schedule_insns2)
7155     {
7156       timevar_push (TV_SCHED2);
7157       ia64_final_schedule = 1;
7158       schedule_ebbs (rtl_dump_file);
7159       ia64_final_schedule = 0;
7160       timevar_pop (TV_SCHED2);
7161 
7162       /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
7163 	 place as they were during scheduling.  */
7164       emit_insn_group_barriers (rtl_dump_file, insns);
7165       ia64_emit_nops ();
7166     }
7167   else
7168     emit_all_insn_group_barriers (rtl_dump_file, insns);
7169 
7170   /* A call must not be the last instruction in a function, so that the
7171      return address is still within the function, so that unwinding works
7172      properly.  Note that IA-64 differs from dwarf2 on this point.  */
7173   if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7174     {
7175       rtx insn;
7176       int saw_stop = 0;
7177 
7178       insn = get_last_insn ();
7179       if (! INSN_P (insn))
7180         insn = prev_active_insn (insn);
7181       /* Skip over insns that expand to nothing.  */
7182       while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7183         {
7184 	  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7185 	      && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7186 	    saw_stop = 1;
7187 	  insn = prev_active_insn (insn);
7188 	}
7189       if (GET_CODE (insn) == CALL_INSN)
7190 	{
7191 	  if (! saw_stop)
7192 	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7193 	  emit_insn (gen_break_f ());
7194 	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7195 	}
7196     }
7197 
7198   fixup_errata ();
7199   emit_predicate_relation_info ();
7200 }
7201 
7202 /* Return true if REGNO is used by the epilogue.  */
7203 
7204 int
ia64_epilogue_uses(regno)7205 ia64_epilogue_uses (regno)
7206      int regno;
7207 {
7208   switch (regno)
7209     {
7210     case R_GR (1):
7211       /* With a call to a function in another module, we will write a new
7212 	 value to "gp".  After returning from such a call, we need to make
7213 	 sure the function restores the original gp-value, even if the
7214 	 function itself does not use the gp anymore.  */
7215       return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7216 
7217     case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7218     case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7219       /* For functions defined with the syscall_linkage attribute, all
7220 	 input registers are marked as live at all function exits.  This
7221 	 prevents the register allocator from using the input registers,
7222 	 which in turn makes it possible to restart a system call after
7223 	 an interrupt without having to save/restore the input registers.
7224 	 This also prevents kernel data from leaking to application code.  */
7225       return lookup_attribute ("syscall_linkage",
7226 	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7227 
7228     case R_BR (0):
7229       /* Conditional return patterns can't represent the use of `b0' as
7230          the return address, so we force the value live this way.  */
7231       return 1;
7232 
7233     case AR_PFS_REGNUM:
7234       /* Likewise for ar.pfs, which is used by br.ret.  */
7235       return 1;
7236 
7237     default:
7238       return 0;
7239     }
7240 }
7241 
7242 /* Return true if REGNO is used by the frame unwinder.  */
7243 
7244 int
ia64_eh_uses(regno)7245 ia64_eh_uses (regno)
7246      int regno;
7247 {
7248   if (! reload_completed)
7249     return 0;
7250 
7251   if (current_frame_info.reg_save_b0
7252       && regno == current_frame_info.reg_save_b0)
7253     return 1;
7254   if (current_frame_info.reg_save_pr
7255       && regno == current_frame_info.reg_save_pr)
7256     return 1;
7257   if (current_frame_info.reg_save_ar_pfs
7258       && regno == current_frame_info.reg_save_ar_pfs)
7259     return 1;
7260   if (current_frame_info.reg_save_ar_unat
7261       && regno == current_frame_info.reg_save_ar_unat)
7262     return 1;
7263   if (current_frame_info.reg_save_ar_lc
7264       && regno == current_frame_info.reg_save_ar_lc)
7265     return 1;
7266 
7267   return 0;
7268 }
7269 
7270 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7271 
7272    We add @ to the name if this goes in small data/bss.  We can only put
7273    a variable in small data/bss if it is defined in this module or a module
7274    that we are statically linked with.  We can't check the second condition,
7275    but TREE_STATIC gives us the first one.  */
7276 
7277 /* ??? If we had IPA, we could check the second condition.  We could support
7278    programmer added section attributes if the variable is not defined in this
7279    module.  */
7280 
7281 /* ??? See the v850 port for a cleaner way to do this.  */
7282 
7283 /* ??? We could also support own long data here.  Generating movl/add/ld8
7284    instead of addl,ld8/ld8.  This makes the code bigger, but should make the
7285    code faster because there is one less load.  This also includes incomplete
7286    types which can't go in sdata/sbss.  */
7287 
7288 static bool
ia64_in_small_data_p(exp)7289 ia64_in_small_data_p (exp)
7290      tree exp;
7291 {
7292   if (TARGET_NO_SDATA)
7293     return false;
7294 
7295   /* Functions are never small data.  */
7296   if (TREE_CODE (exp) == FUNCTION_DECL)
7297     return false;
7298 
7299   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7300     {
7301       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7302       if (strcmp (section, ".sdata") == 0
7303 	  || strcmp (section, ".sbss") == 0)
7304 	return true;
7305     }
7306   else
7307     {
7308       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7309 
7310       /* If this is an incomplete type with size 0, then we can't put it
7311 	 in sdata because it might be too big when completed.  */
7312       if (size > 0 && size <= ia64_section_threshold)
7313 	return true;
7314     }
7315 
7316   return false;
7317 }
7318 
7319 static void
ia64_encode_section_info(decl,first)7320 ia64_encode_section_info (decl, first)
7321      tree decl;
7322      int first ATTRIBUTE_UNUSED;
7323 {
7324   const char *symbol_str;
7325   bool is_local;
7326   rtx symbol;
7327   char encoding = 0;
7328 
7329   if (TREE_CODE (decl) == FUNCTION_DECL)
7330     {
7331       SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7332       return;
7333     }
7334 
7335   /* Careful not to prod global register variables.  */
7336   if (TREE_CODE (decl) != VAR_DECL
7337       || GET_CODE (DECL_RTL (decl)) != MEM
7338       || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7339     return;
7340 
7341   symbol = XEXP (DECL_RTL (decl), 0);
7342   symbol_str = XSTR (symbol, 0);
7343 
7344   is_local = (*targetm.binds_local_p) (decl);
7345 
7346   if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7347     encoding = " GLil"[decl_tls_model (decl)];
7348   /* Determine if DECL will wind up in .sdata/.sbss.  */
7349   else if (is_local && ia64_in_small_data_p (decl))
7350     encoding = 's';
7351 
7352   /* Finally, encode this into the symbol string.  */
7353   if (encoding)
7354     {
7355       char *newstr;
7356       size_t len;
7357 
7358       if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7359 	{
7360 	  if (encoding == symbol_str[1])
7361 	    return;
7362 	  /* ??? Sdata became thread or thread becaome not thread.  Lose.  */
7363 	  if (encoding == 's' || symbol_str[1] == 's')
7364 	    abort ();
7365 	}
7366 
7367       len = strlen (symbol_str);
7368       newstr = alloca (len + 3);
7369       newstr[0] = ENCODE_SECTION_INFO_CHAR;
7370       newstr[1] = encoding;
7371       memcpy (newstr + 2, symbol_str, len + 1);
7372 
7373       XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7374     }
7375 
7376   /* This decl is marked as being in small data/bss but it shouldn't be;
7377      one likely explanation for this is that the decl has been moved into
7378      a different section from the one it was in when encode_section_info
7379      was first called.  Remove the encoding.  */
7380   else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7381     XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7382 }
7383 
7384 static const char *
ia64_strip_name_encoding(str)7385 ia64_strip_name_encoding (str)
7386      const char *str;
7387 {
7388   if (str[0] == ENCODE_SECTION_INFO_CHAR)
7389     str += 2;
7390   if (str[0] == '*')
7391     str++;
7392   return str;
7393 }
7394 
7395 /* True if it is OK to do sibling call optimization for the specified
7396    call expression EXP.  DECL will be the called function, or NULL if
7397    this is an indirect call.  */
7398 bool
ia64_function_ok_for_sibcall(decl)7399 ia64_function_ok_for_sibcall (decl)
7400      tree decl;
7401 {
7402   /* We can't perform a sibcall if the current function has the syscall_linkage
7403      attribute.  */
7404   if (lookup_attribute ("syscall_linkage",
7405 			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7406     return false;
7407 
7408   /* We must always return with our current GP.  This means we can
7409      only sibcall to functions defined in the current module.  */
7410   return decl && (*targetm.binds_local_p) (decl);
7411 }
7412 
7413 /* Output assembly directives for prologue regions.  */
7414 
7415 /* The current basic block number.  */
7416 
7417 static bool last_block;
7418 
7419 /* True if we need a copy_state command at the start of the next block.  */
7420 
7421 static bool need_copy_state;
7422 
7423 /* The function emits unwind directives for the start of an epilogue.  */
7424 
7425 static void
process_epilogue()7426 process_epilogue ()
7427 {
7428   /* If this isn't the last block of the function, then we need to label the
7429      current state, and copy it back in at the start of the next block.  */
7430 
7431   if (!last_block)
7432     {
7433       fprintf (asm_out_file, "\t.label_state 1\n");
7434       need_copy_state = true;
7435     }
7436 
7437   fprintf (asm_out_file, "\t.restore sp\n");
7438 }
7439 
7440 /* This function processes a SET pattern looking for specific patterns
7441    which result in emitting an assembly directive required for unwinding.  */
7442 
7443 static int
process_set(asm_out_file,pat)7444 process_set (asm_out_file, pat)
7445      FILE *asm_out_file;
7446      rtx pat;
7447 {
7448   rtx src = SET_SRC (pat);
7449   rtx dest = SET_DEST (pat);
7450   int src_regno, dest_regno;
7451 
7452   /* Look for the ALLOC insn.  */
7453   if (GET_CODE (src) == UNSPEC_VOLATILE
7454       && XINT (src, 1) == UNSPECV_ALLOC
7455       && GET_CODE (dest) == REG)
7456     {
7457       dest_regno = REGNO (dest);
7458 
7459       /* If this is the final destination for ar.pfs, then this must
7460 	 be the alloc in the prologue.  */
7461       if (dest_regno == current_frame_info.reg_save_ar_pfs)
7462 	fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7463 		 ia64_dbx_register_number (dest_regno));
7464       else
7465 	{
7466 	  /* This must be an alloc before a sibcall.  We must drop the
7467 	     old frame info.  The easiest way to drop the old frame
7468 	     info is to ensure we had a ".restore sp" directive
7469 	     followed by a new prologue.  If the procedure doesn't
7470 	     have a memory-stack frame, we'll issue a dummy ".restore
7471 	     sp" now.  */
7472 	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7473 	    /* if haven't done process_epilogue() yet, do it now */
7474 	    process_epilogue ();
7475 	  fprintf (asm_out_file, "\t.prologue\n");
7476 	}
7477       return 1;
7478     }
7479 
7480   /* Look for SP = ....  */
7481   if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7482     {
7483       if (GET_CODE (src) == PLUS)
7484         {
7485 	  rtx op0 = XEXP (src, 0);
7486 	  rtx op1 = XEXP (src, 1);
7487 	  if (op0 == dest && GET_CODE (op1) == CONST_INT)
7488 	    {
7489 	      if (INTVAL (op1) < 0)
7490 		{
7491 		  fputs ("\t.fframe ", asm_out_file);
7492 		  fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7493 			   -INTVAL (op1));
7494 		  fputc ('\n', asm_out_file);
7495 		}
7496 	      else
7497 		process_epilogue ();
7498 	    }
7499 	  else
7500 	    abort ();
7501 	}
7502       else if (GET_CODE (src) == REG
7503 	       && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7504 	process_epilogue ();
7505       else
7506 	abort ();
7507 
7508       return 1;
7509     }
7510 
7511   /* Register move we need to look at.  */
7512   if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7513     {
7514       src_regno = REGNO (src);
7515       dest_regno = REGNO (dest);
7516 
7517       switch (src_regno)
7518 	{
7519 	case BR_REG (0):
7520 	  /* Saving return address pointer.  */
7521 	  if (dest_regno != current_frame_info.reg_save_b0)
7522 	    abort ();
7523 	  fprintf (asm_out_file, "\t.save rp, r%d\n",
7524 		   ia64_dbx_register_number (dest_regno));
7525 	  return 1;
7526 
7527 	case PR_REG (0):
7528 	  if (dest_regno != current_frame_info.reg_save_pr)
7529 	    abort ();
7530 	  fprintf (asm_out_file, "\t.save pr, r%d\n",
7531 		   ia64_dbx_register_number (dest_regno));
7532 	  return 1;
7533 
7534 	case AR_UNAT_REGNUM:
7535 	  if (dest_regno != current_frame_info.reg_save_ar_unat)
7536 	    abort ();
7537 	  fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7538 		   ia64_dbx_register_number (dest_regno));
7539 	  return 1;
7540 
7541 	case AR_LC_REGNUM:
7542 	  if (dest_regno != current_frame_info.reg_save_ar_lc)
7543 	    abort ();
7544 	  fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7545 		   ia64_dbx_register_number (dest_regno));
7546 	  return 1;
7547 
7548 	case STACK_POINTER_REGNUM:
7549 	  if (dest_regno != HARD_FRAME_POINTER_REGNUM
7550 	      || ! frame_pointer_needed)
7551 	    abort ();
7552 	  fprintf (asm_out_file, "\t.vframe r%d\n",
7553 		   ia64_dbx_register_number (dest_regno));
7554 	  return 1;
7555 
7556 	default:
7557 	  /* Everything else should indicate being stored to memory.  */
7558 	  abort ();
7559 	}
7560     }
7561 
7562   /* Memory store we need to look at.  */
7563   if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7564     {
7565       long off;
7566       rtx base;
7567       const char *saveop;
7568 
7569       if (GET_CODE (XEXP (dest, 0)) == REG)
7570 	{
7571 	  base = XEXP (dest, 0);
7572 	  off = 0;
7573 	}
7574       else if (GET_CODE (XEXP (dest, 0)) == PLUS
7575 	       && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7576 	{
7577 	  base = XEXP (XEXP (dest, 0), 0);
7578 	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
7579 	}
7580       else
7581 	abort ();
7582 
7583       if (base == hard_frame_pointer_rtx)
7584 	{
7585 	  saveop = ".savepsp";
7586 	  off = - off;
7587 	}
7588       else if (base == stack_pointer_rtx)
7589 	saveop = ".savesp";
7590       else
7591 	abort ();
7592 
7593       src_regno = REGNO (src);
7594       switch (src_regno)
7595 	{
7596 	case BR_REG (0):
7597 	  if (current_frame_info.reg_save_b0 != 0)
7598 	    abort ();
7599 	  fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7600 	  return 1;
7601 
7602 	case PR_REG (0):
7603 	  if (current_frame_info.reg_save_pr != 0)
7604 	    abort ();
7605 	  fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7606 	  return 1;
7607 
7608 	case AR_LC_REGNUM:
7609 	  if (current_frame_info.reg_save_ar_lc != 0)
7610 	    abort ();
7611 	  fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7612 	  return 1;
7613 
7614 	case AR_PFS_REGNUM:
7615 	  if (current_frame_info.reg_save_ar_pfs != 0)
7616 	    abort ();
7617 	  fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7618 	  return 1;
7619 
7620 	case AR_UNAT_REGNUM:
7621 	  if (current_frame_info.reg_save_ar_unat != 0)
7622 	    abort ();
7623 	  fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7624 	  return 1;
7625 
7626 	case GR_REG (4):
7627 	case GR_REG (5):
7628 	case GR_REG (6):
7629 	case GR_REG (7):
7630 	  fprintf (asm_out_file, "\t.save.g 0x%x\n",
7631 		   1 << (src_regno - GR_REG (4)));
7632 	  return 1;
7633 
7634 	case BR_REG (1):
7635 	case BR_REG (2):
7636 	case BR_REG (3):
7637 	case BR_REG (4):
7638 	case BR_REG (5):
7639 	  fprintf (asm_out_file, "\t.save.b 0x%x\n",
7640 		   1 << (src_regno - BR_REG (1)));
7641 	  return 1;
7642 
7643 	case FR_REG (2):
7644 	case FR_REG (3):
7645 	case FR_REG (4):
7646 	case FR_REG (5):
7647 	  fprintf (asm_out_file, "\t.save.f 0x%x\n",
7648 		   1 << (src_regno - FR_REG (2)));
7649 	  return 1;
7650 
7651 	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7652 	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7653 	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7654 	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7655 	  fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7656 		   1 << (src_regno - FR_REG (12)));
7657 	  return 1;
7658 
7659 	default:
7660 	  return 0;
7661 	}
7662     }
7663 
7664   return 0;
7665 }
7666 
7667 
7668 /* This function looks at a single insn and emits any directives
7669    required to unwind this insn.  */
7670 void
process_for_unwind_directive(asm_out_file,insn)7671 process_for_unwind_directive (asm_out_file, insn)
7672      FILE *asm_out_file;
7673      rtx insn;
7674 {
7675   if (flag_unwind_tables
7676       || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7677     {
7678       rtx pat;
7679 
7680       if (GET_CODE (insn) == NOTE
7681 	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7682 	{
7683 	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7684 
7685 	  /* Restore unwind state from immediately before the epilogue.  */
7686 	  if (need_copy_state)
7687 	    {
7688 	      fprintf (asm_out_file, "\t.body\n");
7689 	      fprintf (asm_out_file, "\t.copy_state 1\n");
7690 	      need_copy_state = false;
7691 	    }
7692 	}
7693 
7694       if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7695 	return;
7696 
7697       pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7698       if (pat)
7699 	pat = XEXP (pat, 0);
7700       else
7701 	pat = PATTERN (insn);
7702 
7703       switch (GET_CODE (pat))
7704         {
7705 	case SET:
7706 	  process_set (asm_out_file, pat);
7707 	  break;
7708 
7709 	case PARALLEL:
7710 	  {
7711 	    int par_index;
7712 	    int limit = XVECLEN (pat, 0);
7713 	    for (par_index = 0; par_index < limit; par_index++)
7714 	      {
7715 		rtx x = XVECEXP (pat, 0, par_index);
7716 		if (GET_CODE (x) == SET)
7717 		  process_set (asm_out_file, x);
7718 	      }
7719 	    break;
7720 	  }
7721 
7722 	default:
7723 	  abort ();
7724 	}
7725     }
7726 }
7727 
7728 
7729 void
ia64_init_builtins()7730 ia64_init_builtins ()
7731 {
7732   tree psi_type_node = build_pointer_type (integer_type_node);
7733   tree pdi_type_node = build_pointer_type (long_integer_type_node);
7734 
7735   /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7736   tree si_ftype_psi_si_si
7737     = build_function_type_list (integer_type_node,
7738 				psi_type_node, integer_type_node,
7739 				integer_type_node, NULL_TREE);
7740 
7741   /* __sync_val_compare_and_swap_di */
7742   tree di_ftype_pdi_di_di
7743     = build_function_type_list (long_integer_type_node,
7744 				pdi_type_node, long_integer_type_node,
7745 				long_integer_type_node, NULL_TREE);
7746   /* __sync_bool_compare_and_swap_di */
7747   tree si_ftype_pdi_di_di
7748     = build_function_type_list (integer_type_node,
7749 				pdi_type_node, long_integer_type_node,
7750 				long_integer_type_node, NULL_TREE);
7751   /* __sync_synchronize */
7752   tree void_ftype_void
7753     = build_function_type (void_type_node, void_list_node);
7754 
7755   /* __sync_lock_test_and_set_si */
7756   tree si_ftype_psi_si
7757     = build_function_type_list (integer_type_node,
7758 				psi_type_node, integer_type_node, NULL_TREE);
7759 
7760   /* __sync_lock_test_and_set_di */
7761   tree di_ftype_pdi_di
7762     = build_function_type_list (long_integer_type_node,
7763 				pdi_type_node, long_integer_type_node,
7764 				NULL_TREE);
7765 
7766   /* __sync_lock_release_si */
7767   tree void_ftype_psi
7768     = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7769 
7770   /* __sync_lock_release_di */
7771   tree void_ftype_pdi
7772     = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7773 
7774 #define def_builtin(name, type, code) \
7775   builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7776 
7777   def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7778 	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7779   def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7780 	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7781   def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7782 	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7783   def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7784 	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7785 
7786   def_builtin ("__sync_synchronize", void_ftype_void,
7787 	       IA64_BUILTIN_SYNCHRONIZE);
7788 
7789   def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7790 	       IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7791   def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7792 	       IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7793   def_builtin ("__sync_lock_release_si", void_ftype_psi,
7794 	       IA64_BUILTIN_LOCK_RELEASE_SI);
7795   def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7796 	       IA64_BUILTIN_LOCK_RELEASE_DI);
7797 
7798   def_builtin ("__builtin_ia64_bsp",
7799 	       build_function_type (ptr_type_node, void_list_node),
7800 	       IA64_BUILTIN_BSP);
7801 
7802   def_builtin ("__builtin_ia64_flushrs",
7803 	       build_function_type (void_type_node, void_list_node),
7804 	       IA64_BUILTIN_FLUSHRS);
7805 
7806   def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7807 	       IA64_BUILTIN_FETCH_AND_ADD_SI);
7808   def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7809 	       IA64_BUILTIN_FETCH_AND_SUB_SI);
7810   def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7811 	       IA64_BUILTIN_FETCH_AND_OR_SI);
7812   def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7813 	       IA64_BUILTIN_FETCH_AND_AND_SI);
7814   def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7815 	       IA64_BUILTIN_FETCH_AND_XOR_SI);
7816   def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7817 	       IA64_BUILTIN_FETCH_AND_NAND_SI);
7818 
7819   def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7820 	       IA64_BUILTIN_ADD_AND_FETCH_SI);
7821   def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7822 	       IA64_BUILTIN_SUB_AND_FETCH_SI);
7823   def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7824 	       IA64_BUILTIN_OR_AND_FETCH_SI);
7825   def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7826 	       IA64_BUILTIN_AND_AND_FETCH_SI);
7827   def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7828 	       IA64_BUILTIN_XOR_AND_FETCH_SI);
7829   def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7830 	       IA64_BUILTIN_NAND_AND_FETCH_SI);
7831 
7832   def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7833 	       IA64_BUILTIN_FETCH_AND_ADD_DI);
7834   def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7835 	       IA64_BUILTIN_FETCH_AND_SUB_DI);
7836   def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7837 	       IA64_BUILTIN_FETCH_AND_OR_DI);
7838   def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7839 	       IA64_BUILTIN_FETCH_AND_AND_DI);
7840   def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7841 	       IA64_BUILTIN_FETCH_AND_XOR_DI);
7842   def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7843 	       IA64_BUILTIN_FETCH_AND_NAND_DI);
7844 
7845   def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7846 	       IA64_BUILTIN_ADD_AND_FETCH_DI);
7847   def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7848 	       IA64_BUILTIN_SUB_AND_FETCH_DI);
7849   def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7850 	       IA64_BUILTIN_OR_AND_FETCH_DI);
7851   def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7852 	       IA64_BUILTIN_AND_AND_FETCH_DI);
7853   def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7854 	       IA64_BUILTIN_XOR_AND_FETCH_DI);
7855   def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7856 	       IA64_BUILTIN_NAND_AND_FETCH_DI);
7857 
7858 #undef def_builtin
7859 }
7860 
7861 /* Expand fetch_and_op intrinsics.  The basic code sequence is:
7862 
7863      mf
7864      tmp = [ptr];
7865      do {
7866        ret = tmp;
7867        ar.ccv = tmp;
7868        tmp <op>= value;
7869        cmpxchgsz.acq tmp = [ptr], tmp
7870      } while (tmp != ret)
7871 */
7872 
7873 static rtx
ia64_expand_fetch_and_op(binoptab,mode,arglist,target)7874 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7875      optab binoptab;
7876      enum machine_mode mode;
7877      tree arglist;
7878      rtx target;
7879 {
7880   rtx ret, label, tmp, ccv, insn, mem, value;
7881   tree arg0, arg1;
7882 
7883   arg0 = TREE_VALUE (arglist);
7884   arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7885   mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7886 #ifdef POINTERS_EXTEND_UNSIGNED
7887   if (GET_MODE(mem) != Pmode)
7888     mem = convert_memory_address (Pmode, mem);
7889 #endif
7890   value = expand_expr (arg1, NULL_RTX, mode, 0);
7891 
7892   mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7893   MEM_VOLATILE_P (mem) = 1;
7894 
7895   if (target && register_operand (target, mode))
7896     ret = target;
7897   else
7898     ret = gen_reg_rtx (mode);
7899 
7900   emit_insn (gen_mf ());
7901 
7902   /* Special case for fetchadd instructions.  */
7903   if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7904     {
7905       if (mode == SImode)
7906         insn = gen_fetchadd_acq_si (ret, mem, value);
7907       else
7908         insn = gen_fetchadd_acq_di (ret, mem, value);
7909       emit_insn (insn);
7910       return ret;
7911     }
7912 
7913   tmp = gen_reg_rtx (mode);
7914   /* ar.ccv must always be loaded with a zero-extended DImode value.  */
7915   ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7916   emit_move_insn (tmp, mem);
7917 
7918   label = gen_label_rtx ();
7919   emit_label (label);
7920   emit_move_insn (ret, tmp);
7921   convert_move (ccv, tmp, /*unsignedp=*/1);
7922 
7923   /* Perform the specific operation.  Special case NAND by noticing
7924      one_cmpl_optab instead.  */
7925   if (binoptab == one_cmpl_optab)
7926     {
7927       tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7928       binoptab = and_optab;
7929     }
7930   tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7931 
7932   if (mode == SImode)
7933     insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7934   else
7935     insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7936   emit_insn (insn);
7937 
7938   emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7939 
7940   return ret;
7941 }
7942 
7943 /* Expand op_and_fetch intrinsics.  The basic code sequence is:
7944 
7945      mf
7946      tmp = [ptr];
7947      do {
7948        old = tmp;
7949        ar.ccv = tmp;
7950        ret = tmp <op> value;
7951        cmpxchgsz.acq tmp = [ptr], ret
7952      } while (tmp != old)
7953 */
7954 
7955 static rtx
ia64_expand_op_and_fetch(binoptab,mode,arglist,target)7956 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7957      optab binoptab;
7958      enum machine_mode mode;
7959      tree arglist;
7960      rtx target;
7961 {
7962   rtx old, label, tmp, ret, ccv, insn, mem, value;
7963   tree arg0, arg1;
7964 
7965   arg0 = TREE_VALUE (arglist);
7966   arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7967   mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7968 #ifdef POINTERS_EXTEND_UNSIGNED
7969   if (GET_MODE(mem) != Pmode)
7970     mem = convert_memory_address (Pmode, mem);
7971 #endif
7972 
7973   value = expand_expr (arg1, NULL_RTX, mode, 0);
7974 
7975   mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7976   MEM_VOLATILE_P (mem) = 1;
7977 
7978   if (target && ! register_operand (target, mode))
7979     target = NULL_RTX;
7980 
7981   emit_insn (gen_mf ());
7982   tmp = gen_reg_rtx (mode);
7983   old = gen_reg_rtx (mode);
7984   /* ar.ccv must always be loaded with a zero-extended DImode value.  */
7985   ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7986 
7987   emit_move_insn (tmp, mem);
7988 
7989   label = gen_label_rtx ();
7990   emit_label (label);
7991   emit_move_insn (old, tmp);
7992   convert_move (ccv, tmp, /*unsignedp=*/1);
7993 
7994   /* Perform the specific operation.  Special case NAND by noticing
7995      one_cmpl_optab instead.  */
7996   if (binoptab == one_cmpl_optab)
7997     {
7998       tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7999       binoptab = and_optab;
8000     }
8001   ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8002 
8003   if (mode == SImode)
8004     insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8005   else
8006     insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8007   emit_insn (insn);
8008 
8009   emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8010 
8011   return ret;
8012 }
8013 
8014 /* Expand val_ and bool_compare_and_swap.  For val_ we want:
8015 
8016      ar.ccv = oldval
8017      mf
8018      cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8019      return ret
8020 
8021    For bool_ it's the same except return ret == oldval.
8022 */
8023 
8024 static rtx
ia64_expand_compare_and_swap(rmode,mode,boolp,arglist,target)8025 ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
8026      enum machine_mode rmode;
8027      enum machine_mode mode;
8028      int boolp;
8029      tree arglist;
8030      rtx target;
8031 {
8032   tree arg0, arg1, arg2;
8033   rtx mem, old, new, ccv, tmp, insn;
8034 
8035   arg0 = TREE_VALUE (arglist);
8036   arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8037   arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8038   mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8039   old = expand_expr (arg1, NULL_RTX, mode, 0);
8040   new = expand_expr (arg2, NULL_RTX, mode, 0);
8041 
8042   mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8043   MEM_VOLATILE_P (mem) = 1;
8044 
8045   if (GET_MODE (old) != mode)
8046     old = convert_to_mode (mode, old, /*unsignedp=*/1);
8047   if (GET_MODE (new) != mode)
8048     new = convert_to_mode (mode, new, /*unsignedp=*/1);
8049 
8050   if (! register_operand (old, mode))
8051     old = copy_to_mode_reg (mode, old);
8052   if (! register_operand (new, mode))
8053     new = copy_to_mode_reg (mode, new);
8054 
8055   if (! boolp && target && register_operand (target, mode))
8056     tmp = target;
8057   else
8058     tmp = gen_reg_rtx (mode);
8059 
8060   ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8061   convert_move (ccv, old, /*unsignedp=*/1);
8062   emit_insn (gen_mf ());
8063   if (mode == SImode)
8064     insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8065   else
8066     insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8067   emit_insn (insn);
8068 
8069   if (boolp)
8070     {
8071       if (! target)
8072 	target = gen_reg_rtx (rmode);
8073       return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8074     }
8075   else
8076     return tmp;
8077 }
8078 
8079 /* Expand lock_test_and_set.  I.e. `xchgsz ret = [ptr], new'.  */
8080 
8081 static rtx
ia64_expand_lock_test_and_set(mode,arglist,target)8082 ia64_expand_lock_test_and_set (mode, arglist, target)
8083      enum machine_mode mode;
8084      tree arglist;
8085      rtx target;
8086 {
8087   tree arg0, arg1;
8088   rtx mem, new, ret, insn;
8089 
8090   arg0 = TREE_VALUE (arglist);
8091   arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8092   mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8093   new = expand_expr (arg1, NULL_RTX, mode, 0);
8094 
8095   mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8096   MEM_VOLATILE_P (mem) = 1;
8097   if (! register_operand (new, mode))
8098     new = copy_to_mode_reg (mode, new);
8099 
8100   if (target && register_operand (target, mode))
8101     ret = target;
8102   else
8103     ret = gen_reg_rtx (mode);
8104 
8105   if (mode == SImode)
8106     insn = gen_xchgsi (ret, mem, new);
8107   else
8108     insn = gen_xchgdi (ret, mem, new);
8109   emit_insn (insn);
8110 
8111   return ret;
8112 }
8113 
8114 /* Expand lock_release.  I.e. `stsz.rel [ptr] = r0'.  */
8115 
8116 static rtx
ia64_expand_lock_release(mode,arglist,target)8117 ia64_expand_lock_release (mode, arglist, target)
8118      enum machine_mode mode;
8119      tree arglist;
8120      rtx target ATTRIBUTE_UNUSED;
8121 {
8122   tree arg0;
8123   rtx mem;
8124 
8125   arg0 = TREE_VALUE (arglist);
8126   mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8127 
8128   mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8129   MEM_VOLATILE_P (mem) = 1;
8130 
8131   emit_move_insn (mem, const0_rtx);
8132 
8133   return const0_rtx;
8134 }
8135 
8136 rtx
ia64_expand_builtin(exp,target,subtarget,mode,ignore)8137 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8138      tree exp;
8139      rtx target;
8140      rtx subtarget ATTRIBUTE_UNUSED;
8141      enum machine_mode mode ATTRIBUTE_UNUSED;
8142      int ignore ATTRIBUTE_UNUSED;
8143 {
8144   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8145   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8146   tree arglist = TREE_OPERAND (exp, 1);
8147   enum machine_mode rmode = VOIDmode;
8148 
8149   switch (fcode)
8150     {
8151     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8152     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8153       mode = SImode;
8154       rmode = SImode;
8155       break;
8156 
8157     case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8158     case IA64_BUILTIN_LOCK_RELEASE_SI:
8159     case IA64_BUILTIN_FETCH_AND_ADD_SI:
8160     case IA64_BUILTIN_FETCH_AND_SUB_SI:
8161     case IA64_BUILTIN_FETCH_AND_OR_SI:
8162     case IA64_BUILTIN_FETCH_AND_AND_SI:
8163     case IA64_BUILTIN_FETCH_AND_XOR_SI:
8164     case IA64_BUILTIN_FETCH_AND_NAND_SI:
8165     case IA64_BUILTIN_ADD_AND_FETCH_SI:
8166     case IA64_BUILTIN_SUB_AND_FETCH_SI:
8167     case IA64_BUILTIN_OR_AND_FETCH_SI:
8168     case IA64_BUILTIN_AND_AND_FETCH_SI:
8169     case IA64_BUILTIN_XOR_AND_FETCH_SI:
8170     case IA64_BUILTIN_NAND_AND_FETCH_SI:
8171       mode = SImode;
8172       break;
8173 
8174     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8175       mode = DImode;
8176       rmode = SImode;
8177       break;
8178 
8179     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8180       mode = DImode;
8181       rmode = DImode;
8182       break;
8183 
8184     case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8185     case IA64_BUILTIN_LOCK_RELEASE_DI:
8186     case IA64_BUILTIN_FETCH_AND_ADD_DI:
8187     case IA64_BUILTIN_FETCH_AND_SUB_DI:
8188     case IA64_BUILTIN_FETCH_AND_OR_DI:
8189     case IA64_BUILTIN_FETCH_AND_AND_DI:
8190     case IA64_BUILTIN_FETCH_AND_XOR_DI:
8191     case IA64_BUILTIN_FETCH_AND_NAND_DI:
8192     case IA64_BUILTIN_ADD_AND_FETCH_DI:
8193     case IA64_BUILTIN_SUB_AND_FETCH_DI:
8194     case IA64_BUILTIN_OR_AND_FETCH_DI:
8195     case IA64_BUILTIN_AND_AND_FETCH_DI:
8196     case IA64_BUILTIN_XOR_AND_FETCH_DI:
8197     case IA64_BUILTIN_NAND_AND_FETCH_DI:
8198       mode = DImode;
8199       break;
8200 
8201     default:
8202       break;
8203     }
8204 
8205   switch (fcode)
8206     {
8207     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8208     case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8209       return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8210 					   target);
8211 
8212     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8213     case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8214       return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8215 					   target);
8216 
8217     case IA64_BUILTIN_SYNCHRONIZE:
8218       emit_insn (gen_mf ());
8219       return const0_rtx;
8220 
8221     case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8222     case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8223       return ia64_expand_lock_test_and_set (mode, arglist, target);
8224 
8225     case IA64_BUILTIN_LOCK_RELEASE_SI:
8226     case IA64_BUILTIN_LOCK_RELEASE_DI:
8227       return ia64_expand_lock_release (mode, arglist, target);
8228 
8229     case IA64_BUILTIN_BSP:
8230       if (! target || ! register_operand (target, DImode))
8231 	target = gen_reg_rtx (DImode);
8232       emit_insn (gen_bsp_value (target));
8233       return target;
8234 
8235     case IA64_BUILTIN_FLUSHRS:
8236       emit_insn (gen_flushrs ());
8237       return const0_rtx;
8238 
8239     case IA64_BUILTIN_FETCH_AND_ADD_SI:
8240     case IA64_BUILTIN_FETCH_AND_ADD_DI:
8241       return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8242 
8243     case IA64_BUILTIN_FETCH_AND_SUB_SI:
8244     case IA64_BUILTIN_FETCH_AND_SUB_DI:
8245       return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8246 
8247     case IA64_BUILTIN_FETCH_AND_OR_SI:
8248     case IA64_BUILTIN_FETCH_AND_OR_DI:
8249       return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8250 
8251     case IA64_BUILTIN_FETCH_AND_AND_SI:
8252     case IA64_BUILTIN_FETCH_AND_AND_DI:
8253       return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8254 
8255     case IA64_BUILTIN_FETCH_AND_XOR_SI:
8256     case IA64_BUILTIN_FETCH_AND_XOR_DI:
8257       return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8258 
8259     case IA64_BUILTIN_FETCH_AND_NAND_SI:
8260     case IA64_BUILTIN_FETCH_AND_NAND_DI:
8261       return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8262 
8263     case IA64_BUILTIN_ADD_AND_FETCH_SI:
8264     case IA64_BUILTIN_ADD_AND_FETCH_DI:
8265       return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8266 
8267     case IA64_BUILTIN_SUB_AND_FETCH_SI:
8268     case IA64_BUILTIN_SUB_AND_FETCH_DI:
8269       return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8270 
8271     case IA64_BUILTIN_OR_AND_FETCH_SI:
8272     case IA64_BUILTIN_OR_AND_FETCH_DI:
8273       return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8274 
8275     case IA64_BUILTIN_AND_AND_FETCH_SI:
8276     case IA64_BUILTIN_AND_AND_FETCH_DI:
8277       return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8278 
8279     case IA64_BUILTIN_XOR_AND_FETCH_SI:
8280     case IA64_BUILTIN_XOR_AND_FETCH_DI:
8281       return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8282 
8283     case IA64_BUILTIN_NAND_AND_FETCH_SI:
8284     case IA64_BUILTIN_NAND_AND_FETCH_DI:
8285       return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8286 
8287     default:
8288       break;
8289     }
8290 
8291   return NULL_RTX;
8292 }
8293 
8294 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8295    most significant bits of the stack slot.  */
8296 
8297 enum direction
ia64_hpux_function_arg_padding(mode,type)8298 ia64_hpux_function_arg_padding (mode, type)
8299      enum machine_mode mode;
8300      tree type;
8301 {
8302    /* Exception to normal case for structures/unions/etc.  */
8303 
8304    if (type && AGGREGATE_TYPE_P (type)
8305        && int_size_in_bytes (type) < UNITS_PER_WORD)
8306      return upward;
8307 
8308    /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8309       hardwired to be true.  */
8310 
8311    return((mode == BLKmode
8312        ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8313           && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8314        : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8315       ? downward : upward);
8316 }
8317 
8318 /* Linked list of all external functions that are to be emitted by GCC.
8319    We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8320    order to avoid putting out names that are never really used.  */
8321 
8322 struct extern_func_list
8323 {
8324   struct extern_func_list *next; /* next external */
8325   char *name;                    /* name of the external */
8326 } *extern_func_head = 0;
8327 
8328 static void
ia64_hpux_add_extern_decl(name)8329 ia64_hpux_add_extern_decl (name)
8330         const char *name;
8331 {
8332   struct extern_func_list *p;
8333 
8334   p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8335   p->name = xmalloc (strlen (name) + 1);
8336   strcpy(p->name, name);
8337   p->next = extern_func_head;
8338   extern_func_head = p;
8339 }
8340 
8341 /* Print out the list of used global functions.  */
8342 
8343 void
ia64_hpux_asm_file_end(file)8344 ia64_hpux_asm_file_end (file)
8345 	FILE *file;
8346 {
8347   while (extern_func_head)
8348     {
8349       const char *real_name;
8350       tree decl;
8351 
8352       real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8353       decl = maybe_get_identifier (real_name);
8354 
8355       if (!decl
8356 	  || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8357         {
8358 	  if (decl)
8359 	    TREE_ASM_WRITTEN (decl) = 1;
8360 	  (*targetm.asm_out.globalize_label) (file, extern_func_head->name);
8361 	  fprintf (file, "%s", TYPE_ASM_OP);
8362 	  assemble_name (file, extern_func_head->name);
8363 	  putc (',', file);
8364 	  fprintf (file, TYPE_OPERAND_FMT, "function");
8365 	  putc ('\n', file);
8366         }
8367       extern_func_head = extern_func_head->next;
8368     }
8369 }
8370 
8371 
8372 /* Switch to the section to which we should output X.  The only thing
8373    special we do here is to honor small data.  */
8374 
8375 static void
ia64_select_rtx_section(mode,x,align)8376 ia64_select_rtx_section (mode, x, align)
8377      enum machine_mode mode;
8378      rtx x;
8379      unsigned HOST_WIDE_INT align;
8380 {
8381   if (GET_MODE_SIZE (mode) > 0
8382       && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8383     sdata_section ();
8384   else
8385     default_elf_select_rtx_section (mode, x, align);
8386 }
8387 
8388 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8389    Pretend flag_pic is always set.  */
8390 
8391 static void
ia64_rwreloc_select_section(exp,reloc,align)8392 ia64_rwreloc_select_section (exp, reloc, align)
8393      tree exp;
8394      int reloc;
8395      unsigned HOST_WIDE_INT align;
8396 {
8397   default_elf_select_section_1 (exp, reloc, align, true);
8398 }
8399 
8400 static void
ia64_rwreloc_unique_section(decl,reloc)8401 ia64_rwreloc_unique_section (decl, reloc)
8402      tree decl;
8403      int reloc;
8404 {
8405   default_unique_section_1 (decl, reloc, true);
8406 }
8407 
8408 static void
ia64_rwreloc_select_rtx_section(mode,x,align)8409 ia64_rwreloc_select_rtx_section (mode, x, align)
8410      enum machine_mode mode;
8411      rtx x;
8412      unsigned HOST_WIDE_INT align;
8413 {
8414   int save_pic = flag_pic;
8415   flag_pic = 1;
8416   ia64_select_rtx_section (mode, x, align);
8417   flag_pic = save_pic;
8418 }
8419 
8420 static unsigned int
ia64_rwreloc_section_type_flags(decl,name,reloc)8421 ia64_rwreloc_section_type_flags (decl, name, reloc)
8422      tree decl;
8423      const char *name;
8424      int reloc;
8425 {
8426   return default_section_type_flags_1 (decl, name, reloc, true);
8427 }
8428 
8429 
8430 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8431    declaration for the thunk function itself, FUNCTION is the decl for
8432    the target function.  DELTA is an immediate constant offset to be
8433    added to THIS.  If VCALL_OFFSET is non-zero, the word at
8434    *(*this + vcall_offset) should be added to THIS.  */
8435 
8436 static void
ia64_output_mi_thunk(file,thunk,delta,vcall_offset,function)8437 ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8438      FILE *file;
8439      tree thunk ATTRIBUTE_UNUSED;
8440      HOST_WIDE_INT delta;
8441      HOST_WIDE_INT vcall_offset;
8442      tree function;
8443 {
8444   rtx this, insn, funexp;
8445 
8446   reload_completed = 1;
8447   no_new_pseudos = 1;
8448 
8449   /* Set things up as ia64_expand_prologue might.  */
8450   last_scratch_gr_reg = 15;
8451 
8452   memset (&current_frame_info, 0, sizeof (current_frame_info));
8453   current_frame_info.spill_cfa_off = -16;
8454   current_frame_info.n_input_regs = 1;
8455   current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8456 
8457   if (!TARGET_REG_NAMES)
8458     reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8459 
8460   /* Mark the end of the (empty) prologue.  */
8461   emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8462 
8463   this = gen_rtx_REG (Pmode, IN_REG (0));
8464   if (TARGET_ILP32)
8465     emit_insn (gen_ptr_extend (this,
8466 			       gen_rtx_REG (ptr_mode, IN_REG (0))));
8467 
8468   /* Apply the constant offset, if required.  */
8469   if (delta)
8470     {
8471       rtx delta_rtx = GEN_INT (delta);
8472 
8473       if (!CONST_OK_FOR_I (delta))
8474 	{
8475 	  rtx tmp = gen_rtx_REG (Pmode, 2);
8476 	  emit_move_insn (tmp, delta_rtx);
8477 	  delta_rtx = tmp;
8478 	}
8479       emit_insn (gen_adddi3 (this, this, delta_rtx));
8480     }
8481 
8482   /* Apply the offset from the vtable, if required.  */
8483   if (vcall_offset)
8484     {
8485       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8486       rtx tmp = gen_rtx_REG (Pmode, 2);
8487 
8488       if (TARGET_ILP32)
8489 	{
8490 	  rtx t = gen_rtx_REG (ptr_mode, 2);
8491 	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8492 	  emit_insn (gen_ptr_extend (tmp, t));
8493 	}
8494       else
8495 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8496 
8497       if (!CONST_OK_FOR_J (vcall_offset))
8498 	{
8499 	  rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8500 	  emit_move_insn (tmp2, vcall_offset_rtx);
8501 	  vcall_offset_rtx = tmp2;
8502 	}
8503       emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8504 
8505       if (TARGET_ILP32)
8506 	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8507 			gen_rtx_MEM (ptr_mode, tmp));
8508       else
8509 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8510 
8511       emit_insn (gen_adddi3 (this, this, tmp));
8512     }
8513 
8514   /* Generate a tail call to the target function.  */
8515   if (! TREE_USED (function))
8516     {
8517       assemble_external (function);
8518       TREE_USED (function) = 1;
8519     }
8520   funexp = XEXP (DECL_RTL (function), 0);
8521   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8522   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8523   insn = get_last_insn ();
8524   SIBLING_CALL_P (insn) = 1;
8525 
8526   /* Code generation for calls relies on splitting.  */
8527   reload_completed = 1;
8528   try_split (PATTERN (insn), insn, 0);
8529 
8530   emit_barrier ();
8531 
8532   /* Run just enough of rest_of_compilation to get the insns emitted.
8533      There's not really enough bulk here to make other passes such as
8534      instruction scheduling worth while.  Note that use_thunk calls
8535      assemble_start_function and assemble_end_function.  */
8536 
8537   insn = get_insns ();
8538   emit_all_insn_group_barriers (NULL, insn);
8539   shorten_branches (insn);
8540   final_start_function (insn, file, 1);
8541   final (insn, file, 1, 0);
8542   final_end_function ();
8543 
8544   reload_completed = 0;
8545   no_new_pseudos = 0;
8546 }
8547 
8548 #include "gt-ia64.h"
8549