xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/ia64/ia64.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Definitions of target machine for GNU compiler.
2    Copyright (C) 1999-2017 Free Software Foundation, Inc.
3    Contributed by James E. Wilson <wilson@cygnus.com> and
4 		  David Mosberger <davidm@hpl.hp.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "diagnostic-core.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "flags.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "cfgrtl.h"
50 #include "libfuncs.h"
51 #include "sched-int.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "gimplify.h"
55 #include "intl.h"
56 #include "debug.h"
57 #include "params.h"
58 #include "dbgcnt.h"
59 #include "tm-constrs.h"
60 #include "sel-sched.h"
61 #include "reload.h"
62 #include "opts.h"
63 #include "dumpfile.h"
64 #include "builtins.h"
65 
66 /* This file should be included last.  */
67 #include "target-def.h"
68 
69 /* This is used for communication between ASM_OUTPUT_LABEL and
70    ASM_OUTPUT_LABELREF.  */
71 int ia64_asm_output_label = 0;
72 
73 /* Register names for ia64_expand_prologue.  */
74 static const char * const ia64_reg_numbers[96] =
75 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
76   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
77   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
78   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
79   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
80   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
81   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
82   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
83   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
84   "r104","r105","r106","r107","r108","r109","r110","r111",
85   "r112","r113","r114","r115","r116","r117","r118","r119",
86   "r120","r121","r122","r123","r124","r125","r126","r127"};
87 
88 /* ??? These strings could be shared with REGISTER_NAMES.  */
89 static const char * const ia64_input_reg_names[8] =
90 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
91 
92 /* ??? These strings could be shared with REGISTER_NAMES.  */
93 static const char * const ia64_local_reg_names[80] =
94 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
95   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
96   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
97   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
98   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
99   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
100   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
101   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
102   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
103   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
104 
105 /* ??? These strings could be shared with REGISTER_NAMES.  */
106 static const char * const ia64_output_reg_names[8] =
107 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
108 
109 /* Variables which are this size or smaller are put in the sdata/sbss
110    sections.  */
111 
112 unsigned int ia64_section_threshold;
113 
114 /* The following variable is used by the DFA insn scheduler.  The value is
115    TRUE if we do insn bundling instead of insn scheduling.  */
116 int bundling_p = 0;
117 
118 enum ia64_frame_regs
119 {
120    reg_fp,
121    reg_save_b0,
122    reg_save_pr,
123    reg_save_ar_pfs,
124    reg_save_ar_unat,
125    reg_save_ar_lc,
126    reg_save_gp,
127    number_of_ia64_frame_regs
128 };
129 
130 /* Structure to be filled in by ia64_compute_frame_size with register
131    save masks and offsets for the current function.  */
132 
133 struct ia64_frame_info
134 {
135   HOST_WIDE_INT total_size;	/* size of the stack frame, not including
136 				   the caller's scratch area.  */
137   HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
138   HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
139   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
140   HARD_REG_SET mask;		/* mask of saved registers.  */
141   unsigned int gr_used_mask;	/* mask of registers in use as gr spill
142 				   registers or long-term scratches.  */
143   int n_spilled;		/* number of spilled registers.  */
144   int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
145   int n_input_regs;		/* number of input registers used.  */
146   int n_local_regs;		/* number of local registers used.  */
147   int n_output_regs;		/* number of output registers used.  */
148   int n_rotate_regs;		/* number of rotating registers used.  */
149 
150   char need_regstk;		/* true if a .regstk directive needed.  */
151   char initialized;		/* true if the data is finalized.  */
152 };
153 
154 /* Current frame information calculated by ia64_compute_frame_size.  */
155 static struct ia64_frame_info current_frame_info;
156 /* The actual registers that are emitted.  */
157 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
158 
159 static int ia64_first_cycle_multipass_dfa_lookahead (void);
160 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
161 static void ia64_init_dfa_pre_cycle_insn (void);
162 static rtx ia64_dfa_pre_cycle_insn (void);
163 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
164 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
165 static void ia64_h_i_d_extended (void);
166 static void * ia64_alloc_sched_context (void);
167 static void ia64_init_sched_context (void *, bool);
168 static void ia64_set_sched_context (void *);
169 static void ia64_clear_sched_context (void *);
170 static void ia64_free_sched_context (void *);
171 static int ia64_mode_to_int (machine_mode);
172 static void ia64_set_sched_flags (spec_info_t);
173 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
174 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
175 static bool ia64_skip_rtx_p (const_rtx);
176 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
177 static bool ia64_needs_block_p (ds_t);
178 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
179 static int ia64_spec_check_p (rtx);
180 static int ia64_spec_check_src_p (rtx);
181 static rtx gen_tls_get_addr (void);
182 static rtx gen_thread_pointer (void);
183 static int find_gr_spill (enum ia64_frame_regs, int);
184 static int next_scratch_gr_reg (void);
185 static void mark_reg_gr_used_mask (rtx, void *);
186 static void ia64_compute_frame_size (HOST_WIDE_INT);
187 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
188 static void finish_spill_pointers (void);
189 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
190 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
191 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
192 static rtx gen_movdi_x (rtx, rtx, rtx);
193 static rtx gen_fr_spill_x (rtx, rtx, rtx);
194 static rtx gen_fr_restore_x (rtx, rtx, rtx);
195 
196 static void ia64_option_override (void);
197 static bool ia64_can_eliminate (const int, const int);
198 static machine_mode hfa_element_mode (const_tree, bool);
199 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
200 					 tree, int *, int);
201 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
202 				   tree, bool);
203 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
204 				const_tree, bool, bool);
205 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
206 			      const_tree, bool);
207 static rtx ia64_function_incoming_arg (cumulative_args_t,
208 				       machine_mode, const_tree, bool);
209 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
210 				       const_tree, bool);
211 static unsigned int ia64_function_arg_boundary (machine_mode,
212 						const_tree);
213 static bool ia64_function_ok_for_sibcall (tree, tree);
214 static bool ia64_return_in_memory (const_tree, const_tree);
215 static rtx ia64_function_value (const_tree, const_tree, bool);
216 static rtx ia64_libcall_value (machine_mode, const_rtx);
217 static bool ia64_function_value_regno_p (const unsigned int);
218 static int ia64_register_move_cost (machine_mode, reg_class_t,
219                                     reg_class_t);
220 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
221 				  bool);
222 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
223 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
224 static void fix_range (const char *);
225 static struct machine_function * ia64_init_machine_status (void);
226 static void emit_insn_group_barriers (FILE *);
227 static void emit_all_insn_group_barriers (FILE *);
228 static void final_emit_insn_group_barriers (FILE *);
229 static void emit_predicate_relation_info (void);
230 static void ia64_reorg (void);
231 static bool ia64_in_small_data_p (const_tree);
232 static void process_epilogue (FILE *, rtx, bool, bool);
233 
234 static bool ia64_assemble_integer (rtx, unsigned int, int);
235 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
236 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
237 static void ia64_output_function_end_prologue (FILE *);
238 
239 static void ia64_print_operand (FILE *, rtx, int);
240 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
241 static bool ia64_print_operand_punct_valid_p (unsigned char code);
242 
243 static int ia64_issue_rate (void);
244 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
245 static void ia64_sched_init (FILE *, int, int);
246 static void ia64_sched_init_global (FILE *, int, int);
247 static void ia64_sched_finish_global (FILE *, int);
248 static void ia64_sched_finish (FILE *, int);
249 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
250 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
251 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
252 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
253 
254 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
255 static void ia64_asm_emit_except_personality (rtx);
256 static void ia64_asm_init_sections (void);
257 
258 static enum unwind_info_type ia64_debug_unwind_info (void);
259 
260 static struct bundle_state *get_free_bundle_state (void);
261 static void free_bundle_state (struct bundle_state *);
262 static void initiate_bundle_states (void);
263 static void finish_bundle_states (void);
264 static int insert_bundle_state (struct bundle_state *);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state *, int);
268 static int try_issue_insn (struct bundle_state *, rtx);
269 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
270 				 int, int);
271 static int get_max_pos (state_t);
272 static int get_template (state_t, int);
273 
274 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
275 static bool important_for_bundling_p (rtx_insn *);
276 static bool unknown_for_bundling_p (rtx_insn *);
277 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
278 
279 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
280 				  HOST_WIDE_INT, tree);
281 static void ia64_file_start (void);
282 static void ia64_globalize_decl_name (FILE *, tree);
283 
284 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
285 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
286 static section *ia64_select_rtx_section (machine_mode, rtx,
287 					 unsigned HOST_WIDE_INT);
288 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
289      ATTRIBUTE_UNUSED;
290 static unsigned int ia64_section_type_flags (tree, const char *, int);
291 static void ia64_init_libfuncs (void)
292      ATTRIBUTE_UNUSED;
293 static void ia64_hpux_init_libfuncs (void)
294      ATTRIBUTE_UNUSED;
295 static void ia64_sysv4_init_libfuncs (void)
296      ATTRIBUTE_UNUSED;
297 static void ia64_vms_init_libfuncs (void)
298      ATTRIBUTE_UNUSED;
299 static void ia64_soft_fp_init_libfuncs (void)
300      ATTRIBUTE_UNUSED;
301 static bool ia64_vms_valid_pointer_mode (machine_mode mode)
302      ATTRIBUTE_UNUSED;
303 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
304      ATTRIBUTE_UNUSED;
305 
306 static bool ia64_attribute_takes_identifier_p (const_tree);
307 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
308 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
309 static void ia64_encode_section_info (tree, rtx, int);
310 static rtx ia64_struct_value_rtx (tree, int);
311 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
312 static bool ia64_scalar_mode_supported_p (machine_mode mode);
313 static bool ia64_vector_mode_supported_p (machine_mode mode);
314 static bool ia64_legitimate_constant_p (machine_mode, rtx);
315 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
316 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
317 static const char *ia64_mangle_type (const_tree);
318 static const char *ia64_invalid_conversion (const_tree, const_tree);
319 static const char *ia64_invalid_unary_op (int, const_tree);
320 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
321 static machine_mode ia64_c_mode_for_suffix (char);
322 static void ia64_trampoline_init (rtx, tree, rtx);
323 static void ia64_override_options_after_change (void);
324 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
325 
326 static tree ia64_fold_builtin (tree, int, tree *, bool);
327 static tree ia64_builtin_decl (unsigned, bool);
328 
329 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
330 static machine_mode ia64_get_reg_raw_mode (int regno);
331 static section * ia64_hpux_function_section (tree, enum node_frequency,
332 					     bool, bool);
333 
334 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
335 					      const unsigned char *sel);
336 
337 #define MAX_VECT_LEN	8
338 
339 struct expand_vec_perm_d
340 {
341   rtx target, op0, op1;
342   unsigned char perm[MAX_VECT_LEN];
343   machine_mode vmode;
344   unsigned char nelt;
345   bool one_operand_p;
346   bool testing_p;
347 };
348 
349 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
350 
351 
352 /* Table of valid machine attributes.  */
353 static const struct attribute_spec ia64_attribute_table[] =
354 {
355   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
356        affects_type_identity } */
357   { "syscall_linkage", 0, 0, false, true,  true,  NULL, false },
358   { "model",	       1, 1, true, false, false, ia64_handle_model_attribute,
359     false },
360 #if TARGET_ABI_OPEN_VMS
361   { "common_object",   1, 1, true, false, false,
362     ia64_vms_common_object_attribute, false },
363 #endif
364   { "version_id",      1, 1, true, false, false,
365     ia64_handle_version_id_attribute, false },
366   { NULL,	       0, 0, false, false, false, NULL, false }
367 };
368 
369 /* Initialize the GCC target structure.  */
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
372 
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS ia64_init_builtins
375 
376 #undef TARGET_FOLD_BUILTIN
377 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
378 
379 #undef TARGET_EXPAND_BUILTIN
380 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
381 
382 #undef TARGET_BUILTIN_DECL
383 #define TARGET_BUILTIN_DECL ia64_builtin_decl
384 
385 #undef TARGET_ASM_BYTE_OP
386 #define TARGET_ASM_BYTE_OP "\tdata1\t"
387 #undef TARGET_ASM_ALIGNED_HI_OP
388 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
389 #undef TARGET_ASM_ALIGNED_SI_OP
390 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
391 #undef TARGET_ASM_ALIGNED_DI_OP
392 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
393 #undef TARGET_ASM_UNALIGNED_HI_OP
394 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
395 #undef TARGET_ASM_UNALIGNED_SI_OP
396 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
397 #undef TARGET_ASM_UNALIGNED_DI_OP
398 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
399 #undef TARGET_ASM_INTEGER
400 #define TARGET_ASM_INTEGER ia64_assemble_integer
401 
402 #undef TARGET_OPTION_OVERRIDE
403 #define TARGET_OPTION_OVERRIDE ia64_option_override
404 
405 #undef TARGET_ASM_FUNCTION_PROLOGUE
406 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
407 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
408 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
411 
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND ia64_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
418 
419 #undef TARGET_IN_SMALL_DATA_P
420 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
421 
422 #undef TARGET_SCHED_ADJUST_COST
423 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
424 #undef TARGET_SCHED_ISSUE_RATE
425 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
426 #undef TARGET_SCHED_VARIABLE_ISSUE
427 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
428 #undef TARGET_SCHED_INIT
429 #define TARGET_SCHED_INIT ia64_sched_init
430 #undef TARGET_SCHED_FINISH
431 #define TARGET_SCHED_FINISH ia64_sched_finish
432 #undef TARGET_SCHED_INIT_GLOBAL
433 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
434 #undef TARGET_SCHED_FINISH_GLOBAL
435 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
436 #undef TARGET_SCHED_REORDER
437 #define TARGET_SCHED_REORDER ia64_sched_reorder
438 #undef TARGET_SCHED_REORDER2
439 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
440 
441 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
442 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
443 
444 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
445 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
446 
447 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
448 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
449 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
450 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
451 
452 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
453 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
454   ia64_first_cycle_multipass_dfa_lookahead_guard
455 
456 #undef TARGET_SCHED_DFA_NEW_CYCLE
457 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
458 
459 #undef TARGET_SCHED_H_I_D_EXTENDED
460 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
461 
462 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
463 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
464 
465 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
466 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
467 
468 #undef TARGET_SCHED_SET_SCHED_CONTEXT
469 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
470 
471 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
472 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
473 
474 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
475 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
476 
477 #undef TARGET_SCHED_SET_SCHED_FLAGS
478 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
479 
480 #undef TARGET_SCHED_GET_INSN_SPEC_DS
481 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
482 
483 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
484 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
485 
486 #undef TARGET_SCHED_SPECULATE_INSN
487 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
488 
489 #undef TARGET_SCHED_NEEDS_BLOCK_P
490 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
491 
492 #undef TARGET_SCHED_GEN_SPEC_CHECK
493 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
494 
495 #undef TARGET_SCHED_SKIP_RTX_P
496 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
497 
498 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
499 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
500 #undef TARGET_ARG_PARTIAL_BYTES
501 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
502 #undef TARGET_FUNCTION_ARG
503 #define TARGET_FUNCTION_ARG ia64_function_arg
504 #undef TARGET_FUNCTION_INCOMING_ARG
505 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
506 #undef TARGET_FUNCTION_ARG_ADVANCE
507 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
508 #undef TARGET_FUNCTION_ARG_BOUNDARY
509 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
510 
511 #undef TARGET_ASM_OUTPUT_MI_THUNK
512 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
513 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
514 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
515 
516 #undef TARGET_ASM_FILE_START
517 #define TARGET_ASM_FILE_START ia64_file_start
518 
519 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
520 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
521 
522 #undef TARGET_REGISTER_MOVE_COST
523 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
524 #undef TARGET_MEMORY_MOVE_COST
525 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
526 #undef TARGET_RTX_COSTS
527 #define TARGET_RTX_COSTS ia64_rtx_costs
528 #undef TARGET_ADDRESS_COST
529 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
530 
531 #undef TARGET_UNSPEC_MAY_TRAP_P
532 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
533 
534 #undef TARGET_MACHINE_DEPENDENT_REORG
535 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
536 
537 #undef TARGET_ENCODE_SECTION_INFO
538 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
539 
540 #undef  TARGET_SECTION_TYPE_FLAGS
541 #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
542 
543 #ifdef HAVE_AS_TLS
544 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
545 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
546 #endif
547 
548 /* ??? Investigate.  */
549 #if 0
550 #undef TARGET_PROMOTE_PROTOTYPES
551 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
552 #endif
553 
554 #undef TARGET_FUNCTION_VALUE
555 #define TARGET_FUNCTION_VALUE ia64_function_value
556 #undef TARGET_LIBCALL_VALUE
557 #define TARGET_LIBCALL_VALUE ia64_libcall_value
558 #undef TARGET_FUNCTION_VALUE_REGNO_P
559 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
560 
561 #undef TARGET_STRUCT_VALUE_RTX
562 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
563 #undef TARGET_RETURN_IN_MEMORY
564 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
565 #undef TARGET_SETUP_INCOMING_VARARGS
566 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
567 #undef TARGET_STRICT_ARGUMENT_NAMING
568 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
569 #undef TARGET_MUST_PASS_IN_STACK
570 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
571 #undef TARGET_GET_RAW_RESULT_MODE
572 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
573 #undef TARGET_GET_RAW_ARG_MODE
574 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
575 
576 #undef TARGET_MEMBER_TYPE_FORCES_BLK
577 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
578 
579 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
580 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
581 
582 #undef TARGET_ASM_UNWIND_EMIT
583 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
584 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
585 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
588 
589 #undef TARGET_DEBUG_UNWIND_INFO
590 #define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
591 
592 #undef TARGET_SCALAR_MODE_SUPPORTED_P
593 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
594 #undef TARGET_VECTOR_MODE_SUPPORTED_P
595 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
596 
597 #undef TARGET_LEGITIMATE_CONSTANT_P
598 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
601 
602 #undef TARGET_LRA_P
603 #define TARGET_LRA_P hook_bool_void_false
604 
605 #undef TARGET_CANNOT_FORCE_CONST_MEM
606 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
607 
608 #undef TARGET_MANGLE_TYPE
609 #define TARGET_MANGLE_TYPE ia64_mangle_type
610 
611 #undef TARGET_INVALID_CONVERSION
612 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
613 #undef TARGET_INVALID_UNARY_OP
614 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
615 #undef TARGET_INVALID_BINARY_OP
616 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
617 
618 #undef TARGET_C_MODE_FOR_SUFFIX
619 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
620 
621 #undef TARGET_CAN_ELIMINATE
622 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
623 
624 #undef TARGET_TRAMPOLINE_INIT
625 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
626 
627 #undef TARGET_CAN_USE_DOLOOP_P
628 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
629 #undef TARGET_INVALID_WITHIN_DOLOOP
630 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
631 
632 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
633 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
634 
635 #undef TARGET_PREFERRED_RELOAD_CLASS
636 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
637 
638 #undef TARGET_DELAY_SCHED2
639 #define TARGET_DELAY_SCHED2 true
640 
641 /* Variable tracking should be run after all optimizations which
642    change order of insns.  It also needs a valid CFG.  */
643 #undef TARGET_DELAY_VARTRACK
644 #define TARGET_DELAY_VARTRACK true
645 
646 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
647 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
648 
649 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
650 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
651 
652 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
653 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
654 
655 struct gcc_target targetm = TARGET_INITIALIZER;
656 
657 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
658    identifier as an argument, so the front end shouldn't look it up.  */
659 
660 static bool
661 ia64_attribute_takes_identifier_p (const_tree attr_id)
662 {
663   if (is_attribute_p ("model", attr_id))
664     return true;
665 #if TARGET_ABI_OPEN_VMS
666   if (is_attribute_p ("common_object", attr_id))
667     return true;
668 #endif
669   return false;
670 }
671 
672 typedef enum
673   {
674     ADDR_AREA_NORMAL,	/* normal address area */
675     ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
676   }
677 ia64_addr_area;
678 
679 static GTY(()) tree small_ident1;
680 static GTY(()) tree small_ident2;
681 
682 static void
683 init_idents (void)
684 {
685   if (small_ident1 == 0)
686     {
687       small_ident1 = get_identifier ("small");
688       small_ident2 = get_identifier ("__small__");
689     }
690 }
691 
692 /* Retrieve the address area that has been chosen for the given decl.  */
693 
694 static ia64_addr_area
695 ia64_get_addr_area (tree decl)
696 {
697   tree model_attr;
698 
699   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
700   if (model_attr)
701     {
702       tree id;
703 
704       init_idents ();
705       id = TREE_VALUE (TREE_VALUE (model_attr));
706       if (id == small_ident1 || id == small_ident2)
707 	return ADDR_AREA_SMALL;
708     }
709   return ADDR_AREA_NORMAL;
710 }
711 
712 static tree
713 ia64_handle_model_attribute (tree *node, tree name, tree args,
714 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
715 {
716   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
717   ia64_addr_area area;
718   tree arg, decl = *node;
719 
720   init_idents ();
721   arg = TREE_VALUE (args);
722   if (arg == small_ident1 || arg == small_ident2)
723     {
724       addr_area = ADDR_AREA_SMALL;
725     }
726   else
727     {
728       warning (OPT_Wattributes, "invalid argument of %qE attribute",
729 	       name);
730       *no_add_attrs = true;
731     }
732 
733   switch (TREE_CODE (decl))
734     {
735     case VAR_DECL:
736       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
737 	   == FUNCTION_DECL)
738 	  && !TREE_STATIC (decl))
739 	{
740 	  error_at (DECL_SOURCE_LOCATION (decl),
741 		    "an address area attribute cannot be specified for "
742 		    "local variables");
743 	  *no_add_attrs = true;
744 	}
745       area = ia64_get_addr_area (decl);
746       if (area != ADDR_AREA_NORMAL && addr_area != area)
747 	{
748 	  error ("address area of %q+D conflicts with previous "
749 		 "declaration", decl);
750 	  *no_add_attrs = true;
751 	}
752       break;
753 
754     case FUNCTION_DECL:
755       error_at (DECL_SOURCE_LOCATION (decl),
756 		"address area attribute cannot be specified for "
757 		"functions");
758       *no_add_attrs = true;
759       break;
760 
761     default:
762       warning (OPT_Wattributes, "%qE attribute ignored",
763 	       name);
764       *no_add_attrs = true;
765       break;
766     }
767 
768   return NULL_TREE;
769 }
770 
771 /* Part of the low level implementation of DEC Ada pragma Common_Object which
772    enables the shared use of variables stored in overlaid linker areas
773    corresponding to the use of Fortran COMMON.  */
774 
775 static tree
776 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
777 				  int flags ATTRIBUTE_UNUSED,
778 				  bool *no_add_attrs)
779 {
780     tree decl = *node;
781     tree id;
782 
783     gcc_assert (DECL_P (decl));
784 
785     DECL_COMMON (decl) = 1;
786     id = TREE_VALUE (args);
787     if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
788       {
789 	error ("%qE attribute requires a string constant argument", name);
790 	*no_add_attrs = true;
791 	return NULL_TREE;
792       }
793     return NULL_TREE;
794 }
795 
796 /* Part of the low level implementation of DEC Ada pragma Common_Object.  */
797 
798 void
799 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
800 				     unsigned HOST_WIDE_INT size,
801 				     unsigned int align)
802 {
803   tree attr = DECL_ATTRIBUTES (decl);
804 
805   if (attr)
806     attr = lookup_attribute ("common_object", attr);
807   if (attr)
808     {
809       tree id = TREE_VALUE (TREE_VALUE (attr));
810       const char *name;
811 
812       if (TREE_CODE (id) == IDENTIFIER_NODE)
813         name = IDENTIFIER_POINTER (id);
814       else if (TREE_CODE (id) == STRING_CST)
815         name = TREE_STRING_POINTER (id);
816       else
817         abort ();
818 
819       fprintf (file, "\t.vms_common\t\"%s\",", name);
820     }
821   else
822     fprintf (file, "%s", COMMON_ASM_OP);
823 
824   /*  Code from elfos.h.  */
825   assemble_name (file, name);
826   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
827            size, align / BITS_PER_UNIT);
828 
829   fputc ('\n', file);
830 }
831 
832 static void
833 ia64_encode_addr_area (tree decl, rtx symbol)
834 {
835   int flags;
836 
837   flags = SYMBOL_REF_FLAGS (symbol);
838   switch (ia64_get_addr_area (decl))
839     {
840     case ADDR_AREA_NORMAL: break;
841     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
842     default: gcc_unreachable ();
843     }
844   SYMBOL_REF_FLAGS (symbol) = flags;
845 }
846 
847 static void
848 ia64_encode_section_info (tree decl, rtx rtl, int first)
849 {
850   default_encode_section_info (decl, rtl, first);
851 
852   /* Careful not to prod global register variables.  */
853   if (TREE_CODE (decl) == VAR_DECL
854       && GET_CODE (DECL_RTL (decl)) == MEM
855       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
856       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
857     ia64_encode_addr_area (decl, XEXP (rtl, 0));
858 }
859 
860 /* Return 1 if the operands of a move are ok.  */
861 
862 int
863 ia64_move_ok (rtx dst, rtx src)
864 {
865   /* If we're under init_recog_no_volatile, we'll not be able to use
866      memory_operand.  So check the code directly and don't worry about
867      the validity of the underlying address, which should have been
868      checked elsewhere anyway.  */
869   if (GET_CODE (dst) != MEM)
870     return 1;
871   if (GET_CODE (src) == MEM)
872     return 0;
873   if (register_operand (src, VOIDmode))
874     return 1;
875 
876   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
877   if (INTEGRAL_MODE_P (GET_MODE (dst)))
878     return src == const0_rtx;
879   else
880     return satisfies_constraint_G (src);
881 }
882 
883 /* Return 1 if the operands are ok for a floating point load pair.  */
884 
885 int
886 ia64_load_pair_ok (rtx dst, rtx src)
887 {
888   /* ??? There is a thinko in the implementation of the "x" constraint and the
889      FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
890      also return false for it.  */
891   if (GET_CODE (dst) != REG
892       || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
893     return 0;
894   if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
895     return 0;
896   switch (GET_CODE (XEXP (src, 0)))
897     {
898     case REG:
899     case POST_INC:
900       break;
901     case POST_DEC:
902       return 0;
903     case POST_MODIFY:
904       {
905 	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
906 
907 	if (GET_CODE (adjust) != CONST_INT
908 	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
909 	  return 0;
910       }
911       break;
912     default:
913       abort ();
914     }
915   return 1;
916 }
917 
918 int
919 addp4_optimize_ok (rtx op1, rtx op2)
920 {
921   return (basereg_operand (op1, GET_MODE(op1)) !=
922 	  basereg_operand (op2, GET_MODE(op2)));
923 }
924 
925 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
926    Return the length of the field, or <= 0 on failure.  */
927 
928 int
929 ia64_depz_field_mask (rtx rop, rtx rshift)
930 {
931   unsigned HOST_WIDE_INT op = INTVAL (rop);
932   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
933 
934   /* Get rid of the zero bits we're shifting in.  */
935   op >>= shift;
936 
937   /* We must now have a solid block of 1's at bit 0.  */
938   return exact_log2 (op + 1);
939 }
940 
941 /* Return the TLS model to use for ADDR.  */
942 
943 static enum tls_model
944 tls_symbolic_operand_type (rtx addr)
945 {
946   enum tls_model tls_kind = TLS_MODEL_NONE;
947 
948   if (GET_CODE (addr) == CONST)
949     {
950       if (GET_CODE (XEXP (addr, 0)) == PLUS
951 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
952         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
953     }
954   else if (GET_CODE (addr) == SYMBOL_REF)
955     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
956 
957   return tls_kind;
958 }
959 
960 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
961    as a base register.  */
962 
963 static inline bool
964 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
965 {
966   if (strict
967       && REGNO_OK_FOR_BASE_P (REGNO (reg)))
968     return true;
969   else if (!strict
970 	   && (GENERAL_REGNO_P (REGNO (reg))
971 	       || !HARD_REGISTER_P (reg)))
972     return true;
973   else
974     return false;
975 }
976 
977 static bool
978 ia64_legitimate_address_reg (const_rtx reg, bool strict)
979 {
980   if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
981       || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
982 	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
983     return true;
984 
985   return false;
986 }
987 
988 static bool
989 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
990 {
991   if (GET_CODE (disp) == PLUS
992       && rtx_equal_p (reg, XEXP (disp, 0))
993       && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
994 	  || (CONST_INT_P (XEXP (disp, 1))
995 	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
996     return true;
997 
998   return false;
999 }
1000 
1001 /* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
1002 
1003 static bool
1004 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1005 			   rtx x, bool strict)
1006 {
1007   if (ia64_legitimate_address_reg (x, strict))
1008     return true;
1009   else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1010 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1011 	   && XEXP (x, 0) != arg_pointer_rtx)
1012     return true;
1013   else if (GET_CODE (x) == POST_MODIFY
1014 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1015 	   && XEXP (x, 0) != arg_pointer_rtx
1016 	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1017     return true;
1018   else
1019     return false;
1020 }
1021 
1022 /* Return true if X is a constant that is valid for some immediate
1023    field in an instruction.  */
1024 
1025 static bool
1026 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1027 {
1028   switch (GET_CODE (x))
1029     {
1030     case CONST_INT:
1031     case LABEL_REF:
1032       return true;
1033 
1034     case CONST_DOUBLE:
1035       if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1036 	return true;
1037       return satisfies_constraint_G (x);
1038 
1039     case CONST:
1040     case SYMBOL_REF:
1041       /* ??? Short term workaround for PR 28490.  We must make the code here
1042 	 match the code in ia64_expand_move and move_operand, even though they
1043 	 are both technically wrong.  */
1044       if (tls_symbolic_operand_type (x) == 0)
1045 	{
1046 	  HOST_WIDE_INT addend = 0;
1047 	  rtx op = x;
1048 
1049 	  if (GET_CODE (op) == CONST
1050 	      && GET_CODE (XEXP (op, 0)) == PLUS
1051 	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1052 	    {
1053 	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
1054 	      op = XEXP (XEXP (op, 0), 0);
1055 	    }
1056 
1057           if (any_offset_symbol_operand (op, mode)
1058               || function_operand (op, mode))
1059             return true;
1060 	  if (aligned_offset_symbol_operand (op, mode))
1061 	    return (addend & 0x3fff) == 0;
1062 	  return false;
1063 	}
1064       return false;
1065 
1066     case CONST_VECTOR:
1067       if (mode == V2SFmode)
1068 	return satisfies_constraint_Y (x);
1069 
1070       return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1071 	      && GET_MODE_SIZE (mode) <= 8);
1072 
1073     default:
1074       return false;
1075     }
1076 }
1077 
1078 /* Don't allow TLS addresses to get spilled to memory.  */
1079 
1080 static bool
1081 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1082 {
1083   if (mode == RFmode)
1084     return true;
1085   return tls_symbolic_operand_type (x) != 0;
1086 }
1087 
1088 /* Expand a symbolic constant load.  */
1089 
1090 bool
1091 ia64_expand_load_address (rtx dest, rtx src)
1092 {
1093   gcc_assert (GET_CODE (dest) == REG);
1094 
1095   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1096      having to pointer-extend the value afterward.  Other forms of address
1097      computation below are also more natural to compute as 64-bit quantities.
1098      If we've been given an SImode destination register, change it.  */
1099   if (GET_MODE (dest) != Pmode)
1100     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1101 			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
1102 
1103   if (TARGET_NO_PIC)
1104     return false;
1105   if (small_addr_symbolic_operand (src, VOIDmode))
1106     return false;
1107 
1108   if (TARGET_AUTO_PIC)
1109     emit_insn (gen_load_gprel64 (dest, src));
1110   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1111     emit_insn (gen_load_fptr (dest, src));
1112   else if (sdata_symbolic_operand (src, VOIDmode))
1113     emit_insn (gen_load_gprel (dest, src));
1114   else if (local_symbolic_operand64 (src, VOIDmode))
1115     {
1116       /* We want to use @gprel rather than @ltoff relocations for local
1117 	 symbols:
1118 	  - @gprel does not require dynamic linker
1119 	  - and does not use .sdata section
1120 	 https://gcc.gnu.org/bugzilla/60465 */
1121       emit_insn (gen_load_gprel64 (dest, src));
1122     }
1123   else
1124     {
1125       HOST_WIDE_INT addend = 0;
1126       rtx tmp;
1127 
1128       /* We did split constant offsets in ia64_expand_move, and we did try
1129 	 to keep them split in move_operand, but we also allowed reload to
1130 	 rematerialize arbitrary constants rather than spill the value to
1131 	 the stack and reload it.  So we have to be prepared here to split
1132 	 them apart again.  */
1133       if (GET_CODE (src) == CONST)
1134 	{
1135 	  HOST_WIDE_INT hi, lo;
1136 
1137 	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
1138 	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1139 	  hi = hi - lo;
1140 
1141 	  if (lo != 0)
1142 	    {
1143 	      addend = lo;
1144 	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1145 	    }
1146 	}
1147 
1148       tmp = gen_rtx_HIGH (Pmode, src);
1149       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1150       emit_insn (gen_rtx_SET (dest, tmp));
1151 
1152       tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1153       emit_insn (gen_rtx_SET (dest, tmp));
1154 
1155       if (addend)
1156 	{
1157 	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1158 	  emit_insn (gen_rtx_SET (dest, tmp));
1159 	}
1160     }
1161 
1162   return true;
1163 }
1164 
1165 static GTY(()) rtx gen_tls_tga;
1166 static rtx
1167 gen_tls_get_addr (void)
1168 {
1169   if (!gen_tls_tga)
1170     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1171   return gen_tls_tga;
1172 }
1173 
1174 static GTY(()) rtx thread_pointer_rtx;
1175 static rtx
1176 gen_thread_pointer (void)
1177 {
1178   if (!thread_pointer_rtx)
1179     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1180   return thread_pointer_rtx;
1181 }
1182 
1183 static rtx
1184 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1185 			 rtx orig_op1, HOST_WIDE_INT addend)
1186 {
1187   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1188   rtx_insn *insns;
1189   rtx orig_op0 = op0;
1190   HOST_WIDE_INT addend_lo, addend_hi;
1191 
1192   switch (tls_kind)
1193     {
1194     case TLS_MODEL_GLOBAL_DYNAMIC:
1195       start_sequence ();
1196 
1197       tga_op1 = gen_reg_rtx (Pmode);
1198       emit_insn (gen_load_dtpmod (tga_op1, op1));
1199 
1200       tga_op2 = gen_reg_rtx (Pmode);
1201       emit_insn (gen_load_dtprel (tga_op2, op1));
1202 
1203       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1204 					 LCT_CONST, Pmode, 2, tga_op1,
1205 					 Pmode, tga_op2, Pmode);
1206 
1207       insns = get_insns ();
1208       end_sequence ();
1209 
1210       if (GET_MODE (op0) != Pmode)
1211 	op0 = tga_ret;
1212       emit_libcall_block (insns, op0, tga_ret, op1);
1213       break;
1214 
1215     case TLS_MODEL_LOCAL_DYNAMIC:
1216       /* ??? This isn't the completely proper way to do local-dynamic
1217 	 If the call to __tls_get_addr is used only by a single symbol,
1218 	 then we should (somehow) move the dtprel to the second arg
1219 	 to avoid the extra add.  */
1220       start_sequence ();
1221 
1222       tga_op1 = gen_reg_rtx (Pmode);
1223       emit_insn (gen_load_dtpmod (tga_op1, op1));
1224 
1225       tga_op2 = const0_rtx;
1226 
1227       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1228 					 LCT_CONST, Pmode, 2, tga_op1,
1229 					 Pmode, tga_op2, Pmode);
1230 
1231       insns = get_insns ();
1232       end_sequence ();
1233 
1234       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1235 				UNSPEC_LD_BASE);
1236       tmp = gen_reg_rtx (Pmode);
1237       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1238 
1239       if (!register_operand (op0, Pmode))
1240 	op0 = gen_reg_rtx (Pmode);
1241       if (TARGET_TLS64)
1242 	{
1243 	  emit_insn (gen_load_dtprel (op0, op1));
1244 	  emit_insn (gen_adddi3 (op0, tmp, op0));
1245 	}
1246       else
1247 	emit_insn (gen_add_dtprel (op0, op1, tmp));
1248       break;
1249 
1250     case TLS_MODEL_INITIAL_EXEC:
1251       addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1252       addend_hi = addend - addend_lo;
1253 
1254       op1 = plus_constant (Pmode, op1, addend_hi);
1255       addend = addend_lo;
1256 
1257       tmp = gen_reg_rtx (Pmode);
1258       emit_insn (gen_load_tprel (tmp, op1));
1259 
1260       if (!register_operand (op0, Pmode))
1261 	op0 = gen_reg_rtx (Pmode);
1262       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1263       break;
1264 
1265     case TLS_MODEL_LOCAL_EXEC:
1266       if (!register_operand (op0, Pmode))
1267 	op0 = gen_reg_rtx (Pmode);
1268 
1269       op1 = orig_op1;
1270       addend = 0;
1271       if (TARGET_TLS64)
1272 	{
1273 	  emit_insn (gen_load_tprel (op0, op1));
1274 	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1275 	}
1276       else
1277 	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1278       break;
1279 
1280     default:
1281       gcc_unreachable ();
1282     }
1283 
1284   if (addend)
1285     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1286 			       orig_op0, 1, OPTAB_DIRECT);
1287   if (orig_op0 == op0)
1288     return NULL_RTX;
1289   if (GET_MODE (orig_op0) == Pmode)
1290     return op0;
1291   return gen_lowpart (GET_MODE (orig_op0), op0);
1292 }
1293 
1294 rtx
1295 ia64_expand_move (rtx op0, rtx op1)
1296 {
1297   machine_mode mode = GET_MODE (op0);
1298 
1299   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1300     op1 = force_reg (mode, op1);
1301 
1302   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1303     {
1304       HOST_WIDE_INT addend = 0;
1305       enum tls_model tls_kind;
1306       rtx sym = op1;
1307 
1308       if (GET_CODE (op1) == CONST
1309 	  && GET_CODE (XEXP (op1, 0)) == PLUS
1310 	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1311 	{
1312 	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1313 	  sym = XEXP (XEXP (op1, 0), 0);
1314 	}
1315 
1316       tls_kind = tls_symbolic_operand_type (sym);
1317       if (tls_kind)
1318 	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1319 
1320       if (any_offset_symbol_operand (sym, mode))
1321 	addend = 0;
1322       else if (aligned_offset_symbol_operand (sym, mode))
1323 	{
1324 	  HOST_WIDE_INT addend_lo, addend_hi;
1325 
1326 	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1327 	  addend_hi = addend - addend_lo;
1328 
1329 	  if (addend_lo != 0)
1330 	    {
1331 	      op1 = plus_constant (mode, sym, addend_hi);
1332 	      addend = addend_lo;
1333 	    }
1334 	  else
1335 	    addend = 0;
1336 	}
1337       else
1338 	op1 = sym;
1339 
1340       if (reload_completed)
1341 	{
1342 	  /* We really should have taken care of this offset earlier.  */
1343 	  gcc_assert (addend == 0);
1344 	  if (ia64_expand_load_address (op0, op1))
1345 	    return NULL_RTX;
1346 	}
1347 
1348       if (addend)
1349 	{
1350 	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1351 
1352 	  emit_insn (gen_rtx_SET (subtarget, op1));
1353 
1354 	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1355 				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1356 	  if (op0 == op1)
1357 	    return NULL_RTX;
1358 	}
1359     }
1360 
1361   return op1;
1362 }
1363 
1364 /* Split a move from OP1 to OP0 conditional on COND.  */
1365 
1366 void
1367 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1368 {
1369   rtx_insn *insn, *first = get_last_insn ();
1370 
1371   emit_move_insn (op0, op1);
1372 
1373   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1374     if (INSN_P (insn))
1375       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1376 					  PATTERN (insn));
1377 }
1378 
1379 /* Split a post-reload TImode or TFmode reference into two DImode
1380    components.  This is made extra difficult by the fact that we do
1381    not get any scratch registers to work with, because reload cannot
1382    be prevented from giving us a scratch that overlaps the register
1383    pair involved.  So instead, when addressing memory, we tweak the
1384    pointer register up and back down with POST_INCs.  Or up and not
1385    back down when we can get away with it.
1386 
1387    REVERSED is true when the loads must be done in reversed order
1388    (high word first) for correctness.  DEAD is true when the pointer
1389    dies with the second insn we generate and therefore the second
1390    address must not carry a postmodify.
1391 
1392    May return an insn which is to be emitted after the moves.  */
1393 
1394 static rtx
1395 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1396 {
1397   rtx fixup = 0;
1398 
1399   switch (GET_CODE (in))
1400     {
1401     case REG:
1402       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1403       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1404       break;
1405 
1406     case CONST_INT:
1407     case CONST_DOUBLE:
1408       /* Cannot occur reversed.  */
1409       gcc_assert (!reversed);
1410 
1411       if (GET_MODE (in) != TFmode)
1412 	split_double (in, &out[0], &out[1]);
1413       else
1414 	/* split_double does not understand how to split a TFmode
1415 	   quantity into a pair of DImode constants.  */
1416 	{
1417 	  unsigned HOST_WIDE_INT p[2];
1418 	  long l[4];  /* TFmode is 128 bits */
1419 
1420 	  real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1421 
1422 	  if (FLOAT_WORDS_BIG_ENDIAN)
1423 	    {
1424 	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1425 	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1426 	    }
1427 	  else
1428 	    {
1429 	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1430 	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1431 	    }
1432 	  out[0] = GEN_INT (p[0]);
1433 	  out[1] = GEN_INT (p[1]);
1434 	}
1435       break;
1436 
1437     case MEM:
1438       {
1439 	rtx base = XEXP (in, 0);
1440 	rtx offset;
1441 
1442 	switch (GET_CODE (base))
1443 	  {
1444 	  case REG:
1445 	    if (!reversed)
1446 	      {
1447 		out[0] = adjust_automodify_address
1448 		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1449 		out[1] = adjust_automodify_address
1450 		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1451 	      }
1452 	    else
1453 	      {
1454 		/* Reversal requires a pre-increment, which can only
1455 		   be done as a separate insn.  */
1456 		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1457 		out[0] = adjust_automodify_address
1458 		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1459 		out[1] = adjust_address (in, DImode, 0);
1460 	      }
1461 	    break;
1462 
1463 	  case POST_INC:
1464 	    gcc_assert (!reversed && !dead);
1465 
1466 	    /* Just do the increment in two steps.  */
1467 	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1468 	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1469 	    break;
1470 
1471 	  case POST_DEC:
1472 	    gcc_assert (!reversed && !dead);
1473 
1474 	    /* Add 8, subtract 24.  */
1475 	    base = XEXP (base, 0);
1476 	    out[0] = adjust_automodify_address
1477 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1478 	    out[1] = adjust_automodify_address
1479 	      (in, DImode,
1480 	       gen_rtx_POST_MODIFY (Pmode, base,
1481 				    plus_constant (Pmode, base, -24)),
1482 	       8);
1483 	    break;
1484 
1485 	  case POST_MODIFY:
1486 	    gcc_assert (!reversed && !dead);
1487 
1488 	    /* Extract and adjust the modification.  This case is
1489 	       trickier than the others, because we might have an
1490 	       index register, or we might have a combined offset that
1491 	       doesn't fit a signed 9-bit displacement field.  We can
1492 	       assume the incoming expression is already legitimate.  */
1493 	    offset = XEXP (base, 1);
1494 	    base = XEXP (base, 0);
1495 
1496 	    out[0] = adjust_automodify_address
1497 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1498 
1499 	    if (GET_CODE (XEXP (offset, 1)) == REG)
1500 	      {
1501 		/* Can't adjust the postmodify to match.  Emit the
1502 		   original, then a separate addition insn.  */
1503 		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1504 		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1505 	      }
1506 	    else
1507 	      {
1508 		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1509 		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1510 		  {
1511 		    /* Again the postmodify cannot be made to match,
1512 		       but in this case it's more efficient to get rid
1513 		       of the postmodify entirely and fix up with an
1514 		       add insn.  */
1515 		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1516 		    fixup = gen_adddi3
1517 		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1518 		  }
1519 		else
1520 		  {
1521 		    /* Combined offset still fits in the displacement field.
1522 		       (We cannot overflow it at the high end.)  */
1523 		    out[1] = adjust_automodify_address
1524 		      (in, DImode, gen_rtx_POST_MODIFY
1525 		       (Pmode, base, gen_rtx_PLUS
1526 			(Pmode, base,
1527 			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1528 		       8);
1529 		  }
1530 	      }
1531 	    break;
1532 
1533 	  default:
1534 	    gcc_unreachable ();
1535 	  }
1536 	break;
1537       }
1538 
1539     default:
1540       gcc_unreachable ();
1541     }
1542 
1543   return fixup;
1544 }
1545 
1546 /* Split a TImode or TFmode move instruction after reload.
1547    This is used by *movtf_internal and *movti_internal.  */
1548 void
1549 ia64_split_tmode_move (rtx operands[])
1550 {
1551   rtx in[2], out[2], insn;
1552   rtx fixup[2];
1553   bool dead = false;
1554   bool reversed = false;
1555 
1556   /* It is possible for reload to decide to overwrite a pointer with
1557      the value it points to.  In that case we have to do the loads in
1558      the appropriate order so that the pointer is not destroyed too
1559      early.  Also we must not generate a postmodify for that second
1560      load, or rws_access_regno will die.  And we must not generate a
1561      postmodify for the second load if the destination register
1562      overlaps with the base register.  */
1563   if (GET_CODE (operands[1]) == MEM
1564       && reg_overlap_mentioned_p (operands[0], operands[1]))
1565     {
1566       rtx base = XEXP (operands[1], 0);
1567       while (GET_CODE (base) != REG)
1568 	base = XEXP (base, 0);
1569 
1570       if (REGNO (base) == REGNO (operands[0]))
1571 	reversed = true;
1572 
1573       if (refers_to_regno_p (REGNO (operands[0]),
1574 			     REGNO (operands[0])+2,
1575 			     base, 0))
1576 	dead = true;
1577     }
1578   /* Another reason to do the moves in reversed order is if the first
1579      element of the target register pair is also the second element of
1580      the source register pair.  */
1581   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1582       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1583     reversed = true;
1584 
1585   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1586   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1587 
1588 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1589   if (GET_CODE (EXP) == MEM						\
1590       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1591 	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1592 	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1593     add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1594 
1595   insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1596   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1597   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1598 
1599   insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1600   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1601   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1602 
1603   if (fixup[0])
1604     emit_insn (fixup[0]);
1605   if (fixup[1])
1606     emit_insn (fixup[1]);
1607 
1608 #undef MAYBE_ADD_REG_INC_NOTE
1609 }
1610 
1611 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1612    through memory plus an extra GR scratch register.  Except that you can
1613    either get the first from SECONDARY_MEMORY_NEEDED or the second from
1614    SECONDARY_RELOAD_CLASS, but not both.
1615 
1616    We got into problems in the first place by allowing a construct like
1617    (subreg:XF (reg:TI)), which we got from a union containing a long double.
1618    This solution attempts to prevent this situation from occurring.  When
1619    we see something like the above, we spill the inner register to memory.  */
1620 
1621 static rtx
1622 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1623 {
1624   if (GET_CODE (in) == SUBREG
1625       && GET_MODE (SUBREG_REG (in)) == TImode
1626       && GET_CODE (SUBREG_REG (in)) == REG)
1627     {
1628       rtx memt = assign_stack_temp (TImode, 16);
1629       emit_move_insn (memt, SUBREG_REG (in));
1630       return adjust_address (memt, mode, 0);
1631     }
1632   else if (force && GET_CODE (in) == REG)
1633     {
1634       rtx memx = assign_stack_temp (mode, 16);
1635       emit_move_insn (memx, in);
1636       return memx;
1637     }
1638   else
1639     return in;
1640 }
1641 
1642 /* Expand the movxf or movrf pattern (MODE says which) with the given
1643    OPERANDS, returning true if the pattern should then invoke
1644    DONE.  */
1645 
1646 bool
1647 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1648 {
1649   rtx op0 = operands[0];
1650 
1651   if (GET_CODE (op0) == SUBREG)
1652     op0 = SUBREG_REG (op0);
1653 
1654   /* We must support XFmode loads into general registers for stdarg/vararg,
1655      unprototyped calls, and a rare case where a long double is passed as
1656      an argument after a float HFA fills the FP registers.  We split them into
1657      DImode loads for convenience.  We also need to support XFmode stores
1658      for the last case.  This case does not happen for stdarg/vararg routines,
1659      because we do a block store to memory of unnamed arguments.  */
1660 
1661   if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1662     {
1663       rtx out[2];
1664 
1665       /* We're hoping to transform everything that deals with XFmode
1666 	 quantities and GR registers early in the compiler.  */
1667       gcc_assert (can_create_pseudo_p ());
1668 
1669       /* Struct to register can just use TImode instead.  */
1670       if ((GET_CODE (operands[1]) == SUBREG
1671 	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1672 	  || (GET_CODE (operands[1]) == REG
1673 	      && GR_REGNO_P (REGNO (operands[1]))))
1674 	{
1675 	  rtx op1 = operands[1];
1676 
1677 	  if (GET_CODE (op1) == SUBREG)
1678 	    op1 = SUBREG_REG (op1);
1679 	  else
1680 	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1681 
1682 	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1683 	  return true;
1684 	}
1685 
1686       if (GET_CODE (operands[1]) == CONST_DOUBLE)
1687 	{
1688 	  /* Don't word-swap when reading in the constant.  */
1689 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1690 			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1691 					   0, mode));
1692 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1693 			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1694 					   0, mode));
1695 	  return true;
1696 	}
1697 
1698       /* If the quantity is in a register not known to be GR, spill it.  */
1699       if (register_operand (operands[1], mode))
1700 	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1701 
1702       gcc_assert (GET_CODE (operands[1]) == MEM);
1703 
1704       /* Don't word-swap when reading in the value.  */
1705       out[0] = gen_rtx_REG (DImode, REGNO (op0));
1706       out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1707 
1708       emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1709       emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1710       return true;
1711     }
1712 
1713   if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1714     {
1715       /* We're hoping to transform everything that deals with XFmode
1716 	 quantities and GR registers early in the compiler.  */
1717       gcc_assert (can_create_pseudo_p ());
1718 
1719       /* Op0 can't be a GR_REG here, as that case is handled above.
1720 	 If op0 is a register, then we spill op1, so that we now have a
1721 	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1722 	 to force the spill.  */
1723       if (register_operand (operands[0], mode))
1724 	{
1725 	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1726 	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1727 	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1728 	}
1729 
1730       else
1731 	{
1732 	  rtx in[2];
1733 
1734 	  gcc_assert (GET_CODE (operands[0]) == MEM);
1735 
1736 	  /* Don't word-swap when writing out the value.  */
1737 	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1738 	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1739 
1740 	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1741 	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1742 	  return true;
1743 	}
1744     }
1745 
1746   if (!reload_in_progress && !reload_completed)
1747     {
1748       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1749 
1750       if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1751 	{
1752 	  rtx memt, memx, in = operands[1];
1753 	  if (CONSTANT_P (in))
1754 	    in = validize_mem (force_const_mem (mode, in));
1755 	  if (GET_CODE (in) == MEM)
1756 	    memt = adjust_address (in, TImode, 0);
1757 	  else
1758 	    {
1759 	      memt = assign_stack_temp (TImode, 16);
1760 	      memx = adjust_address (memt, mode, 0);
1761 	      emit_move_insn (memx, in);
1762 	    }
1763 	  emit_move_insn (op0, memt);
1764 	  return true;
1765 	}
1766 
1767       if (!ia64_move_ok (operands[0], operands[1]))
1768 	operands[1] = force_reg (mode, operands[1]);
1769     }
1770 
1771   return false;
1772 }
1773 
1774 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1775    with the expression that holds the compare result (in VOIDmode).  */
1776 
1777 static GTY(()) rtx cmptf_libfunc;
1778 
1779 void
1780 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1781 {
1782   enum rtx_code code = GET_CODE (*expr);
1783   rtx cmp;
1784 
1785   /* If we have a BImode input, then we already have a compare result, and
1786      do not need to emit another comparison.  */
1787   if (GET_MODE (*op0) == BImode)
1788     {
1789       gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1790       cmp = *op0;
1791     }
1792   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1793      magic number as its third argument, that indicates what to do.
1794      The return value is an integer to be compared against zero.  */
1795   else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1796     {
1797       enum qfcmp_magic {
1798 	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
1799 	QCMP_UNORD = 2,
1800 	QCMP_EQ = 4,
1801 	QCMP_LT = 8,
1802 	QCMP_GT = 16
1803       };
1804       int magic;
1805       enum rtx_code ncode;
1806       rtx ret;
1807 
1808       gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1809       switch (code)
1810 	{
1811 	  /* 1 = equal, 0 = not equal.  Equality operators do
1812 	     not raise FP_INVALID when given a NaN operand.  */
1813 	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1814 	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1815 	  /* isunordered() from C99.  */
1816 	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1817 	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1818 	  /* Relational operators raise FP_INVALID when given
1819 	     a NaN operand.  */
1820 	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1821 	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1822 	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1823 	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1824           /* Unordered relational operators do not raise FP_INVALID
1825 	     when given a NaN operand.  */
1826 	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
1827 	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1828 	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
1829 	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1830 	  /* Not supported.  */
1831 	case UNEQ:
1832 	case LTGT:
1833 	default: gcc_unreachable ();
1834 	}
1835 
1836       start_sequence ();
1837 
1838       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1839 				     *op0, TFmode, *op1, TFmode,
1840 				     GEN_INT (magic), DImode);
1841       cmp = gen_reg_rtx (BImode);
1842       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1843 						   ret, const0_rtx)));
1844 
1845       rtx_insn *insns = get_insns ();
1846       end_sequence ();
1847 
1848       emit_libcall_block (insns, cmp, cmp,
1849 			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1850       code = NE;
1851     }
1852   else
1853     {
1854       cmp = gen_reg_rtx (BImode);
1855       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1856       code = NE;
1857     }
1858 
1859   *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1860   *op0 = cmp;
1861   *op1 = const0_rtx;
1862 }
1863 
1864 /* Generate an integral vector comparison.  Return true if the condition has
1865    been reversed, and so the sense of the comparison should be inverted.  */
1866 
1867 static bool
1868 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1869 			    rtx dest, rtx op0, rtx op1)
1870 {
1871   bool negate = false;
1872   rtx x;
1873 
1874   /* Canonicalize the comparison to EQ, GT, GTU.  */
1875   switch (code)
1876     {
1877     case EQ:
1878     case GT:
1879     case GTU:
1880       break;
1881 
1882     case NE:
1883     case LE:
1884     case LEU:
1885       code = reverse_condition (code);
1886       negate = true;
1887       break;
1888 
1889     case GE:
1890     case GEU:
1891       code = reverse_condition (code);
1892       negate = true;
1893       /* FALLTHRU */
1894 
1895     case LT:
1896     case LTU:
1897       code = swap_condition (code);
1898       x = op0, op0 = op1, op1 = x;
1899       break;
1900 
1901     default:
1902       gcc_unreachable ();
1903     }
1904 
1905   /* Unsigned parallel compare is not supported by the hardware.  Play some
1906      tricks to turn this into a signed comparison against 0.  */
1907   if (code == GTU)
1908     {
1909       switch (mode)
1910 	{
1911 	case V2SImode:
1912 	  {
1913 	    rtx t1, t2, mask;
1914 
1915 	    /* Subtract (-(INT MAX) - 1) from both operands to make
1916 	       them signed.  */
1917 	    mask = gen_int_mode (0x80000000, SImode);
1918 	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1919 	    mask = force_reg (mode, mask);
1920 	    t1 = gen_reg_rtx (mode);
1921 	    emit_insn (gen_subv2si3 (t1, op0, mask));
1922 	    t2 = gen_reg_rtx (mode);
1923 	    emit_insn (gen_subv2si3 (t2, op1, mask));
1924 	    op0 = t1;
1925 	    op1 = t2;
1926 	    code = GT;
1927 	  }
1928 	  break;
1929 
1930 	case V8QImode:
1931 	case V4HImode:
1932 	  /* Perform a parallel unsigned saturating subtraction.  */
1933 	  x = gen_reg_rtx (mode);
1934 	  emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1935 
1936 	  code = EQ;
1937 	  op0 = x;
1938 	  op1 = CONST0_RTX (mode);
1939 	  negate = !negate;
1940 	  break;
1941 
1942 	default:
1943 	  gcc_unreachable ();
1944 	}
1945     }
1946 
1947   x = gen_rtx_fmt_ee (code, mode, op0, op1);
1948   emit_insn (gen_rtx_SET (dest, x));
1949 
1950   return negate;
1951 }
1952 
1953 /* Emit an integral vector conditional move.  */
1954 
1955 void
1956 ia64_expand_vecint_cmov (rtx operands[])
1957 {
1958   machine_mode mode = GET_MODE (operands[0]);
1959   enum rtx_code code = GET_CODE (operands[3]);
1960   bool negate;
1961   rtx cmp, x, ot, of;
1962 
1963   cmp = gen_reg_rtx (mode);
1964   negate = ia64_expand_vecint_compare (code, mode, cmp,
1965 				       operands[4], operands[5]);
1966 
1967   ot = operands[1+negate];
1968   of = operands[2-negate];
1969 
1970   if (ot == CONST0_RTX (mode))
1971     {
1972       if (of == CONST0_RTX (mode))
1973 	{
1974 	  emit_move_insn (operands[0], ot);
1975 	  return;
1976 	}
1977 
1978       x = gen_rtx_NOT (mode, cmp);
1979       x = gen_rtx_AND (mode, x, of);
1980       emit_insn (gen_rtx_SET (operands[0], x));
1981     }
1982   else if (of == CONST0_RTX (mode))
1983     {
1984       x = gen_rtx_AND (mode, cmp, ot);
1985       emit_insn (gen_rtx_SET (operands[0], x));
1986     }
1987   else
1988     {
1989       rtx t, f;
1990 
1991       t = gen_reg_rtx (mode);
1992       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1993       emit_insn (gen_rtx_SET (t, x));
1994 
1995       f = gen_reg_rtx (mode);
1996       x = gen_rtx_NOT (mode, cmp);
1997       x = gen_rtx_AND (mode, x, operands[2-negate]);
1998       emit_insn (gen_rtx_SET (f, x));
1999 
2000       x = gen_rtx_IOR (mode, t, f);
2001       emit_insn (gen_rtx_SET (operands[0], x));
2002     }
2003 }
2004 
2005 /* Emit an integral vector min or max operation.  Return true if all done.  */
2006 
2007 bool
2008 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2009 			   rtx operands[])
2010 {
2011   rtx xops[6];
2012 
2013   /* These four combinations are supported directly.  */
2014   if (mode == V8QImode && (code == UMIN || code == UMAX))
2015     return false;
2016   if (mode == V4HImode && (code == SMIN || code == SMAX))
2017     return false;
2018 
2019   /* This combination can be implemented with only saturating subtraction.  */
2020   if (mode == V4HImode && code == UMAX)
2021     {
2022       rtx x, tmp = gen_reg_rtx (mode);
2023 
2024       x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2025       emit_insn (gen_rtx_SET (tmp, x));
2026 
2027       emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2028       return true;
2029     }
2030 
2031   /* Everything else implemented via vector comparisons.  */
2032   xops[0] = operands[0];
2033   xops[4] = xops[1] = operands[1];
2034   xops[5] = xops[2] = operands[2];
2035 
2036   switch (code)
2037     {
2038     case UMIN:
2039       code = LTU;
2040       break;
2041     case UMAX:
2042       code = GTU;
2043       break;
2044     case SMIN:
2045       code = LT;
2046       break;
2047     case SMAX:
2048       code = GT;
2049       break;
2050     default:
2051       gcc_unreachable ();
2052     }
2053   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2054 
2055   ia64_expand_vecint_cmov (xops);
2056   return true;
2057 }
2058 
2059 /* The vectors LO and HI each contain N halves of a double-wide vector.
2060    Reassemble either the first N/2 or the second N/2 elements.  */
2061 
2062 void
2063 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2064 {
2065   machine_mode vmode = GET_MODE (lo);
2066   unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2067   struct expand_vec_perm_d d;
2068   bool ok;
2069 
2070   d.target = gen_lowpart (vmode, out);
2071   d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2072   d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2073   d.vmode = vmode;
2074   d.nelt = nelt;
2075   d.one_operand_p = false;
2076   d.testing_p = false;
2077 
2078   high = (highp ? nelt / 2 : 0);
2079   for (i = 0; i < nelt / 2; ++i)
2080     {
2081       d.perm[i * 2] = i + high;
2082       d.perm[i * 2 + 1] = i + high + nelt;
2083     }
2084 
2085   ok = ia64_expand_vec_perm_const_1 (&d);
2086   gcc_assert (ok);
2087 }
2088 
2089 /* Return a vector of the sign-extension of VEC.  */
2090 
2091 static rtx
2092 ia64_unpack_sign (rtx vec, bool unsignedp)
2093 {
2094   machine_mode mode = GET_MODE (vec);
2095   rtx zero = CONST0_RTX (mode);
2096 
2097   if (unsignedp)
2098     return zero;
2099   else
2100     {
2101       rtx sign = gen_reg_rtx (mode);
2102       bool neg;
2103 
2104       neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2105       gcc_assert (!neg);
2106 
2107       return sign;
2108     }
2109 }
2110 
2111 /* Emit an integral vector unpack operation.  */
2112 
2113 void
2114 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2115 {
2116   rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2117   ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2118 }
2119 
2120 /* Emit an integral vector widening sum operations.  */
2121 
2122 void
2123 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2124 {
2125   machine_mode wmode;
2126   rtx l, h, t, sign;
2127 
2128   sign = ia64_unpack_sign (operands[1], unsignedp);
2129 
2130   wmode = GET_MODE (operands[0]);
2131   l = gen_reg_rtx (wmode);
2132   h = gen_reg_rtx (wmode);
2133 
2134   ia64_unpack_assemble (l, operands[1], sign, false);
2135   ia64_unpack_assemble (h, operands[1], sign, true);
2136 
2137   t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2138   t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2139   if (t != operands[0])
2140     emit_move_insn (operands[0], t);
2141 }
2142 
2143 /* Emit the appropriate sequence for a call.  */
2144 
2145 void
2146 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2147 		  int sibcall_p)
2148 {
2149   rtx insn, b0;
2150 
2151   addr = XEXP (addr, 0);
2152   addr = convert_memory_address (DImode, addr);
2153   b0 = gen_rtx_REG (DImode, R_BR (0));
2154 
2155   /* ??? Should do this for functions known to bind local too.  */
2156   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2157     {
2158       if (sibcall_p)
2159 	insn = gen_sibcall_nogp (addr);
2160       else if (! retval)
2161 	insn = gen_call_nogp (addr, b0);
2162       else
2163 	insn = gen_call_value_nogp (retval, addr, b0);
2164       insn = emit_call_insn (insn);
2165     }
2166   else
2167     {
2168       if (sibcall_p)
2169 	insn = gen_sibcall_gp (addr);
2170       else if (! retval)
2171 	insn = gen_call_gp (addr, b0);
2172       else
2173 	insn = gen_call_value_gp (retval, addr, b0);
2174       insn = emit_call_insn (insn);
2175 
2176       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2177     }
2178 
2179   if (sibcall_p)
2180     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2181 
2182   if (TARGET_ABI_OPEN_VMS)
2183     use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2184 	     gen_rtx_REG (DImode, GR_REG (25)));
2185 }
2186 
2187 static void
2188 reg_emitted (enum ia64_frame_regs r)
2189 {
2190   if (emitted_frame_related_regs[r] == 0)
2191     emitted_frame_related_regs[r] = current_frame_info.r[r];
2192   else
2193     gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2194 }
2195 
2196 static int
2197 get_reg (enum ia64_frame_regs r)
2198 {
2199   reg_emitted (r);
2200   return current_frame_info.r[r];
2201 }
2202 
2203 static bool
2204 is_emitted (int regno)
2205 {
2206   unsigned int r;
2207 
2208   for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2209     if (emitted_frame_related_regs[r] == regno)
2210       return true;
2211   return false;
2212 }
2213 
2214 void
2215 ia64_reload_gp (void)
2216 {
2217   rtx tmp;
2218 
2219   if (current_frame_info.r[reg_save_gp])
2220     {
2221       tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2222     }
2223   else
2224     {
2225       HOST_WIDE_INT offset;
2226       rtx offset_r;
2227 
2228       offset = (current_frame_info.spill_cfa_off
2229 	        + current_frame_info.spill_size);
2230       if (frame_pointer_needed)
2231         {
2232           tmp = hard_frame_pointer_rtx;
2233           offset = -offset;
2234         }
2235       else
2236         {
2237           tmp = stack_pointer_rtx;
2238           offset = current_frame_info.total_size - offset;
2239         }
2240 
2241       offset_r = GEN_INT (offset);
2242       if (satisfies_constraint_I (offset_r))
2243         emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2244       else
2245         {
2246           emit_move_insn (pic_offset_table_rtx, offset_r);
2247           emit_insn (gen_adddi3 (pic_offset_table_rtx,
2248 			         pic_offset_table_rtx, tmp));
2249         }
2250 
2251       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2252     }
2253 
2254   emit_move_insn (pic_offset_table_rtx, tmp);
2255 }
2256 
2257 void
2258 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2259 		 rtx scratch_b, int noreturn_p, int sibcall_p)
2260 {
2261   rtx insn;
2262   bool is_desc = false;
2263 
2264   /* If we find we're calling through a register, then we're actually
2265      calling through a descriptor, so load up the values.  */
2266   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2267     {
2268       rtx tmp;
2269       bool addr_dead_p;
2270 
2271       /* ??? We are currently constrained to *not* use peep2, because
2272 	 we can legitimately change the global lifetime of the GP
2273 	 (in the form of killing where previously live).  This is
2274 	 because a call through a descriptor doesn't use the previous
2275 	 value of the GP, while a direct call does, and we do not
2276 	 commit to either form until the split here.
2277 
2278 	 That said, this means that we lack precise life info for
2279 	 whether ADDR is dead after this call.  This is not terribly
2280 	 important, since we can fix things up essentially for free
2281 	 with the POST_DEC below, but it's nice to not use it when we
2282 	 can immediately tell it's not necessary.  */
2283       addr_dead_p = ((noreturn_p || sibcall_p
2284 		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2285 					    REGNO (addr)))
2286 		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2287 
2288       /* Load the code address into scratch_b.  */
2289       tmp = gen_rtx_POST_INC (Pmode, addr);
2290       tmp = gen_rtx_MEM (Pmode, tmp);
2291       emit_move_insn (scratch_r, tmp);
2292       emit_move_insn (scratch_b, scratch_r);
2293 
2294       /* Load the GP address.  If ADDR is not dead here, then we must
2295 	 revert the change made above via the POST_INCREMENT.  */
2296       if (!addr_dead_p)
2297 	tmp = gen_rtx_POST_DEC (Pmode, addr);
2298       else
2299 	tmp = addr;
2300       tmp = gen_rtx_MEM (Pmode, tmp);
2301       emit_move_insn (pic_offset_table_rtx, tmp);
2302 
2303       is_desc = true;
2304       addr = scratch_b;
2305     }
2306 
2307   if (sibcall_p)
2308     insn = gen_sibcall_nogp (addr);
2309   else if (retval)
2310     insn = gen_call_value_nogp (retval, addr, retaddr);
2311   else
2312     insn = gen_call_nogp (addr, retaddr);
2313   emit_call_insn (insn);
2314 
2315   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2316     ia64_reload_gp ();
2317 }
2318 
2319 /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2320 
2321    This differs from the generic code in that we know about the zero-extending
2322    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2323    also know that ld.acq+cmpxchg.rel equals a full barrier.
2324 
2325    The loop we want to generate looks like
2326 
2327 	cmp_reg = mem;
2328       label:
2329         old_reg = cmp_reg;
2330 	new_reg = cmp_reg op val;
2331 	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2332 	if (cmp_reg != old_reg)
2333 	  goto label;
2334 
2335    Note that we only do the plain load from memory once.  Subsequent
2336    iterations use the value loaded by the compare-and-swap pattern.  */
2337 
2338 void
2339 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2340 		       rtx old_dst, rtx new_dst, enum memmodel model)
2341 {
2342   machine_mode mode = GET_MODE (mem);
2343   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2344   enum insn_code icode;
2345 
2346   /* Special case for using fetchadd.  */
2347   if ((mode == SImode || mode == DImode)
2348       && (code == PLUS || code == MINUS)
2349       && fetchadd_operand (val, mode))
2350     {
2351       if (code == MINUS)
2352 	val = GEN_INT (-INTVAL (val));
2353 
2354       if (!old_dst)
2355         old_dst = gen_reg_rtx (mode);
2356 
2357       switch (model)
2358 	{
2359 	case MEMMODEL_ACQ_REL:
2360 	case MEMMODEL_SEQ_CST:
2361 	case MEMMODEL_SYNC_SEQ_CST:
2362 	  emit_insn (gen_memory_barrier ());
2363 	  /* FALLTHRU */
2364 	case MEMMODEL_RELAXED:
2365 	case MEMMODEL_ACQUIRE:
2366 	case MEMMODEL_SYNC_ACQUIRE:
2367 	case MEMMODEL_CONSUME:
2368 	  if (mode == SImode)
2369 	    icode = CODE_FOR_fetchadd_acq_si;
2370 	  else
2371 	    icode = CODE_FOR_fetchadd_acq_di;
2372 	  break;
2373 	case MEMMODEL_RELEASE:
2374 	case MEMMODEL_SYNC_RELEASE:
2375 	  if (mode == SImode)
2376 	    icode = CODE_FOR_fetchadd_rel_si;
2377 	  else
2378 	    icode = CODE_FOR_fetchadd_rel_di;
2379 	  break;
2380 
2381 	default:
2382 	  gcc_unreachable ();
2383 	}
2384 
2385       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2386 
2387       if (new_dst)
2388 	{
2389 	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2390 					 true, OPTAB_WIDEN);
2391 	  if (new_reg != new_dst)
2392 	    emit_move_insn (new_dst, new_reg);
2393 	}
2394       return;
2395     }
2396 
2397   /* Because of the volatile mem read, we get an ld.acq, which is the
2398      front half of the full barrier.  The end half is the cmpxchg.rel.
2399      For relaxed and release memory models, we don't need this.  But we
2400      also don't bother trying to prevent it either.  */
2401   gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2402 	      || MEM_VOLATILE_P (mem));
2403 
2404   old_reg = gen_reg_rtx (DImode);
2405   cmp_reg = gen_reg_rtx (DImode);
2406   label = gen_label_rtx ();
2407 
2408   if (mode != DImode)
2409     {
2410       val = simplify_gen_subreg (DImode, val, mode, 0);
2411       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2412     }
2413   else
2414     emit_move_insn (cmp_reg, mem);
2415 
2416   emit_label (label);
2417 
2418   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2419   emit_move_insn (old_reg, cmp_reg);
2420   emit_move_insn (ar_ccv, cmp_reg);
2421 
2422   if (old_dst)
2423     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2424 
2425   new_reg = cmp_reg;
2426   if (code == NOT)
2427     {
2428       new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2429 				     true, OPTAB_DIRECT);
2430       new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2431     }
2432   else
2433     new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2434 				   true, OPTAB_DIRECT);
2435 
2436   if (mode != DImode)
2437     new_reg = gen_lowpart (mode, new_reg);
2438   if (new_dst)
2439     emit_move_insn (new_dst, new_reg);
2440 
2441   switch (model)
2442     {
2443     case MEMMODEL_RELAXED:
2444     case MEMMODEL_ACQUIRE:
2445     case MEMMODEL_SYNC_ACQUIRE:
2446     case MEMMODEL_CONSUME:
2447       switch (mode)
2448 	{
2449 	case QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
2450 	case HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
2451 	case SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
2452 	case DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
2453 	default:
2454 	  gcc_unreachable ();
2455 	}
2456       break;
2457 
2458     case MEMMODEL_RELEASE:
2459     case MEMMODEL_SYNC_RELEASE:
2460     case MEMMODEL_ACQ_REL:
2461     case MEMMODEL_SEQ_CST:
2462     case MEMMODEL_SYNC_SEQ_CST:
2463       switch (mode)
2464 	{
2465 	case QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
2466 	case HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
2467 	case SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
2468 	case DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
2469 	default:
2470 	  gcc_unreachable ();
2471 	}
2472       break;
2473 
2474     default:
2475       gcc_unreachable ();
2476     }
2477 
2478   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2479 
2480   emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2481 }
2482 
2483 /* Begin the assembly file.  */
2484 
2485 static void
2486 ia64_file_start (void)
2487 {
2488   default_file_start ();
2489   emit_safe_across_calls ();
2490 }
2491 
2492 void
2493 emit_safe_across_calls (void)
2494 {
2495   unsigned int rs, re;
2496   int out_state;
2497 
2498   rs = 1;
2499   out_state = 0;
2500   while (1)
2501     {
2502       while (rs < 64 && call_used_regs[PR_REG (rs)])
2503 	rs++;
2504       if (rs >= 64)
2505 	break;
2506       for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2507 	continue;
2508       if (out_state == 0)
2509 	{
2510 	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2511 	  out_state = 1;
2512 	}
2513       else
2514 	fputc (',', asm_out_file);
2515       if (re == rs + 1)
2516 	fprintf (asm_out_file, "p%u", rs);
2517       else
2518 	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2519       rs = re + 1;
2520     }
2521   if (out_state)
2522     fputc ('\n', asm_out_file);
2523 }
2524 
2525 /* Globalize a declaration.  */
2526 
2527 static void
2528 ia64_globalize_decl_name (FILE * stream, tree decl)
2529 {
2530   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2531   tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2532   if (version_attr)
2533     {
2534       tree v = TREE_VALUE (TREE_VALUE (version_attr));
2535       const char *p = TREE_STRING_POINTER (v);
2536       fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2537     }
2538   targetm.asm_out.globalize_label (stream, name);
2539   if (TREE_CODE (decl) == FUNCTION_DECL)
2540     ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2541 }
2542 
2543 /* Helper function for ia64_compute_frame_size: find an appropriate general
2544    register to spill some special register to.  SPECIAL_SPILL_MASK contains
2545    bits in GR0 to GR31 that have already been allocated by this routine.
2546    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2547 
2548 static int
2549 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2550 {
2551   int regno;
2552 
2553   if (emitted_frame_related_regs[r] != 0)
2554     {
2555       regno = emitted_frame_related_regs[r];
2556       if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2557 	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2558         current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2559       else if (crtl->is_leaf
2560                && regno >= GR_REG (1) && regno <= GR_REG (31))
2561         current_frame_info.gr_used_mask |= 1 << regno;
2562 
2563       return regno;
2564     }
2565 
2566   /* If this is a leaf function, first try an otherwise unused
2567      call-clobbered register.  */
2568   if (crtl->is_leaf)
2569     {
2570       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2571 	if (! df_regs_ever_live_p (regno)
2572 	    && call_used_regs[regno]
2573 	    && ! fixed_regs[regno]
2574 	    && ! global_regs[regno]
2575 	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2576             && ! is_emitted (regno))
2577 	  {
2578 	    current_frame_info.gr_used_mask |= 1 << regno;
2579 	    return regno;
2580 	  }
2581     }
2582 
2583   if (try_locals)
2584     {
2585       regno = current_frame_info.n_local_regs;
2586       /* If there is a frame pointer, then we can't use loc79, because
2587 	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2588 	 reg_name switching code in ia64_expand_prologue.  */
2589       while (regno < (80 - frame_pointer_needed))
2590 	if (! is_emitted (LOC_REG (regno++)))
2591 	  {
2592 	    current_frame_info.n_local_regs = regno;
2593 	    return LOC_REG (regno - 1);
2594 	  }
2595     }
2596 
2597   /* Failed to find a general register to spill to.  Must use stack.  */
2598   return 0;
2599 }
2600 
2601 /* In order to make for nice schedules, we try to allocate every temporary
2602    to a different register.  We must of course stay away from call-saved,
2603    fixed, and global registers.  We must also stay away from registers
2604    allocated in current_frame_info.gr_used_mask, since those include regs
2605    used all through the prologue.
2606 
2607    Any register allocated here must be used immediately.  The idea is to
2608    aid scheduling, not to solve data flow problems.  */
2609 
2610 static int last_scratch_gr_reg;
2611 
2612 static int
2613 next_scratch_gr_reg (void)
2614 {
2615   int i, regno;
2616 
2617   for (i = 0; i < 32; ++i)
2618     {
2619       regno = (last_scratch_gr_reg + i + 1) & 31;
2620       if (call_used_regs[regno]
2621 	  && ! fixed_regs[regno]
2622 	  && ! global_regs[regno]
2623 	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2624 	{
2625 	  last_scratch_gr_reg = regno;
2626 	  return regno;
2627 	}
2628     }
2629 
2630   /* There must be _something_ available.  */
2631   gcc_unreachable ();
2632 }
2633 
2634 /* Helper function for ia64_compute_frame_size, called through
2635    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2636 
2637 static void
2638 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2639 {
2640   unsigned int regno = REGNO (reg);
2641   if (regno < 32)
2642     {
2643       unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2644       for (i = 0; i < n; ++i)
2645 	current_frame_info.gr_used_mask |= 1 << (regno + i);
2646     }
2647 }
2648 
2649 
2650 /* Returns the number of bytes offset between the frame pointer and the stack
2651    pointer for the current function.  SIZE is the number of bytes of space
2652    needed for local variables.  */
2653 
2654 static void
2655 ia64_compute_frame_size (HOST_WIDE_INT size)
2656 {
2657   HOST_WIDE_INT total_size;
2658   HOST_WIDE_INT spill_size = 0;
2659   HOST_WIDE_INT extra_spill_size = 0;
2660   HOST_WIDE_INT pretend_args_size;
2661   HARD_REG_SET mask;
2662   int n_spilled = 0;
2663   int spilled_gr_p = 0;
2664   int spilled_fr_p = 0;
2665   unsigned int regno;
2666   int min_regno;
2667   int max_regno;
2668   int i;
2669 
2670   if (current_frame_info.initialized)
2671     return;
2672 
2673   memset (&current_frame_info, 0, sizeof current_frame_info);
2674   CLEAR_HARD_REG_SET (mask);
2675 
2676   /* Don't allocate scratches to the return register.  */
2677   diddle_return_value (mark_reg_gr_used_mask, NULL);
2678 
2679   /* Don't allocate scratches to the EH scratch registers.  */
2680   if (cfun->machine->ia64_eh_epilogue_sp)
2681     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2682   if (cfun->machine->ia64_eh_epilogue_bsp)
2683     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2684 
2685   /* Static stack checking uses r2 and r3.  */
2686   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2687     current_frame_info.gr_used_mask |= 0xc;
2688 
2689   /* Find the size of the register stack frame.  We have only 80 local
2690      registers, because we reserve 8 for the inputs and 8 for the
2691      outputs.  */
2692 
2693   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2694      since we'll be adjusting that down later.  */
2695   regno = LOC_REG (78) + ! frame_pointer_needed;
2696   for (; regno >= LOC_REG (0); regno--)
2697     if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2698       break;
2699   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2700 
2701   /* For functions marked with the syscall_linkage attribute, we must mark
2702      all eight input registers as in use, so that locals aren't visible to
2703      the caller.  */
2704 
2705   if (cfun->machine->n_varargs > 0
2706       || lookup_attribute ("syscall_linkage",
2707 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2708     current_frame_info.n_input_regs = 8;
2709   else
2710     {
2711       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2712 	if (df_regs_ever_live_p (regno))
2713 	  break;
2714       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2715     }
2716 
2717   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2718     if (df_regs_ever_live_p (regno))
2719       break;
2720   i = regno - OUT_REG (0) + 1;
2721 
2722 #ifndef PROFILE_HOOK
2723   /* When -p profiling, we need one output register for the mcount argument.
2724      Likewise for -a profiling for the bb_init_func argument.  For -ax
2725      profiling, we need two output registers for the two bb_init_trace_func
2726      arguments.  */
2727   if (crtl->profile)
2728     i = MAX (i, 1);
2729 #endif
2730   current_frame_info.n_output_regs = i;
2731 
2732   /* ??? No rotating register support yet.  */
2733   current_frame_info.n_rotate_regs = 0;
2734 
2735   /* Discover which registers need spilling, and how much room that
2736      will take.  Begin with floating point and general registers,
2737      which will always wind up on the stack.  */
2738 
2739   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2740     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2741       {
2742 	SET_HARD_REG_BIT (mask, regno);
2743 	spill_size += 16;
2744 	n_spilled += 1;
2745 	spilled_fr_p = 1;
2746       }
2747 
2748   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2749     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2750       {
2751 	SET_HARD_REG_BIT (mask, regno);
2752 	spill_size += 8;
2753 	n_spilled += 1;
2754 	spilled_gr_p = 1;
2755       }
2756 
2757   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2758     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2759       {
2760 	SET_HARD_REG_BIT (mask, regno);
2761 	spill_size += 8;
2762 	n_spilled += 1;
2763       }
2764 
2765   /* Now come all special registers that might get saved in other
2766      general registers.  */
2767 
2768   if (frame_pointer_needed)
2769     {
2770       current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2771       /* If we did not get a register, then we take LOC79.  This is guaranteed
2772 	 to be free, even if regs_ever_live is already set, because this is
2773 	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2774 	 as we don't count loc79 above.  */
2775       if (current_frame_info.r[reg_fp] == 0)
2776 	{
2777 	  current_frame_info.r[reg_fp] = LOC_REG (79);
2778 	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2779 	}
2780     }
2781 
2782   if (! crtl->is_leaf)
2783     {
2784       /* Emit a save of BR0 if we call other functions.  Do this even
2785 	 if this function doesn't return, as EH depends on this to be
2786 	 able to unwind the stack.  */
2787       SET_HARD_REG_BIT (mask, BR_REG (0));
2788 
2789       current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2790       if (current_frame_info.r[reg_save_b0] == 0)
2791 	{
2792 	  extra_spill_size += 8;
2793 	  n_spilled += 1;
2794 	}
2795 
2796       /* Similarly for ar.pfs.  */
2797       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2798       current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2799       if (current_frame_info.r[reg_save_ar_pfs] == 0)
2800 	{
2801 	  extra_spill_size += 8;
2802 	  n_spilled += 1;
2803 	}
2804 
2805       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2806 	 registers are clobbered, so we fall back to the stack.  */
2807       current_frame_info.r[reg_save_gp]
2808 	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2809       if (current_frame_info.r[reg_save_gp] == 0)
2810 	{
2811 	  SET_HARD_REG_BIT (mask, GR_REG (1));
2812 	  spill_size += 8;
2813 	  n_spilled += 1;
2814 	}
2815     }
2816   else
2817     {
2818       if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2819 	{
2820 	  SET_HARD_REG_BIT (mask, BR_REG (0));
2821 	  extra_spill_size += 8;
2822 	  n_spilled += 1;
2823 	}
2824 
2825       if (df_regs_ever_live_p (AR_PFS_REGNUM))
2826 	{
2827 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2828  	  current_frame_info.r[reg_save_ar_pfs]
2829             = find_gr_spill (reg_save_ar_pfs, 1);
2830 	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
2831 	    {
2832 	      extra_spill_size += 8;
2833 	      n_spilled += 1;
2834 	    }
2835 	}
2836     }
2837 
2838   /* Unwind descriptor hackery: things are most efficient if we allocate
2839      consecutive GR save registers for RP, PFS, FP in that order. However,
2840      it is absolutely critical that FP get the only hard register that's
2841      guaranteed to be free, so we allocated it first.  If all three did
2842      happen to be allocated hard regs, and are consecutive, rearrange them
2843      into the preferred order now.
2844 
2845      If we have already emitted code for any of those registers,
2846      then it's already too late to change.  */
2847   min_regno = MIN (current_frame_info.r[reg_fp],
2848 		   MIN (current_frame_info.r[reg_save_b0],
2849 			current_frame_info.r[reg_save_ar_pfs]));
2850   max_regno = MAX (current_frame_info.r[reg_fp],
2851 		   MAX (current_frame_info.r[reg_save_b0],
2852 			current_frame_info.r[reg_save_ar_pfs]));
2853   if (min_regno > 0
2854       && min_regno + 2 == max_regno
2855       && (current_frame_info.r[reg_fp] == min_regno + 1
2856 	  || current_frame_info.r[reg_save_b0] == min_regno + 1
2857 	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2858       && (emitted_frame_related_regs[reg_save_b0] == 0
2859 	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
2860       && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2861 	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2862       && (emitted_frame_related_regs[reg_fp] == 0
2863 	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2864     {
2865       current_frame_info.r[reg_save_b0] = min_regno;
2866       current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2867       current_frame_info.r[reg_fp] = min_regno + 2;
2868     }
2869 
2870   /* See if we need to store the predicate register block.  */
2871   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2872     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2873       break;
2874   if (regno <= PR_REG (63))
2875     {
2876       SET_HARD_REG_BIT (mask, PR_REG (0));
2877       current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2878       if (current_frame_info.r[reg_save_pr] == 0)
2879 	{
2880 	  extra_spill_size += 8;
2881 	  n_spilled += 1;
2882 	}
2883 
2884       /* ??? Mark them all as used so that register renaming and such
2885 	 are free to use them.  */
2886       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2887 	df_set_regs_ever_live (regno, true);
2888     }
2889 
2890   /* If we're forced to use st8.spill, we're forced to save and restore
2891      ar.unat as well.  The check for existing liveness allows inline asm
2892      to touch ar.unat.  */
2893   if (spilled_gr_p || cfun->machine->n_varargs
2894       || df_regs_ever_live_p (AR_UNAT_REGNUM))
2895     {
2896       df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2897       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2898       current_frame_info.r[reg_save_ar_unat]
2899         = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2900       if (current_frame_info.r[reg_save_ar_unat] == 0)
2901 	{
2902 	  extra_spill_size += 8;
2903 	  n_spilled += 1;
2904 	}
2905     }
2906 
2907   if (df_regs_ever_live_p (AR_LC_REGNUM))
2908     {
2909       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2910       current_frame_info.r[reg_save_ar_lc]
2911         = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2912       if (current_frame_info.r[reg_save_ar_lc] == 0)
2913 	{
2914 	  extra_spill_size += 8;
2915 	  n_spilled += 1;
2916 	}
2917     }
2918 
2919   /* If we have an odd number of words of pretend arguments written to
2920      the stack, then the FR save area will be unaligned.  We round the
2921      size of this area up to keep things 16 byte aligned.  */
2922   if (spilled_fr_p)
2923     pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2924   else
2925     pretend_args_size = crtl->args.pretend_args_size;
2926 
2927   total_size = (spill_size + extra_spill_size + size + pretend_args_size
2928 		+ crtl->outgoing_args_size);
2929   total_size = IA64_STACK_ALIGN (total_size);
2930 
2931   /* We always use the 16-byte scratch area provided by the caller, but
2932      if we are a leaf function, there's no one to which we need to provide
2933      a scratch area.  However, if the function allocates dynamic stack space,
2934      the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2935      so we need to cope.  */
2936   if (crtl->is_leaf && !cfun->calls_alloca)
2937     total_size = MAX (0, total_size - 16);
2938 
2939   current_frame_info.total_size = total_size;
2940   current_frame_info.spill_cfa_off = pretend_args_size - 16;
2941   current_frame_info.spill_size = spill_size;
2942   current_frame_info.extra_spill_size = extra_spill_size;
2943   COPY_HARD_REG_SET (current_frame_info.mask, mask);
2944   current_frame_info.n_spilled = n_spilled;
2945   current_frame_info.initialized = reload_completed;
2946 }
2947 
2948 /* Worker function for TARGET_CAN_ELIMINATE.  */
2949 
2950 bool
2951 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2952 {
2953   return (to == BR_REG (0) ? crtl->is_leaf : true);
2954 }
2955 
2956 /* Compute the initial difference between the specified pair of registers.  */
2957 
2958 HOST_WIDE_INT
2959 ia64_initial_elimination_offset (int from, int to)
2960 {
2961   HOST_WIDE_INT offset;
2962 
2963   ia64_compute_frame_size (get_frame_size ());
2964   switch (from)
2965     {
2966     case FRAME_POINTER_REGNUM:
2967       switch (to)
2968 	{
2969 	case HARD_FRAME_POINTER_REGNUM:
2970 	  offset = -current_frame_info.total_size;
2971 	  if (!crtl->is_leaf || cfun->calls_alloca)
2972 	    offset += 16 + crtl->outgoing_args_size;
2973 	  break;
2974 
2975 	case STACK_POINTER_REGNUM:
2976 	  offset = 0;
2977 	  if (!crtl->is_leaf || cfun->calls_alloca)
2978 	    offset += 16 + crtl->outgoing_args_size;
2979 	  break;
2980 
2981 	default:
2982 	  gcc_unreachable ();
2983 	}
2984       break;
2985 
2986     case ARG_POINTER_REGNUM:
2987       /* Arguments start above the 16 byte save area, unless stdarg
2988 	 in which case we store through the 16 byte save area.  */
2989       switch (to)
2990 	{
2991 	case HARD_FRAME_POINTER_REGNUM:
2992 	  offset = 16 - crtl->args.pretend_args_size;
2993 	  break;
2994 
2995 	case STACK_POINTER_REGNUM:
2996 	  offset = (current_frame_info.total_size
2997 		    + 16 - crtl->args.pretend_args_size);
2998 	  break;
2999 
3000 	default:
3001 	  gcc_unreachable ();
3002 	}
3003       break;
3004 
3005     default:
3006       gcc_unreachable ();
3007     }
3008 
3009   return offset;
3010 }
3011 
3012 /* If there are more than a trivial number of register spills, we use
3013    two interleaved iterators so that we can get two memory references
3014    per insn group.
3015 
3016    In order to simplify things in the prologue and epilogue expanders,
3017    we use helper functions to fix up the memory references after the
3018    fact with the appropriate offsets to a POST_MODIFY memory mode.
3019    The following data structure tracks the state of the two iterators
3020    while insns are being emitted.  */
3021 
3022 struct spill_fill_data
3023 {
3024   rtx_insn *init_after;		/* point at which to emit initializations */
3025   rtx init_reg[2];		/* initial base register */
3026   rtx iter_reg[2];		/* the iterator registers */
3027   rtx *prev_addr[2];		/* address of last memory use */
3028   rtx_insn *prev_insn[2];	/* the insn corresponding to prev_addr */
3029   HOST_WIDE_INT prev_off[2];	/* last offset */
3030   int n_iter;			/* number of iterators in use */
3031   int next_iter;		/* next iterator to use */
3032   unsigned int save_gr_used_mask;
3033 };
3034 
3035 static struct spill_fill_data spill_fill_data;
3036 
3037 static void
3038 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3039 {
3040   int i;
3041 
3042   spill_fill_data.init_after = get_last_insn ();
3043   spill_fill_data.init_reg[0] = init_reg;
3044   spill_fill_data.init_reg[1] = init_reg;
3045   spill_fill_data.prev_addr[0] = NULL;
3046   spill_fill_data.prev_addr[1] = NULL;
3047   spill_fill_data.prev_insn[0] = NULL;
3048   spill_fill_data.prev_insn[1] = NULL;
3049   spill_fill_data.prev_off[0] = cfa_off;
3050   spill_fill_data.prev_off[1] = cfa_off;
3051   spill_fill_data.next_iter = 0;
3052   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3053 
3054   spill_fill_data.n_iter = 1 + (n_spills > 2);
3055   for (i = 0; i < spill_fill_data.n_iter; ++i)
3056     {
3057       int regno = next_scratch_gr_reg ();
3058       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3059       current_frame_info.gr_used_mask |= 1 << regno;
3060     }
3061 }
3062 
3063 static void
3064 finish_spill_pointers (void)
3065 {
3066   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3067 }
3068 
3069 static rtx
3070 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3071 {
3072   int iter = spill_fill_data.next_iter;
3073   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3074   rtx disp_rtx = GEN_INT (disp);
3075   rtx mem;
3076 
3077   if (spill_fill_data.prev_addr[iter])
3078     {
3079       if (satisfies_constraint_N (disp_rtx))
3080 	{
3081 	  *spill_fill_data.prev_addr[iter]
3082 	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3083 				   gen_rtx_PLUS (DImode,
3084 						 spill_fill_data.iter_reg[iter],
3085 						 disp_rtx));
3086 	  add_reg_note (spill_fill_data.prev_insn[iter],
3087 			REG_INC, spill_fill_data.iter_reg[iter]);
3088 	}
3089       else
3090 	{
3091 	  /* ??? Could use register post_modify for loads.  */
3092 	  if (!satisfies_constraint_I (disp_rtx))
3093 	    {
3094 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3095 	      emit_move_insn (tmp, disp_rtx);
3096 	      disp_rtx = tmp;
3097 	    }
3098 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3099 				 spill_fill_data.iter_reg[iter], disp_rtx));
3100 	}
3101     }
3102   /* Micro-optimization: if we've created a frame pointer, it's at
3103      CFA 0, which may allow the real iterator to be initialized lower,
3104      slightly increasing parallelism.  Also, if there are few saves
3105      it may eliminate the iterator entirely.  */
3106   else if (disp == 0
3107 	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3108 	   && frame_pointer_needed)
3109     {
3110       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3111       set_mem_alias_set (mem, get_varargs_alias_set ());
3112       return mem;
3113     }
3114   else
3115     {
3116       rtx seq;
3117       rtx_insn *insn;
3118 
3119       if (disp == 0)
3120 	seq = gen_movdi (spill_fill_data.iter_reg[iter],
3121 			 spill_fill_data.init_reg[iter]);
3122       else
3123 	{
3124 	  start_sequence ();
3125 
3126 	  if (!satisfies_constraint_I (disp_rtx))
3127 	    {
3128 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3129 	      emit_move_insn (tmp, disp_rtx);
3130 	      disp_rtx = tmp;
3131 	    }
3132 
3133 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3134 				 spill_fill_data.init_reg[iter],
3135 				 disp_rtx));
3136 
3137 	  seq = get_insns ();
3138 	  end_sequence ();
3139 	}
3140 
3141       /* Careful for being the first insn in a sequence.  */
3142       if (spill_fill_data.init_after)
3143 	insn = emit_insn_after (seq, spill_fill_data.init_after);
3144       else
3145 	{
3146 	  rtx_insn *first = get_insns ();
3147 	  if (first)
3148 	    insn = emit_insn_before (seq, first);
3149 	  else
3150 	    insn = emit_insn (seq);
3151 	}
3152       spill_fill_data.init_after = insn;
3153     }
3154 
3155   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3156 
3157   /* ??? Not all of the spills are for varargs, but some of them are.
3158      The rest of the spills belong in an alias set of their own.  But
3159      it doesn't actually hurt to include them here.  */
3160   set_mem_alias_set (mem, get_varargs_alias_set ());
3161 
3162   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3163   spill_fill_data.prev_off[iter] = cfa_off;
3164 
3165   if (++iter >= spill_fill_data.n_iter)
3166     iter = 0;
3167   spill_fill_data.next_iter = iter;
3168 
3169   return mem;
3170 }
3171 
3172 static void
3173 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3174 	  rtx frame_reg)
3175 {
3176   int iter = spill_fill_data.next_iter;
3177   rtx mem;
3178   rtx_insn *insn;
3179 
3180   mem = spill_restore_mem (reg, cfa_off);
3181   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3182   spill_fill_data.prev_insn[iter] = insn;
3183 
3184   if (frame_reg)
3185     {
3186       rtx base;
3187       HOST_WIDE_INT off;
3188 
3189       RTX_FRAME_RELATED_P (insn) = 1;
3190 
3191       /* Don't even pretend that the unwind code can intuit its way
3192 	 through a pair of interleaved post_modify iterators.  Just
3193 	 provide the correct answer.  */
3194 
3195       if (frame_pointer_needed)
3196 	{
3197 	  base = hard_frame_pointer_rtx;
3198 	  off = - cfa_off;
3199 	}
3200       else
3201 	{
3202 	  base = stack_pointer_rtx;
3203 	  off = current_frame_info.total_size - cfa_off;
3204 	}
3205 
3206       add_reg_note (insn, REG_CFA_OFFSET,
3207 		    gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3208 					      plus_constant (Pmode,
3209 							     base, off)),
3210 				 frame_reg));
3211     }
3212 }
3213 
3214 static void
3215 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3216 {
3217   int iter = spill_fill_data.next_iter;
3218   rtx_insn *insn;
3219 
3220   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3221 				GEN_INT (cfa_off)));
3222   spill_fill_data.prev_insn[iter] = insn;
3223 }
3224 
3225 /* Wrapper functions that discards the CONST_INT spill offset.  These
3226    exist so that we can give gr_spill/gr_fill the offset they need and
3227    use a consistent function interface.  */
3228 
3229 static rtx
3230 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3231 {
3232   return gen_movdi (dest, src);
3233 }
3234 
3235 static rtx
3236 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3237 {
3238   return gen_fr_spill (dest, src);
3239 }
3240 
3241 static rtx
3242 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3243 {
3244   return gen_fr_restore (dest, src);
3245 }
3246 
3247 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3248 
3249 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
3250 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3251 
3252 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3253    inclusive.  These are offsets from the current stack pointer.  BS_SIZE
3254    is the size of the backing store.  ??? This clobbers r2 and r3.  */
3255 
3256 static void
3257 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3258 			     int bs_size)
3259 {
3260   rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3261   rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3262   rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3263 
3264   /* On the IA-64 there is a second stack in memory, namely the Backing Store
3265      of the Register Stack Engine.  We also need to probe it after checking
3266      that the 2 stacks don't overlap.  */
3267   emit_insn (gen_bsp_value (r3));
3268   emit_move_insn (r2, GEN_INT (-(first + size)));
3269 
3270   /* Compare current value of BSP and SP registers.  */
3271   emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3272 					      r3, stack_pointer_rtx)));
3273 
3274   /* Compute the address of the probe for the Backing Store (which grows
3275      towards higher addresses).  We probe only at the first offset of
3276      the next page because some OS (eg Linux/ia64) only extend the
3277      backing store when this specific address is hit (but generate a SEGV
3278      on other address).  Page size is the worst case (4KB).  The reserve
3279      size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3280      Also compute the address of the last probe for the memory stack
3281      (which grows towards lower addresses).  */
3282   emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3283   emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3284 
3285   /* Compare them and raise SEGV if the former has topped the latter.  */
3286   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3287 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3288 				gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3289 								 r3, r2))));
3290   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3291 						const0_rtx),
3292 			  const0_rtx));
3293   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3294 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3295 				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3296 						 GEN_INT (11))));
3297 
3298   /* Probe the Backing Store if necessary.  */
3299   if (bs_size > 0)
3300     emit_stack_probe (r3);
3301 
3302   /* Probe the memory stack if necessary.  */
3303   if (size == 0)
3304     ;
3305 
3306   /* See if we have a constant small number of probes to generate.  If so,
3307      that's the easy case.  */
3308   else if (size <= PROBE_INTERVAL)
3309     emit_stack_probe (r2);
3310 
3311   /* The run-time loop is made up of 9 insns in the generic case while this
3312      compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
3313   else if (size <= 4 * PROBE_INTERVAL)
3314     {
3315       HOST_WIDE_INT i;
3316 
3317       emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3318       emit_insn (gen_rtx_SET (r2,
3319 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3320       emit_stack_probe (r2);
3321 
3322       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3323 	 it exceeds SIZE.  If only two probes are needed, this will not
3324 	 generate any code.  Then probe at FIRST + SIZE.  */
3325       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3326 	{
3327 	  emit_insn (gen_rtx_SET (r2,
3328 				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3329 	  emit_stack_probe (r2);
3330 	}
3331 
3332       emit_insn (gen_rtx_SET (r2,
3333 			      plus_constant (Pmode, r2,
3334 					     (i - PROBE_INTERVAL) - size)));
3335       emit_stack_probe (r2);
3336     }
3337 
3338   /* Otherwise, do the same as above, but in a loop.  Note that we must be
3339      extra careful with variables wrapping around because we might be at
3340      the very top (or the very bottom) of the address space and we have
3341      to be able to handle this case properly; in particular, we use an
3342      equality test for the loop condition.  */
3343   else
3344     {
3345       HOST_WIDE_INT rounded_size;
3346 
3347       emit_move_insn (r2, GEN_INT (-first));
3348 
3349 
3350       /* Step 1: round SIZE to the previous multiple of the interval.  */
3351 
3352       rounded_size = size & -PROBE_INTERVAL;
3353 
3354 
3355       /* Step 2: compute initial and final value of the loop counter.  */
3356 
3357       /* TEST_ADDR = SP + FIRST.  */
3358       emit_insn (gen_rtx_SET (r2,
3359 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3360 
3361       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
3362       if (rounded_size > (1 << 21))
3363 	{
3364 	  emit_move_insn (r3, GEN_INT (-rounded_size));
3365 	  emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3366 	}
3367       else
3368         emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3369 						  GEN_INT (-rounded_size))));
3370 
3371 
3372       /* Step 3: the loop
3373 
3374 	 do
3375 	   {
3376 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3377 	     probe at TEST_ADDR
3378 	   }
3379 	 while (TEST_ADDR != LAST_ADDR)
3380 
3381 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3382 	 until it is equal to ROUNDED_SIZE.  */
3383 
3384       emit_insn (gen_probe_stack_range (r2, r2, r3));
3385 
3386 
3387       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3388 	 that SIZE is equal to ROUNDED_SIZE.  */
3389 
3390       /* TEMP = SIZE - ROUNDED_SIZE.  */
3391       if (size != rounded_size)
3392 	{
3393 	  emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3394 						     rounded_size - size)));
3395 	  emit_stack_probe (r2);
3396 	}
3397     }
3398 
3399   /* Make sure nothing is scheduled before we are done.  */
3400   emit_insn (gen_blockage ());
3401 }
3402 
3403 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
3404    absolute addresses.  */
3405 
3406 const char *
3407 output_probe_stack_range (rtx reg1, rtx reg2)
3408 {
3409   static int labelno = 0;
3410   char loop_lab[32];
3411   rtx xops[3];
3412 
3413   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3414 
3415   /* Loop.  */
3416   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3417 
3418   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
3419   xops[0] = reg1;
3420   xops[1] = GEN_INT (-PROBE_INTERVAL);
3421   output_asm_insn ("addl %0 = %1, %0", xops);
3422   fputs ("\t;;\n", asm_out_file);
3423 
3424   /* Probe at TEST_ADDR.  */
3425   output_asm_insn ("probe.w.fault %0, 0", xops);
3426 
3427   /* Test if TEST_ADDR == LAST_ADDR.  */
3428   xops[1] = reg2;
3429   xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3430   output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3431 
3432   /* Branch.  */
3433   fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3434   assemble_name_raw (asm_out_file, loop_lab);
3435   fputc ('\n', asm_out_file);
3436 
3437   return "";
3438 }
3439 
3440 /* Called after register allocation to add any instructions needed for the
3441    prologue.  Using a prologue insn is favored compared to putting all of the
3442    instructions in output_function_prologue(), since it allows the scheduler
3443    to intermix instructions with the saves of the caller saved registers.  In
3444    some cases, it might be necessary to emit a barrier instruction as the last
3445    insn to prevent such scheduling.
3446 
3447    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3448    so that the debug info generation code can handle them properly.
3449 
3450    The register save area is laid out like so:
3451    cfa+16
3452 	[ varargs spill area ]
3453 	[ fr register spill area ]
3454 	[ br register spill area ]
3455 	[ ar register spill area ]
3456 	[ pr register spill area ]
3457 	[ gr register spill area ] */
3458 
3459 /* ??? Get inefficient code when the frame size is larger than can fit in an
3460    adds instruction.  */
3461 
3462 void
3463 ia64_expand_prologue (void)
3464 {
3465   rtx_insn *insn;
3466   rtx ar_pfs_save_reg, ar_unat_save_reg;
3467   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3468   rtx reg, alt_reg;
3469 
3470   ia64_compute_frame_size (get_frame_size ());
3471   last_scratch_gr_reg = 15;
3472 
3473   if (flag_stack_usage_info)
3474     current_function_static_stack_size = current_frame_info.total_size;
3475 
3476   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3477     {
3478       HOST_WIDE_INT size = current_frame_info.total_size;
3479       int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3480 					  + current_frame_info.n_local_regs);
3481 
3482       if (crtl->is_leaf && !cfun->calls_alloca)
3483 	{
3484 	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3485 	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3486 					 size - STACK_CHECK_PROTECT,
3487 					 bs_size);
3488 	  else if (size + bs_size > STACK_CHECK_PROTECT)
3489 	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3490 	}
3491       else if (size + bs_size > 0)
3492 	ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3493     }
3494 
3495   if (dump_file)
3496     {
3497       fprintf (dump_file, "ia64 frame related registers "
3498                "recorded in current_frame_info.r[]:\n");
3499 #define PRINTREG(a) if (current_frame_info.r[a]) \
3500         fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3501       PRINTREG(reg_fp);
3502       PRINTREG(reg_save_b0);
3503       PRINTREG(reg_save_pr);
3504       PRINTREG(reg_save_ar_pfs);
3505       PRINTREG(reg_save_ar_unat);
3506       PRINTREG(reg_save_ar_lc);
3507       PRINTREG(reg_save_gp);
3508 #undef PRINTREG
3509     }
3510 
3511   /* If there is no epilogue, then we don't need some prologue insns.
3512      We need to avoid emitting the dead prologue insns, because flow
3513      will complain about them.  */
3514   if (optimize)
3515     {
3516       edge e;
3517       edge_iterator ei;
3518 
3519       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3520 	if ((e->flags & EDGE_FAKE) == 0
3521 	    && (e->flags & EDGE_FALLTHRU) != 0)
3522 	  break;
3523       epilogue_p = (e != NULL);
3524     }
3525   else
3526     epilogue_p = 1;
3527 
3528   /* Set the local, input, and output register names.  We need to do this
3529      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3530      half.  If we use in/loc/out register names, then we get assembler errors
3531      in crtn.S because there is no alloc insn or regstk directive in there.  */
3532   if (! TARGET_REG_NAMES)
3533     {
3534       int inputs = current_frame_info.n_input_regs;
3535       int locals = current_frame_info.n_local_regs;
3536       int outputs = current_frame_info.n_output_regs;
3537 
3538       for (i = 0; i < inputs; i++)
3539 	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3540       for (i = 0; i < locals; i++)
3541 	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3542       for (i = 0; i < outputs; i++)
3543 	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3544     }
3545 
3546   /* Set the frame pointer register name.  The regnum is logically loc79,
3547      but of course we'll not have allocated that many locals.  Rather than
3548      worrying about renumbering the existing rtxs, we adjust the name.  */
3549   /* ??? This code means that we can never use one local register when
3550      there is a frame pointer.  loc79 gets wasted in this case, as it is
3551      renamed to a register that will never be used.  See also the try_locals
3552      code in find_gr_spill.  */
3553   if (current_frame_info.r[reg_fp])
3554     {
3555       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3556       reg_names[HARD_FRAME_POINTER_REGNUM]
3557 	= reg_names[current_frame_info.r[reg_fp]];
3558       reg_names[current_frame_info.r[reg_fp]] = tmp;
3559     }
3560 
3561   /* We don't need an alloc instruction if we've used no outputs or locals.  */
3562   if (current_frame_info.n_local_regs == 0
3563       && current_frame_info.n_output_regs == 0
3564       && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3565       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3566     {
3567       /* If there is no alloc, but there are input registers used, then we
3568 	 need a .regstk directive.  */
3569       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3570       ar_pfs_save_reg = NULL_RTX;
3571     }
3572   else
3573     {
3574       current_frame_info.need_regstk = 0;
3575 
3576       if (current_frame_info.r[reg_save_ar_pfs])
3577         {
3578 	  regno = current_frame_info.r[reg_save_ar_pfs];
3579 	  reg_emitted (reg_save_ar_pfs);
3580 	}
3581       else
3582 	regno = next_scratch_gr_reg ();
3583       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3584 
3585       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3586 				   GEN_INT (current_frame_info.n_input_regs),
3587 				   GEN_INT (current_frame_info.n_local_regs),
3588 				   GEN_INT (current_frame_info.n_output_regs),
3589 				   GEN_INT (current_frame_info.n_rotate_regs)));
3590       if (current_frame_info.r[reg_save_ar_pfs])
3591 	{
3592 	  RTX_FRAME_RELATED_P (insn) = 1;
3593 	  add_reg_note (insn, REG_CFA_REGISTER,
3594 			gen_rtx_SET (ar_pfs_save_reg,
3595 				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3596 	}
3597     }
3598 
3599   /* Set up frame pointer, stack pointer, and spill iterators.  */
3600 
3601   n_varargs = cfun->machine->n_varargs;
3602   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3603 			stack_pointer_rtx, 0);
3604 
3605   if (frame_pointer_needed)
3606     {
3607       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3608       RTX_FRAME_RELATED_P (insn) = 1;
3609 
3610       /* Force the unwind info to recognize this as defining a new CFA,
3611 	 rather than some temp register setup.  */
3612       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3613     }
3614 
3615   if (current_frame_info.total_size != 0)
3616     {
3617       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3618       rtx offset;
3619 
3620       if (satisfies_constraint_I (frame_size_rtx))
3621 	offset = frame_size_rtx;
3622       else
3623 	{
3624 	  regno = next_scratch_gr_reg ();
3625 	  offset = gen_rtx_REG (DImode, regno);
3626 	  emit_move_insn (offset, frame_size_rtx);
3627 	}
3628 
3629       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3630 				    stack_pointer_rtx, offset));
3631 
3632       if (! frame_pointer_needed)
3633 	{
3634 	  RTX_FRAME_RELATED_P (insn) = 1;
3635 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3636 			gen_rtx_SET (stack_pointer_rtx,
3637 				     gen_rtx_PLUS (DImode,
3638 						   stack_pointer_rtx,
3639 						   frame_size_rtx)));
3640 	}
3641 
3642       /* ??? At this point we must generate a magic insn that appears to
3643 	 modify the stack pointer, the frame pointer, and all spill
3644 	 iterators.  This would allow the most scheduling freedom.  For
3645 	 now, just hard stop.  */
3646       emit_insn (gen_blockage ());
3647     }
3648 
3649   /* Must copy out ar.unat before doing any integer spills.  */
3650   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3651     {
3652       if (current_frame_info.r[reg_save_ar_unat])
3653         {
3654 	  ar_unat_save_reg
3655 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3656 	  reg_emitted (reg_save_ar_unat);
3657 	}
3658       else
3659 	{
3660 	  alt_regno = next_scratch_gr_reg ();
3661 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3662 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3663 	}
3664 
3665       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3666       insn = emit_move_insn (ar_unat_save_reg, reg);
3667       if (current_frame_info.r[reg_save_ar_unat])
3668 	{
3669 	  RTX_FRAME_RELATED_P (insn) = 1;
3670 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3671 	}
3672 
3673       /* Even if we're not going to generate an epilogue, we still
3674 	 need to save the register so that EH works.  */
3675       if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3676 	emit_insn (gen_prologue_use (ar_unat_save_reg));
3677     }
3678   else
3679     ar_unat_save_reg = NULL_RTX;
3680 
3681   /* Spill all varargs registers.  Do this before spilling any GR registers,
3682      since we want the UNAT bits for the GR registers to override the UNAT
3683      bits from varargs, which we don't care about.  */
3684 
3685   cfa_off = -16;
3686   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3687     {
3688       reg = gen_rtx_REG (DImode, regno);
3689       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3690     }
3691 
3692   /* Locate the bottom of the register save area.  */
3693   cfa_off = (current_frame_info.spill_cfa_off
3694 	     + current_frame_info.spill_size
3695 	     + current_frame_info.extra_spill_size);
3696 
3697   /* Save the predicate register block either in a register or in memory.  */
3698   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3699     {
3700       reg = gen_rtx_REG (DImode, PR_REG (0));
3701       if (current_frame_info.r[reg_save_pr] != 0)
3702 	{
3703 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3704 	  reg_emitted (reg_save_pr);
3705 	  insn = emit_move_insn (alt_reg, reg);
3706 
3707 	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3708 	     64 hard registers.  */
3709 	  RTX_FRAME_RELATED_P (insn) = 1;
3710 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3711 
3712 	  /* Even if we're not going to generate an epilogue, we still
3713 	     need to save the register so that EH works.  */
3714 	  if (! epilogue_p)
3715 	    emit_insn (gen_prologue_use (alt_reg));
3716 	}
3717       else
3718 	{
3719 	  alt_regno = next_scratch_gr_reg ();
3720 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3721 	  insn = emit_move_insn (alt_reg, reg);
3722 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3723 	  cfa_off -= 8;
3724 	}
3725     }
3726 
3727   /* Handle AR regs in numerical order.  All of them get special handling.  */
3728   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3729       && current_frame_info.r[reg_save_ar_unat] == 0)
3730     {
3731       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3732       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3733       cfa_off -= 8;
3734     }
3735 
3736   /* The alloc insn already copied ar.pfs into a general register.  The
3737      only thing we have to do now is copy that register to a stack slot
3738      if we'd not allocated a local register for the job.  */
3739   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3740       && current_frame_info.r[reg_save_ar_pfs] == 0)
3741     {
3742       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3743       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3744       cfa_off -= 8;
3745     }
3746 
3747   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3748     {
3749       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3750       if (current_frame_info.r[reg_save_ar_lc] != 0)
3751 	{
3752 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3753 	  reg_emitted (reg_save_ar_lc);
3754 	  insn = emit_move_insn (alt_reg, reg);
3755 	  RTX_FRAME_RELATED_P (insn) = 1;
3756 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3757 
3758 	  /* Even if we're not going to generate an epilogue, we still
3759 	     need to save the register so that EH works.  */
3760 	  if (! epilogue_p)
3761 	    emit_insn (gen_prologue_use (alt_reg));
3762 	}
3763       else
3764 	{
3765 	  alt_regno = next_scratch_gr_reg ();
3766 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3767 	  emit_move_insn (alt_reg, reg);
3768 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3769 	  cfa_off -= 8;
3770 	}
3771     }
3772 
3773   /* Save the return pointer.  */
3774   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3775     {
3776       reg = gen_rtx_REG (DImode, BR_REG (0));
3777       if (current_frame_info.r[reg_save_b0] != 0)
3778 	{
3779           alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3780           reg_emitted (reg_save_b0);
3781 	  insn = emit_move_insn (alt_reg, reg);
3782 	  RTX_FRAME_RELATED_P (insn) = 1;
3783 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3784 
3785 	  /* Even if we're not going to generate an epilogue, we still
3786 	     need to save the register so that EH works.  */
3787 	  if (! epilogue_p)
3788 	    emit_insn (gen_prologue_use (alt_reg));
3789 	}
3790       else
3791 	{
3792 	  alt_regno = next_scratch_gr_reg ();
3793 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3794 	  emit_move_insn (alt_reg, reg);
3795 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3796 	  cfa_off -= 8;
3797 	}
3798     }
3799 
3800   if (current_frame_info.r[reg_save_gp])
3801     {
3802       reg_emitted (reg_save_gp);
3803       insn = emit_move_insn (gen_rtx_REG (DImode,
3804 					  current_frame_info.r[reg_save_gp]),
3805 			     pic_offset_table_rtx);
3806     }
3807 
3808   /* We should now be at the base of the gr/br/fr spill area.  */
3809   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3810 			  + current_frame_info.spill_size));
3811 
3812   /* Spill all general registers.  */
3813   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3814     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3815       {
3816 	reg = gen_rtx_REG (DImode, regno);
3817 	do_spill (gen_gr_spill, reg, cfa_off, reg);
3818 	cfa_off -= 8;
3819       }
3820 
3821   /* Spill the rest of the BR registers.  */
3822   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3823     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3824       {
3825 	alt_regno = next_scratch_gr_reg ();
3826 	alt_reg = gen_rtx_REG (DImode, alt_regno);
3827 	reg = gen_rtx_REG (DImode, regno);
3828 	emit_move_insn (alt_reg, reg);
3829 	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3830 	cfa_off -= 8;
3831       }
3832 
3833   /* Align the frame and spill all FR registers.  */
3834   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3835     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3836       {
3837         gcc_assert (!(cfa_off & 15));
3838 	reg = gen_rtx_REG (XFmode, regno);
3839 	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3840 	cfa_off -= 16;
3841       }
3842 
3843   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3844 
3845   finish_spill_pointers ();
3846 }
3847 
3848 /* Output the textual info surrounding the prologue.  */
3849 
3850 void
3851 ia64_start_function (FILE *file, const char *fnname,
3852 		     tree decl ATTRIBUTE_UNUSED)
3853 {
3854 #if TARGET_ABI_OPEN_VMS
3855   vms_start_function (fnname);
3856 #endif
3857 
3858   fputs ("\t.proc ", file);
3859   assemble_name (file, fnname);
3860   fputc ('\n', file);
3861   ASM_OUTPUT_LABEL (file, fnname);
3862 }
3863 
3864 /* Called after register allocation to add any instructions needed for the
3865    epilogue.  Using an epilogue insn is favored compared to putting all of the
3866    instructions in output_function_prologue(), since it allows the scheduler
3867    to intermix instructions with the saves of the caller saved registers.  In
3868    some cases, it might be necessary to emit a barrier instruction as the last
3869    insn to prevent such scheduling.  */
3870 
3871 void
3872 ia64_expand_epilogue (int sibcall_p)
3873 {
3874   rtx_insn *insn;
3875   rtx reg, alt_reg, ar_unat_save_reg;
3876   int regno, alt_regno, cfa_off;
3877 
3878   ia64_compute_frame_size (get_frame_size ());
3879 
3880   /* If there is a frame pointer, then we use it instead of the stack
3881      pointer, so that the stack pointer does not need to be valid when
3882      the epilogue starts.  See EXIT_IGNORE_STACK.  */
3883   if (frame_pointer_needed)
3884     setup_spill_pointers (current_frame_info.n_spilled,
3885 			  hard_frame_pointer_rtx, 0);
3886   else
3887     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3888 			  current_frame_info.total_size);
3889 
3890   if (current_frame_info.total_size != 0)
3891     {
3892       /* ??? At this point we must generate a magic insn that appears to
3893          modify the spill iterators and the frame pointer.  This would
3894 	 allow the most scheduling freedom.  For now, just hard stop.  */
3895       emit_insn (gen_blockage ());
3896     }
3897 
3898   /* Locate the bottom of the register save area.  */
3899   cfa_off = (current_frame_info.spill_cfa_off
3900 	     + current_frame_info.spill_size
3901 	     + current_frame_info.extra_spill_size);
3902 
3903   /* Restore the predicate registers.  */
3904   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3905     {
3906       if (current_frame_info.r[reg_save_pr] != 0)
3907         {
3908 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3909 	  reg_emitted (reg_save_pr);
3910 	}
3911       else
3912 	{
3913 	  alt_regno = next_scratch_gr_reg ();
3914 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3915 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3916 	  cfa_off -= 8;
3917 	}
3918       reg = gen_rtx_REG (DImode, PR_REG (0));
3919       emit_move_insn (reg, alt_reg);
3920     }
3921 
3922   /* Restore the application registers.  */
3923 
3924   /* Load the saved unat from the stack, but do not restore it until
3925      after the GRs have been restored.  */
3926   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3927     {
3928       if (current_frame_info.r[reg_save_ar_unat] != 0)
3929         {
3930           ar_unat_save_reg
3931 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3932 	  reg_emitted (reg_save_ar_unat);
3933 	}
3934       else
3935 	{
3936 	  alt_regno = next_scratch_gr_reg ();
3937 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3938 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3939 	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3940 	  cfa_off -= 8;
3941 	}
3942     }
3943   else
3944     ar_unat_save_reg = NULL_RTX;
3945 
3946   if (current_frame_info.r[reg_save_ar_pfs] != 0)
3947     {
3948       reg_emitted (reg_save_ar_pfs);
3949       alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3950       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3951       emit_move_insn (reg, alt_reg);
3952     }
3953   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3954     {
3955       alt_regno = next_scratch_gr_reg ();
3956       alt_reg = gen_rtx_REG (DImode, alt_regno);
3957       do_restore (gen_movdi_x, alt_reg, cfa_off);
3958       cfa_off -= 8;
3959       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3960       emit_move_insn (reg, alt_reg);
3961     }
3962 
3963   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3964     {
3965       if (current_frame_info.r[reg_save_ar_lc] != 0)
3966         {
3967 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3968           reg_emitted (reg_save_ar_lc);
3969 	}
3970       else
3971 	{
3972 	  alt_regno = next_scratch_gr_reg ();
3973 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3974 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3975 	  cfa_off -= 8;
3976 	}
3977       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3978       emit_move_insn (reg, alt_reg);
3979     }
3980 
3981   /* Restore the return pointer.  */
3982   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3983     {
3984       if (current_frame_info.r[reg_save_b0] != 0)
3985         {
3986          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3987          reg_emitted (reg_save_b0);
3988         }
3989       else
3990 	{
3991 	  alt_regno = next_scratch_gr_reg ();
3992 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3993 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3994 	  cfa_off -= 8;
3995 	}
3996       reg = gen_rtx_REG (DImode, BR_REG (0));
3997       emit_move_insn (reg, alt_reg);
3998     }
3999 
4000   /* We should now be at the base of the gr/br/fr spill area.  */
4001   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4002 			  + current_frame_info.spill_size));
4003 
4004   /* The GP may be stored on the stack in the prologue, but it's
4005      never restored in the epilogue.  Skip the stack slot.  */
4006   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4007     cfa_off -= 8;
4008 
4009   /* Restore all general registers.  */
4010   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4011     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4012       {
4013 	reg = gen_rtx_REG (DImode, regno);
4014 	do_restore (gen_gr_restore, reg, cfa_off);
4015 	cfa_off -= 8;
4016       }
4017 
4018   /* Restore the branch registers.  */
4019   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4020     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4021       {
4022 	alt_regno = next_scratch_gr_reg ();
4023 	alt_reg = gen_rtx_REG (DImode, alt_regno);
4024 	do_restore (gen_movdi_x, alt_reg, cfa_off);
4025 	cfa_off -= 8;
4026 	reg = gen_rtx_REG (DImode, regno);
4027 	emit_move_insn (reg, alt_reg);
4028       }
4029 
4030   /* Restore floating point registers.  */
4031   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4032     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4033       {
4034         gcc_assert (!(cfa_off & 15));
4035 	reg = gen_rtx_REG (XFmode, regno);
4036 	do_restore (gen_fr_restore_x, reg, cfa_off);
4037 	cfa_off -= 16;
4038       }
4039 
4040   /* Restore ar.unat for real.  */
4041   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4042     {
4043       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4044       emit_move_insn (reg, ar_unat_save_reg);
4045     }
4046 
4047   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4048 
4049   finish_spill_pointers ();
4050 
4051   if (current_frame_info.total_size
4052       || cfun->machine->ia64_eh_epilogue_sp
4053       || frame_pointer_needed)
4054     {
4055       /* ??? At this point we must generate a magic insn that appears to
4056          modify the spill iterators, the stack pointer, and the frame
4057 	 pointer.  This would allow the most scheduling freedom.  For now,
4058 	 just hard stop.  */
4059       emit_insn (gen_blockage ());
4060     }
4061 
4062   if (cfun->machine->ia64_eh_epilogue_sp)
4063     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4064   else if (frame_pointer_needed)
4065     {
4066       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4067       RTX_FRAME_RELATED_P (insn) = 1;
4068       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4069     }
4070   else if (current_frame_info.total_size)
4071     {
4072       rtx offset, frame_size_rtx;
4073 
4074       frame_size_rtx = GEN_INT (current_frame_info.total_size);
4075       if (satisfies_constraint_I (frame_size_rtx))
4076 	offset = frame_size_rtx;
4077       else
4078 	{
4079 	  regno = next_scratch_gr_reg ();
4080 	  offset = gen_rtx_REG (DImode, regno);
4081 	  emit_move_insn (offset, frame_size_rtx);
4082 	}
4083 
4084       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4085 				    offset));
4086 
4087       RTX_FRAME_RELATED_P (insn) = 1;
4088       add_reg_note (insn, REG_CFA_ADJUST_CFA,
4089 		    gen_rtx_SET (stack_pointer_rtx,
4090 				 gen_rtx_PLUS (DImode,
4091 					       stack_pointer_rtx,
4092 					       frame_size_rtx)));
4093     }
4094 
4095   if (cfun->machine->ia64_eh_epilogue_bsp)
4096     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4097 
4098   if (! sibcall_p)
4099     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4100   else
4101     {
4102       int fp = GR_REG (2);
4103       /* We need a throw away register here, r0 and r1 are reserved,
4104 	 so r2 is the first available call clobbered register.  If
4105 	 there was a frame_pointer register, we may have swapped the
4106 	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4107 	 sure we're using the string "r2" when emitting the register
4108 	 name for the assembler.  */
4109       if (current_frame_info.r[reg_fp]
4110           && current_frame_info.r[reg_fp] == GR_REG (2))
4111 	fp = HARD_FRAME_POINTER_REGNUM;
4112 
4113       /* We must emit an alloc to force the input registers to become output
4114 	 registers.  Otherwise, if the callee tries to pass its parameters
4115 	 through to another call without an intervening alloc, then these
4116 	 values get lost.  */
4117       /* ??? We don't need to preserve all input registers.  We only need to
4118 	 preserve those input registers used as arguments to the sibling call.
4119 	 It is unclear how to compute that number here.  */
4120       if (current_frame_info.n_input_regs != 0)
4121 	{
4122 	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4123 
4124 	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4125 				const0_rtx, const0_rtx,
4126 				n_inputs, const0_rtx));
4127 	  RTX_FRAME_RELATED_P (insn) = 1;
4128 
4129 	  /* ??? We need to mark the alloc as frame-related so that it gets
4130 	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4131 	     But there's nothing dwarf2 related to be done wrt the register
4132 	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
4133 	     the empty parallel means dwarf2out will not see anything.  */
4134 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4135 			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4136 	}
4137     }
4138 }
4139 
4140 /* Return 1 if br.ret can do all the work required to return from a
4141    function.  */
4142 
4143 int
4144 ia64_direct_return (void)
4145 {
4146   if (reload_completed && ! frame_pointer_needed)
4147     {
4148       ia64_compute_frame_size (get_frame_size ());
4149 
4150       return (current_frame_info.total_size == 0
4151 	      && current_frame_info.n_spilled == 0
4152 	      && current_frame_info.r[reg_save_b0] == 0
4153 	      && current_frame_info.r[reg_save_pr] == 0
4154 	      && current_frame_info.r[reg_save_ar_pfs] == 0
4155 	      && current_frame_info.r[reg_save_ar_unat] == 0
4156 	      && current_frame_info.r[reg_save_ar_lc] == 0);
4157     }
4158   return 0;
4159 }
4160 
4161 /* Return the magic cookie that we use to hold the return address
4162    during early compilation.  */
4163 
4164 rtx
4165 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4166 {
4167   if (count != 0)
4168     return NULL;
4169   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4170 }
4171 
4172 /* Split this value after reload, now that we know where the return
4173    address is saved.  */
4174 
4175 void
4176 ia64_split_return_addr_rtx (rtx dest)
4177 {
4178   rtx src;
4179 
4180   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4181     {
4182       if (current_frame_info.r[reg_save_b0] != 0)
4183         {
4184 	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4185 	  reg_emitted (reg_save_b0);
4186 	}
4187       else
4188 	{
4189 	  HOST_WIDE_INT off;
4190 	  unsigned int regno;
4191 	  rtx off_r;
4192 
4193 	  /* Compute offset from CFA for BR0.  */
4194 	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
4195 	  off = (current_frame_info.spill_cfa_off
4196 		 + current_frame_info.spill_size);
4197 	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4198 	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4199 	      off -= 8;
4200 
4201 	  /* Convert CFA offset to a register based offset.  */
4202 	  if (frame_pointer_needed)
4203 	    src = hard_frame_pointer_rtx;
4204 	  else
4205 	    {
4206 	      src = stack_pointer_rtx;
4207 	      off += current_frame_info.total_size;
4208 	    }
4209 
4210 	  /* Load address into scratch register.  */
4211 	  off_r = GEN_INT (off);
4212 	  if (satisfies_constraint_I (off_r))
4213 	    emit_insn (gen_adddi3 (dest, src, off_r));
4214 	  else
4215 	    {
4216 	      emit_move_insn (dest, off_r);
4217 	      emit_insn (gen_adddi3 (dest, src, dest));
4218 	    }
4219 
4220 	  src = gen_rtx_MEM (Pmode, dest);
4221 	}
4222     }
4223   else
4224     src = gen_rtx_REG (DImode, BR_REG (0));
4225 
4226   emit_move_insn (dest, src);
4227 }
4228 
4229 int
4230 ia64_hard_regno_rename_ok (int from, int to)
4231 {
4232   /* Don't clobber any of the registers we reserved for the prologue.  */
4233   unsigned int r;
4234 
4235   for (r = reg_fp; r <= reg_save_ar_lc; r++)
4236     if (to == current_frame_info.r[r]
4237         || from == current_frame_info.r[r]
4238         || to == emitted_frame_related_regs[r]
4239         || from == emitted_frame_related_regs[r])
4240       return 0;
4241 
4242   /* Don't use output registers outside the register frame.  */
4243   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4244     return 0;
4245 
4246   /* Retain even/oddness on predicate register pairs.  */
4247   if (PR_REGNO_P (from) && PR_REGNO_P (to))
4248     return (from & 1) == (to & 1);
4249 
4250   return 1;
4251 }
4252 
4253 /* Target hook for assembling integer objects.  Handle word-sized
4254    aligned objects and detect the cases when @fptr is needed.  */
4255 
4256 static bool
4257 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4258 {
4259   if (size == POINTER_SIZE / BITS_PER_UNIT
4260       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4261       && GET_CODE (x) == SYMBOL_REF
4262       && SYMBOL_REF_FUNCTION_P (x))
4263     {
4264       static const char * const directive[2][2] = {
4265 	  /* 64-bit pointer */  /* 32-bit pointer */
4266 	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
4267 	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
4268       };
4269       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4270       output_addr_const (asm_out_file, x);
4271       fputs (")\n", asm_out_file);
4272       return true;
4273     }
4274   return default_assemble_integer (x, size, aligned_p);
4275 }
4276 
4277 /* Emit the function prologue.  */
4278 
4279 static void
4280 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4281 {
4282   int mask, grsave, grsave_prev;
4283 
4284   if (current_frame_info.need_regstk)
4285     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4286 	     current_frame_info.n_input_regs,
4287 	     current_frame_info.n_local_regs,
4288 	     current_frame_info.n_output_regs,
4289 	     current_frame_info.n_rotate_regs);
4290 
4291   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4292     return;
4293 
4294   /* Emit the .prologue directive.  */
4295 
4296   mask = 0;
4297   grsave = grsave_prev = 0;
4298   if (current_frame_info.r[reg_save_b0] != 0)
4299     {
4300       mask |= 8;
4301       grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4302     }
4303   if (current_frame_info.r[reg_save_ar_pfs] != 0
4304       && (grsave_prev == 0
4305 	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4306     {
4307       mask |= 4;
4308       if (grsave_prev == 0)
4309 	grsave = current_frame_info.r[reg_save_ar_pfs];
4310       grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4311     }
4312   if (current_frame_info.r[reg_fp] != 0
4313       && (grsave_prev == 0
4314 	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
4315     {
4316       mask |= 2;
4317       if (grsave_prev == 0)
4318 	grsave = HARD_FRAME_POINTER_REGNUM;
4319       grsave_prev = current_frame_info.r[reg_fp];
4320     }
4321   if (current_frame_info.r[reg_save_pr] != 0
4322       && (grsave_prev == 0
4323 	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4324     {
4325       mask |= 1;
4326       if (grsave_prev == 0)
4327 	grsave = current_frame_info.r[reg_save_pr];
4328     }
4329 
4330   if (mask && TARGET_GNU_AS)
4331     fprintf (file, "\t.prologue %d, %d\n", mask,
4332 	     ia64_dbx_register_number (grsave));
4333   else
4334     fputs ("\t.prologue\n", file);
4335 
4336   /* Emit a .spill directive, if necessary, to relocate the base of
4337      the register spill area.  */
4338   if (current_frame_info.spill_cfa_off != -16)
4339     fprintf (file, "\t.spill %ld\n",
4340 	     (long) (current_frame_info.spill_cfa_off
4341 		     + current_frame_info.spill_size));
4342 }
4343 
4344 /* Emit the .body directive at the scheduled end of the prologue.  */
4345 
4346 static void
4347 ia64_output_function_end_prologue (FILE *file)
4348 {
4349   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4350     return;
4351 
4352   fputs ("\t.body\n", file);
4353 }
4354 
4355 /* Emit the function epilogue.  */
4356 
4357 static void
4358 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4359 			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4360 {
4361   int i;
4362 
4363   if (current_frame_info.r[reg_fp])
4364     {
4365       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4366       reg_names[HARD_FRAME_POINTER_REGNUM]
4367 	= reg_names[current_frame_info.r[reg_fp]];
4368       reg_names[current_frame_info.r[reg_fp]] = tmp;
4369       reg_emitted (reg_fp);
4370     }
4371   if (! TARGET_REG_NAMES)
4372     {
4373       for (i = 0; i < current_frame_info.n_input_regs; i++)
4374 	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4375       for (i = 0; i < current_frame_info.n_local_regs; i++)
4376 	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4377       for (i = 0; i < current_frame_info.n_output_regs; i++)
4378 	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4379     }
4380 
4381   current_frame_info.initialized = 0;
4382 }
4383 
4384 int
4385 ia64_dbx_register_number (int regno)
4386 {
4387   /* In ia64_expand_prologue we quite literally renamed the frame pointer
4388      from its home at loc79 to something inside the register frame.  We
4389      must perform the same renumbering here for the debug info.  */
4390   if (current_frame_info.r[reg_fp])
4391     {
4392       if (regno == HARD_FRAME_POINTER_REGNUM)
4393 	regno = current_frame_info.r[reg_fp];
4394       else if (regno == current_frame_info.r[reg_fp])
4395 	regno = HARD_FRAME_POINTER_REGNUM;
4396     }
4397 
4398   if (IN_REGNO_P (regno))
4399     return 32 + regno - IN_REG (0);
4400   else if (LOC_REGNO_P (regno))
4401     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4402   else if (OUT_REGNO_P (regno))
4403     return (32 + current_frame_info.n_input_regs
4404 	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
4405   else
4406     return regno;
4407 }
4408 
4409 /* Implement TARGET_TRAMPOLINE_INIT.
4410 
4411    The trampoline should set the static chain pointer to value placed
4412    into the trampoline and should branch to the specified routine.
4413    To make the normal indirect-subroutine calling convention work,
4414    the trampoline must look like a function descriptor; the first
4415    word being the target address and the second being the target's
4416    global pointer.
4417 
4418    We abuse the concept of a global pointer by arranging for it
4419    to point to the data we need to load.  The complete trampoline
4420    has the following form:
4421 
4422 		+-------------------+ \
4423 	TRAMP:	| __ia64_trampoline | |
4424 		+-------------------+  > fake function descriptor
4425 		| TRAMP+16          | |
4426 		+-------------------+ /
4427 		| target descriptor |
4428 		+-------------------+
4429 		| static link	    |
4430 		+-------------------+
4431 */
4432 
4433 static void
4434 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4435 {
4436   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4437   rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4438 
4439   /* The Intel assembler requires that the global __ia64_trampoline symbol
4440      be declared explicitly */
4441   if (!TARGET_GNU_AS)
4442     {
4443       static bool declared_ia64_trampoline = false;
4444 
4445       if (!declared_ia64_trampoline)
4446 	{
4447 	  declared_ia64_trampoline = true;
4448 	  (*targetm.asm_out.globalize_label) (asm_out_file,
4449 					      "__ia64_trampoline");
4450 	}
4451     }
4452 
4453   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4454   addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4455   fnaddr = convert_memory_address (Pmode, fnaddr);
4456   static_chain = convert_memory_address (Pmode, static_chain);
4457 
4458   /* Load up our iterator.  */
4459   addr_reg = copy_to_reg (addr);
4460   m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4461 
4462   /* The first two words are the fake descriptor:
4463      __ia64_trampoline, ADDR+16.  */
4464   tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4465   if (TARGET_ABI_OPEN_VMS)
4466     {
4467       /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4468 	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4469 	 relocation against function symbols to make it identical to the
4470 	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4471 	 strict ELF and dereference to get the bare code address.  */
4472       rtx reg = gen_reg_rtx (Pmode);
4473       SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4474       emit_move_insn (reg, tramp);
4475       emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4476       tramp = reg;
4477    }
4478   emit_move_insn (m_tramp, tramp);
4479   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4480   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4481 
4482   emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4483   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4484   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4485 
4486   /* The third word is the target descriptor.  */
4487   emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4488   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4489   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4490 
4491   /* The fourth word is the static chain.  */
4492   emit_move_insn (m_tramp, static_chain);
4493 }
4494 
4495 /* Do any needed setup for a variadic function.  CUM has not been updated
4496    for the last named argument which has type TYPE and mode MODE.
4497 
4498    We generate the actual spill instructions during prologue generation.  */
4499 
4500 static void
4501 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4502 			     tree type, int * pretend_size,
4503 			     int second_time ATTRIBUTE_UNUSED)
4504 {
4505   CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4506 
4507   /* Skip the current argument.  */
4508   ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4509 
4510   if (next_cum.words < MAX_ARGUMENT_SLOTS)
4511     {
4512       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4513       *pretend_size = n * UNITS_PER_WORD;
4514       cfun->machine->n_varargs = n;
4515     }
4516 }
4517 
4518 /* Check whether TYPE is a homogeneous floating point aggregate.  If
4519    it is, return the mode of the floating point type that appears
4520    in all leafs.  If it is not, return VOIDmode.
4521 
4522    An aggregate is a homogeneous floating point aggregate is if all
4523    fields/elements in it have the same floating point type (e.g,
4524    SFmode).  128-bit quad-precision floats are excluded.
4525 
4526    Variable sized aggregates should never arrive here, since we should
4527    have already decided to pass them by reference.  Top-level zero-sized
4528    aggregates are excluded because our parallels crash the middle-end.  */
4529 
4530 static machine_mode
4531 hfa_element_mode (const_tree type, bool nested)
4532 {
4533   machine_mode element_mode = VOIDmode;
4534   machine_mode mode;
4535   enum tree_code code = TREE_CODE (type);
4536   int know_element_mode = 0;
4537   tree t;
4538 
4539   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4540     return VOIDmode;
4541 
4542   switch (code)
4543     {
4544     case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
4545     case BOOLEAN_TYPE:	case POINTER_TYPE:
4546     case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
4547     case LANG_TYPE:		case FUNCTION_TYPE:
4548       return VOIDmode;
4549 
4550       /* Fortran complex types are supposed to be HFAs, so we need to handle
4551 	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4552 	 types though.  */
4553     case COMPLEX_TYPE:
4554       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4555 	  && TYPE_MODE (type) != TCmode)
4556 	return GET_MODE_INNER (TYPE_MODE (type));
4557       else
4558 	return VOIDmode;
4559 
4560     case REAL_TYPE:
4561       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4562 	 mode if this is contained within an aggregate.  */
4563       if (nested && TYPE_MODE (type) != TFmode)
4564 	return TYPE_MODE (type);
4565       else
4566 	return VOIDmode;
4567 
4568     case ARRAY_TYPE:
4569       return hfa_element_mode (TREE_TYPE (type), 1);
4570 
4571     case RECORD_TYPE:
4572     case UNION_TYPE:
4573     case QUAL_UNION_TYPE:
4574       for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4575 	{
4576 	  if (TREE_CODE (t) != FIELD_DECL)
4577 	    continue;
4578 
4579 	  mode = hfa_element_mode (TREE_TYPE (t), 1);
4580 	  if (know_element_mode)
4581 	    {
4582 	      if (mode != element_mode)
4583 		return VOIDmode;
4584 	    }
4585 	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4586 	    return VOIDmode;
4587 	  else
4588 	    {
4589 	      know_element_mode = 1;
4590 	      element_mode = mode;
4591 	    }
4592 	}
4593       return element_mode;
4594 
4595     default:
4596       /* If we reach here, we probably have some front-end specific type
4597 	 that the backend doesn't know about.  This can happen via the
4598 	 aggregate_value_p call in init_function_start.  All we can do is
4599 	 ignore unknown tree types.  */
4600       return VOIDmode;
4601     }
4602 
4603   return VOIDmode;
4604 }
4605 
4606 /* Return the number of words required to hold a quantity of TYPE and MODE
4607    when passed as an argument.  */
4608 static int
4609 ia64_function_arg_words (const_tree type, machine_mode mode)
4610 {
4611   int words;
4612 
4613   if (mode == BLKmode)
4614     words = int_size_in_bytes (type);
4615   else
4616     words = GET_MODE_SIZE (mode);
4617 
4618   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4619 }
4620 
4621 /* Return the number of registers that should be skipped so the current
4622    argument (described by TYPE and WORDS) will be properly aligned.
4623 
4624    Integer and float arguments larger than 8 bytes start at the next
4625    even boundary.  Aggregates larger than 8 bytes start at the next
4626    even boundary if the aggregate has 16 byte alignment.  Note that
4627    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4628    but are still to be aligned in registers.
4629 
4630    ??? The ABI does not specify how to handle aggregates with
4631    alignment from 9 to 15 bytes, or greater than 16.  We handle them
4632    all as if they had 16 byte alignment.  Such aggregates can occur
4633    only if gcc extensions are used.  */
4634 static int
4635 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4636 			  const_tree type, int words)
4637 {
4638   /* No registers are skipped on VMS.  */
4639   if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4640     return 0;
4641 
4642   if (type
4643       && TREE_CODE (type) != INTEGER_TYPE
4644       && TREE_CODE (type) != REAL_TYPE)
4645     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4646   else
4647     return words > 1;
4648 }
4649 
4650 /* Return rtx for register where argument is passed, or zero if it is passed
4651    on the stack.  */
4652 /* ??? 128-bit quad-precision floats are always passed in general
4653    registers.  */
4654 
4655 static rtx
4656 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4657 		     const_tree type, bool named, bool incoming)
4658 {
4659   const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4660 
4661   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4662   int words = ia64_function_arg_words (type, mode);
4663   int offset = ia64_function_arg_offset (cum, type, words);
4664   machine_mode hfa_mode = VOIDmode;
4665 
4666   /* For OPEN VMS, emit the instruction setting up the argument register here,
4667      when we know this will be together with the other arguments setup related
4668      insns.  This is not the conceptually best place to do this, but this is
4669      the easiest as we have convenient access to cumulative args info.  */
4670 
4671   if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4672       && named == 1)
4673     {
4674       unsigned HOST_WIDE_INT regval = cum->words;
4675       int i;
4676 
4677       for (i = 0; i < 8; i++)
4678 	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4679 
4680       emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4681 		      GEN_INT (regval));
4682     }
4683 
4684   /* If all argument slots are used, then it must go on the stack.  */
4685   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4686     return 0;
4687 
4688   /* On OpenVMS argument is either in Rn or Fn.  */
4689   if (TARGET_ABI_OPEN_VMS)
4690     {
4691       if (FLOAT_MODE_P (mode))
4692 	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4693       else
4694 	return gen_rtx_REG (mode, basereg + cum->words);
4695     }
4696 
4697   /* Check for and handle homogeneous FP aggregates.  */
4698   if (type)
4699     hfa_mode = hfa_element_mode (type, 0);
4700 
4701   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4702      and unprototyped hfas are passed specially.  */
4703   if (hfa_mode != VOIDmode && (! cum->prototype || named))
4704     {
4705       rtx loc[16];
4706       int i = 0;
4707       int fp_regs = cum->fp_regs;
4708       int int_regs = cum->words + offset;
4709       int hfa_size = GET_MODE_SIZE (hfa_mode);
4710       int byte_size;
4711       int args_byte_size;
4712 
4713       /* If prototyped, pass it in FR regs then GR regs.
4714 	 If not prototyped, pass it in both FR and GR regs.
4715 
4716 	 If this is an SFmode aggregate, then it is possible to run out of
4717 	 FR regs while GR regs are still left.  In that case, we pass the
4718 	 remaining part in the GR regs.  */
4719 
4720       /* Fill the FP regs.  We do this always.  We stop if we reach the end
4721 	 of the argument, the last FP register, or the last argument slot.  */
4722 
4723       byte_size = ((mode == BLKmode)
4724 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4725       args_byte_size = int_regs * UNITS_PER_WORD;
4726       offset = 0;
4727       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4728 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4729 	{
4730 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4731 				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4732 							      + fp_regs)),
4733 				      GEN_INT (offset));
4734 	  offset += hfa_size;
4735 	  args_byte_size += hfa_size;
4736 	  fp_regs++;
4737 	}
4738 
4739       /* If no prototype, then the whole thing must go in GR regs.  */
4740       if (! cum->prototype)
4741 	offset = 0;
4742       /* If this is an SFmode aggregate, then we might have some left over
4743 	 that needs to go in GR regs.  */
4744       else if (byte_size != offset)
4745 	int_regs += offset / UNITS_PER_WORD;
4746 
4747       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4748 
4749       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4750 	{
4751 	  machine_mode gr_mode = DImode;
4752 	  unsigned int gr_size;
4753 
4754 	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4755 	     then this goes in a GR reg left adjusted/little endian, right
4756 	     adjusted/big endian.  */
4757 	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4758 	     always right adjusted/little endian.  */
4759 	  if (offset & 0x4)
4760 	    gr_mode = SImode;
4761 	  /* If we have an even 4 byte hunk because the aggregate is a
4762 	     multiple of 4 bytes in size, then this goes in a GR reg right
4763 	     adjusted/little endian.  */
4764 	  else if (byte_size - offset == 4)
4765 	    gr_mode = SImode;
4766 
4767 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4768 				      gen_rtx_REG (gr_mode, (basereg
4769 							     + int_regs)),
4770 				      GEN_INT (offset));
4771 
4772 	  gr_size = GET_MODE_SIZE (gr_mode);
4773 	  offset += gr_size;
4774 	  if (gr_size == UNITS_PER_WORD
4775 	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4776 	    int_regs++;
4777 	  else if (gr_size > UNITS_PER_WORD)
4778 	    int_regs += gr_size / UNITS_PER_WORD;
4779 	}
4780       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4781     }
4782 
4783   /* Integral and aggregates go in general registers.  If we have run out of
4784      FR registers, then FP values must also go in general registers.  This can
4785      happen when we have a SFmode HFA.  */
4786   else if (mode == TFmode || mode == TCmode
4787 	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4788     {
4789       int byte_size = ((mode == BLKmode)
4790                        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4791       if (BYTES_BIG_ENDIAN
4792 	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4793 	&& byte_size < UNITS_PER_WORD
4794 	&& byte_size > 0)
4795 	{
4796 	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4797 					  gen_rtx_REG (DImode,
4798 						       (basereg + cum->words
4799 							+ offset)),
4800 					  const0_rtx);
4801 	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4802 	}
4803       else
4804 	return gen_rtx_REG (mode, basereg + cum->words + offset);
4805 
4806     }
4807 
4808   /* If there is a prototype, then FP values go in a FR register when
4809      named, and in a GR register when unnamed.  */
4810   else if (cum->prototype)
4811     {
4812       if (named)
4813 	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4814       /* In big-endian mode, an anonymous SFmode value must be represented
4815          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4816 	 the value into the high half of the general register.  */
4817       else if (BYTES_BIG_ENDIAN && mode == SFmode)
4818 	return gen_rtx_PARALLEL (mode,
4819 		 gen_rtvec (1,
4820                    gen_rtx_EXPR_LIST (VOIDmode,
4821 		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4822 				      const0_rtx)));
4823       else
4824 	return gen_rtx_REG (mode, basereg + cum->words + offset);
4825     }
4826   /* If there is no prototype, then FP values go in both FR and GR
4827      registers.  */
4828   else
4829     {
4830       /* See comment above.  */
4831       machine_mode inner_mode =
4832 	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4833 
4834       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4835 				      gen_rtx_REG (mode, (FR_ARG_FIRST
4836 							  + cum->fp_regs)),
4837 				      const0_rtx);
4838       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4839 				      gen_rtx_REG (inner_mode,
4840 						   (basereg + cum->words
4841 						    + offset)),
4842 				      const0_rtx);
4843 
4844       return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4845     }
4846 }
4847 
4848 /* Implement TARGET_FUNCION_ARG target hook.  */
4849 
4850 static rtx
4851 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4852 		   const_tree type, bool named)
4853 {
4854   return ia64_function_arg_1 (cum, mode, type, named, false);
4855 }
4856 
4857 /* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
4858 
4859 static rtx
4860 ia64_function_incoming_arg (cumulative_args_t cum,
4861 			    machine_mode mode,
4862 			    const_tree type, bool named)
4863 {
4864   return ia64_function_arg_1 (cum, mode, type, named, true);
4865 }
4866 
4867 /* Return number of bytes, at the beginning of the argument, that must be
4868    put in registers.  0 is the argument is entirely in registers or entirely
4869    in memory.  */
4870 
4871 static int
4872 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4873 			tree type, bool named ATTRIBUTE_UNUSED)
4874 {
4875   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4876 
4877   int words = ia64_function_arg_words (type, mode);
4878   int offset = ia64_function_arg_offset (cum, type, words);
4879 
4880   /* If all argument slots are used, then it must go on the stack.  */
4881   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4882     return 0;
4883 
4884   /* It doesn't matter whether the argument goes in FR or GR regs.  If
4885      it fits within the 8 argument slots, then it goes entirely in
4886      registers.  If it extends past the last argument slot, then the rest
4887      goes on the stack.  */
4888 
4889   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4890     return 0;
4891 
4892   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4893 }
4894 
4895 /* Return ivms_arg_type based on machine_mode.  */
4896 
4897 static enum ivms_arg_type
4898 ia64_arg_type (machine_mode mode)
4899 {
4900   switch (mode)
4901     {
4902     case SFmode:
4903       return FS;
4904     case DFmode:
4905       return FT;
4906     default:
4907       return I64;
4908     }
4909 }
4910 
4911 /* Update CUM to point after this argument.  This is patterned after
4912    ia64_function_arg.  */
4913 
4914 static void
4915 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4916 			   const_tree type, bool named)
4917 {
4918   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4919   int words = ia64_function_arg_words (type, mode);
4920   int offset = ia64_function_arg_offset (cum, type, words);
4921   machine_mode hfa_mode = VOIDmode;
4922 
4923   /* If all arg slots are already full, then there is nothing to do.  */
4924   if (cum->words >= MAX_ARGUMENT_SLOTS)
4925     {
4926       cum->words += words + offset;
4927       return;
4928     }
4929 
4930   cum->atypes[cum->words] = ia64_arg_type (mode);
4931   cum->words += words + offset;
4932 
4933   /* On OpenVMS argument is either in Rn or Fn.  */
4934   if (TARGET_ABI_OPEN_VMS)
4935     {
4936       cum->int_regs = cum->words;
4937       cum->fp_regs = cum->words;
4938       return;
4939     }
4940 
4941   /* Check for and handle homogeneous FP aggregates.  */
4942   if (type)
4943     hfa_mode = hfa_element_mode (type, 0);
4944 
4945   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4946      and unprototyped hfas are passed specially.  */
4947   if (hfa_mode != VOIDmode && (! cum->prototype || named))
4948     {
4949       int fp_regs = cum->fp_regs;
4950       /* This is the original value of cum->words + offset.  */
4951       int int_regs = cum->words - words;
4952       int hfa_size = GET_MODE_SIZE (hfa_mode);
4953       int byte_size;
4954       int args_byte_size;
4955 
4956       /* If prototyped, pass it in FR regs then GR regs.
4957 	 If not prototyped, pass it in both FR and GR regs.
4958 
4959 	 If this is an SFmode aggregate, then it is possible to run out of
4960 	 FR regs while GR regs are still left.  In that case, we pass the
4961 	 remaining part in the GR regs.  */
4962 
4963       /* Fill the FP regs.  We do this always.  We stop if we reach the end
4964 	 of the argument, the last FP register, or the last argument slot.  */
4965 
4966       byte_size = ((mode == BLKmode)
4967 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4968       args_byte_size = int_regs * UNITS_PER_WORD;
4969       offset = 0;
4970       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4971 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4972 	{
4973 	  offset += hfa_size;
4974 	  args_byte_size += hfa_size;
4975 	  fp_regs++;
4976 	}
4977 
4978       cum->fp_regs = fp_regs;
4979     }
4980 
4981   /* Integral and aggregates go in general registers.  So do TFmode FP values.
4982      If we have run out of FR registers, then other FP values must also go in
4983      general registers.  This can happen when we have a SFmode HFA.  */
4984   else if (mode == TFmode || mode == TCmode
4985            || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4986     cum->int_regs = cum->words;
4987 
4988   /* If there is a prototype, then FP values go in a FR register when
4989      named, and in a GR register when unnamed.  */
4990   else if (cum->prototype)
4991     {
4992       if (! named)
4993 	cum->int_regs = cum->words;
4994       else
4995 	/* ??? Complex types should not reach here.  */
4996 	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4997     }
4998   /* If there is no prototype, then FP values go in both FR and GR
4999      registers.  */
5000   else
5001     {
5002       /* ??? Complex types should not reach here.  */
5003       cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5004       cum->int_regs = cum->words;
5005     }
5006 }
5007 
5008 /* Arguments with alignment larger than 8 bytes start at the next even
5009    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
5010    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
5011 
5012 static unsigned int
5013 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5014 {
5015   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5016     return PARM_BOUNDARY * 2;
5017 
5018   if (type)
5019     {
5020       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5021         return PARM_BOUNDARY * 2;
5022       else
5023         return PARM_BOUNDARY;
5024     }
5025 
5026   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5027     return PARM_BOUNDARY * 2;
5028   else
5029     return PARM_BOUNDARY;
5030 }
5031 
5032 /* True if it is OK to do sibling call optimization for the specified
5033    call expression EXP.  DECL will be the called function, or NULL if
5034    this is an indirect call.  */
5035 static bool
5036 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5037 {
5038   /* We can't perform a sibcall if the current function has the syscall_linkage
5039      attribute.  */
5040   if (lookup_attribute ("syscall_linkage",
5041 			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5042     return false;
5043 
5044   /* We must always return with our current GP.  This means we can
5045      only sibcall to functions defined in the current module unless
5046      TARGET_CONST_GP is set to true.  */
5047   return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5048 }
5049 
5050 
5051 /* Implement va_arg.  */
5052 
5053 static tree
5054 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5055 		      gimple_seq *post_p)
5056 {
5057   /* Variable sized types are passed by reference.  */
5058   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5059     {
5060       tree ptrtype = build_pointer_type (type);
5061       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5062       return build_va_arg_indirect_ref (addr);
5063     }
5064 
5065   /* Aggregate arguments with alignment larger than 8 bytes start at
5066      the next even boundary.  Integer and floating point arguments
5067      do so if they are larger than 8 bytes, whether or not they are
5068      also aligned larger than 8 bytes.  */
5069   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5070       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5071     {
5072       tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5073       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5074 		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5075       gimplify_assign (unshare_expr (valist), t, pre_p);
5076     }
5077 
5078   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5079 }
5080 
5081 /* Return 1 if function return value returned in memory.  Return 0 if it is
5082    in a register.  */
5083 
5084 static bool
5085 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5086 {
5087   machine_mode mode;
5088   machine_mode hfa_mode;
5089   HOST_WIDE_INT byte_size;
5090 
5091   mode = TYPE_MODE (valtype);
5092   byte_size = GET_MODE_SIZE (mode);
5093   if (mode == BLKmode)
5094     {
5095       byte_size = int_size_in_bytes (valtype);
5096       if (byte_size < 0)
5097 	return true;
5098     }
5099 
5100   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
5101 
5102   hfa_mode = hfa_element_mode (valtype, 0);
5103   if (hfa_mode != VOIDmode)
5104     {
5105       int hfa_size = GET_MODE_SIZE (hfa_mode);
5106 
5107       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5108 	return true;
5109       else
5110 	return false;
5111     }
5112   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5113     return true;
5114   else
5115     return false;
5116 }
5117 
5118 /* Return rtx for register that holds the function return value.  */
5119 
5120 static rtx
5121 ia64_function_value (const_tree valtype,
5122 		     const_tree fn_decl_or_type,
5123 		     bool outgoing ATTRIBUTE_UNUSED)
5124 {
5125   machine_mode mode;
5126   machine_mode hfa_mode;
5127   int unsignedp;
5128   const_tree func = fn_decl_or_type;
5129 
5130   if (fn_decl_or_type
5131       && !DECL_P (fn_decl_or_type))
5132     func = NULL;
5133 
5134   mode = TYPE_MODE (valtype);
5135   hfa_mode = hfa_element_mode (valtype, 0);
5136 
5137   if (hfa_mode != VOIDmode)
5138     {
5139       rtx loc[8];
5140       int i;
5141       int hfa_size;
5142       int byte_size;
5143       int offset;
5144 
5145       hfa_size = GET_MODE_SIZE (hfa_mode);
5146       byte_size = ((mode == BLKmode)
5147 		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5148       offset = 0;
5149       for (i = 0; offset < byte_size; i++)
5150 	{
5151 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5152 				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5153 				      GEN_INT (offset));
5154 	  offset += hfa_size;
5155 	}
5156       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5157     }
5158   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5159     return gen_rtx_REG (mode, FR_ARG_FIRST);
5160   else
5161     {
5162       bool need_parallel = false;
5163 
5164       /* In big-endian mode, we need to manage the layout of aggregates
5165 	 in the registers so that we get the bits properly aligned in
5166 	 the highpart of the registers.  */
5167       if (BYTES_BIG_ENDIAN
5168 	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5169 	need_parallel = true;
5170 
5171       /* Something like struct S { long double x; char a[0] } is not an
5172 	 HFA structure, and therefore doesn't go in fp registers.  But
5173 	 the middle-end will give it XFmode anyway, and XFmode values
5174 	 don't normally fit in integer registers.  So we need to smuggle
5175 	 the value inside a parallel.  */
5176       else if (mode == XFmode || mode == XCmode || mode == RFmode)
5177 	need_parallel = true;
5178 
5179       if (need_parallel)
5180 	{
5181 	  rtx loc[8];
5182 	  int offset;
5183 	  int bytesize;
5184 	  int i;
5185 
5186 	  offset = 0;
5187 	  bytesize = int_size_in_bytes (valtype);
5188 	  /* An empty PARALLEL is invalid here, but the return value
5189 	     doesn't matter for empty structs.  */
5190 	  if (bytesize == 0)
5191 	    return gen_rtx_REG (mode, GR_RET_FIRST);
5192 	  for (i = 0; offset < bytesize; i++)
5193 	    {
5194 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5195 					  gen_rtx_REG (DImode,
5196 						       GR_RET_FIRST + i),
5197 					  GEN_INT (offset));
5198 	      offset += UNITS_PER_WORD;
5199 	    }
5200 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5201 	}
5202 
5203       mode = promote_function_mode (valtype, mode, &unsignedp,
5204                                     func ? TREE_TYPE (func) : NULL_TREE,
5205                                     true);
5206 
5207       return gen_rtx_REG (mode, GR_RET_FIRST);
5208     }
5209 }
5210 
5211 /* Worker function for TARGET_LIBCALL_VALUE.  */
5212 
5213 static rtx
5214 ia64_libcall_value (machine_mode mode,
5215 		    const_rtx fun ATTRIBUTE_UNUSED)
5216 {
5217   return gen_rtx_REG (mode,
5218 		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
5219 			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5220 			&& (mode) != TFmode)
5221 		       ? FR_RET_FIRST : GR_RET_FIRST));
5222 }
5223 
5224 /* Worker function for FUNCTION_VALUE_REGNO_P.  */
5225 
5226 static bool
5227 ia64_function_value_regno_p (const unsigned int regno)
5228 {
5229   return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5230           || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5231 }
5232 
5233 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5234    We need to emit DTP-relative relocations.  */
5235 
5236 static void
5237 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5238 {
5239   gcc_assert (size == 4 || size == 8);
5240   if (size == 4)
5241     fputs ("\tdata4.ua\t@dtprel(", file);
5242   else
5243     fputs ("\tdata8.ua\t@dtprel(", file);
5244   output_addr_const (file, x);
5245   fputs (")", file);
5246 }
5247 
5248 /* Print a memory address as an operand to reference that memory location.  */
5249 
5250 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
5251    also call this from ia64_print_operand for memory addresses.  */
5252 
5253 static void
5254 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5255 			    machine_mode /*mode*/,
5256 			    rtx address ATTRIBUTE_UNUSED)
5257 {
5258 }
5259 
5260 /* Print an operand to an assembler instruction.
5261    C	Swap and print a comparison operator.
5262    D	Print an FP comparison operator.
5263    E    Print 32 - constant, for SImode shifts as extract.
5264    e    Print 64 - constant, for DImode rotates.
5265    F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5266         a floating point register emitted normally.
5267    G	A floating point constant.
5268    I	Invert a predicate register by adding 1.
5269    J    Select the proper predicate register for a condition.
5270    j    Select the inverse predicate register for a condition.
5271    O	Append .acq for volatile load.
5272    P	Postincrement of a MEM.
5273    Q	Append .rel for volatile store.
5274    R	Print .s .d or nothing for a single, double or no truncation.
5275    S	Shift amount for shladd instruction.
5276    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5277 	for Intel assembler.
5278    U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5279 	for Intel assembler.
5280    X	A pair of floating point registers.
5281    r	Print register name, or constant 0 as r0.  HP compatibility for
5282 	Linux kernel.
5283    v    Print vector constant value as an 8-byte integer value.  */
5284 
5285 static void
5286 ia64_print_operand (FILE * file, rtx x, int code)
5287 {
5288   const char *str;
5289 
5290   switch (code)
5291     {
5292     case 0:
5293       /* Handled below.  */
5294       break;
5295 
5296     case 'C':
5297       {
5298 	enum rtx_code c = swap_condition (GET_CODE (x));
5299 	fputs (GET_RTX_NAME (c), file);
5300 	return;
5301       }
5302 
5303     case 'D':
5304       switch (GET_CODE (x))
5305 	{
5306 	case NE:
5307 	  str = "neq";
5308 	  break;
5309 	case UNORDERED:
5310 	  str = "unord";
5311 	  break;
5312 	case ORDERED:
5313 	  str = "ord";
5314 	  break;
5315 	case UNLT:
5316 	  str = "nge";
5317 	  break;
5318 	case UNLE:
5319 	  str = "ngt";
5320 	  break;
5321 	case UNGT:
5322 	  str = "nle";
5323 	  break;
5324 	case UNGE:
5325 	  str = "nlt";
5326 	  break;
5327 	case UNEQ:
5328 	case LTGT:
5329 	  gcc_unreachable ();
5330 	default:
5331 	  str = GET_RTX_NAME (GET_CODE (x));
5332 	  break;
5333 	}
5334       fputs (str, file);
5335       return;
5336 
5337     case 'E':
5338       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5339       return;
5340 
5341     case 'e':
5342       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5343       return;
5344 
5345     case 'F':
5346       if (x == CONST0_RTX (GET_MODE (x)))
5347 	str = reg_names [FR_REG (0)];
5348       else if (x == CONST1_RTX (GET_MODE (x)))
5349 	str = reg_names [FR_REG (1)];
5350       else
5351 	{
5352 	  gcc_assert (GET_CODE (x) == REG);
5353 	  str = reg_names [REGNO (x)];
5354 	}
5355       fputs (str, file);
5356       return;
5357 
5358     case 'G':
5359       {
5360 	long val[4];
5361 	real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5362 	if (GET_MODE (x) == SFmode)
5363 	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5364 	else if (GET_MODE (x) == DFmode)
5365 	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5366 					  & 0xffffffff,
5367 					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5368 					  & 0xffffffff);
5369 	else
5370 	  output_operand_lossage ("invalid %%G mode");
5371       }
5372       return;
5373 
5374     case 'I':
5375       fputs (reg_names [REGNO (x) + 1], file);
5376       return;
5377 
5378     case 'J':
5379     case 'j':
5380       {
5381 	unsigned int regno = REGNO (XEXP (x, 0));
5382 	if (GET_CODE (x) == EQ)
5383 	  regno += 1;
5384 	if (code == 'j')
5385 	  regno ^= 1;
5386         fputs (reg_names [regno], file);
5387       }
5388       return;
5389 
5390     case 'O':
5391       if (MEM_VOLATILE_P (x))
5392 	fputs(".acq", file);
5393       return;
5394 
5395     case 'P':
5396       {
5397 	HOST_WIDE_INT value;
5398 
5399 	switch (GET_CODE (XEXP (x, 0)))
5400 	  {
5401 	  default:
5402 	    return;
5403 
5404 	  case POST_MODIFY:
5405 	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5406 	    if (GET_CODE (x) == CONST_INT)
5407 	      value = INTVAL (x);
5408 	    else
5409 	      {
5410 		gcc_assert (GET_CODE (x) == REG);
5411 		fprintf (file, ", %s", reg_names[REGNO (x)]);
5412 		return;
5413 	      }
5414 	    break;
5415 
5416 	  case POST_INC:
5417 	    value = GET_MODE_SIZE (GET_MODE (x));
5418 	    break;
5419 
5420 	  case POST_DEC:
5421 	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5422 	    break;
5423 	  }
5424 
5425 	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5426 	return;
5427       }
5428 
5429     case 'Q':
5430       if (MEM_VOLATILE_P (x))
5431 	fputs(".rel", file);
5432       return;
5433 
5434     case 'R':
5435       if (x == CONST0_RTX (GET_MODE (x)))
5436 	fputs(".s", file);
5437       else if (x == CONST1_RTX (GET_MODE (x)))
5438 	fputs(".d", file);
5439       else if (x == CONST2_RTX (GET_MODE (x)))
5440 	;
5441       else
5442 	output_operand_lossage ("invalid %%R value");
5443       return;
5444 
5445     case 'S':
5446       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5447       return;
5448 
5449     case 'T':
5450       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5451 	{
5452 	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5453 	  return;
5454 	}
5455       break;
5456 
5457     case 'U':
5458       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5459 	{
5460 	  const char *prefix = "0x";
5461 	  if (INTVAL (x) & 0x80000000)
5462 	    {
5463 	      fprintf (file, "0xffffffff");
5464 	      prefix = "";
5465 	    }
5466 	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5467 	  return;
5468 	}
5469       break;
5470 
5471     case 'X':
5472       {
5473 	unsigned int regno = REGNO (x);
5474 	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5475       }
5476       return;
5477 
5478     case 'r':
5479       /* If this operand is the constant zero, write it as register zero.
5480 	 Any register, zero, or CONST_INT value is OK here.  */
5481       if (GET_CODE (x) == REG)
5482 	fputs (reg_names[REGNO (x)], file);
5483       else if (x == CONST0_RTX (GET_MODE (x)))
5484 	fputs ("r0", file);
5485       else if (GET_CODE (x) == CONST_INT)
5486 	output_addr_const (file, x);
5487       else
5488 	output_operand_lossage ("invalid %%r value");
5489       return;
5490 
5491     case 'v':
5492       gcc_assert (GET_CODE (x) == CONST_VECTOR);
5493       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5494       break;
5495 
5496     case '+':
5497       {
5498 	const char *which;
5499 
5500 	/* For conditional branches, returns or calls, substitute
5501 	   sptk, dptk, dpnt, or spnt for %s.  */
5502 	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5503 	if (x)
5504 	  {
5505 	    int pred_val = XINT (x, 0);
5506 
5507 	    /* Guess top and bottom 10% statically predicted.  */
5508 	    if (pred_val < REG_BR_PROB_BASE / 50
5509 		&& br_prob_note_reliable_p (x))
5510 	      which = ".spnt";
5511 	    else if (pred_val < REG_BR_PROB_BASE / 2)
5512 	      which = ".dpnt";
5513 	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5514 		     || !br_prob_note_reliable_p (x))
5515 	      which = ".dptk";
5516 	    else
5517 	      which = ".sptk";
5518 	  }
5519 	else if (CALL_P (current_output_insn))
5520 	  which = ".sptk";
5521 	else
5522 	  which = ".dptk";
5523 
5524 	fputs (which, file);
5525 	return;
5526       }
5527 
5528     case ',':
5529       x = current_insn_predicate;
5530       if (x)
5531 	{
5532 	  unsigned int regno = REGNO (XEXP (x, 0));
5533 	  if (GET_CODE (x) == EQ)
5534 	    regno += 1;
5535           fprintf (file, "(%s) ", reg_names [regno]);
5536 	}
5537       return;
5538 
5539     default:
5540       output_operand_lossage ("ia64_print_operand: unknown code");
5541       return;
5542     }
5543 
5544   switch (GET_CODE (x))
5545     {
5546       /* This happens for the spill/restore instructions.  */
5547     case POST_INC:
5548     case POST_DEC:
5549     case POST_MODIFY:
5550       x = XEXP (x, 0);
5551       /* fall through */
5552 
5553     case REG:
5554       fputs (reg_names [REGNO (x)], file);
5555       break;
5556 
5557     case MEM:
5558       {
5559 	rtx addr = XEXP (x, 0);
5560 	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5561 	  addr = XEXP (addr, 0);
5562 	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5563 	break;
5564       }
5565 
5566     default:
5567       output_addr_const (file, x);
5568       break;
5569     }
5570 
5571   return;
5572 }
5573 
5574 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5575 
5576 static bool
5577 ia64_print_operand_punct_valid_p (unsigned char code)
5578 {
5579   return (code == '+' || code == ',');
5580 }
5581 
5582 /* Compute a (partial) cost for rtx X.  Return true if the complete
5583    cost has been computed, and false if subexpressions should be
5584    scanned.  In either case, *TOTAL contains the cost result.  */
5585 /* ??? This is incomplete.  */
5586 
5587 static bool
5588 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5589 		int opno ATTRIBUTE_UNUSED,
5590 		int *total, bool speed ATTRIBUTE_UNUSED)
5591 {
5592   int code = GET_CODE (x);
5593 
5594   switch (code)
5595     {
5596     case CONST_INT:
5597       switch (outer_code)
5598         {
5599         case SET:
5600 	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5601 	  return true;
5602         case PLUS:
5603 	  if (satisfies_constraint_I (x))
5604 	    *total = 0;
5605 	  else if (satisfies_constraint_J (x))
5606 	    *total = 1;
5607 	  else
5608 	    *total = COSTS_N_INSNS (1);
5609 	  return true;
5610         default:
5611 	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5612 	    *total = 0;
5613 	  else
5614 	    *total = COSTS_N_INSNS (1);
5615 	  return true;
5616 	}
5617 
5618     case CONST_DOUBLE:
5619       *total = COSTS_N_INSNS (1);
5620       return true;
5621 
5622     case CONST:
5623     case SYMBOL_REF:
5624     case LABEL_REF:
5625       *total = COSTS_N_INSNS (3);
5626       return true;
5627 
5628     case FMA:
5629       *total = COSTS_N_INSNS (4);
5630       return true;
5631 
5632     case MULT:
5633       /* For multiplies wider than HImode, we have to go to the FPU,
5634          which normally involves copies.  Plus there's the latency
5635          of the multiply itself, and the latency of the instructions to
5636          transfer integer regs to FP regs.  */
5637       if (FLOAT_MODE_P (mode))
5638 	*total = COSTS_N_INSNS (4);
5639       else if (GET_MODE_SIZE (mode) > 2)
5640         *total = COSTS_N_INSNS (10);
5641       else
5642 	*total = COSTS_N_INSNS (2);
5643       return true;
5644 
5645     case PLUS:
5646     case MINUS:
5647       if (FLOAT_MODE_P (mode))
5648 	{
5649 	  *total = COSTS_N_INSNS (4);
5650 	  return true;
5651 	}
5652       /* FALLTHRU */
5653 
5654     case ASHIFT:
5655     case ASHIFTRT:
5656     case LSHIFTRT:
5657       *total = COSTS_N_INSNS (1);
5658       return true;
5659 
5660     case DIV:
5661     case UDIV:
5662     case MOD:
5663     case UMOD:
5664       /* We make divide expensive, so that divide-by-constant will be
5665          optimized to a multiply.  */
5666       *total = COSTS_N_INSNS (60);
5667       return true;
5668 
5669     default:
5670       return false;
5671     }
5672 }
5673 
5674 /* Calculate the cost of moving data from a register in class FROM to
5675    one in class TO, using MODE.  */
5676 
5677 static int
5678 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5679 			 reg_class_t to)
5680 {
5681   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5682   if (to == ADDL_REGS)
5683     to = GR_REGS;
5684   if (from == ADDL_REGS)
5685     from = GR_REGS;
5686 
5687   /* All costs are symmetric, so reduce cases by putting the
5688      lower number class as the destination.  */
5689   if (from < to)
5690     {
5691       reg_class_t tmp = to;
5692       to = from, from = tmp;
5693     }
5694 
5695   /* Moving from FR<->GR in XFmode must be more expensive than 2,
5696      so that we get secondary memory reloads.  Between FR_REGS,
5697      we have to make this at least as expensive as memory_move_cost
5698      to avoid spectacularly poor register class preferencing.  */
5699   if (mode == XFmode || mode == RFmode)
5700     {
5701       if (to != GR_REGS || from != GR_REGS)
5702         return memory_move_cost (mode, to, false);
5703       else
5704 	return 3;
5705     }
5706 
5707   switch (to)
5708     {
5709     case PR_REGS:
5710       /* Moving between PR registers takes two insns.  */
5711       if (from == PR_REGS)
5712 	return 3;
5713       /* Moving between PR and anything but GR is impossible.  */
5714       if (from != GR_REGS)
5715 	return memory_move_cost (mode, to, false);
5716       break;
5717 
5718     case BR_REGS:
5719       /* Moving between BR and anything but GR is impossible.  */
5720       if (from != GR_REGS && from != GR_AND_BR_REGS)
5721 	return memory_move_cost (mode, to, false);
5722       break;
5723 
5724     case AR_I_REGS:
5725     case AR_M_REGS:
5726       /* Moving between AR and anything but GR is impossible.  */
5727       if (from != GR_REGS)
5728 	return memory_move_cost (mode, to, false);
5729       break;
5730 
5731     case GR_REGS:
5732     case FR_REGS:
5733     case FP_REGS:
5734     case GR_AND_FR_REGS:
5735     case GR_AND_BR_REGS:
5736     case ALL_REGS:
5737       break;
5738 
5739     default:
5740       gcc_unreachable ();
5741     }
5742 
5743   return 2;
5744 }
5745 
5746 /* Calculate the cost of moving data of MODE from a register to or from
5747    memory.  */
5748 
5749 static int
5750 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5751 		       reg_class_t rclass,
5752 		       bool in ATTRIBUTE_UNUSED)
5753 {
5754   if (rclass == GENERAL_REGS
5755       || rclass == FR_REGS
5756       || rclass == FP_REGS
5757       || rclass == GR_AND_FR_REGS)
5758     return 4;
5759   else
5760     return 10;
5761 }
5762 
5763 /* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
5764    on RCLASS to use when copying X into that class.  */
5765 
5766 static reg_class_t
5767 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5768 {
5769   switch (rclass)
5770     {
5771     case FR_REGS:
5772     case FP_REGS:
5773       /* Don't allow volatile mem reloads into floating point registers.
5774 	 This is defined to force reload to choose the r/m case instead
5775 	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
5776       if (MEM_P (x) && MEM_VOLATILE_P (x))
5777 	return NO_REGS;
5778 
5779       /* Force all unrecognized constants into the constant pool.  */
5780       if (CONSTANT_P (x))
5781 	return NO_REGS;
5782       break;
5783 
5784     case AR_M_REGS:
5785     case AR_I_REGS:
5786       if (!OBJECT_P (x))
5787 	return NO_REGS;
5788       break;
5789 
5790     default:
5791       break;
5792     }
5793 
5794   return rclass;
5795 }
5796 
5797 /* This function returns the register class required for a secondary
5798    register when copying between one of the registers in RCLASS, and X,
5799    using MODE.  A return value of NO_REGS means that no secondary register
5800    is required.  */
5801 
5802 enum reg_class
5803 ia64_secondary_reload_class (enum reg_class rclass,
5804 			     machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5805 {
5806   int regno = -1;
5807 
5808   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5809     regno = true_regnum (x);
5810 
5811   switch (rclass)
5812     {
5813     case BR_REGS:
5814     case AR_M_REGS:
5815     case AR_I_REGS:
5816       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5817 	 interaction.  We end up with two pseudos with overlapping lifetimes
5818 	 both of which are equiv to the same constant, and both which need
5819 	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5820 	 changes depending on the path length, which means the qty_first_reg
5821 	 check in make_regs_eqv can give different answers at different times.
5822 	 At some point I'll probably need a reload_indi pattern to handle
5823 	 this.
5824 
5825 	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5826 	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5827 	 non-general registers for good measure.  */
5828       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5829 	return GR_REGS;
5830 
5831       /* This is needed if a pseudo used as a call_operand gets spilled to a
5832 	 stack slot.  */
5833       if (GET_CODE (x) == MEM)
5834 	return GR_REGS;
5835       break;
5836 
5837     case FR_REGS:
5838     case FP_REGS:
5839       /* Need to go through general registers to get to other class regs.  */
5840       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5841 	return GR_REGS;
5842 
5843       /* This can happen when a paradoxical subreg is an operand to the
5844 	 muldi3 pattern.  */
5845       /* ??? This shouldn't be necessary after instruction scheduling is
5846 	 enabled, because paradoxical subregs are not accepted by
5847 	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5848 	 stop the paradoxical subreg stupidity in the *_operand functions
5849 	 in recog.c.  */
5850       if (GET_CODE (x) == MEM
5851 	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5852 	      || GET_MODE (x) == QImode))
5853 	return GR_REGS;
5854 
5855       /* This can happen because of the ior/and/etc patterns that accept FP
5856 	 registers as operands.  If the third operand is a constant, then it
5857 	 needs to be reloaded into a FP register.  */
5858       if (GET_CODE (x) == CONST_INT)
5859 	return GR_REGS;
5860 
5861       /* This can happen because of register elimination in a muldi3 insn.
5862 	 E.g. `26107 * (unsigned long)&u'.  */
5863       if (GET_CODE (x) == PLUS)
5864 	return GR_REGS;
5865       break;
5866 
5867     case PR_REGS:
5868       /* ??? This happens if we cse/gcse a BImode value across a call,
5869 	 and the function has a nonlocal goto.  This is because global
5870 	 does not allocate call crossing pseudos to hard registers when
5871 	 crtl->has_nonlocal_goto is true.  This is relatively
5872 	 common for C++ programs that use exceptions.  To reproduce,
5873 	 return NO_REGS and compile libstdc++.  */
5874       if (GET_CODE (x) == MEM)
5875 	return GR_REGS;
5876 
5877       /* This can happen when we take a BImode subreg of a DImode value,
5878 	 and that DImode value winds up in some non-GR register.  */
5879       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5880 	return GR_REGS;
5881       break;
5882 
5883     default:
5884       break;
5885     }
5886 
5887   return NO_REGS;
5888 }
5889 
5890 
5891 /* Implement targetm.unspec_may_trap_p hook.  */
5892 static int
5893 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5894 {
5895   switch (XINT (x, 1))
5896     {
5897     case UNSPEC_LDA:
5898     case UNSPEC_LDS:
5899     case UNSPEC_LDSA:
5900     case UNSPEC_LDCCLR:
5901     case UNSPEC_CHKACLR:
5902     case UNSPEC_CHKS:
5903       /* These unspecs are just wrappers.  */
5904       return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5905     }
5906 
5907   return default_unspec_may_trap_p (x, flags);
5908 }
5909 
5910 
5911 /* Parse the -mfixed-range= option string.  */
5912 
5913 static void
5914 fix_range (const char *const_str)
5915 {
5916   int i, first, last;
5917   char *str, *dash, *comma;
5918 
5919   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5920      REG2 are either register names or register numbers.  The effect
5921      of this option is to mark the registers in the range from REG1 to
5922      REG2 as ``fixed'' so they won't be used by the compiler.  This is
5923      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5924 
5925   i = strlen (const_str);
5926   str = (char *) alloca (i + 1);
5927   memcpy (str, const_str, i + 1);
5928 
5929   while (1)
5930     {
5931       dash = strchr (str, '-');
5932       if (!dash)
5933 	{
5934 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5935 	  return;
5936 	}
5937       *dash = '\0';
5938 
5939       comma = strchr (dash + 1, ',');
5940       if (comma)
5941 	*comma = '\0';
5942 
5943       first = decode_reg_name (str);
5944       if (first < 0)
5945 	{
5946 	  warning (0, "unknown register name: %s", str);
5947 	  return;
5948 	}
5949 
5950       last = decode_reg_name (dash + 1);
5951       if (last < 0)
5952 	{
5953 	  warning (0, "unknown register name: %s", dash + 1);
5954 	  return;
5955 	}
5956 
5957       *dash = '-';
5958 
5959       if (first > last)
5960 	{
5961 	  warning (0, "%s-%s is an empty range", str, dash + 1);
5962 	  return;
5963 	}
5964 
5965       for (i = first; i <= last; ++i)
5966 	fixed_regs[i] = call_used_regs[i] = 1;
5967 
5968       if (!comma)
5969 	break;
5970 
5971       *comma = ',';
5972       str = comma + 1;
5973     }
5974 }
5975 
5976 /* Implement TARGET_OPTION_OVERRIDE.  */
5977 
5978 static void
5979 ia64_option_override (void)
5980 {
5981   unsigned int i;
5982   cl_deferred_option *opt;
5983   vec<cl_deferred_option> *v
5984     = (vec<cl_deferred_option> *) ia64_deferred_options;
5985 
5986   if (v)
5987     FOR_EACH_VEC_ELT (*v, i, opt)
5988       {
5989 	switch (opt->opt_index)
5990 	  {
5991 	  case OPT_mfixed_range_:
5992 	    fix_range (opt->arg);
5993 	    break;
5994 
5995 	  default:
5996 	    gcc_unreachable ();
5997 	  }
5998       }
5999 
6000   if (TARGET_AUTO_PIC)
6001     target_flags |= MASK_CONST_GP;
6002 
6003   /* Numerous experiment shows that IRA based loop pressure
6004      calculation works better for RTL loop invariant motion on targets
6005      with enough (>= 32) registers.  It is an expensive optimization.
6006      So it is on only for peak performance.  */
6007   if (optimize >= 3)
6008     flag_ira_loop_pressure = 1;
6009 
6010 
6011   ia64_section_threshold = (global_options_set.x_g_switch_value
6012 			    ? g_switch_value
6013 			    : IA64_DEFAULT_GVALUE);
6014 
6015   init_machine_status = ia64_init_machine_status;
6016 
6017   if (align_functions <= 0)
6018     align_functions = 64;
6019   if (align_loops <= 0)
6020     align_loops = 32;
6021   if (TARGET_ABI_OPEN_VMS)
6022     flag_no_common = 1;
6023 
6024   ia64_override_options_after_change();
6025 }
6026 
6027 /* Implement targetm.override_options_after_change.  */
6028 
6029 static void
6030 ia64_override_options_after_change (void)
6031 {
6032   if (optimize >= 3
6033       && !global_options_set.x_flag_selective_scheduling
6034       && !global_options_set.x_flag_selective_scheduling2)
6035     {
6036       flag_selective_scheduling2 = 1;
6037       flag_sel_sched_pipelining = 1;
6038     }
6039   if (mflag_sched_control_spec == 2)
6040     {
6041       /* Control speculation is on by default for the selective scheduler,
6042          but not for the Haifa scheduler.  */
6043       mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6044     }
6045   if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6046     {
6047       /* FIXME: remove this when we'd implement breaking autoinsns as
6048          a transformation.  */
6049       flag_auto_inc_dec = 0;
6050     }
6051 }
6052 
6053 /* Initialize the record of emitted frame related registers.  */
6054 
6055 void ia64_init_expanders (void)
6056 {
6057   memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6058 }
6059 
6060 static struct machine_function *
6061 ia64_init_machine_status (void)
6062 {
6063   return ggc_cleared_alloc<machine_function> ();
6064 }
6065 
6066 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6067 static enum attr_type ia64_safe_type (rtx_insn *);
6068 
6069 static enum attr_itanium_class
6070 ia64_safe_itanium_class (rtx_insn *insn)
6071 {
6072   if (recog_memoized (insn) >= 0)
6073     return get_attr_itanium_class (insn);
6074   else if (DEBUG_INSN_P (insn))
6075     return ITANIUM_CLASS_IGNORE;
6076   else
6077     return ITANIUM_CLASS_UNKNOWN;
6078 }
6079 
6080 static enum attr_type
6081 ia64_safe_type (rtx_insn *insn)
6082 {
6083   if (recog_memoized (insn) >= 0)
6084     return get_attr_type (insn);
6085   else
6086     return TYPE_UNKNOWN;
6087 }
6088 
6089 /* The following collection of routines emit instruction group stop bits as
6090    necessary to avoid dependencies.  */
6091 
6092 /* Need to track some additional registers as far as serialization is
6093    concerned so we can properly handle br.call and br.ret.  We could
6094    make these registers visible to gcc, but since these registers are
6095    never explicitly used in gcc generated code, it seems wasteful to
6096    do so (plus it would make the call and return patterns needlessly
6097    complex).  */
6098 #define REG_RP		(BR_REG (0))
6099 #define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
6100 /* This is used for volatile asms which may require a stop bit immediately
6101    before and after them.  */
6102 #define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
6103 #define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
6104 #define NUM_REGS	(AR_UNAT_BIT_0 + 64)
6105 
6106 /* For each register, we keep track of how it has been written in the
6107    current instruction group.
6108 
6109    If a register is written unconditionally (no qualifying predicate),
6110    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6111 
6112    If a register is written if its qualifying predicate P is true, we
6113    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
6114    may be written again by the complement of P (P^1) and when this happens,
6115    WRITE_COUNT gets set to 2.
6116 
6117    The result of this is that whenever an insn attempts to write a register
6118    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6119 
6120    If a predicate register is written by a floating-point insn, we set
6121    WRITTEN_BY_FP to true.
6122 
6123    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6124    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
6125 
6126 #if GCC_VERSION >= 4000
6127 #define RWS_FIELD_TYPE __extension__ unsigned short
6128 #else
6129 #define RWS_FIELD_TYPE unsigned int
6130 #endif
6131 struct reg_write_state
6132 {
6133   RWS_FIELD_TYPE write_count : 2;
6134   RWS_FIELD_TYPE first_pred : 10;
6135   RWS_FIELD_TYPE written_by_fp : 1;
6136   RWS_FIELD_TYPE written_by_and : 1;
6137   RWS_FIELD_TYPE written_by_or : 1;
6138 };
6139 
6140 /* Cumulative info for the current instruction group.  */
6141 struct reg_write_state rws_sum[NUM_REGS];
6142 #if CHECKING_P
6143 /* Bitmap whether a register has been written in the current insn.  */
6144 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6145 			   / HOST_BITS_PER_WIDEST_FAST_INT];
6146 
6147 static inline void
6148 rws_insn_set (int regno)
6149 {
6150   gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6151   SET_HARD_REG_BIT (rws_insn, regno);
6152 }
6153 
6154 static inline int
6155 rws_insn_test (int regno)
6156 {
6157   return TEST_HARD_REG_BIT (rws_insn, regno);
6158 }
6159 #else
6160 /* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
6161 unsigned char rws_insn[2];
6162 
6163 static inline void
6164 rws_insn_set (int regno)
6165 {
6166   if (regno == REG_AR_CFM)
6167     rws_insn[0] = 1;
6168   else if (regno == REG_VOLATILE)
6169     rws_insn[1] = 1;
6170 }
6171 
6172 static inline int
6173 rws_insn_test (int regno)
6174 {
6175   if (regno == REG_AR_CFM)
6176     return rws_insn[0];
6177   if (regno == REG_VOLATILE)
6178     return rws_insn[1];
6179   return 0;
6180 }
6181 #endif
6182 
6183 /* Indicates whether this is the first instruction after a stop bit,
6184    in which case we don't need another stop bit.  Without this,
6185    ia64_variable_issue will die when scheduling an alloc.  */
6186 static int first_instruction;
6187 
6188 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6189    RTL for one instruction.  */
6190 struct reg_flags
6191 {
6192   unsigned int is_write : 1;	/* Is register being written?  */
6193   unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
6194   unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
6195   unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
6196   unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
6197   unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
6198 };
6199 
6200 static void rws_update (int, struct reg_flags, int);
6201 static int rws_access_regno (int, struct reg_flags, int);
6202 static int rws_access_reg (rtx, struct reg_flags, int);
6203 static void update_set_flags (rtx, struct reg_flags *);
6204 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6205 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6206 static void init_insn_group_barriers (void);
6207 static int group_barrier_needed (rtx_insn *);
6208 static int safe_group_barrier_needed (rtx_insn *);
6209 static int in_safe_group_barrier;
6210 
6211 /* Update *RWS for REGNO, which is being written by the current instruction,
6212    with predicate PRED, and associated register flags in FLAGS.  */
6213 
6214 static void
6215 rws_update (int regno, struct reg_flags flags, int pred)
6216 {
6217   if (pred)
6218     rws_sum[regno].write_count++;
6219   else
6220     rws_sum[regno].write_count = 2;
6221   rws_sum[regno].written_by_fp |= flags.is_fp;
6222   /* ??? Not tracking and/or across differing predicates.  */
6223   rws_sum[regno].written_by_and = flags.is_and;
6224   rws_sum[regno].written_by_or = flags.is_or;
6225   rws_sum[regno].first_pred = pred;
6226 }
6227 
6228 /* Handle an access to register REGNO of type FLAGS using predicate register
6229    PRED.  Update rws_sum array.  Return 1 if this access creates
6230    a dependency with an earlier instruction in the same group.  */
6231 
6232 static int
6233 rws_access_regno (int regno, struct reg_flags flags, int pred)
6234 {
6235   int need_barrier = 0;
6236 
6237   gcc_assert (regno < NUM_REGS);
6238 
6239   if (! PR_REGNO_P (regno))
6240     flags.is_and = flags.is_or = 0;
6241 
6242   if (flags.is_write)
6243     {
6244       int write_count;
6245 
6246       rws_insn_set (regno);
6247       write_count = rws_sum[regno].write_count;
6248 
6249       switch (write_count)
6250 	{
6251 	case 0:
6252 	  /* The register has not been written yet.  */
6253 	  if (!in_safe_group_barrier)
6254 	    rws_update (regno, flags, pred);
6255 	  break;
6256 
6257 	case 1:
6258 	  /* The register has been written via a predicate.  Treat
6259 	     it like a unconditional write and do not try to check
6260 	     for complementary pred reg in earlier write.  */
6261 	  if (flags.is_and && rws_sum[regno].written_by_and)
6262 	    ;
6263 	  else if (flags.is_or && rws_sum[regno].written_by_or)
6264 	    ;
6265 	  else
6266 	    need_barrier = 1;
6267 	  if (!in_safe_group_barrier)
6268 	    rws_update (regno, flags, pred);
6269 	  break;
6270 
6271 	case 2:
6272 	  /* The register has been unconditionally written already.  We
6273 	     need a barrier.  */
6274 	  if (flags.is_and && rws_sum[regno].written_by_and)
6275 	    ;
6276 	  else if (flags.is_or && rws_sum[regno].written_by_or)
6277 	    ;
6278 	  else
6279 	    need_barrier = 1;
6280 	  if (!in_safe_group_barrier)
6281 	    {
6282 	      rws_sum[regno].written_by_and = flags.is_and;
6283 	      rws_sum[regno].written_by_or = flags.is_or;
6284 	    }
6285 	  break;
6286 
6287 	default:
6288 	  gcc_unreachable ();
6289 	}
6290     }
6291   else
6292     {
6293       if (flags.is_branch)
6294 	{
6295 	  /* Branches have several RAW exceptions that allow to avoid
6296 	     barriers.  */
6297 
6298 	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6299 	    /* RAW dependencies on branch regs are permissible as long
6300 	       as the writer is a non-branch instruction.  Since we
6301 	       never generate code that uses a branch register written
6302 	       by a branch instruction, handling this case is
6303 	       easy.  */
6304 	    return 0;
6305 
6306 	  if (REGNO_REG_CLASS (regno) == PR_REGS
6307 	      && ! rws_sum[regno].written_by_fp)
6308 	    /* The predicates of a branch are available within the
6309 	       same insn group as long as the predicate was written by
6310 	       something other than a floating-point instruction.  */
6311 	    return 0;
6312 	}
6313 
6314       if (flags.is_and && rws_sum[regno].written_by_and)
6315 	return 0;
6316       if (flags.is_or && rws_sum[regno].written_by_or)
6317 	return 0;
6318 
6319       switch (rws_sum[regno].write_count)
6320 	{
6321 	case 0:
6322 	  /* The register has not been written yet.  */
6323 	  break;
6324 
6325 	case 1:
6326 	  /* The register has been written via a predicate, assume we
6327 	     need a barrier (don't check for complementary regs).  */
6328 	  need_barrier = 1;
6329 	  break;
6330 
6331 	case 2:
6332 	  /* The register has been unconditionally written already.  We
6333 	     need a barrier.  */
6334 	  need_barrier = 1;
6335 	  break;
6336 
6337 	default:
6338 	  gcc_unreachable ();
6339 	}
6340     }
6341 
6342   return need_barrier;
6343 }
6344 
6345 static int
6346 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6347 {
6348   int regno = REGNO (reg);
6349   int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6350 
6351   if (n == 1)
6352     return rws_access_regno (regno, flags, pred);
6353   else
6354     {
6355       int need_barrier = 0;
6356       while (--n >= 0)
6357 	need_barrier |= rws_access_regno (regno + n, flags, pred);
6358       return need_barrier;
6359     }
6360 }
6361 
6362 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6363    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
6364 
6365 static void
6366 update_set_flags (rtx x, struct reg_flags *pflags)
6367 {
6368   rtx src = SET_SRC (x);
6369 
6370   switch (GET_CODE (src))
6371     {
6372     case CALL:
6373       return;
6374 
6375     case IF_THEN_ELSE:
6376       /* There are four cases here:
6377 	 (1) The destination is (pc), in which case this is a branch,
6378 	 nothing here applies.
6379 	 (2) The destination is ar.lc, in which case this is a
6380 	 doloop_end_internal,
6381 	 (3) The destination is an fp register, in which case this is
6382 	 an fselect instruction.
6383 	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6384 	 this is a check load.
6385 	 In all cases, nothing we do in this function applies.  */
6386       return;
6387 
6388     default:
6389       if (COMPARISON_P (src)
6390 	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6391 	/* Set pflags->is_fp to 1 so that we know we're dealing
6392 	   with a floating point comparison when processing the
6393 	   destination of the SET.  */
6394 	pflags->is_fp = 1;
6395 
6396       /* Discover if this is a parallel comparison.  We only handle
6397 	 and.orcm and or.andcm at present, since we must retain a
6398 	 strict inverse on the predicate pair.  */
6399       else if (GET_CODE (src) == AND)
6400 	pflags->is_and = 1;
6401       else if (GET_CODE (src) == IOR)
6402 	pflags->is_or = 1;
6403 
6404       break;
6405     }
6406 }
6407 
6408 /* Subroutine of rtx_needs_barrier; this function determines whether the
6409    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
6410    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
6411    for this insn.  */
6412 
6413 static int
6414 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6415 {
6416   int need_barrier = 0;
6417   rtx dst;
6418   rtx src = SET_SRC (x);
6419 
6420   if (GET_CODE (src) == CALL)
6421     /* We don't need to worry about the result registers that
6422        get written by subroutine call.  */
6423     return rtx_needs_barrier (src, flags, pred);
6424   else if (SET_DEST (x) == pc_rtx)
6425     {
6426       /* X is a conditional branch.  */
6427       /* ??? This seems redundant, as the caller sets this bit for
6428 	 all JUMP_INSNs.  */
6429       if (!ia64_spec_check_src_p (src))
6430 	flags.is_branch = 1;
6431       return rtx_needs_barrier (src, flags, pred);
6432     }
6433 
6434   if (ia64_spec_check_src_p (src))
6435     /* Avoid checking one register twice (in condition
6436        and in 'then' section) for ldc pattern.  */
6437     {
6438       gcc_assert (REG_P (XEXP (src, 2)));
6439       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6440 
6441       /* We process MEM below.  */
6442       src = XEXP (src, 1);
6443     }
6444 
6445   need_barrier |= rtx_needs_barrier (src, flags, pred);
6446 
6447   dst = SET_DEST (x);
6448   if (GET_CODE (dst) == ZERO_EXTRACT)
6449     {
6450       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6451       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6452     }
6453   return need_barrier;
6454 }
6455 
6456 /* Handle an access to rtx X of type FLAGS using predicate register
6457    PRED.  Return 1 if this access creates a dependency with an earlier
6458    instruction in the same group.  */
6459 
6460 static int
6461 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6462 {
6463   int i, j;
6464   int is_complemented = 0;
6465   int need_barrier = 0;
6466   const char *format_ptr;
6467   struct reg_flags new_flags;
6468   rtx cond;
6469 
6470   if (! x)
6471     return 0;
6472 
6473   new_flags = flags;
6474 
6475   switch (GET_CODE (x))
6476     {
6477     case SET:
6478       update_set_flags (x, &new_flags);
6479       need_barrier = set_src_needs_barrier (x, new_flags, pred);
6480       if (GET_CODE (SET_SRC (x)) != CALL)
6481 	{
6482 	  new_flags.is_write = 1;
6483 	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6484 	}
6485       break;
6486 
6487     case CALL:
6488       new_flags.is_write = 0;
6489       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6490 
6491       /* Avoid multiple register writes, in case this is a pattern with
6492 	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
6493       if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6494 	{
6495 	  new_flags.is_write = 1;
6496 	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6497 	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6498 	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6499 	}
6500       break;
6501 
6502     case COND_EXEC:
6503       /* X is a predicated instruction.  */
6504 
6505       cond = COND_EXEC_TEST (x);
6506       gcc_assert (!pred);
6507       need_barrier = rtx_needs_barrier (cond, flags, 0);
6508 
6509       if (GET_CODE (cond) == EQ)
6510 	is_complemented = 1;
6511       cond = XEXP (cond, 0);
6512       gcc_assert (GET_CODE (cond) == REG
6513 		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6514       pred = REGNO (cond);
6515       if (is_complemented)
6516 	++pred;
6517 
6518       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6519       return need_barrier;
6520 
6521     case CLOBBER:
6522     case USE:
6523       /* Clobber & use are for earlier compiler-phases only.  */
6524       break;
6525 
6526     case ASM_OPERANDS:
6527     case ASM_INPUT:
6528       /* We always emit stop bits for traditional asms.  We emit stop bits
6529 	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6530       if (GET_CODE (x) != ASM_OPERANDS
6531 	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6532 	{
6533 	  /* Avoid writing the register multiple times if we have multiple
6534 	     asm outputs.  This avoids a failure in rws_access_reg.  */
6535 	  if (! rws_insn_test (REG_VOLATILE))
6536 	    {
6537 	      new_flags.is_write = 1;
6538 	      rws_access_regno (REG_VOLATILE, new_flags, pred);
6539 	    }
6540 	  return 1;
6541 	}
6542 
6543       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6544 	 We cannot just fall through here since then we would be confused
6545 	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6546 	 traditional asms unlike their normal usage.  */
6547 
6548       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6549 	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6550 	  need_barrier = 1;
6551       break;
6552 
6553     case PARALLEL:
6554       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6555 	{
6556 	  rtx pat = XVECEXP (x, 0, i);
6557 	  switch (GET_CODE (pat))
6558 	    {
6559 	    case SET:
6560 	      update_set_flags (pat, &new_flags);
6561 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6562 	      break;
6563 
6564 	    case USE:
6565 	    case CALL:
6566 	    case ASM_OPERANDS:
6567 	    case ASM_INPUT:
6568 	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
6569 	      break;
6570 
6571 	    case CLOBBER:
6572 	      if (REG_P (XEXP (pat, 0))
6573 		  && extract_asm_operands (x) != NULL_RTX
6574 		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6575 		{
6576 		  new_flags.is_write = 1;
6577 		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6578 						     new_flags, pred);
6579 		  new_flags = flags;
6580 		}
6581 	      break;
6582 
6583 	    case RETURN:
6584 	      break;
6585 
6586 	    default:
6587 	      gcc_unreachable ();
6588 	    }
6589 	}
6590       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6591 	{
6592 	  rtx pat = XVECEXP (x, 0, i);
6593 	  if (GET_CODE (pat) == SET)
6594 	    {
6595 	      if (GET_CODE (SET_SRC (pat)) != CALL)
6596 		{
6597 		  new_flags.is_write = 1;
6598 		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6599 						     pred);
6600 		}
6601 	    }
6602 	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6603 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
6604 	}
6605       break;
6606 
6607     case SUBREG:
6608       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6609       break;
6610     case REG:
6611       if (REGNO (x) == AR_UNAT_REGNUM)
6612 	{
6613 	  for (i = 0; i < 64; ++i)
6614 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6615 	}
6616       else
6617 	need_barrier = rws_access_reg (x, flags, pred);
6618       break;
6619 
6620     case MEM:
6621       /* Find the regs used in memory address computation.  */
6622       new_flags.is_write = 0;
6623       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6624       break;
6625 
6626     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6627     case SYMBOL_REF:  case LABEL_REF:     case CONST:
6628       break;
6629 
6630       /* Operators with side-effects.  */
6631     case POST_INC:    case POST_DEC:
6632       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6633 
6634       new_flags.is_write = 0;
6635       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6636       new_flags.is_write = 1;
6637       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6638       break;
6639 
6640     case POST_MODIFY:
6641       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6642 
6643       new_flags.is_write = 0;
6644       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6645       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6646       new_flags.is_write = 1;
6647       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6648       break;
6649 
6650       /* Handle common unary and binary ops for efficiency.  */
6651     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6652     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6653     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6654     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6655     case NE:       case EQ:      case GE:      case GT:        case LE:
6656     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6657       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6658       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6659       break;
6660 
6661     case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
6662     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6663     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6664     case SQRT:     case FFS:		case POPCOUNT:
6665       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6666       break;
6667 
6668     case VEC_SELECT:
6669       /* VEC_SELECT's second argument is a PARALLEL with integers that
6670 	 describe the elements selected.  On ia64, those integers are
6671 	 always constants.  Avoid walking the PARALLEL so that we don't
6672 	 get confused with "normal" parallels and then die.  */
6673       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6674       break;
6675 
6676     case UNSPEC:
6677       switch (XINT (x, 1))
6678 	{
6679 	case UNSPEC_LTOFF_DTPMOD:
6680 	case UNSPEC_LTOFF_DTPREL:
6681 	case UNSPEC_DTPREL:
6682 	case UNSPEC_LTOFF_TPREL:
6683 	case UNSPEC_TPREL:
6684 	case UNSPEC_PRED_REL_MUTEX:
6685 	case UNSPEC_PIC_CALL:
6686         case UNSPEC_MF:
6687         case UNSPEC_FETCHADD_ACQ:
6688         case UNSPEC_FETCHADD_REL:
6689 	case UNSPEC_BSP_VALUE:
6690 	case UNSPEC_FLUSHRS:
6691 	case UNSPEC_BUNDLE_SELECTOR:
6692           break;
6693 
6694 	case UNSPEC_GR_SPILL:
6695 	case UNSPEC_GR_RESTORE:
6696 	  {
6697 	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6698 	    HOST_WIDE_INT bit = (offset >> 3) & 63;
6699 
6700 	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6701 	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6702 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6703 					      new_flags, pred);
6704 	    break;
6705 	  }
6706 
6707 	case UNSPEC_FR_SPILL:
6708 	case UNSPEC_FR_RESTORE:
6709 	case UNSPEC_GETF_EXP:
6710 	case UNSPEC_SETF_EXP:
6711         case UNSPEC_ADDP4:
6712 	case UNSPEC_FR_SQRT_RECIP_APPROX:
6713 	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6714 	case UNSPEC_LDA:
6715 	case UNSPEC_LDS:
6716 	case UNSPEC_LDS_A:
6717 	case UNSPEC_LDSA:
6718 	case UNSPEC_CHKACLR:
6719         case UNSPEC_CHKS:
6720 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6721 	  break;
6722 
6723 	case UNSPEC_FR_RECIP_APPROX:
6724 	case UNSPEC_SHRP:
6725 	case UNSPEC_COPYSIGN:
6726 	case UNSPEC_FR_RECIP_APPROX_RES:
6727 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6728 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6729 	  break;
6730 
6731         case UNSPEC_CMPXCHG_ACQ:
6732         case UNSPEC_CMPXCHG_REL:
6733 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6734 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6735 	  break;
6736 
6737 	default:
6738 	  gcc_unreachable ();
6739 	}
6740       break;
6741 
6742     case UNSPEC_VOLATILE:
6743       switch (XINT (x, 1))
6744 	{
6745 	case UNSPECV_ALLOC:
6746 	  /* Alloc must always be the first instruction of a group.
6747 	     We force this by always returning true.  */
6748 	  /* ??? We might get better scheduling if we explicitly check for
6749 	     input/local/output register dependencies, and modify the
6750 	     scheduler so that alloc is always reordered to the start of
6751 	     the current group.  We could then eliminate all of the
6752 	     first_instruction code.  */
6753 	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
6754 
6755 	  new_flags.is_write = 1;
6756 	  rws_access_regno (REG_AR_CFM, new_flags, pred);
6757 	  return 1;
6758 
6759 	case UNSPECV_SET_BSP:
6760 	case UNSPECV_PROBE_STACK_RANGE:
6761 	  need_barrier = 1;
6762           break;
6763 
6764 	case UNSPECV_BLOCKAGE:
6765 	case UNSPECV_INSN_GROUP_BARRIER:
6766 	case UNSPECV_BREAK:
6767 	case UNSPECV_PSAC_ALL:
6768 	case UNSPECV_PSAC_NORMAL:
6769 	  return 0;
6770 
6771 	case UNSPECV_PROBE_STACK_ADDRESS:
6772 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6773 	  break;
6774 
6775 	default:
6776 	  gcc_unreachable ();
6777 	}
6778       break;
6779 
6780     case RETURN:
6781       new_flags.is_write = 0;
6782       need_barrier  = rws_access_regno (REG_RP, flags, pred);
6783       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6784 
6785       new_flags.is_write = 1;
6786       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6787       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6788       break;
6789 
6790     default:
6791       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6792       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6793 	switch (format_ptr[i])
6794 	  {
6795 	  case '0':	/* unused field */
6796 	  case 'i':	/* integer */
6797 	  case 'n':	/* note */
6798 	  case 'w':	/* wide integer */
6799 	  case 's':	/* pointer to string */
6800 	  case 'S':	/* optional pointer to string */
6801 	    break;
6802 
6803 	  case 'e':
6804 	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6805 	      need_barrier = 1;
6806 	    break;
6807 
6808 	  case 'E':
6809 	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6810 	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6811 		need_barrier = 1;
6812 	    break;
6813 
6814 	  default:
6815 	    gcc_unreachable ();
6816 	  }
6817       break;
6818     }
6819   return need_barrier;
6820 }
6821 
6822 /* Clear out the state for group_barrier_needed at the start of a
6823    sequence of insns.  */
6824 
6825 static void
6826 init_insn_group_barriers (void)
6827 {
6828   memset (rws_sum, 0, sizeof (rws_sum));
6829   first_instruction = 1;
6830 }
6831 
6832 /* Given the current state, determine whether a group barrier (a stop bit) is
6833    necessary before INSN.  Return nonzero if so.  This modifies the state to
6834    include the effects of INSN as a side-effect.  */
6835 
6836 static int
6837 group_barrier_needed (rtx_insn *insn)
6838 {
6839   rtx pat;
6840   int need_barrier = 0;
6841   struct reg_flags flags;
6842 
6843   memset (&flags, 0, sizeof (flags));
6844   switch (GET_CODE (insn))
6845     {
6846     case NOTE:
6847     case DEBUG_INSN:
6848       break;
6849 
6850     case BARRIER:
6851       /* A barrier doesn't imply an instruction group boundary.  */
6852       break;
6853 
6854     case CODE_LABEL:
6855       memset (rws_insn, 0, sizeof (rws_insn));
6856       return 1;
6857 
6858     case CALL_INSN:
6859       flags.is_branch = 1;
6860       flags.is_sibcall = SIBLING_CALL_P (insn);
6861       memset (rws_insn, 0, sizeof (rws_insn));
6862 
6863       /* Don't bundle a call following another call.  */
6864       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6865 	{
6866 	  need_barrier = 1;
6867 	  break;
6868 	}
6869 
6870       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6871       break;
6872 
6873     case JUMP_INSN:
6874       if (!ia64_spec_check_p (insn))
6875 	flags.is_branch = 1;
6876 
6877       /* Don't bundle a jump following a call.  */
6878       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6879 	{
6880 	  need_barrier = 1;
6881 	  break;
6882 	}
6883       /* FALLTHRU */
6884 
6885     case INSN:
6886       if (GET_CODE (PATTERN (insn)) == USE
6887 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6888 	/* Don't care about USE and CLOBBER "insns"---those are used to
6889 	   indicate to the optimizer that it shouldn't get rid of
6890 	   certain operations.  */
6891 	break;
6892 
6893       pat = PATTERN (insn);
6894 
6895       /* Ug.  Hack hacks hacked elsewhere.  */
6896       switch (recog_memoized (insn))
6897 	{
6898 	  /* We play dependency tricks with the epilogue in order
6899 	     to get proper schedules.  Undo this for dv analysis.  */
6900 	case CODE_FOR_epilogue_deallocate_stack:
6901 	case CODE_FOR_prologue_allocate_stack:
6902 	  pat = XVECEXP (pat, 0, 0);
6903 	  break;
6904 
6905 	  /* The pattern we use for br.cloop confuses the code above.
6906 	     The second element of the vector is representative.  */
6907 	case CODE_FOR_doloop_end_internal:
6908 	  pat = XVECEXP (pat, 0, 1);
6909 	  break;
6910 
6911 	  /* Doesn't generate code.  */
6912 	case CODE_FOR_pred_rel_mutex:
6913 	case CODE_FOR_prologue_use:
6914 	  return 0;
6915 
6916 	default:
6917 	  break;
6918 	}
6919 
6920       memset (rws_insn, 0, sizeof (rws_insn));
6921       need_barrier = rtx_needs_barrier (pat, flags, 0);
6922 
6923       /* Check to see if the previous instruction was a volatile
6924 	 asm.  */
6925       if (! need_barrier)
6926 	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6927 
6928       break;
6929 
6930     default:
6931       gcc_unreachable ();
6932     }
6933 
6934   if (first_instruction && important_for_bundling_p (insn))
6935     {
6936       need_barrier = 0;
6937       first_instruction = 0;
6938     }
6939 
6940   return need_barrier;
6941 }
6942 
6943 /* Like group_barrier_needed, but do not clobber the current state.  */
6944 
6945 static int
6946 safe_group_barrier_needed (rtx_insn *insn)
6947 {
6948   int saved_first_instruction;
6949   int t;
6950 
6951   saved_first_instruction = first_instruction;
6952   in_safe_group_barrier = 1;
6953 
6954   t = group_barrier_needed (insn);
6955 
6956   first_instruction = saved_first_instruction;
6957   in_safe_group_barrier = 0;
6958 
6959   return t;
6960 }
6961 
6962 /* Scan the current function and insert stop bits as necessary to
6963    eliminate dependencies.  This function assumes that a final
6964    instruction scheduling pass has been run which has already
6965    inserted most of the necessary stop bits.  This function only
6966    inserts new ones at basic block boundaries, since these are
6967    invisible to the scheduler.  */
6968 
6969 static void
6970 emit_insn_group_barriers (FILE *dump)
6971 {
6972   rtx_insn *insn;
6973   rtx_insn *last_label = 0;
6974   int insns_since_last_label = 0;
6975 
6976   init_insn_group_barriers ();
6977 
6978   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6979     {
6980       if (LABEL_P (insn))
6981 	{
6982 	  if (insns_since_last_label)
6983 	    last_label = insn;
6984 	  insns_since_last_label = 0;
6985 	}
6986       else if (NOTE_P (insn)
6987 	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6988 	{
6989 	  if (insns_since_last_label)
6990 	    last_label = insn;
6991 	  insns_since_last_label = 0;
6992 	}
6993       else if (NONJUMP_INSN_P (insn)
6994 	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6995 	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6996 	{
6997 	  init_insn_group_barriers ();
6998 	  last_label = 0;
6999 	}
7000       else if (NONDEBUG_INSN_P (insn))
7001 	{
7002 	  insns_since_last_label = 1;
7003 
7004 	  if (group_barrier_needed (insn))
7005 	    {
7006 	      if (last_label)
7007 		{
7008 		  if (dump)
7009 		    fprintf (dump, "Emitting stop before label %d\n",
7010 			     INSN_UID (last_label));
7011 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7012 		  insn = last_label;
7013 
7014 		  init_insn_group_barriers ();
7015 		  last_label = 0;
7016 		}
7017 	    }
7018 	}
7019     }
7020 }
7021 
7022 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7023    This function has to emit all necessary group barriers.  */
7024 
7025 static void
7026 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7027 {
7028   rtx_insn *insn;
7029 
7030   init_insn_group_barriers ();
7031 
7032   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7033     {
7034       if (BARRIER_P (insn))
7035 	{
7036 	  rtx_insn *last = prev_active_insn (insn);
7037 
7038 	  if (! last)
7039 	    continue;
7040 	  if (JUMP_TABLE_DATA_P (last))
7041 	    last = prev_active_insn (last);
7042 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7043 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7044 
7045 	  init_insn_group_barriers ();
7046 	}
7047       else if (NONDEBUG_INSN_P (insn))
7048 	{
7049 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7050 	    init_insn_group_barriers ();
7051 	  else if (group_barrier_needed (insn))
7052 	    {
7053 	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7054 	      init_insn_group_barriers ();
7055 	      group_barrier_needed (insn);
7056 	    }
7057 	}
7058     }
7059 }
7060 
7061 
7062 
7063 /* Instruction scheduling support.  */
7064 
7065 #define NR_BUNDLES 10
7066 
7067 /* A list of names of all available bundles.  */
7068 
7069 static const char *bundle_name [NR_BUNDLES] =
7070 {
7071   ".mii",
7072   ".mmi",
7073   ".mfi",
7074   ".mmf",
7075 #if NR_BUNDLES == 10
7076   ".bbb",
7077   ".mbb",
7078 #endif
7079   ".mib",
7080   ".mmb",
7081   ".mfb",
7082   ".mlx"
7083 };
7084 
7085 /* Nonzero if we should insert stop bits into the schedule.  */
7086 
7087 int ia64_final_schedule = 0;
7088 
7089 /* Codes of the corresponding queried units: */
7090 
7091 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7092 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7093 
7094 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7095 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7096 
7097 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7098 
7099 /* The following variable value is an insn group barrier.  */
7100 
7101 static rtx_insn *dfa_stop_insn;
7102 
7103 /* The following variable value is the last issued insn.  */
7104 
7105 static rtx_insn *last_scheduled_insn;
7106 
7107 /* The following variable value is pointer to a DFA state used as
7108    temporary variable.  */
7109 
7110 static state_t temp_dfa_state = NULL;
7111 
7112 /* The following variable value is DFA state after issuing the last
7113    insn.  */
7114 
7115 static state_t prev_cycle_state = NULL;
7116 
7117 /* The following array element values are TRUE if the corresponding
7118    insn requires to add stop bits before it.  */
7119 
7120 static char *stops_p = NULL;
7121 
7122 /* The following variable is used to set up the mentioned above array.  */
7123 
7124 static int stop_before_p = 0;
7125 
7126 /* The following variable value is length of the arrays `clocks' and
7127    `add_cycles'. */
7128 
7129 static int clocks_length;
7130 
7131 /* The following variable value is number of data speculations in progress.  */
7132 static int pending_data_specs = 0;
7133 
7134 /* Number of memory references on current and three future processor cycles.  */
7135 static char mem_ops_in_group[4];
7136 
7137 /* Number of current processor cycle (from scheduler's point of view).  */
7138 static int current_cycle;
7139 
7140 static rtx ia64_single_set (rtx_insn *);
7141 static void ia64_emit_insn_before (rtx, rtx_insn *);
7142 
7143 /* Map a bundle number to its pseudo-op.  */
7144 
7145 const char *
7146 get_bundle_name (int b)
7147 {
7148   return bundle_name[b];
7149 }
7150 
7151 
7152 /* Return the maximum number of instructions a cpu can issue.  */
7153 
7154 static int
7155 ia64_issue_rate (void)
7156 {
7157   return 6;
7158 }
7159 
7160 /* Helper function - like single_set, but look inside COND_EXEC.  */
7161 
7162 static rtx
7163 ia64_single_set (rtx_insn *insn)
7164 {
7165   rtx x = PATTERN (insn), ret;
7166   if (GET_CODE (x) == COND_EXEC)
7167     x = COND_EXEC_CODE (x);
7168   if (GET_CODE (x) == SET)
7169     return x;
7170 
7171   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7172      Although they are not classical single set, the second set is there just
7173      to protect it from moving past FP-relative stack accesses.  */
7174   switch (recog_memoized (insn))
7175     {
7176     case CODE_FOR_prologue_allocate_stack:
7177     case CODE_FOR_prologue_allocate_stack_pr:
7178     case CODE_FOR_epilogue_deallocate_stack:
7179     case CODE_FOR_epilogue_deallocate_stack_pr:
7180       ret = XVECEXP (x, 0, 0);
7181       break;
7182 
7183     default:
7184       ret = single_set_2 (insn, x);
7185       break;
7186     }
7187 
7188   return ret;
7189 }
7190 
7191 /* Adjust the cost of a scheduling dependency.
7192    Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7193    COST is the current cost, DW is dependency weakness.  */
7194 static int
7195 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7196 		  int cost, dw_t dw)
7197 {
7198   enum reg_note dep_type = (enum reg_note) dep_type1;
7199   enum attr_itanium_class dep_class;
7200   enum attr_itanium_class insn_class;
7201 
7202   insn_class = ia64_safe_itanium_class (insn);
7203   dep_class = ia64_safe_itanium_class (dep_insn);
7204 
7205   /* Treat true memory dependencies separately.  Ignore apparent true
7206      dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
7207   if (dep_type == REG_DEP_TRUE
7208       && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7209       && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7210     return 0;
7211 
7212   if (dw == MIN_DEP_WEAK)
7213     /* Store and load are likely to alias, use higher cost to avoid stall.  */
7214     return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7215   else if (dw > MIN_DEP_WEAK)
7216     {
7217       /* Store and load are less likely to alias.  */
7218       if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7219 	/* Assume there will be no cache conflict for floating-point data.
7220 	   For integer data, L1 conflict penalty is huge (17 cycles), so we
7221 	   never assume it will not cause a conflict.  */
7222 	return 0;
7223       else
7224 	return cost;
7225     }
7226 
7227   if (dep_type != REG_DEP_OUTPUT)
7228     return cost;
7229 
7230   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7231       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7232     return 0;
7233 
7234   return cost;
7235 }
7236 
7237 /* Like emit_insn_before, but skip cycle_display notes.
7238    ??? When cycle display notes are implemented, update this.  */
7239 
7240 static void
7241 ia64_emit_insn_before (rtx insn, rtx_insn *before)
7242 {
7243   emit_insn_before (insn, before);
7244 }
7245 
7246 /* The following function marks insns who produce addresses for load
7247    and store insns.  Such insns will be placed into M slots because it
7248    decrease latency time for Itanium1 (see function
7249    `ia64_produce_address_p' and the DFA descriptions).  */
7250 
7251 static void
7252 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7253 {
7254   rtx_insn *insn, *next, *next_tail;
7255 
7256   /* Before reload, which_alternative is not set, which means that
7257      ia64_safe_itanium_class will produce wrong results for (at least)
7258      move instructions.  */
7259   if (!reload_completed)
7260     return;
7261 
7262   next_tail = NEXT_INSN (tail);
7263   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7264     if (INSN_P (insn))
7265       insn->call = 0;
7266   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7267     if (INSN_P (insn)
7268 	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7269       {
7270 	sd_iterator_def sd_it;
7271 	dep_t dep;
7272 	bool has_mem_op_consumer_p = false;
7273 
7274 	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7275 	  {
7276 	    enum attr_itanium_class c;
7277 
7278 	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
7279 	      continue;
7280 
7281 	    next = DEP_CON (dep);
7282 	    c = ia64_safe_itanium_class (next);
7283 	    if ((c == ITANIUM_CLASS_ST
7284 		 || c == ITANIUM_CLASS_STF)
7285 		&& ia64_st_address_bypass_p (insn, next))
7286 	      {
7287 		has_mem_op_consumer_p = true;
7288 		break;
7289 	      }
7290 	    else if ((c == ITANIUM_CLASS_LD
7291 		      || c == ITANIUM_CLASS_FLD
7292 		      || c == ITANIUM_CLASS_FLDP)
7293 		     && ia64_ld_address_bypass_p (insn, next))
7294 	      {
7295 		has_mem_op_consumer_p = true;
7296 		break;
7297 	      }
7298 	  }
7299 
7300 	insn->call = has_mem_op_consumer_p;
7301       }
7302 }
7303 
7304 /* We're beginning a new block.  Initialize data structures as necessary.  */
7305 
7306 static void
7307 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7308 		 int sched_verbose ATTRIBUTE_UNUSED,
7309 		 int max_ready ATTRIBUTE_UNUSED)
7310 {
7311   if (flag_checking && !sel_sched_p () && reload_completed)
7312     {
7313       for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7314 	   insn != current_sched_info->next_tail;
7315 	   insn = NEXT_INSN (insn))
7316 	gcc_assert (!SCHED_GROUP_P (insn));
7317     }
7318   last_scheduled_insn = NULL;
7319   init_insn_group_barriers ();
7320 
7321   current_cycle = 0;
7322   memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7323 }
7324 
7325 /* We're beginning a scheduling pass.  Check assertion.  */
7326 
7327 static void
7328 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7329                         int sched_verbose ATTRIBUTE_UNUSED,
7330                         int max_ready ATTRIBUTE_UNUSED)
7331 {
7332   gcc_assert (pending_data_specs == 0);
7333 }
7334 
7335 /* Scheduling pass is now finished.  Free/reset static variable.  */
7336 static void
7337 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7338 			  int sched_verbose ATTRIBUTE_UNUSED)
7339 {
7340   gcc_assert (pending_data_specs == 0);
7341 }
7342 
7343 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7344    speculation check), FALSE otherwise.  */
7345 static bool
7346 is_load_p (rtx_insn *insn)
7347 {
7348   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7349 
7350   return
7351    ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7352     && get_attr_check_load (insn) == CHECK_LOAD_NO);
7353 }
7354 
7355 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7356    (taking account for 3-cycle cache reference postponing for stores: Intel
7357    Itanium 2 Reference Manual for Software Development and Optimization,
7358    6.7.3.1).  */
7359 static void
7360 record_memory_reference (rtx_insn *insn)
7361 {
7362   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7363 
7364   switch (insn_class) {
7365     case ITANIUM_CLASS_FLD:
7366     case ITANIUM_CLASS_LD:
7367       mem_ops_in_group[current_cycle % 4]++;
7368       break;
7369     case ITANIUM_CLASS_STF:
7370     case ITANIUM_CLASS_ST:
7371       mem_ops_in_group[(current_cycle + 3) % 4]++;
7372       break;
7373     default:;
7374   }
7375 }
7376 
7377 /* We are about to being issuing insns for this clock cycle.
7378    Override the default sort algorithm to better slot instructions.  */
7379 
7380 static int
7381 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7382 			int *pn_ready, int clock_var,
7383 			int reorder_type)
7384 {
7385   int n_asms;
7386   int n_ready = *pn_ready;
7387   rtx_insn **e_ready = ready + n_ready;
7388   rtx_insn **insnp;
7389 
7390   if (sched_verbose)
7391     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7392 
7393   if (reorder_type == 0)
7394     {
7395       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
7396       n_asms = 0;
7397       for (insnp = ready; insnp < e_ready; insnp++)
7398 	if (insnp < e_ready)
7399 	  {
7400 	    rtx_insn *insn = *insnp;
7401 	    enum attr_type t = ia64_safe_type (insn);
7402 	    if (t == TYPE_UNKNOWN)
7403 	      {
7404 		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7405 		    || asm_noperands (PATTERN (insn)) >= 0)
7406 		  {
7407 		    rtx_insn *lowest = ready[n_asms];
7408 		    ready[n_asms] = insn;
7409 		    *insnp = lowest;
7410 		    n_asms++;
7411 		  }
7412 		else
7413 		  {
7414 		    rtx_insn *highest = ready[n_ready - 1];
7415 		    ready[n_ready - 1] = insn;
7416 		    *insnp = highest;
7417 		    return 1;
7418 		  }
7419 	      }
7420 	  }
7421 
7422       if (n_asms < n_ready)
7423 	{
7424 	  /* Some normal insns to process.  Skip the asms.  */
7425 	  ready += n_asms;
7426 	  n_ready -= n_asms;
7427 	}
7428       else if (n_ready > 0)
7429 	return 1;
7430     }
7431 
7432   if (ia64_final_schedule)
7433     {
7434       int deleted = 0;
7435       int nr_need_stop = 0;
7436 
7437       for (insnp = ready; insnp < e_ready; insnp++)
7438 	if (safe_group_barrier_needed (*insnp))
7439 	  nr_need_stop++;
7440 
7441       if (reorder_type == 1 && n_ready == nr_need_stop)
7442 	return 0;
7443       if (reorder_type == 0)
7444 	return 1;
7445       insnp = e_ready;
7446       /* Move down everything that needs a stop bit, preserving
7447 	 relative order.  */
7448       while (insnp-- > ready + deleted)
7449 	while (insnp >= ready + deleted)
7450 	  {
7451 	    rtx_insn *insn = *insnp;
7452 	    if (! safe_group_barrier_needed (insn))
7453 	      break;
7454 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7455 	    *ready = insn;
7456 	    deleted++;
7457 	  }
7458       n_ready -= deleted;
7459       ready += deleted;
7460     }
7461 
7462   current_cycle = clock_var;
7463   if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7464     {
7465       int moved = 0;
7466 
7467       insnp = e_ready;
7468       /* Move down loads/stores, preserving relative order.  */
7469       while (insnp-- > ready + moved)
7470 	while (insnp >= ready + moved)
7471 	  {
7472 	    rtx_insn *insn = *insnp;
7473 	    if (! is_load_p (insn))
7474 	      break;
7475 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7476 	    *ready = insn;
7477 	    moved++;
7478 	  }
7479       n_ready -= moved;
7480       ready += moved;
7481     }
7482 
7483   return 1;
7484 }
7485 
7486 /* We are about to being issuing insns for this clock cycle.  Override
7487    the default sort algorithm to better slot instructions.  */
7488 
7489 static int
7490 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7491 		    int *pn_ready, int clock_var)
7492 {
7493   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7494 				 pn_ready, clock_var, 0);
7495 }
7496 
7497 /* Like ia64_sched_reorder, but called after issuing each insn.
7498    Override the default sort algorithm to better slot instructions.  */
7499 
7500 static int
7501 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7502 		     int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7503 		     int *pn_ready, int clock_var)
7504 {
7505   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7506 				 clock_var, 1);
7507 }
7508 
7509 /* We are about to issue INSN.  Return the number of insns left on the
7510    ready queue that can be issued this cycle.  */
7511 
7512 static int
7513 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7514 		     int sched_verbose ATTRIBUTE_UNUSED,
7515 		     rtx_insn *insn,
7516 		     int can_issue_more ATTRIBUTE_UNUSED)
7517 {
7518   if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7519     /* Modulo scheduling does not extend h_i_d when emitting
7520        new instructions.  Don't use h_i_d, if we don't have to.  */
7521     {
7522       if (DONE_SPEC (insn) & BEGIN_DATA)
7523 	pending_data_specs++;
7524       if (CHECK_SPEC (insn) & BEGIN_DATA)
7525 	pending_data_specs--;
7526     }
7527 
7528   if (DEBUG_INSN_P (insn))
7529     return 1;
7530 
7531   last_scheduled_insn = insn;
7532   memcpy (prev_cycle_state, curr_state, dfa_state_size);
7533   if (reload_completed)
7534     {
7535       int needed = group_barrier_needed (insn);
7536 
7537       gcc_assert (!needed);
7538       if (CALL_P (insn))
7539 	init_insn_group_barriers ();
7540       stops_p [INSN_UID (insn)] = stop_before_p;
7541       stop_before_p = 0;
7542 
7543       record_memory_reference (insn);
7544     }
7545   return 1;
7546 }
7547 
7548 /* We are choosing insn from the ready queue.  Return zero if INSN
7549    can be chosen.  */
7550 
7551 static int
7552 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7553 {
7554   gcc_assert (insn && INSN_P (insn));
7555 
7556   /* Size of ALAT is 32.  As far as we perform conservative
7557      data speculation, we keep ALAT half-empty.  */
7558   if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7559     return ready_index == 0 ? -1 : 1;
7560 
7561   if (ready_index == 0)
7562     return 0;
7563 
7564   if ((!reload_completed
7565        || !safe_group_barrier_needed (insn))
7566       && (!mflag_sched_mem_insns_hard_limit
7567 	  || !is_load_p (insn)
7568 	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7569     return 0;
7570 
7571   return 1;
7572 }
7573 
7574 /* The following variable value is pseudo-insn used by the DFA insn
7575    scheduler to change the DFA state when the simulated clock is
7576    increased.  */
7577 
7578 static rtx_insn *dfa_pre_cycle_insn;
7579 
7580 /* Returns 1 when a meaningful insn was scheduled between the last group
7581    barrier and LAST.  */
7582 static int
7583 scheduled_good_insn (rtx_insn *last)
7584 {
7585   if (last && recog_memoized (last) >= 0)
7586     return 1;
7587 
7588   for ( ;
7589        last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7590        && !stops_p[INSN_UID (last)];
7591        last = PREV_INSN (last))
7592     /* We could hit a NOTE_INSN_DELETED here which is actually outside
7593        the ebb we're scheduling.  */
7594     if (INSN_P (last) && recog_memoized (last) >= 0)
7595       return 1;
7596 
7597   return 0;
7598 }
7599 
7600 /* We are about to being issuing INSN.  Return nonzero if we cannot
7601    issue it on given cycle CLOCK and return zero if we should not sort
7602    the ready queue on the next clock start.  */
7603 
7604 static int
7605 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7606 		    int clock, int *sort_p)
7607 {
7608   gcc_assert (insn && INSN_P (insn));
7609 
7610   if (DEBUG_INSN_P (insn))
7611     return 0;
7612 
7613   /* When a group barrier is needed for insn, last_scheduled_insn
7614      should be set.  */
7615   gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7616               || last_scheduled_insn);
7617 
7618   if ((reload_completed
7619        && (safe_group_barrier_needed (insn)
7620 	   || (mflag_sched_stop_bits_after_every_cycle
7621 	       && last_clock != clock
7622 	       && last_scheduled_insn
7623 	       && scheduled_good_insn (last_scheduled_insn))))
7624       || (last_scheduled_insn
7625 	  && (CALL_P (last_scheduled_insn)
7626 	      || unknown_for_bundling_p (last_scheduled_insn))))
7627     {
7628       init_insn_group_barriers ();
7629 
7630       if (verbose && dump)
7631 	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7632 		 last_clock == clock ? " + cycle advance" : "");
7633 
7634       stop_before_p = 1;
7635       current_cycle = clock;
7636       mem_ops_in_group[current_cycle % 4] = 0;
7637 
7638       if (last_clock == clock)
7639 	{
7640 	  state_transition (curr_state, dfa_stop_insn);
7641 	  if (TARGET_EARLY_STOP_BITS)
7642 	    *sort_p = (last_scheduled_insn == NULL_RTX
7643 		       || ! CALL_P (last_scheduled_insn));
7644 	  else
7645 	    *sort_p = 0;
7646 	  return 1;
7647 	}
7648 
7649       if (last_scheduled_insn)
7650 	{
7651 	  if (unknown_for_bundling_p (last_scheduled_insn))
7652 	    state_reset (curr_state);
7653 	  else
7654 	    {
7655 	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
7656 	      state_transition (curr_state, dfa_stop_insn);
7657 	      state_transition (curr_state, dfa_pre_cycle_insn);
7658 	      state_transition (curr_state, NULL);
7659 	    }
7660 	}
7661     }
7662   return 0;
7663 }
7664 
7665 /* Implement targetm.sched.h_i_d_extended hook.
7666    Extend internal data structures.  */
7667 static void
7668 ia64_h_i_d_extended (void)
7669 {
7670   if (stops_p != NULL)
7671     {
7672       int new_clocks_length = get_max_uid () * 3 / 2;
7673       stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7674       clocks_length = new_clocks_length;
7675     }
7676 }
7677 
7678 
7679 /* This structure describes the data used by the backend to guide scheduling.
7680    When the current scheduling point is switched, this data should be saved
7681    and restored later, if the scheduler returns to this point.  */
7682 struct _ia64_sched_context
7683 {
7684   state_t prev_cycle_state;
7685   rtx_insn *last_scheduled_insn;
7686   struct reg_write_state rws_sum[NUM_REGS];
7687   struct reg_write_state rws_insn[NUM_REGS];
7688   int first_instruction;
7689   int pending_data_specs;
7690   int current_cycle;
7691   char mem_ops_in_group[4];
7692 };
7693 typedef struct _ia64_sched_context *ia64_sched_context_t;
7694 
7695 /* Allocates a scheduling context.  */
7696 static void *
7697 ia64_alloc_sched_context (void)
7698 {
7699   return xmalloc (sizeof (struct _ia64_sched_context));
7700 }
7701 
7702 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7703    the global context otherwise.  */
7704 static void
7705 ia64_init_sched_context (void *_sc, bool clean_p)
7706 {
7707   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7708 
7709   sc->prev_cycle_state = xmalloc (dfa_state_size);
7710   if (clean_p)
7711     {
7712       state_reset (sc->prev_cycle_state);
7713       sc->last_scheduled_insn = NULL;
7714       memset (sc->rws_sum, 0, sizeof (rws_sum));
7715       memset (sc->rws_insn, 0, sizeof (rws_insn));
7716       sc->first_instruction = 1;
7717       sc->pending_data_specs = 0;
7718       sc->current_cycle = 0;
7719       memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7720     }
7721   else
7722     {
7723       memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7724       sc->last_scheduled_insn = last_scheduled_insn;
7725       memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7726       memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7727       sc->first_instruction = first_instruction;
7728       sc->pending_data_specs = pending_data_specs;
7729       sc->current_cycle = current_cycle;
7730       memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7731     }
7732 }
7733 
7734 /* Sets the global scheduling context to the one pointed to by _SC.  */
7735 static void
7736 ia64_set_sched_context (void *_sc)
7737 {
7738   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7739 
7740   gcc_assert (sc != NULL);
7741 
7742   memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7743   last_scheduled_insn = sc->last_scheduled_insn;
7744   memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7745   memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7746   first_instruction = sc->first_instruction;
7747   pending_data_specs = sc->pending_data_specs;
7748   current_cycle = sc->current_cycle;
7749   memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7750 }
7751 
7752 /* Clears the data in the _SC scheduling context.  */
7753 static void
7754 ia64_clear_sched_context (void *_sc)
7755 {
7756   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7757 
7758   free (sc->prev_cycle_state);
7759   sc->prev_cycle_state = NULL;
7760 }
7761 
7762 /* Frees the _SC scheduling context.  */
7763 static void
7764 ia64_free_sched_context (void *_sc)
7765 {
7766   gcc_assert (_sc != NULL);
7767 
7768   free (_sc);
7769 }
7770 
7771 typedef rtx (* gen_func_t) (rtx, rtx);
7772 
7773 /* Return a function that will generate a load of mode MODE_NO
7774    with speculation types TS.  */
7775 static gen_func_t
7776 get_spec_load_gen_function (ds_t ts, int mode_no)
7777 {
7778   static gen_func_t gen_ld_[] = {
7779     gen_movbi,
7780     gen_movqi_internal,
7781     gen_movhi_internal,
7782     gen_movsi_internal,
7783     gen_movdi_internal,
7784     gen_movsf_internal,
7785     gen_movdf_internal,
7786     gen_movxf_internal,
7787     gen_movti_internal,
7788     gen_zero_extendqidi2,
7789     gen_zero_extendhidi2,
7790     gen_zero_extendsidi2,
7791   };
7792 
7793   static gen_func_t gen_ld_a[] = {
7794     gen_movbi_advanced,
7795     gen_movqi_advanced,
7796     gen_movhi_advanced,
7797     gen_movsi_advanced,
7798     gen_movdi_advanced,
7799     gen_movsf_advanced,
7800     gen_movdf_advanced,
7801     gen_movxf_advanced,
7802     gen_movti_advanced,
7803     gen_zero_extendqidi2_advanced,
7804     gen_zero_extendhidi2_advanced,
7805     gen_zero_extendsidi2_advanced,
7806   };
7807   static gen_func_t gen_ld_s[] = {
7808     gen_movbi_speculative,
7809     gen_movqi_speculative,
7810     gen_movhi_speculative,
7811     gen_movsi_speculative,
7812     gen_movdi_speculative,
7813     gen_movsf_speculative,
7814     gen_movdf_speculative,
7815     gen_movxf_speculative,
7816     gen_movti_speculative,
7817     gen_zero_extendqidi2_speculative,
7818     gen_zero_extendhidi2_speculative,
7819     gen_zero_extendsidi2_speculative,
7820   };
7821   static gen_func_t gen_ld_sa[] = {
7822     gen_movbi_speculative_advanced,
7823     gen_movqi_speculative_advanced,
7824     gen_movhi_speculative_advanced,
7825     gen_movsi_speculative_advanced,
7826     gen_movdi_speculative_advanced,
7827     gen_movsf_speculative_advanced,
7828     gen_movdf_speculative_advanced,
7829     gen_movxf_speculative_advanced,
7830     gen_movti_speculative_advanced,
7831     gen_zero_extendqidi2_speculative_advanced,
7832     gen_zero_extendhidi2_speculative_advanced,
7833     gen_zero_extendsidi2_speculative_advanced,
7834   };
7835   static gen_func_t gen_ld_s_a[] = {
7836     gen_movbi_speculative_a,
7837     gen_movqi_speculative_a,
7838     gen_movhi_speculative_a,
7839     gen_movsi_speculative_a,
7840     gen_movdi_speculative_a,
7841     gen_movsf_speculative_a,
7842     gen_movdf_speculative_a,
7843     gen_movxf_speculative_a,
7844     gen_movti_speculative_a,
7845     gen_zero_extendqidi2_speculative_a,
7846     gen_zero_extendhidi2_speculative_a,
7847     gen_zero_extendsidi2_speculative_a,
7848   };
7849 
7850   gen_func_t *gen_ld;
7851 
7852   if (ts & BEGIN_DATA)
7853     {
7854       if (ts & BEGIN_CONTROL)
7855 	gen_ld = gen_ld_sa;
7856       else
7857 	gen_ld = gen_ld_a;
7858     }
7859   else if (ts & BEGIN_CONTROL)
7860     {
7861       if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7862 	  || ia64_needs_block_p (ts))
7863 	gen_ld = gen_ld_s;
7864       else
7865 	gen_ld = gen_ld_s_a;
7866     }
7867   else if (ts == 0)
7868     gen_ld = gen_ld_;
7869   else
7870     gcc_unreachable ();
7871 
7872   return gen_ld[mode_no];
7873 }
7874 
7875 /* Constants that help mapping 'machine_mode' to int.  */
7876 enum SPEC_MODES
7877   {
7878     SPEC_MODE_INVALID = -1,
7879     SPEC_MODE_FIRST = 0,
7880     SPEC_MODE_FOR_EXTEND_FIRST = 1,
7881     SPEC_MODE_FOR_EXTEND_LAST = 3,
7882     SPEC_MODE_LAST = 8
7883   };
7884 
7885 enum
7886   {
7887     /* Offset to reach ZERO_EXTEND patterns.  */
7888     SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7889   };
7890 
7891 /* Return index of the MODE.  */
7892 static int
7893 ia64_mode_to_int (machine_mode mode)
7894 {
7895   switch (mode)
7896     {
7897     case BImode: return 0; /* SPEC_MODE_FIRST  */
7898     case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7899     case HImode: return 2;
7900     case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7901     case DImode: return 4;
7902     case SFmode: return 5;
7903     case DFmode: return 6;
7904     case XFmode: return 7;
7905     case TImode:
7906       /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
7907 	 mentioned in itanium[12].md.  Predicate fp_register_operand also
7908 	 needs to be defined.  Bottom line: better disable for now.  */
7909       return SPEC_MODE_INVALID;
7910     default:     return SPEC_MODE_INVALID;
7911     }
7912 }
7913 
7914 /* Provide information about speculation capabilities.  */
7915 static void
7916 ia64_set_sched_flags (spec_info_t spec_info)
7917 {
7918   unsigned int *flags = &(current_sched_info->flags);
7919 
7920   if (*flags & SCHED_RGN
7921       || *flags & SCHED_EBB
7922       || *flags & SEL_SCHED)
7923     {
7924       int mask = 0;
7925 
7926       if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7927           || (mflag_sched_ar_data_spec && reload_completed))
7928 	{
7929 	  mask |= BEGIN_DATA;
7930 
7931 	  if (!sel_sched_p ()
7932 	      && ((mflag_sched_br_in_data_spec && !reload_completed)
7933 		  || (mflag_sched_ar_in_data_spec && reload_completed)))
7934 	    mask |= BE_IN_DATA;
7935 	}
7936 
7937       if (mflag_sched_control_spec
7938           && (!sel_sched_p ()
7939 	      || reload_completed))
7940 	{
7941 	  mask |= BEGIN_CONTROL;
7942 
7943 	  if (!sel_sched_p () && mflag_sched_in_control_spec)
7944 	    mask |= BE_IN_CONTROL;
7945 	}
7946 
7947       spec_info->mask = mask;
7948 
7949       if (mask)
7950 	{
7951 	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
7952 
7953 	  if (mask & BE_IN_SPEC)
7954 	    *flags |= NEW_BBS;
7955 
7956 	  spec_info->flags = 0;
7957 
7958 	  if ((mask & CONTROL_SPEC)
7959 	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7960 	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7961 
7962 	  if (sched_verbose >= 1)
7963 	    spec_info->dump = sched_dump;
7964 	  else
7965 	    spec_info->dump = 0;
7966 
7967 	  if (mflag_sched_count_spec_in_critical_path)
7968 	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7969 	}
7970     }
7971   else
7972     spec_info->mask = 0;
7973 }
7974 
7975 /* If INSN is an appropriate load return its mode.
7976    Return -1 otherwise.  */
7977 static int
7978 get_mode_no_for_insn (rtx_insn *insn)
7979 {
7980   rtx reg, mem, mode_rtx;
7981   int mode_no;
7982   bool extend_p;
7983 
7984   extract_insn_cached (insn);
7985 
7986   /* We use WHICH_ALTERNATIVE only after reload.  This will
7987      guarantee that reload won't touch a speculative insn.  */
7988 
7989   if (recog_data.n_operands != 2)
7990     return -1;
7991 
7992   reg = recog_data.operand[0];
7993   mem = recog_data.operand[1];
7994 
7995   /* We should use MEM's mode since REG's mode in presence of
7996      ZERO_EXTEND will always be DImode.  */
7997   if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7998     /* Process non-speculative ld.  */
7999     {
8000       if (!reload_completed)
8001 	{
8002 	  /* Do not speculate into regs like ar.lc.  */
8003 	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8004 	    return -1;
8005 
8006 	  if (!MEM_P (mem))
8007 	    return -1;
8008 
8009 	  {
8010 	    rtx mem_reg = XEXP (mem, 0);
8011 
8012 	    if (!REG_P (mem_reg))
8013 	      return -1;
8014 	  }
8015 
8016 	  mode_rtx = mem;
8017 	}
8018       else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8019 	{
8020 	  gcc_assert (REG_P (reg) && MEM_P (mem));
8021 	  mode_rtx = mem;
8022 	}
8023       else
8024 	return -1;
8025     }
8026   else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8027 	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8028 	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
8029     /* Process speculative ld or ld.c.  */
8030     {
8031       gcc_assert (REG_P (reg) && MEM_P (mem));
8032       mode_rtx = mem;
8033     }
8034   else
8035     {
8036       enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8037 
8038       if (attr_class == ITANIUM_CLASS_CHK_A
8039 	  || attr_class == ITANIUM_CLASS_CHK_S_I
8040 	  || attr_class == ITANIUM_CLASS_CHK_S_F)
8041 	/* Process chk.  */
8042 	mode_rtx = reg;
8043       else
8044 	return -1;
8045     }
8046 
8047   mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8048 
8049   if (mode_no == SPEC_MODE_INVALID)
8050     return -1;
8051 
8052   extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8053 
8054   if (extend_p)
8055     {
8056       if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8057 	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8058 	return -1;
8059 
8060       mode_no += SPEC_GEN_EXTEND_OFFSET;
8061     }
8062 
8063   return mode_no;
8064 }
8065 
8066 /* If X is an unspec part of a speculative load, return its code.
8067    Return -1 otherwise.  */
8068 static int
8069 get_spec_unspec_code (const_rtx x)
8070 {
8071   if (GET_CODE (x) != UNSPEC)
8072     return -1;
8073 
8074   {
8075     int code;
8076 
8077     code = XINT (x, 1);
8078 
8079     switch (code)
8080       {
8081       case UNSPEC_LDA:
8082       case UNSPEC_LDS:
8083       case UNSPEC_LDS_A:
8084       case UNSPEC_LDSA:
8085 	return code;
8086 
8087       default:
8088 	return -1;
8089       }
8090   }
8091 }
8092 
8093 /* Implement skip_rtx_p hook.  */
8094 static bool
8095 ia64_skip_rtx_p (const_rtx x)
8096 {
8097   return get_spec_unspec_code (x) != -1;
8098 }
8099 
8100 /* If INSN is a speculative load, return its UNSPEC code.
8101    Return -1 otherwise.  */
8102 static int
8103 get_insn_spec_code (const_rtx insn)
8104 {
8105   rtx pat, reg, mem;
8106 
8107   pat = PATTERN (insn);
8108 
8109   if (GET_CODE (pat) == COND_EXEC)
8110     pat = COND_EXEC_CODE (pat);
8111 
8112   if (GET_CODE (pat) != SET)
8113     return -1;
8114 
8115   reg = SET_DEST (pat);
8116   if (!REG_P (reg))
8117     return -1;
8118 
8119   mem = SET_SRC (pat);
8120   if (GET_CODE (mem) == ZERO_EXTEND)
8121     mem = XEXP (mem, 0);
8122 
8123   return get_spec_unspec_code (mem);
8124 }
8125 
8126 /* If INSN is a speculative load, return a ds with the speculation types.
8127    Otherwise [if INSN is a normal instruction] return 0.  */
8128 static ds_t
8129 ia64_get_insn_spec_ds (rtx_insn *insn)
8130 {
8131   int code = get_insn_spec_code (insn);
8132 
8133   switch (code)
8134     {
8135     case UNSPEC_LDA:
8136       return BEGIN_DATA;
8137 
8138     case UNSPEC_LDS:
8139     case UNSPEC_LDS_A:
8140       return BEGIN_CONTROL;
8141 
8142     case UNSPEC_LDSA:
8143       return BEGIN_DATA | BEGIN_CONTROL;
8144 
8145     default:
8146       return 0;
8147     }
8148 }
8149 
8150 /* If INSN is a speculative load return a ds with the speculation types that
8151    will be checked.
8152    Otherwise [if INSN is a normal instruction] return 0.  */
8153 static ds_t
8154 ia64_get_insn_checked_ds (rtx_insn *insn)
8155 {
8156   int code = get_insn_spec_code (insn);
8157 
8158   switch (code)
8159     {
8160     case UNSPEC_LDA:
8161       return BEGIN_DATA | BEGIN_CONTROL;
8162 
8163     case UNSPEC_LDS:
8164       return BEGIN_CONTROL;
8165 
8166     case UNSPEC_LDS_A:
8167     case UNSPEC_LDSA:
8168       return BEGIN_DATA | BEGIN_CONTROL;
8169 
8170     default:
8171       return 0;
8172     }
8173 }
8174 
8175 /* If GEN_P is true, calculate the index of needed speculation check and return
8176    speculative pattern for INSN with speculative mode TS, machine mode
8177    MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8178    If GEN_P is false, just calculate the index of needed speculation check.  */
8179 static rtx
8180 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8181 {
8182   rtx pat, new_pat;
8183   gen_func_t gen_load;
8184 
8185   gen_load = get_spec_load_gen_function (ts, mode_no);
8186 
8187   new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8188 		      copy_rtx (recog_data.operand[1]));
8189 
8190   pat = PATTERN (insn);
8191   if (GET_CODE (pat) == COND_EXEC)
8192     new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8193 				 new_pat);
8194 
8195   return new_pat;
8196 }
8197 
8198 static bool
8199 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8200 			      ds_t ds ATTRIBUTE_UNUSED)
8201 {
8202   return false;
8203 }
8204 
8205 /* Implement targetm.sched.speculate_insn hook.
8206    Check if the INSN can be TS speculative.
8207    If 'no' - return -1.
8208    If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8209    If current pattern of the INSN already provides TS speculation,
8210    return 0.  */
8211 static int
8212 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8213 {
8214   int mode_no;
8215   int res;
8216 
8217   gcc_assert (!(ts & ~SPECULATIVE));
8218 
8219   if (ia64_spec_check_p (insn))
8220     return -1;
8221 
8222   if ((ts & BE_IN_SPEC)
8223       && !insn_can_be_in_speculative_p (insn, ts))
8224     return -1;
8225 
8226   mode_no = get_mode_no_for_insn (insn);
8227 
8228   if (mode_no != SPEC_MODE_INVALID)
8229     {
8230       if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8231 	res = 0;
8232       else
8233 	{
8234 	  res = 1;
8235 	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8236 	}
8237     }
8238   else
8239     res = -1;
8240 
8241   return res;
8242 }
8243 
8244 /* Return a function that will generate a check for speculation TS with mode
8245    MODE_NO.
8246    If simple check is needed, pass true for SIMPLE_CHECK_P.
8247    If clearing check is needed, pass true for CLEARING_CHECK_P.  */
8248 static gen_func_t
8249 get_spec_check_gen_function (ds_t ts, int mode_no,
8250 			     bool simple_check_p, bool clearing_check_p)
8251 {
8252   static gen_func_t gen_ld_c_clr[] = {
8253     gen_movbi_clr,
8254     gen_movqi_clr,
8255     gen_movhi_clr,
8256     gen_movsi_clr,
8257     gen_movdi_clr,
8258     gen_movsf_clr,
8259     gen_movdf_clr,
8260     gen_movxf_clr,
8261     gen_movti_clr,
8262     gen_zero_extendqidi2_clr,
8263     gen_zero_extendhidi2_clr,
8264     gen_zero_extendsidi2_clr,
8265   };
8266   static gen_func_t gen_ld_c_nc[] = {
8267     gen_movbi_nc,
8268     gen_movqi_nc,
8269     gen_movhi_nc,
8270     gen_movsi_nc,
8271     gen_movdi_nc,
8272     gen_movsf_nc,
8273     gen_movdf_nc,
8274     gen_movxf_nc,
8275     gen_movti_nc,
8276     gen_zero_extendqidi2_nc,
8277     gen_zero_extendhidi2_nc,
8278     gen_zero_extendsidi2_nc,
8279   };
8280   static gen_func_t gen_chk_a_clr[] = {
8281     gen_advanced_load_check_clr_bi,
8282     gen_advanced_load_check_clr_qi,
8283     gen_advanced_load_check_clr_hi,
8284     gen_advanced_load_check_clr_si,
8285     gen_advanced_load_check_clr_di,
8286     gen_advanced_load_check_clr_sf,
8287     gen_advanced_load_check_clr_df,
8288     gen_advanced_load_check_clr_xf,
8289     gen_advanced_load_check_clr_ti,
8290     gen_advanced_load_check_clr_di,
8291     gen_advanced_load_check_clr_di,
8292     gen_advanced_load_check_clr_di,
8293   };
8294   static gen_func_t gen_chk_a_nc[] = {
8295     gen_advanced_load_check_nc_bi,
8296     gen_advanced_load_check_nc_qi,
8297     gen_advanced_load_check_nc_hi,
8298     gen_advanced_load_check_nc_si,
8299     gen_advanced_load_check_nc_di,
8300     gen_advanced_load_check_nc_sf,
8301     gen_advanced_load_check_nc_df,
8302     gen_advanced_load_check_nc_xf,
8303     gen_advanced_load_check_nc_ti,
8304     gen_advanced_load_check_nc_di,
8305     gen_advanced_load_check_nc_di,
8306     gen_advanced_load_check_nc_di,
8307   };
8308   static gen_func_t gen_chk_s[] = {
8309     gen_speculation_check_bi,
8310     gen_speculation_check_qi,
8311     gen_speculation_check_hi,
8312     gen_speculation_check_si,
8313     gen_speculation_check_di,
8314     gen_speculation_check_sf,
8315     gen_speculation_check_df,
8316     gen_speculation_check_xf,
8317     gen_speculation_check_ti,
8318     gen_speculation_check_di,
8319     gen_speculation_check_di,
8320     gen_speculation_check_di,
8321   };
8322 
8323   gen_func_t *gen_check;
8324 
8325   if (ts & BEGIN_DATA)
8326     {
8327       /* We don't need recovery because even if this is ld.sa
8328 	 ALAT entry will be allocated only if NAT bit is set to zero.
8329 	 So it is enough to use ld.c here.  */
8330 
8331       if (simple_check_p)
8332 	{
8333 	  gcc_assert (mflag_sched_spec_ldc);
8334 
8335 	  if (clearing_check_p)
8336 	    gen_check = gen_ld_c_clr;
8337 	  else
8338 	    gen_check = gen_ld_c_nc;
8339 	}
8340       else
8341 	{
8342 	  if (clearing_check_p)
8343 	    gen_check = gen_chk_a_clr;
8344 	  else
8345 	    gen_check = gen_chk_a_nc;
8346 	}
8347     }
8348   else if (ts & BEGIN_CONTROL)
8349     {
8350       if (simple_check_p)
8351 	/* We might want to use ld.sa -> ld.c instead of
8352 	   ld.s -> chk.s.  */
8353 	{
8354 	  gcc_assert (!ia64_needs_block_p (ts));
8355 
8356 	  if (clearing_check_p)
8357 	    gen_check = gen_ld_c_clr;
8358 	  else
8359 	    gen_check = gen_ld_c_nc;
8360 	}
8361       else
8362 	{
8363 	  gen_check = gen_chk_s;
8364 	}
8365     }
8366   else
8367     gcc_unreachable ();
8368 
8369   gcc_assert (mode_no >= 0);
8370   return gen_check[mode_no];
8371 }
8372 
8373 /* Return nonzero, if INSN needs branchy recovery check.  */
8374 static bool
8375 ia64_needs_block_p (ds_t ts)
8376 {
8377   if (ts & BEGIN_DATA)
8378     return !mflag_sched_spec_ldc;
8379 
8380   gcc_assert ((ts & BEGIN_CONTROL) != 0);
8381 
8382   return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8383 }
8384 
8385 /* Generate (or regenerate) a recovery check for INSN.  */
8386 static rtx
8387 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8388 {
8389   rtx op1, pat, check_pat;
8390   gen_func_t gen_check;
8391   int mode_no;
8392 
8393   mode_no = get_mode_no_for_insn (insn);
8394   gcc_assert (mode_no >= 0);
8395 
8396   if (label)
8397     op1 = label;
8398   else
8399     {
8400       gcc_assert (!ia64_needs_block_p (ds));
8401       op1 = copy_rtx (recog_data.operand[1]);
8402     }
8403 
8404   gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8405 					   true);
8406 
8407   check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8408 
8409   pat = PATTERN (insn);
8410   if (GET_CODE (pat) == COND_EXEC)
8411     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8412 				   check_pat);
8413 
8414   return check_pat;
8415 }
8416 
8417 /* Return nonzero, if X is branchy recovery check.  */
8418 static int
8419 ia64_spec_check_p (rtx x)
8420 {
8421   x = PATTERN (x);
8422   if (GET_CODE (x) == COND_EXEC)
8423     x = COND_EXEC_CODE (x);
8424   if (GET_CODE (x) == SET)
8425     return ia64_spec_check_src_p (SET_SRC (x));
8426   return 0;
8427 }
8428 
8429 /* Return nonzero, if SRC belongs to recovery check.  */
8430 static int
8431 ia64_spec_check_src_p (rtx src)
8432 {
8433   if (GET_CODE (src) == IF_THEN_ELSE)
8434     {
8435       rtx t;
8436 
8437       t = XEXP (src, 0);
8438       if (GET_CODE (t) == NE)
8439 	{
8440 	  t = XEXP (t, 0);
8441 
8442 	  if (GET_CODE (t) == UNSPEC)
8443 	    {
8444 	      int code;
8445 
8446 	      code = XINT (t, 1);
8447 
8448 	      if (code == UNSPEC_LDCCLR
8449 		  || code == UNSPEC_LDCNC
8450 		  || code == UNSPEC_CHKACLR
8451 		  || code == UNSPEC_CHKANC
8452 		  || code == UNSPEC_CHKS)
8453 		{
8454 		  gcc_assert (code != 0);
8455 		  return code;
8456 		}
8457 	    }
8458 	}
8459     }
8460   return 0;
8461 }
8462 
8463 
8464 /* The following page contains abstract data `bundle states' which are
8465    used for bundling insns (inserting nops and template generation).  */
8466 
8467 /* The following describes state of insn bundling.  */
8468 
8469 struct bundle_state
8470 {
8471   /* Unique bundle state number to identify them in the debugging
8472      output  */
8473   int unique_num;
8474   rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state  */
8475   /* number nops before and after the insn  */
8476   short before_nops_num, after_nops_num;
8477   int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8478                    insn */
8479   int cost;     /* cost of the state in cycles */
8480   int accumulated_insns_num; /* number of all previous insns including
8481 				nops.  L is considered as 2 insns */
8482   int branch_deviation; /* deviation of previous branches from 3rd slots  */
8483   int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8484   struct bundle_state *next;  /* next state with the same insn_num  */
8485   struct bundle_state *originator; /* originator (previous insn state)  */
8486   /* All bundle states are in the following chain.  */
8487   struct bundle_state *allocated_states_chain;
8488   /* The DFA State after issuing the insn and the nops.  */
8489   state_t dfa_state;
8490 };
8491 
8492 /* The following is map insn number to the corresponding bundle state.  */
8493 
8494 static struct bundle_state **index_to_bundle_states;
8495 
8496 /* The unique number of next bundle state.  */
8497 
8498 static int bundle_states_num;
8499 
8500 /* All allocated bundle states are in the following chain.  */
8501 
8502 static struct bundle_state *allocated_bundle_states_chain;
8503 
8504 /* All allocated but not used bundle states are in the following
8505    chain.  */
8506 
8507 static struct bundle_state *free_bundle_state_chain;
8508 
8509 
8510 /* The following function returns a free bundle state.  */
8511 
8512 static struct bundle_state *
8513 get_free_bundle_state (void)
8514 {
8515   struct bundle_state *result;
8516 
8517   if (free_bundle_state_chain != NULL)
8518     {
8519       result = free_bundle_state_chain;
8520       free_bundle_state_chain = result->next;
8521     }
8522   else
8523     {
8524       result = XNEW (struct bundle_state);
8525       result->dfa_state = xmalloc (dfa_state_size);
8526       result->allocated_states_chain = allocated_bundle_states_chain;
8527       allocated_bundle_states_chain = result;
8528     }
8529   result->unique_num = bundle_states_num++;
8530   return result;
8531 
8532 }
8533 
8534 /* The following function frees given bundle state.  */
8535 
8536 static void
8537 free_bundle_state (struct bundle_state *state)
8538 {
8539   state->next = free_bundle_state_chain;
8540   free_bundle_state_chain = state;
8541 }
8542 
8543 /* Start work with abstract data `bundle states'.  */
8544 
8545 static void
8546 initiate_bundle_states (void)
8547 {
8548   bundle_states_num = 0;
8549   free_bundle_state_chain = NULL;
8550   allocated_bundle_states_chain = NULL;
8551 }
8552 
8553 /* Finish work with abstract data `bundle states'.  */
8554 
8555 static void
8556 finish_bundle_states (void)
8557 {
8558   struct bundle_state *curr_state, *next_state;
8559 
8560   for (curr_state = allocated_bundle_states_chain;
8561        curr_state != NULL;
8562        curr_state = next_state)
8563     {
8564       next_state = curr_state->allocated_states_chain;
8565       free (curr_state->dfa_state);
8566       free (curr_state);
8567     }
8568 }
8569 
8570 /* Hashtable helpers.  */
8571 
8572 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8573 {
8574   static inline hashval_t hash (const bundle_state *);
8575   static inline bool equal (const bundle_state *, const bundle_state *);
8576 };
8577 
8578 /* The function returns hash of BUNDLE_STATE.  */
8579 
8580 inline hashval_t
8581 bundle_state_hasher::hash (const bundle_state *state)
8582 {
8583   unsigned result, i;
8584 
8585   for (result = i = 0; i < dfa_state_size; i++)
8586     result += (((unsigned char *) state->dfa_state) [i]
8587 	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8588   return result + state->insn_num;
8589 }
8590 
8591 /* The function returns nonzero if the bundle state keys are equal.  */
8592 
8593 inline bool
8594 bundle_state_hasher::equal (const bundle_state *state1,
8595 			    const bundle_state *state2)
8596 {
8597   return (state1->insn_num == state2->insn_num
8598 	  && memcmp (state1->dfa_state, state2->dfa_state,
8599 		     dfa_state_size) == 0);
8600 }
8601 
8602 /* Hash table of the bundle states.  The key is dfa_state and insn_num
8603    of the bundle states.  */
8604 
8605 static hash_table<bundle_state_hasher> *bundle_state_table;
8606 
8607 /* The function inserts the BUNDLE_STATE into the hash table.  The
8608    function returns nonzero if the bundle has been inserted into the
8609    table.  The table contains the best bundle state with given key.  */
8610 
8611 static int
8612 insert_bundle_state (struct bundle_state *bundle_state)
8613 {
8614   struct bundle_state **entry_ptr;
8615 
8616   entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8617   if (*entry_ptr == NULL)
8618     {
8619       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8620       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8621       *entry_ptr = bundle_state;
8622       return TRUE;
8623     }
8624   else if (bundle_state->cost < (*entry_ptr)->cost
8625 	   || (bundle_state->cost == (*entry_ptr)->cost
8626 	       && ((*entry_ptr)->accumulated_insns_num
8627 		   > bundle_state->accumulated_insns_num
8628 		   || ((*entry_ptr)->accumulated_insns_num
8629 		       == bundle_state->accumulated_insns_num
8630 		       && ((*entry_ptr)->branch_deviation
8631 			   > bundle_state->branch_deviation
8632 			   || ((*entry_ptr)->branch_deviation
8633 			       == bundle_state->branch_deviation
8634 			       && (*entry_ptr)->middle_bundle_stops
8635 			       > bundle_state->middle_bundle_stops))))))
8636 
8637     {
8638       struct bundle_state temp;
8639 
8640       temp = **entry_ptr;
8641       **entry_ptr = *bundle_state;
8642       (*entry_ptr)->next = temp.next;
8643       *bundle_state = temp;
8644     }
8645   return FALSE;
8646 }
8647 
8648 /* Start work with the hash table.  */
8649 
8650 static void
8651 initiate_bundle_state_table (void)
8652 {
8653   bundle_state_table = new hash_table<bundle_state_hasher> (50);
8654 }
8655 
8656 /* Finish work with the hash table.  */
8657 
8658 static void
8659 finish_bundle_state_table (void)
8660 {
8661   delete bundle_state_table;
8662   bundle_state_table = NULL;
8663 }
8664 
8665 
8666 
8667 /* The following variable is a insn `nop' used to check bundle states
8668    with different number of inserted nops.  */
8669 
8670 static rtx_insn *ia64_nop;
8671 
8672 /* The following function tries to issue NOPS_NUM nops for the current
8673    state without advancing processor cycle.  If it failed, the
8674    function returns FALSE and frees the current state.  */
8675 
8676 static int
8677 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8678 {
8679   int i;
8680 
8681   for (i = 0; i < nops_num; i++)
8682     if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8683       {
8684 	free_bundle_state (curr_state);
8685 	return FALSE;
8686       }
8687   return TRUE;
8688 }
8689 
8690 /* The following function tries to issue INSN for the current
8691    state without advancing processor cycle.  If it failed, the
8692    function returns FALSE and frees the current state.  */
8693 
8694 static int
8695 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8696 {
8697   if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8698     {
8699       free_bundle_state (curr_state);
8700       return FALSE;
8701     }
8702   return TRUE;
8703 }
8704 
8705 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8706    starting with ORIGINATOR without advancing processor cycle.  If
8707    TRY_BUNDLE_END_P is TRUE, the function also/only (if
8708    ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8709    If it was successful, the function creates new bundle state and
8710    insert into the hash table and into `index_to_bundle_states'.  */
8711 
8712 static void
8713 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8714 		     rtx_insn *insn, int try_bundle_end_p,
8715 		     int only_bundle_end_p)
8716 {
8717   struct bundle_state *curr_state;
8718 
8719   curr_state = get_free_bundle_state ();
8720   memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8721   curr_state->insn = insn;
8722   curr_state->insn_num = originator->insn_num + 1;
8723   curr_state->cost = originator->cost;
8724   curr_state->originator = originator;
8725   curr_state->before_nops_num = before_nops_num;
8726   curr_state->after_nops_num = 0;
8727   curr_state->accumulated_insns_num
8728     = originator->accumulated_insns_num + before_nops_num;
8729   curr_state->branch_deviation = originator->branch_deviation;
8730   curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8731   gcc_assert (insn);
8732   if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8733     {
8734       gcc_assert (GET_MODE (insn) != TImode);
8735       if (!try_issue_nops (curr_state, before_nops_num))
8736 	return;
8737       if (!try_issue_insn (curr_state, insn))
8738 	return;
8739       memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8740       if (curr_state->accumulated_insns_num % 3 != 0)
8741 	curr_state->middle_bundle_stops++;
8742       if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8743 	  && curr_state->accumulated_insns_num % 3 != 0)
8744 	{
8745 	  free_bundle_state (curr_state);
8746 	  return;
8747 	}
8748     }
8749   else if (GET_MODE (insn) != TImode)
8750     {
8751       if (!try_issue_nops (curr_state, before_nops_num))
8752 	return;
8753       if (!try_issue_insn (curr_state, insn))
8754 	return;
8755       curr_state->accumulated_insns_num++;
8756       gcc_assert (!unknown_for_bundling_p (insn));
8757 
8758       if (ia64_safe_type (insn) == TYPE_L)
8759 	curr_state->accumulated_insns_num++;
8760     }
8761   else
8762     {
8763       /* If this is an insn that must be first in a group, then don't allow
8764 	 nops to be emitted before it.  Currently, alloc is the only such
8765 	 supported instruction.  */
8766       /* ??? The bundling automatons should handle this for us, but they do
8767 	 not yet have support for the first_insn attribute.  */
8768       if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8769 	{
8770 	  free_bundle_state (curr_state);
8771 	  return;
8772 	}
8773 
8774       state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8775       state_transition (curr_state->dfa_state, NULL);
8776       curr_state->cost++;
8777       if (!try_issue_nops (curr_state, before_nops_num))
8778 	return;
8779       if (!try_issue_insn (curr_state, insn))
8780 	return;
8781       curr_state->accumulated_insns_num++;
8782       if (unknown_for_bundling_p (insn))
8783 	{
8784 	  /* Finish bundle containing asm insn.  */
8785 	  curr_state->after_nops_num
8786 	    = 3 - curr_state->accumulated_insns_num % 3;
8787 	  curr_state->accumulated_insns_num
8788 	    += 3 - curr_state->accumulated_insns_num % 3;
8789 	}
8790       else if (ia64_safe_type (insn) == TYPE_L)
8791 	curr_state->accumulated_insns_num++;
8792     }
8793   if (ia64_safe_type (insn) == TYPE_B)
8794     curr_state->branch_deviation
8795       += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8796   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8797     {
8798       if (!only_bundle_end_p && insert_bundle_state (curr_state))
8799 	{
8800 	  state_t dfa_state;
8801 	  struct bundle_state *curr_state1;
8802 	  struct bundle_state *allocated_states_chain;
8803 
8804 	  curr_state1 = get_free_bundle_state ();
8805 	  dfa_state = curr_state1->dfa_state;
8806 	  allocated_states_chain = curr_state1->allocated_states_chain;
8807 	  *curr_state1 = *curr_state;
8808 	  curr_state1->dfa_state = dfa_state;
8809 	  curr_state1->allocated_states_chain = allocated_states_chain;
8810 	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8811 		  dfa_state_size);
8812 	  curr_state = curr_state1;
8813 	}
8814       if (!try_issue_nops (curr_state,
8815 			   3 - curr_state->accumulated_insns_num % 3))
8816 	return;
8817       curr_state->after_nops_num
8818 	= 3 - curr_state->accumulated_insns_num % 3;
8819       curr_state->accumulated_insns_num
8820 	+= 3 - curr_state->accumulated_insns_num % 3;
8821     }
8822   if (!insert_bundle_state (curr_state))
8823     free_bundle_state (curr_state);
8824   return;
8825 }
8826 
8827 /* The following function returns position in the two window bundle
8828    for given STATE.  */
8829 
8830 static int
8831 get_max_pos (state_t state)
8832 {
8833   if (cpu_unit_reservation_p (state, pos_6))
8834     return 6;
8835   else if (cpu_unit_reservation_p (state, pos_5))
8836     return 5;
8837   else if (cpu_unit_reservation_p (state, pos_4))
8838     return 4;
8839   else if (cpu_unit_reservation_p (state, pos_3))
8840     return 3;
8841   else if (cpu_unit_reservation_p (state, pos_2))
8842     return 2;
8843   else if (cpu_unit_reservation_p (state, pos_1))
8844     return 1;
8845   else
8846     return 0;
8847 }
8848 
8849 /* The function returns code of a possible template for given position
8850    and state.  The function should be called only with 2 values of
8851    position equal to 3 or 6.  We avoid generating F NOPs by putting
8852    templates containing F insns at the end of the template search
8853    because undocumented anomaly in McKinley derived cores which can
8854    cause stalls if an F-unit insn (including a NOP) is issued within a
8855    six-cycle window after reading certain application registers (such
8856    as ar.bsp).  Furthermore, power-considerations also argue against
8857    the use of F-unit instructions unless they're really needed.  */
8858 
8859 static int
8860 get_template (state_t state, int pos)
8861 {
8862   switch (pos)
8863     {
8864     case 3:
8865       if (cpu_unit_reservation_p (state, _0mmi_))
8866 	return 1;
8867       else if (cpu_unit_reservation_p (state, _0mii_))
8868 	return 0;
8869       else if (cpu_unit_reservation_p (state, _0mmb_))
8870 	return 7;
8871       else if (cpu_unit_reservation_p (state, _0mib_))
8872 	return 6;
8873       else if (cpu_unit_reservation_p (state, _0mbb_))
8874 	return 5;
8875       else if (cpu_unit_reservation_p (state, _0bbb_))
8876 	return 4;
8877       else if (cpu_unit_reservation_p (state, _0mmf_))
8878 	return 3;
8879       else if (cpu_unit_reservation_p (state, _0mfi_))
8880 	return 2;
8881       else if (cpu_unit_reservation_p (state, _0mfb_))
8882 	return 8;
8883       else if (cpu_unit_reservation_p (state, _0mlx_))
8884 	return 9;
8885       else
8886 	gcc_unreachable ();
8887     case 6:
8888       if (cpu_unit_reservation_p (state, _1mmi_))
8889 	return 1;
8890       else if (cpu_unit_reservation_p (state, _1mii_))
8891 	return 0;
8892       else if (cpu_unit_reservation_p (state, _1mmb_))
8893 	return 7;
8894       else if (cpu_unit_reservation_p (state, _1mib_))
8895 	return 6;
8896       else if (cpu_unit_reservation_p (state, _1mbb_))
8897 	return 5;
8898       else if (cpu_unit_reservation_p (state, _1bbb_))
8899 	return 4;
8900       else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8901 	return 3;
8902       else if (cpu_unit_reservation_p (state, _1mfi_))
8903 	return 2;
8904       else if (cpu_unit_reservation_p (state, _1mfb_))
8905 	return 8;
8906       else if (cpu_unit_reservation_p (state, _1mlx_))
8907 	return 9;
8908       else
8909 	gcc_unreachable ();
8910     default:
8911       gcc_unreachable ();
8912     }
8913 }
8914 
8915 /* True when INSN is important for bundling.  */
8916 
8917 static bool
8918 important_for_bundling_p (rtx_insn *insn)
8919 {
8920   return (INSN_P (insn)
8921 	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8922 	  && GET_CODE (PATTERN (insn)) != USE
8923 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
8924 }
8925 
8926 /* The following function returns an insn important for insn bundling
8927    followed by INSN and before TAIL.  */
8928 
8929 static rtx_insn *
8930 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
8931 {
8932   for (; insn && insn != tail; insn = NEXT_INSN (insn))
8933     if (important_for_bundling_p (insn))
8934       return insn;
8935   return NULL;
8936 }
8937 
8938 /* True when INSN is unknown, but important, for bundling.  */
8939 
8940 static bool
8941 unknown_for_bundling_p (rtx_insn *insn)
8942 {
8943   return (INSN_P (insn)
8944 	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8945 	  && GET_CODE (PATTERN (insn)) != USE
8946 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
8947 }
8948 
8949 /* Add a bundle selector TEMPLATE0 before INSN.  */
8950 
8951 static void
8952 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
8953 {
8954   rtx b = gen_bundle_selector (GEN_INT (template0));
8955 
8956   ia64_emit_insn_before (b, insn);
8957 #if NR_BUNDLES == 10
8958   if ((template0 == 4 || template0 == 5)
8959       && ia64_except_unwind_info (&global_options) == UI_TARGET)
8960     {
8961       int i;
8962       rtx note = NULL_RTX;
8963 
8964       /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8965 	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
8966 	 to following nops, as br.call sets rp to the address of following
8967 	 bundle and therefore an EH region end must be on a bundle
8968 	 boundary.  */
8969       insn = PREV_INSN (insn);
8970       for (i = 0; i < 3; i++)
8971 	{
8972 	  do
8973 	    insn = next_active_insn (insn);
8974 	  while (NONJUMP_INSN_P (insn)
8975 		 && get_attr_empty (insn) == EMPTY_YES);
8976 	  if (CALL_P (insn))
8977 	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8978 	  else if (note)
8979 	    {
8980 	      int code;
8981 
8982 	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8983 			  || code == CODE_FOR_nop_b);
8984 	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8985 		note = NULL_RTX;
8986 	      else
8987 		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8988 	    }
8989 	}
8990     }
8991 #endif
8992 }
8993 
8994 /* The following function does insn bundling.  Bundling means
8995    inserting templates and nop insns to fit insn groups into permitted
8996    templates.  Instruction scheduling uses NDFA (non-deterministic
8997    finite automata) encoding informations about the templates and the
8998    inserted nops.  Nondeterminism of the automata permits follows
8999    all possible insn sequences very fast.
9000 
9001    Unfortunately it is not possible to get information about inserting
9002    nop insns and used templates from the automata states.  The
9003    automata only says that we can issue an insn possibly inserting
9004    some nops before it and using some template.  Therefore insn
9005    bundling in this function is implemented by using DFA
9006    (deterministic finite automata).  We follow all possible insn
9007    sequences by inserting 0-2 nops (that is what the NDFA describe for
9008    insn scheduling) before/after each insn being bundled.  We know the
9009    start of simulated processor cycle from insn scheduling (insn
9010    starting a new cycle has TImode).
9011 
9012    Simple implementation of insn bundling would create enormous
9013    number of possible insn sequences satisfying information about new
9014    cycle ticks taken from the insn scheduling.  To make the algorithm
9015    practical we use dynamic programming.  Each decision (about
9016    inserting nops and implicitly about previous decisions) is described
9017    by structure bundle_state (see above).  If we generate the same
9018    bundle state (key is automaton state after issuing the insns and
9019    nops for it), we reuse already generated one.  As consequence we
9020    reject some decisions which cannot improve the solution and
9021    reduce memory for the algorithm.
9022 
9023    When we reach the end of EBB (extended basic block), we choose the
9024    best sequence and then, moving back in EBB, insert templates for
9025    the best alternative.  The templates are taken from querying
9026    automaton state for each insn in chosen bundle states.
9027 
9028    So the algorithm makes two (forward and backward) passes through
9029    EBB.  */
9030 
9031 static void
9032 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9033 {
9034   struct bundle_state *curr_state, *next_state, *best_state;
9035   rtx_insn *insn, *next_insn;
9036   int insn_num;
9037   int i, bundle_end_p, only_bundle_end_p, asm_p;
9038   int pos = 0, max_pos, template0, template1;
9039   rtx_insn *b;
9040   enum attr_type type;
9041 
9042   insn_num = 0;
9043   /* Count insns in the EBB.  */
9044   for (insn = NEXT_INSN (prev_head_insn);
9045        insn && insn != tail;
9046        insn = NEXT_INSN (insn))
9047     if (INSN_P (insn))
9048       insn_num++;
9049   if (insn_num == 0)
9050     return;
9051   bundling_p = 1;
9052   dfa_clean_insn_cache ();
9053   initiate_bundle_state_table ();
9054   index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9055   /* First (forward) pass -- generation of bundle states.  */
9056   curr_state = get_free_bundle_state ();
9057   curr_state->insn = NULL;
9058   curr_state->before_nops_num = 0;
9059   curr_state->after_nops_num = 0;
9060   curr_state->insn_num = 0;
9061   curr_state->cost = 0;
9062   curr_state->accumulated_insns_num = 0;
9063   curr_state->branch_deviation = 0;
9064   curr_state->middle_bundle_stops = 0;
9065   curr_state->next = NULL;
9066   curr_state->originator = NULL;
9067   state_reset (curr_state->dfa_state);
9068   index_to_bundle_states [0] = curr_state;
9069   insn_num = 0;
9070   /* Shift cycle mark if it is put on insn which could be ignored.  */
9071   for (insn = NEXT_INSN (prev_head_insn);
9072        insn != tail;
9073        insn = NEXT_INSN (insn))
9074     if (INSN_P (insn)
9075 	&& !important_for_bundling_p (insn)
9076 	&& GET_MODE (insn) == TImode)
9077       {
9078 	PUT_MODE (insn, VOIDmode);
9079 	for (next_insn = NEXT_INSN (insn);
9080 	     next_insn != tail;
9081 	     next_insn = NEXT_INSN (next_insn))
9082 	  if (important_for_bundling_p (next_insn)
9083 	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9084 	    {
9085 	      PUT_MODE (next_insn, TImode);
9086 	      break;
9087 	    }
9088       }
9089   /* Forward pass: generation of bundle states.  */
9090   for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9091        insn != NULL_RTX;
9092        insn = next_insn)
9093     {
9094       gcc_assert (important_for_bundling_p (insn));
9095       type = ia64_safe_type (insn);
9096       next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9097       insn_num++;
9098       index_to_bundle_states [insn_num] = NULL;
9099       for (curr_state = index_to_bundle_states [insn_num - 1];
9100 	   curr_state != NULL;
9101 	   curr_state = next_state)
9102 	{
9103 	  pos = curr_state->accumulated_insns_num % 3;
9104 	  next_state = curr_state->next;
9105 	  /* We must fill up the current bundle in order to start a
9106 	     subsequent asm insn in a new bundle.  Asm insn is always
9107 	     placed in a separate bundle.  */
9108 	  only_bundle_end_p
9109 	    = (next_insn != NULL_RTX
9110 	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9111 	       && unknown_for_bundling_p (next_insn));
9112 	  /* We may fill up the current bundle if it is the cycle end
9113 	     without a group barrier.  */
9114 	  bundle_end_p
9115 	    = (only_bundle_end_p || next_insn == NULL_RTX
9116 	       || (GET_MODE (next_insn) == TImode
9117 		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9118 	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9119 	      || type == TYPE_S)
9120 	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9121 				 only_bundle_end_p);
9122 	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9123 			       only_bundle_end_p);
9124 	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9125 			       only_bundle_end_p);
9126 	}
9127       gcc_assert (index_to_bundle_states [insn_num]);
9128       for (curr_state = index_to_bundle_states [insn_num];
9129 	   curr_state != NULL;
9130 	   curr_state = curr_state->next)
9131 	if (verbose >= 2 && dump)
9132 	  {
9133 	    /* This structure is taken from generated code of the
9134 	       pipeline hazard recognizer (see file insn-attrtab.c).
9135 	       Please don't forget to change the structure if a new
9136 	       automaton is added to .md file.  */
9137 	    struct DFA_chip
9138 	    {
9139 	      unsigned short one_automaton_state;
9140 	      unsigned short oneb_automaton_state;
9141 	      unsigned short two_automaton_state;
9142 	      unsigned short twob_automaton_state;
9143 	    };
9144 
9145 	    fprintf
9146 	      (dump,
9147 	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9148 	       curr_state->unique_num,
9149 	       (curr_state->originator == NULL
9150 		? -1 : curr_state->originator->unique_num),
9151 	       curr_state->cost,
9152 	       curr_state->before_nops_num, curr_state->after_nops_num,
9153 	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
9154 	       curr_state->middle_bundle_stops,
9155 	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9156 	       INSN_UID (insn));
9157 	  }
9158     }
9159 
9160   /* We should find a solution because the 2nd insn scheduling has
9161      found one.  */
9162   gcc_assert (index_to_bundle_states [insn_num]);
9163   /* Find a state corresponding to the best insn sequence.  */
9164   best_state = NULL;
9165   for (curr_state = index_to_bundle_states [insn_num];
9166        curr_state != NULL;
9167        curr_state = curr_state->next)
9168     /* We are just looking at the states with fully filled up last
9169        bundle.  The first we prefer insn sequences with minimal cost
9170        then with minimal inserted nops and finally with branch insns
9171        placed in the 3rd slots.  */
9172     if (curr_state->accumulated_insns_num % 3 == 0
9173 	&& (best_state == NULL || best_state->cost > curr_state->cost
9174 	    || (best_state->cost == curr_state->cost
9175 		&& (curr_state->accumulated_insns_num
9176 		    < best_state->accumulated_insns_num
9177 		    || (curr_state->accumulated_insns_num
9178 			== best_state->accumulated_insns_num
9179 			&& (curr_state->branch_deviation
9180 			    < best_state->branch_deviation
9181 			    || (curr_state->branch_deviation
9182 				== best_state->branch_deviation
9183 				&& curr_state->middle_bundle_stops
9184 				< best_state->middle_bundle_stops)))))))
9185       best_state = curr_state;
9186   /* Second (backward) pass: adding nops and templates.  */
9187   gcc_assert (best_state);
9188   insn_num = best_state->before_nops_num;
9189   template0 = template1 = -1;
9190   for (curr_state = best_state;
9191        curr_state->originator != NULL;
9192        curr_state = curr_state->originator)
9193     {
9194       insn = curr_state->insn;
9195       asm_p = unknown_for_bundling_p (insn);
9196       insn_num++;
9197       if (verbose >= 2 && dump)
9198 	{
9199 	  struct DFA_chip
9200 	  {
9201 	    unsigned short one_automaton_state;
9202 	    unsigned short oneb_automaton_state;
9203 	    unsigned short two_automaton_state;
9204 	    unsigned short twob_automaton_state;
9205 	  };
9206 
9207 	  fprintf
9208 	    (dump,
9209 	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9210 	     curr_state->unique_num,
9211 	     (curr_state->originator == NULL
9212 	      ? -1 : curr_state->originator->unique_num),
9213 	     curr_state->cost,
9214 	     curr_state->before_nops_num, curr_state->after_nops_num,
9215 	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
9216 	     curr_state->middle_bundle_stops,
9217 	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9218 	     INSN_UID (insn));
9219 	}
9220       /* Find the position in the current bundle window.  The window can
9221 	 contain at most two bundles.  Two bundle window means that
9222 	 the processor will make two bundle rotation.  */
9223       max_pos = get_max_pos (curr_state->dfa_state);
9224       if (max_pos == 6
9225 	  /* The following (negative template number) means that the
9226 	     processor did one bundle rotation.  */
9227 	  || (max_pos == 3 && template0 < 0))
9228 	{
9229 	  /* We are at the end of the window -- find template(s) for
9230 	     its bundle(s).  */
9231 	  pos = max_pos;
9232 	  if (max_pos == 3)
9233 	    template0 = get_template (curr_state->dfa_state, 3);
9234 	  else
9235 	    {
9236 	      template1 = get_template (curr_state->dfa_state, 3);
9237 	      template0 = get_template (curr_state->dfa_state, 6);
9238 	    }
9239 	}
9240       if (max_pos > 3 && template1 < 0)
9241 	/* It may happen when we have the stop inside a bundle.  */
9242 	{
9243 	  gcc_assert (pos <= 3);
9244 	  template1 = get_template (curr_state->dfa_state, 3);
9245 	  pos += 3;
9246 	}
9247       if (!asm_p)
9248 	/* Emit nops after the current insn.  */
9249 	for (i = 0; i < curr_state->after_nops_num; i++)
9250 	  {
9251 	    rtx nop_pat = gen_nop ();
9252 	    rtx_insn *nop = emit_insn_after (nop_pat, insn);
9253 	    pos--;
9254 	    gcc_assert (pos >= 0);
9255 	    if (pos % 3 == 0)
9256 	      {
9257 		/* We are at the start of a bundle: emit the template
9258 		   (it should be defined).  */
9259 		gcc_assert (template0 >= 0);
9260 		ia64_add_bundle_selector_before (template0, nop);
9261 		/* If we have two bundle window, we make one bundle
9262 		   rotation.  Otherwise template0 will be undefined
9263 		   (negative value).  */
9264 		template0 = template1;
9265 		template1 = -1;
9266 	      }
9267 	  }
9268       /* Move the position backward in the window.  Group barrier has
9269 	 no slot.  Asm insn takes all bundle.  */
9270       if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9271 	  && !unknown_for_bundling_p (insn))
9272 	pos--;
9273       /* Long insn takes 2 slots.  */
9274       if (ia64_safe_type (insn) == TYPE_L)
9275 	pos--;
9276       gcc_assert (pos >= 0);
9277       if (pos % 3 == 0
9278 	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9279 	  && !unknown_for_bundling_p (insn))
9280 	{
9281 	  /* The current insn is at the bundle start: emit the
9282 	     template.  */
9283 	  gcc_assert (template0 >= 0);
9284 	  ia64_add_bundle_selector_before (template0, insn);
9285 	  b = PREV_INSN (insn);
9286 	  insn = b;
9287 	  /* See comment above in analogous place for emitting nops
9288 	     after the insn.  */
9289 	  template0 = template1;
9290 	  template1 = -1;
9291 	}
9292       /* Emit nops after the current insn.  */
9293       for (i = 0; i < curr_state->before_nops_num; i++)
9294 	{
9295 	  rtx nop_pat = gen_nop ();
9296 	  ia64_emit_insn_before (nop_pat, insn);
9297 	  rtx_insn *nop = PREV_INSN (insn);
9298 	  insn = nop;
9299 	  pos--;
9300 	  gcc_assert (pos >= 0);
9301 	  if (pos % 3 == 0)
9302 	    {
9303 	      /* See comment above in analogous place for emitting nops
9304 		 after the insn.  */
9305 	      gcc_assert (template0 >= 0);
9306 	      ia64_add_bundle_selector_before (template0, insn);
9307 	      b = PREV_INSN (insn);
9308 	      insn = b;
9309 	      template0 = template1;
9310 	      template1 = -1;
9311 	    }
9312 	}
9313     }
9314 
9315   if (flag_checking)
9316     {
9317       /* Assert right calculation of middle_bundle_stops.  */
9318       int num = best_state->middle_bundle_stops;
9319       bool start_bundle = true, end_bundle = false;
9320 
9321       for (insn = NEXT_INSN (prev_head_insn);
9322 	   insn && insn != tail;
9323 	   insn = NEXT_INSN (insn))
9324 	{
9325 	  if (!INSN_P (insn))
9326 	    continue;
9327 	  if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9328 	    start_bundle = true;
9329 	  else
9330 	    {
9331 	      rtx_insn *next_insn;
9332 
9333 	      for (next_insn = NEXT_INSN (insn);
9334 		   next_insn && next_insn != tail;
9335 		   next_insn = NEXT_INSN (next_insn))
9336 		if (INSN_P (next_insn)
9337 		    && (ia64_safe_itanium_class (next_insn)
9338 			!= ITANIUM_CLASS_IGNORE
9339 			|| recog_memoized (next_insn)
9340 			== CODE_FOR_bundle_selector)
9341 		    && GET_CODE (PATTERN (next_insn)) != USE
9342 		    && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9343 		  break;
9344 
9345 	      end_bundle = next_insn == NULL_RTX
9346 		|| next_insn == tail
9347 		|| (INSN_P (next_insn)
9348 		    && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9349 	      if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9350 		  && !start_bundle && !end_bundle
9351 		  && next_insn
9352 		  && !unknown_for_bundling_p (next_insn))
9353 		num--;
9354 
9355 	      start_bundle = false;
9356 	    }
9357 	}
9358 
9359       gcc_assert (num == 0);
9360     }
9361 
9362   free (index_to_bundle_states);
9363   finish_bundle_state_table ();
9364   bundling_p = 0;
9365   dfa_clean_insn_cache ();
9366 }
9367 
9368 /* The following function is called at the end of scheduling BB or
9369    EBB.  After reload, it inserts stop bits and does insn bundling.  */
9370 
9371 static void
9372 ia64_sched_finish (FILE *dump, int sched_verbose)
9373 {
9374   if (sched_verbose)
9375     fprintf (dump, "// Finishing schedule.\n");
9376   if (!reload_completed)
9377     return;
9378   if (reload_completed)
9379     {
9380       final_emit_insn_group_barriers (dump);
9381       bundling (dump, sched_verbose, current_sched_info->prev_head,
9382 		current_sched_info->next_tail);
9383       if (sched_verbose && dump)
9384 	fprintf (dump, "//    finishing %d-%d\n",
9385 		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9386 		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9387 
9388       return;
9389     }
9390 }
9391 
9392 /* The following function inserts stop bits in scheduled BB or EBB.  */
9393 
9394 static void
9395 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9396 {
9397   rtx_insn *insn;
9398   int need_barrier_p = 0;
9399   int seen_good_insn = 0;
9400 
9401   init_insn_group_barriers ();
9402 
9403   for (insn = NEXT_INSN (current_sched_info->prev_head);
9404        insn != current_sched_info->next_tail;
9405        insn = NEXT_INSN (insn))
9406     {
9407       if (BARRIER_P (insn))
9408 	{
9409 	  rtx_insn *last = prev_active_insn (insn);
9410 
9411 	  if (! last)
9412 	    continue;
9413 	  if (JUMP_TABLE_DATA_P (last))
9414 	    last = prev_active_insn (last);
9415 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9416 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9417 
9418 	  init_insn_group_barriers ();
9419 	  seen_good_insn = 0;
9420 	  need_barrier_p = 0;
9421 	}
9422       else if (NONDEBUG_INSN_P (insn))
9423 	{
9424 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9425 	    {
9426 	      init_insn_group_barriers ();
9427 	      seen_good_insn = 0;
9428 	      need_barrier_p = 0;
9429 	    }
9430 	  else if (need_barrier_p || group_barrier_needed (insn)
9431 		   || (mflag_sched_stop_bits_after_every_cycle
9432 		       && GET_MODE (insn) == TImode
9433 		       && seen_good_insn))
9434 	    {
9435 	      if (TARGET_EARLY_STOP_BITS)
9436 		{
9437 		  rtx_insn *last;
9438 
9439 		  for (last = insn;
9440 		       last != current_sched_info->prev_head;
9441 		       last = PREV_INSN (last))
9442 		    if (INSN_P (last) && GET_MODE (last) == TImode
9443 			&& stops_p [INSN_UID (last)])
9444 		      break;
9445 		  if (last == current_sched_info->prev_head)
9446 		    last = insn;
9447 		  last = prev_active_insn (last);
9448 		  if (last
9449 		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9450 		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9451 				     last);
9452 		  init_insn_group_barriers ();
9453 		  for (last = NEXT_INSN (last);
9454 		       last != insn;
9455 		       last = NEXT_INSN (last))
9456 		    if (INSN_P (last))
9457 		      {
9458 			group_barrier_needed (last);
9459 			if (recog_memoized (last) >= 0
9460 			    && important_for_bundling_p (last))
9461 			  seen_good_insn = 1;
9462 		      }
9463 		}
9464 	      else
9465 		{
9466 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9467 				    insn);
9468 		  init_insn_group_barriers ();
9469 		  seen_good_insn = 0;
9470 		}
9471 	      group_barrier_needed (insn);
9472 	      if (recog_memoized (insn) >= 0
9473 		  && important_for_bundling_p (insn))
9474 		seen_good_insn = 1;
9475 	    }
9476 	  else if (recog_memoized (insn) >= 0
9477 		   && important_for_bundling_p (insn))
9478 	    seen_good_insn = 1;
9479 	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9480 	}
9481     }
9482 }
9483 
9484 
9485 
9486 /* If the following function returns TRUE, we will use the DFA
9487    insn scheduler.  */
9488 
9489 static int
9490 ia64_first_cycle_multipass_dfa_lookahead (void)
9491 {
9492   return (reload_completed ? 6 : 4);
9493 }
9494 
9495 /* The following function initiates variable `dfa_pre_cycle_insn'.  */
9496 
9497 static void
9498 ia64_init_dfa_pre_cycle_insn (void)
9499 {
9500   if (temp_dfa_state == NULL)
9501     {
9502       dfa_state_size = state_size ();
9503       temp_dfa_state = xmalloc (dfa_state_size);
9504       prev_cycle_state = xmalloc (dfa_state_size);
9505     }
9506   dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9507   SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9508   recog_memoized (dfa_pre_cycle_insn);
9509   dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9510   SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9511   recog_memoized (dfa_stop_insn);
9512 }
9513 
9514 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9515    used by the DFA insn scheduler.  */
9516 
9517 static rtx
9518 ia64_dfa_pre_cycle_insn (void)
9519 {
9520   return dfa_pre_cycle_insn;
9521 }
9522 
9523 /* The following function returns TRUE if PRODUCER (of type ilog or
9524    ld) produces address for CONSUMER (of type st or stf). */
9525 
9526 int
9527 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9528 {
9529   rtx dest, reg, mem;
9530 
9531   gcc_assert (producer && consumer);
9532   dest = ia64_single_set (producer);
9533   gcc_assert (dest);
9534   reg = SET_DEST (dest);
9535   gcc_assert (reg);
9536   if (GET_CODE (reg) == SUBREG)
9537     reg = SUBREG_REG (reg);
9538   gcc_assert (GET_CODE (reg) == REG);
9539 
9540   dest = ia64_single_set (consumer);
9541   gcc_assert (dest);
9542   mem = SET_DEST (dest);
9543   gcc_assert (mem && GET_CODE (mem) == MEM);
9544   return reg_mentioned_p (reg, mem);
9545 }
9546 
9547 /* The following function returns TRUE if PRODUCER (of type ilog or
9548    ld) produces address for CONSUMER (of type ld or fld). */
9549 
9550 int
9551 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9552 {
9553   rtx dest, src, reg, mem;
9554 
9555   gcc_assert (producer && consumer);
9556   dest = ia64_single_set (producer);
9557   gcc_assert (dest);
9558   reg = SET_DEST (dest);
9559   gcc_assert (reg);
9560   if (GET_CODE (reg) == SUBREG)
9561     reg = SUBREG_REG (reg);
9562   gcc_assert (GET_CODE (reg) == REG);
9563 
9564   src = ia64_single_set (consumer);
9565   gcc_assert (src);
9566   mem = SET_SRC (src);
9567   gcc_assert (mem);
9568 
9569   if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9570     mem = XVECEXP (mem, 0, 0);
9571   else if (GET_CODE (mem) == IF_THEN_ELSE)
9572     /* ??? Is this bypass necessary for ld.c?  */
9573     {
9574       gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9575       mem = XEXP (mem, 1);
9576     }
9577 
9578   while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9579     mem = XEXP (mem, 0);
9580 
9581   if (GET_CODE (mem) == UNSPEC)
9582     {
9583       int c = XINT (mem, 1);
9584 
9585       gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9586 		  || c == UNSPEC_LDSA);
9587       mem = XVECEXP (mem, 0, 0);
9588     }
9589 
9590   /* Note that LO_SUM is used for GOT loads.  */
9591   gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9592 
9593   return reg_mentioned_p (reg, mem);
9594 }
9595 
9596 /* The following function returns TRUE if INSN produces address for a
9597    load/store insn.  We will place such insns into M slot because it
9598    decreases its latency time.  */
9599 
9600 int
9601 ia64_produce_address_p (rtx insn)
9602 {
9603   return insn->call;
9604 }
9605 
9606 
9607 /* Emit pseudo-ops for the assembler to describe predicate relations.
9608    At present this assumes that we only consider predicate pairs to
9609    be mutex, and that the assembler can deduce proper values from
9610    straight-line code.  */
9611 
9612 static void
9613 emit_predicate_relation_info (void)
9614 {
9615   basic_block bb;
9616 
9617   FOR_EACH_BB_REVERSE_FN (bb, cfun)
9618     {
9619       int r;
9620       rtx_insn *head = BB_HEAD (bb);
9621 
9622       /* We only need such notes at code labels.  */
9623       if (! LABEL_P (head))
9624 	continue;
9625       if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9626 	head = NEXT_INSN (head);
9627 
9628       /* Skip p0, which may be thought to be live due to (reg:DI p0)
9629 	 grabbing the entire block of predicate registers.  */
9630       for (r = PR_REG (2); r < PR_REG (64); r += 2)
9631 	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9632 	  {
9633 	    rtx p = gen_rtx_REG (BImode, r);
9634 	    rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9635 	    if (head == BB_END (bb))
9636 	      BB_END (bb) = n;
9637 	    head = n;
9638 	  }
9639     }
9640 
9641   /* Look for conditional calls that do not return, and protect predicate
9642      relations around them.  Otherwise the assembler will assume the call
9643      returns, and complain about uses of call-clobbered predicates after
9644      the call.  */
9645   FOR_EACH_BB_REVERSE_FN (bb, cfun)
9646     {
9647       rtx_insn *insn = BB_HEAD (bb);
9648 
9649       while (1)
9650 	{
9651 	  if (CALL_P (insn)
9652 	      && GET_CODE (PATTERN (insn)) == COND_EXEC
9653 	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9654 	    {
9655 	      rtx_insn *b =
9656 		emit_insn_before (gen_safe_across_calls_all (), insn);
9657 	      rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9658 	      if (BB_HEAD (bb) == insn)
9659 		BB_HEAD (bb) = b;
9660 	      if (BB_END (bb) == insn)
9661 		BB_END (bb) = a;
9662 	    }
9663 
9664 	  if (insn == BB_END (bb))
9665 	    break;
9666 	  insn = NEXT_INSN (insn);
9667 	}
9668     }
9669 }
9670 
9671 /* Perform machine dependent operations on the rtl chain INSNS.  */
9672 
9673 static void
9674 ia64_reorg (void)
9675 {
9676   /* We are freeing block_for_insn in the toplev to keep compatibility
9677      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9678   compute_bb_for_insn ();
9679 
9680   /* If optimizing, we'll have split before scheduling.  */
9681   if (optimize == 0)
9682     split_all_insns ();
9683 
9684   if (optimize && flag_schedule_insns_after_reload
9685       && dbg_cnt (ia64_sched2))
9686     {
9687       basic_block bb;
9688       timevar_push (TV_SCHED2);
9689       ia64_final_schedule = 1;
9690 
9691       /* We can't let modulo-sched prevent us from scheduling any bbs,
9692 	 since we need the final schedule to produce bundle information.  */
9693       FOR_EACH_BB_FN (bb, cfun)
9694 	bb->flags &= ~BB_DISABLE_SCHEDULE;
9695 
9696       initiate_bundle_states ();
9697       ia64_nop = make_insn_raw (gen_nop ());
9698       SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9699       recog_memoized (ia64_nop);
9700       clocks_length = get_max_uid () + 1;
9701       stops_p = XCNEWVEC (char, clocks_length);
9702 
9703       if (ia64_tune == PROCESSOR_ITANIUM2)
9704 	{
9705 	  pos_1 = get_cpu_unit_code ("2_1");
9706 	  pos_2 = get_cpu_unit_code ("2_2");
9707 	  pos_3 = get_cpu_unit_code ("2_3");
9708 	  pos_4 = get_cpu_unit_code ("2_4");
9709 	  pos_5 = get_cpu_unit_code ("2_5");
9710 	  pos_6 = get_cpu_unit_code ("2_6");
9711 	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
9712 	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9713 	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9714 	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9715 	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9716 	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9717 	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
9718 	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9719 	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9720 	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9721 	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
9722 	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9723 	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9724 	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9725 	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9726 	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9727 	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
9728 	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9729 	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9730 	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9731 	}
9732       else
9733 	{
9734 	  pos_1 = get_cpu_unit_code ("1_1");
9735 	  pos_2 = get_cpu_unit_code ("1_2");
9736 	  pos_3 = get_cpu_unit_code ("1_3");
9737 	  pos_4 = get_cpu_unit_code ("1_4");
9738 	  pos_5 = get_cpu_unit_code ("1_5");
9739 	  pos_6 = get_cpu_unit_code ("1_6");
9740 	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
9741 	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9742 	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9743 	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9744 	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9745 	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9746 	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
9747 	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9748 	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9749 	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9750 	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
9751 	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9752 	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9753 	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9754 	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9755 	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9756 	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
9757 	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9758 	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9759 	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9760 	}
9761 
9762       if (flag_selective_scheduling2
9763 	  && !maybe_skip_selective_scheduling ())
9764         run_selective_scheduling ();
9765       else
9766 	schedule_ebbs ();
9767 
9768       /* Redo alignment computation, as it might gone wrong.  */
9769       compute_alignments ();
9770 
9771       /* We cannot reuse this one because it has been corrupted by the
9772 	 evil glat.  */
9773       finish_bundle_states ();
9774       free (stops_p);
9775       stops_p = NULL;
9776       emit_insn_group_barriers (dump_file);
9777 
9778       ia64_final_schedule = 0;
9779       timevar_pop (TV_SCHED2);
9780     }
9781   else
9782     emit_all_insn_group_barriers (dump_file);
9783 
9784   df_analyze ();
9785 
9786   /* A call must not be the last instruction in a function, so that the
9787      return address is still within the function, so that unwinding works
9788      properly.  Note that IA-64 differs from dwarf2 on this point.  */
9789   if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9790     {
9791       rtx_insn *insn;
9792       int saw_stop = 0;
9793 
9794       insn = get_last_insn ();
9795       if (! INSN_P (insn))
9796         insn = prev_active_insn (insn);
9797       if (insn)
9798 	{
9799 	  /* Skip over insns that expand to nothing.  */
9800 	  while (NONJUMP_INSN_P (insn)
9801 		 && get_attr_empty (insn) == EMPTY_YES)
9802 	    {
9803 	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9804 		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9805 		saw_stop = 1;
9806 	      insn = prev_active_insn (insn);
9807 	    }
9808 	  if (CALL_P (insn))
9809 	    {
9810 	      if (! saw_stop)
9811 		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9812 	      emit_insn (gen_break_f ());
9813 	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9814 	    }
9815 	}
9816     }
9817 
9818   emit_predicate_relation_info ();
9819 
9820   if (flag_var_tracking)
9821     {
9822       timevar_push (TV_VAR_TRACKING);
9823       variable_tracking_main ();
9824       timevar_pop (TV_VAR_TRACKING);
9825     }
9826   df_finish_pass (false);
9827 }
9828 
9829 /* Return true if REGNO is used by the epilogue.  */
9830 
9831 int
9832 ia64_epilogue_uses (int regno)
9833 {
9834   switch (regno)
9835     {
9836     case R_GR (1):
9837       /* With a call to a function in another module, we will write a new
9838 	 value to "gp".  After returning from such a call, we need to make
9839 	 sure the function restores the original gp-value, even if the
9840 	 function itself does not use the gp anymore.  */
9841       return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9842 
9843     case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9844     case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9845       /* For functions defined with the syscall_linkage attribute, all
9846 	 input registers are marked as live at all function exits.  This
9847 	 prevents the register allocator from using the input registers,
9848 	 which in turn makes it possible to restart a system call after
9849 	 an interrupt without having to save/restore the input registers.
9850 	 This also prevents kernel data from leaking to application code.  */
9851       return lookup_attribute ("syscall_linkage",
9852 	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9853 
9854     case R_BR (0):
9855       /* Conditional return patterns can't represent the use of `b0' as
9856          the return address, so we force the value live this way.  */
9857       return 1;
9858 
9859     case AR_PFS_REGNUM:
9860       /* Likewise for ar.pfs, which is used by br.ret.  */
9861       return 1;
9862 
9863     default:
9864       return 0;
9865     }
9866 }
9867 
9868 /* Return true if REGNO is used by the frame unwinder.  */
9869 
9870 int
9871 ia64_eh_uses (int regno)
9872 {
9873   unsigned int r;
9874 
9875   if (! reload_completed)
9876     return 0;
9877 
9878   if (regno == 0)
9879     return 0;
9880 
9881   for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9882     if (regno == current_frame_info.r[r]
9883        || regno == emitted_frame_related_regs[r])
9884       return 1;
9885 
9886   return 0;
9887 }
9888 
9889 /* Return true if this goes in small data/bss.  */
9890 
9891 /* ??? We could also support own long data here.  Generating movl/add/ld8
9892    instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9893    code faster because there is one less load.  This also includes incomplete
9894    types which can't go in sdata/sbss.  */
9895 
9896 static bool
9897 ia64_in_small_data_p (const_tree exp)
9898 {
9899   if (TARGET_NO_SDATA)
9900     return false;
9901 
9902   /* We want to merge strings, so we never consider them small data.  */
9903   if (TREE_CODE (exp) == STRING_CST)
9904     return false;
9905 
9906   /* Functions are never small data.  */
9907   if (TREE_CODE (exp) == FUNCTION_DECL)
9908     return false;
9909 
9910   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9911     {
9912       const char *section = DECL_SECTION_NAME (exp);
9913 
9914       if (strcmp (section, ".sdata") == 0
9915 	  || strncmp (section, ".sdata.", 7) == 0
9916 	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9917 	  || strcmp (section, ".sbss") == 0
9918 	  || strncmp (section, ".sbss.", 6) == 0
9919 	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9920 	return true;
9921     }
9922   else
9923     {
9924       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9925 
9926       /* If this is an incomplete type with size 0, then we can't put it
9927 	 in sdata because it might be too big when completed.  */
9928       if (size > 0 && size <= ia64_section_threshold)
9929 	return true;
9930     }
9931 
9932   return false;
9933 }
9934 
9935 /* Output assembly directives for prologue regions.  */
9936 
9937 /* The current basic block number.  */
9938 
9939 static bool last_block;
9940 
9941 /* True if we need a copy_state command at the start of the next block.  */
9942 
9943 static bool need_copy_state;
9944 
9945 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9946 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9947 #endif
9948 
9949 /* The function emits unwind directives for the start of an epilogue.  */
9950 
9951 static void
9952 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9953 		  bool unwind, bool frame ATTRIBUTE_UNUSED)
9954 {
9955   /* If this isn't the last block of the function, then we need to label the
9956      current state, and copy it back in at the start of the next block.  */
9957 
9958   if (!last_block)
9959     {
9960       if (unwind)
9961 	fprintf (asm_out_file, "\t.label_state %d\n",
9962 		 ++cfun->machine->state_num);
9963       need_copy_state = true;
9964     }
9965 
9966   if (unwind)
9967     fprintf (asm_out_file, "\t.restore sp\n");
9968 }
9969 
9970 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
9971 
9972 static void
9973 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9974 			bool unwind, bool frame)
9975 {
9976   rtx dest = SET_DEST (pat);
9977   rtx src = SET_SRC (pat);
9978 
9979   if (dest == stack_pointer_rtx)
9980     {
9981       if (GET_CODE (src) == PLUS)
9982 	{
9983 	  rtx op0 = XEXP (src, 0);
9984 	  rtx op1 = XEXP (src, 1);
9985 
9986 	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9987 
9988 	  if (INTVAL (op1) < 0)
9989 	    {
9990 	      gcc_assert (!frame_pointer_needed);
9991 	      if (unwind)
9992 		fprintf (asm_out_file,
9993 			 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
9994 			 -INTVAL (op1));
9995 	    }
9996 	  else
9997 	    process_epilogue (asm_out_file, insn, unwind, frame);
9998 	}
9999       else
10000 	{
10001 	  gcc_assert (src == hard_frame_pointer_rtx);
10002 	  process_epilogue (asm_out_file, insn, unwind, frame);
10003 	}
10004     }
10005   else if (dest == hard_frame_pointer_rtx)
10006     {
10007       gcc_assert (src == stack_pointer_rtx);
10008       gcc_assert (frame_pointer_needed);
10009 
10010       if (unwind)
10011 	fprintf (asm_out_file, "\t.vframe r%d\n",
10012 		 ia64_dbx_register_number (REGNO (dest)));
10013     }
10014   else
10015     gcc_unreachable ();
10016 }
10017 
10018 /* This function processes a SET pattern for REG_CFA_REGISTER.  */
10019 
10020 static void
10021 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10022 {
10023   rtx dest = SET_DEST (pat);
10024   rtx src = SET_SRC (pat);
10025   int dest_regno = REGNO (dest);
10026   int src_regno;
10027 
10028   if (src == pc_rtx)
10029     {
10030       /* Saving return address pointer.  */
10031       if (unwind)
10032 	fprintf (asm_out_file, "\t.save rp, r%d\n",
10033 		 ia64_dbx_register_number (dest_regno));
10034       return;
10035     }
10036 
10037   src_regno = REGNO (src);
10038 
10039   switch (src_regno)
10040     {
10041     case PR_REG (0):
10042       gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10043       if (unwind)
10044 	fprintf (asm_out_file, "\t.save pr, r%d\n",
10045 		 ia64_dbx_register_number (dest_regno));
10046       break;
10047 
10048     case AR_UNAT_REGNUM:
10049       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10050       if (unwind)
10051 	fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10052 		 ia64_dbx_register_number (dest_regno));
10053       break;
10054 
10055     case AR_LC_REGNUM:
10056       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10057       if (unwind)
10058 	fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10059 		 ia64_dbx_register_number (dest_regno));
10060       break;
10061 
10062     default:
10063       /* Everything else should indicate being stored to memory.  */
10064       gcc_unreachable ();
10065     }
10066 }
10067 
10068 /* This function processes a SET pattern for REG_CFA_OFFSET.  */
10069 
10070 static void
10071 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10072 {
10073   rtx dest = SET_DEST (pat);
10074   rtx src = SET_SRC (pat);
10075   int src_regno = REGNO (src);
10076   const char *saveop;
10077   HOST_WIDE_INT off;
10078   rtx base;
10079 
10080   gcc_assert (MEM_P (dest));
10081   if (GET_CODE (XEXP (dest, 0)) == REG)
10082     {
10083       base = XEXP (dest, 0);
10084       off = 0;
10085     }
10086   else
10087     {
10088       gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10089 		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10090       base = XEXP (XEXP (dest, 0), 0);
10091       off = INTVAL (XEXP (XEXP (dest, 0), 1));
10092     }
10093 
10094   if (base == hard_frame_pointer_rtx)
10095     {
10096       saveop = ".savepsp";
10097       off = - off;
10098     }
10099   else
10100     {
10101       gcc_assert (base == stack_pointer_rtx);
10102       saveop = ".savesp";
10103     }
10104 
10105   src_regno = REGNO (src);
10106   switch (src_regno)
10107     {
10108     case BR_REG (0):
10109       gcc_assert (!current_frame_info.r[reg_save_b0]);
10110       if (unwind)
10111 	fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10112 		 saveop, off);
10113       break;
10114 
10115     case PR_REG (0):
10116       gcc_assert (!current_frame_info.r[reg_save_pr]);
10117       if (unwind)
10118 	fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10119 		 saveop, off);
10120       break;
10121 
10122     case AR_LC_REGNUM:
10123       gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10124       if (unwind)
10125 	fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10126 		 saveop, off);
10127       break;
10128 
10129     case AR_PFS_REGNUM:
10130       gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10131       if (unwind)
10132 	fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10133 		 saveop, off);
10134       break;
10135 
10136     case AR_UNAT_REGNUM:
10137       gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10138       if (unwind)
10139 	fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10140 		 saveop, off);
10141       break;
10142 
10143     case GR_REG (4):
10144     case GR_REG (5):
10145     case GR_REG (6):
10146     case GR_REG (7):
10147       if (unwind)
10148 	fprintf (asm_out_file, "\t.save.g 0x%x\n",
10149 		 1 << (src_regno - GR_REG (4)));
10150       break;
10151 
10152     case BR_REG (1):
10153     case BR_REG (2):
10154     case BR_REG (3):
10155     case BR_REG (4):
10156     case BR_REG (5):
10157       if (unwind)
10158 	fprintf (asm_out_file, "\t.save.b 0x%x\n",
10159 		 1 << (src_regno - BR_REG (1)));
10160       break;
10161 
10162     case FR_REG (2):
10163     case FR_REG (3):
10164     case FR_REG (4):
10165     case FR_REG (5):
10166       if (unwind)
10167 	fprintf (asm_out_file, "\t.save.f 0x%x\n",
10168 		 1 << (src_regno - FR_REG (2)));
10169       break;
10170 
10171     case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10172     case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10173     case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10174     case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10175       if (unwind)
10176 	fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10177 		 1 << (src_regno - FR_REG (12)));
10178       break;
10179 
10180     default:
10181       /* ??? For some reason we mark other general registers, even those
10182 	 we can't represent in the unwind info.  Ignore them.  */
10183       break;
10184     }
10185 }
10186 
10187 /* This function looks at a single insn and emits any directives
10188    required to unwind this insn.  */
10189 
10190 static void
10191 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10192 {
10193   bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10194   bool frame = dwarf2out_do_frame ();
10195   rtx note, pat;
10196   bool handled_one;
10197 
10198   if (!unwind && !frame)
10199     return;
10200 
10201   if (NOTE_INSN_BASIC_BLOCK_P (insn))
10202     {
10203       last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10204      == EXIT_BLOCK_PTR_FOR_FN (cfun);
10205 
10206       /* Restore unwind state from immediately before the epilogue.  */
10207       if (need_copy_state)
10208 	{
10209 	  if (unwind)
10210 	    {
10211 	      fprintf (asm_out_file, "\t.body\n");
10212 	      fprintf (asm_out_file, "\t.copy_state %d\n",
10213 		       cfun->machine->state_num);
10214 	    }
10215 	  need_copy_state = false;
10216 	}
10217     }
10218 
10219   if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10220     return;
10221 
10222   /* Look for the ALLOC insn.  */
10223   if (INSN_CODE (insn) == CODE_FOR_alloc)
10224     {
10225       rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10226       int dest_regno = REGNO (dest);
10227 
10228       /* If this is the final destination for ar.pfs, then this must
10229 	 be the alloc in the prologue.  */
10230       if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10231 	{
10232 	  if (unwind)
10233 	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10234 		     ia64_dbx_register_number (dest_regno));
10235 	}
10236       else
10237 	{
10238 	  /* This must be an alloc before a sibcall.  We must drop the
10239 	     old frame info.  The easiest way to drop the old frame
10240 	     info is to ensure we had a ".restore sp" directive
10241 	     followed by a new prologue.  If the procedure doesn't
10242 	     have a memory-stack frame, we'll issue a dummy ".restore
10243 	     sp" now.  */
10244 	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10245 	    /* if haven't done process_epilogue() yet, do it now */
10246 	    process_epilogue (asm_out_file, insn, unwind, frame);
10247 	  if (unwind)
10248 	    fprintf (asm_out_file, "\t.prologue\n");
10249 	}
10250       return;
10251     }
10252 
10253   handled_one = false;
10254   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10255     switch (REG_NOTE_KIND (note))
10256       {
10257       case REG_CFA_ADJUST_CFA:
10258 	pat = XEXP (note, 0);
10259 	if (pat == NULL)
10260 	  pat = PATTERN (insn);
10261 	process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10262 	handled_one = true;
10263 	break;
10264 
10265       case REG_CFA_OFFSET:
10266 	pat = XEXP (note, 0);
10267 	if (pat == NULL)
10268 	  pat = PATTERN (insn);
10269 	process_cfa_offset (asm_out_file, pat, unwind);
10270 	handled_one = true;
10271 	break;
10272 
10273       case REG_CFA_REGISTER:
10274 	pat = XEXP (note, 0);
10275 	if (pat == NULL)
10276 	  pat = PATTERN (insn);
10277 	process_cfa_register (asm_out_file, pat, unwind);
10278 	handled_one = true;
10279 	break;
10280 
10281       case REG_FRAME_RELATED_EXPR:
10282       case REG_CFA_DEF_CFA:
10283       case REG_CFA_EXPRESSION:
10284       case REG_CFA_RESTORE:
10285       case REG_CFA_SET_VDRAP:
10286 	/* Not used in the ia64 port.  */
10287 	gcc_unreachable ();
10288 
10289       default:
10290 	/* Not a frame-related note.  */
10291 	break;
10292       }
10293 
10294   /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10295      explicit action to take.  No guessing required.  */
10296   gcc_assert (handled_one);
10297 }
10298 
10299 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
10300 
10301 static void
10302 ia64_asm_emit_except_personality (rtx personality)
10303 {
10304   fputs ("\t.personality\t", asm_out_file);
10305   output_addr_const (asm_out_file, personality);
10306   fputc ('\n', asm_out_file);
10307 }
10308 
10309 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
10310 
10311 static void
10312 ia64_asm_init_sections (void)
10313 {
10314   exception_section = get_unnamed_section (0, output_section_asm_op,
10315 					   "\t.handlerdata");
10316 }
10317 
10318 /* Implement TARGET_DEBUG_UNWIND_INFO.  */
10319 
10320 static enum unwind_info_type
10321 ia64_debug_unwind_info (void)
10322 {
10323   return UI_TARGET;
10324 }
10325 
10326 enum ia64_builtins
10327 {
10328   IA64_BUILTIN_BSP,
10329   IA64_BUILTIN_COPYSIGNQ,
10330   IA64_BUILTIN_FABSQ,
10331   IA64_BUILTIN_FLUSHRS,
10332   IA64_BUILTIN_INFQ,
10333   IA64_BUILTIN_HUGE_VALQ,
10334   IA64_BUILTIN_NANQ,
10335   IA64_BUILTIN_NANSQ,
10336   IA64_BUILTIN_max
10337 };
10338 
10339 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10340 
10341 void
10342 ia64_init_builtins (void)
10343 {
10344   tree fpreg_type;
10345   tree float80_type;
10346   tree decl;
10347 
10348   /* The __fpreg type.  */
10349   fpreg_type = make_node (REAL_TYPE);
10350   TYPE_PRECISION (fpreg_type) = 82;
10351   layout_type (fpreg_type);
10352   (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10353 
10354   /* The __float80 type.  */
10355   if (float64x_type_node != NULL_TREE
10356       && TYPE_MODE (float64x_type_node) == XFmode)
10357     float80_type = float64x_type_node;
10358   else
10359     {
10360       float80_type = make_node (REAL_TYPE);
10361       TYPE_PRECISION (float80_type) = 80;
10362       layout_type (float80_type);
10363     }
10364   (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10365 
10366   /* The __float128 type.  */
10367   if (!TARGET_HPUX)
10368     {
10369       tree ftype;
10370       tree const_string_type
10371 	= build_pointer_type (build_qualified_type
10372 			      (char_type_node, TYPE_QUAL_CONST));
10373 
10374       (*lang_hooks.types.register_builtin_type) (float128_type_node,
10375 						 "__float128");
10376 
10377       /* TFmode support builtins.  */
10378       ftype = build_function_type_list (float128_type_node, NULL_TREE);
10379       decl = add_builtin_function ("__builtin_infq", ftype,
10380 				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
10381 				   NULL, NULL_TREE);
10382       ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10383 
10384       decl = add_builtin_function ("__builtin_huge_valq", ftype,
10385 				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10386 				   NULL, NULL_TREE);
10387       ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10388 
10389       ftype = build_function_type_list (float128_type_node,
10390 					const_string_type,
10391 					NULL_TREE);
10392       decl = add_builtin_function ("__builtin_nanq", ftype,
10393 				   IA64_BUILTIN_NANQ, BUILT_IN_MD,
10394 				   "nanq", NULL_TREE);
10395       TREE_READONLY (decl) = 1;
10396       ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10397 
10398       decl = add_builtin_function ("__builtin_nansq", ftype,
10399 				   IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10400 				   "nansq", NULL_TREE);
10401       TREE_READONLY (decl) = 1;
10402       ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10403 
10404       ftype = build_function_type_list (float128_type_node,
10405 					float128_type_node,
10406 					NULL_TREE);
10407       decl = add_builtin_function ("__builtin_fabsq", ftype,
10408 				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10409 				   "__fabstf2", NULL_TREE);
10410       TREE_READONLY (decl) = 1;
10411       ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10412 
10413       ftype = build_function_type_list (float128_type_node,
10414 					float128_type_node,
10415 					float128_type_node,
10416 					NULL_TREE);
10417       decl = add_builtin_function ("__builtin_copysignq", ftype,
10418 				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10419 				   "__copysigntf3", NULL_TREE);
10420       TREE_READONLY (decl) = 1;
10421       ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10422     }
10423   else
10424     /* Under HPUX, this is a synonym for "long double".  */
10425     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10426 					       "__float128");
10427 
10428   /* Fwrite on VMS is non-standard.  */
10429 #if TARGET_ABI_OPEN_VMS
10430   vms_patch_builtins ();
10431 #endif
10432 
10433 #define def_builtin(name, type, code)					\
10434   add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
10435 		       NULL, NULL_TREE)
10436 
10437   decl = def_builtin ("__builtin_ia64_bsp",
10438 		      build_function_type_list (ptr_type_node, NULL_TREE),
10439 		      IA64_BUILTIN_BSP);
10440   ia64_builtins[IA64_BUILTIN_BSP] = decl;
10441 
10442   decl = def_builtin ("__builtin_ia64_flushrs",
10443 		      build_function_type_list (void_type_node, NULL_TREE),
10444 		      IA64_BUILTIN_FLUSHRS);
10445   ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10446 
10447 #undef def_builtin
10448 
10449   if (TARGET_HPUX)
10450     {
10451       if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10452 	set_user_assembler_name (decl, "_Isfinite");
10453       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10454 	set_user_assembler_name (decl, "_Isfinitef");
10455       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10456 	set_user_assembler_name (decl, "_Isfinitef128");
10457     }
10458 }
10459 
10460 static tree
10461 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10462 		   tree *args, bool ignore ATTRIBUTE_UNUSED)
10463 {
10464   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10465     {
10466       enum ia64_builtins fn_code = (enum ia64_builtins)
10467 				   DECL_FUNCTION_CODE (fndecl);
10468       switch (fn_code)
10469 	{
10470 	case IA64_BUILTIN_NANQ:
10471 	case IA64_BUILTIN_NANSQ:
10472 	  {
10473 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
10474 	    const char *str = c_getstr (*args);
10475 	    int quiet = fn_code == IA64_BUILTIN_NANQ;
10476 	    REAL_VALUE_TYPE real;
10477 
10478 	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10479 	      return build_real (type, real);
10480 	    return NULL_TREE;
10481 	  }
10482 
10483 	default:
10484 	  break;
10485 	}
10486     }
10487 
10488 #ifdef SUBTARGET_FOLD_BUILTIN
10489   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10490 #endif
10491 
10492   return NULL_TREE;
10493 }
10494 
10495 rtx
10496 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10497 		     machine_mode mode ATTRIBUTE_UNUSED,
10498 		     int ignore ATTRIBUTE_UNUSED)
10499 {
10500   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10501   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10502 
10503   switch (fcode)
10504     {
10505     case IA64_BUILTIN_BSP:
10506       if (! target || ! register_operand (target, DImode))
10507 	target = gen_reg_rtx (DImode);
10508       emit_insn (gen_bsp_value (target));
10509 #ifdef POINTERS_EXTEND_UNSIGNED
10510       target = convert_memory_address (ptr_mode, target);
10511 #endif
10512       return target;
10513 
10514     case IA64_BUILTIN_FLUSHRS:
10515       emit_insn (gen_flushrs ());
10516       return const0_rtx;
10517 
10518     case IA64_BUILTIN_INFQ:
10519     case IA64_BUILTIN_HUGE_VALQ:
10520       {
10521         machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10522 	REAL_VALUE_TYPE inf;
10523 	rtx tmp;
10524 
10525 	real_inf (&inf);
10526 	tmp = const_double_from_real_value (inf, target_mode);
10527 
10528 	tmp = validize_mem (force_const_mem (target_mode, tmp));
10529 
10530 	if (target == 0)
10531 	  target = gen_reg_rtx (target_mode);
10532 
10533 	emit_move_insn (target, tmp);
10534 	return target;
10535       }
10536 
10537     case IA64_BUILTIN_NANQ:
10538     case IA64_BUILTIN_NANSQ:
10539     case IA64_BUILTIN_FABSQ:
10540     case IA64_BUILTIN_COPYSIGNQ:
10541       return expand_call (exp, target, ignore);
10542 
10543     default:
10544       gcc_unreachable ();
10545     }
10546 
10547   return NULL_RTX;
10548 }
10549 
10550 /* Return the ia64 builtin for CODE.  */
10551 
10552 static tree
10553 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10554 {
10555   if (code >= IA64_BUILTIN_max)
10556     return error_mark_node;
10557 
10558   return ia64_builtins[code];
10559 }
10560 
10561 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10562    most significant bits of the stack slot.  */
10563 
10564 enum direction
10565 ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
10566 {
10567    /* Exception to normal case for structures/unions/etc.  */
10568 
10569    if (type && AGGREGATE_TYPE_P (type)
10570        && int_size_in_bytes (type) < UNITS_PER_WORD)
10571      return upward;
10572 
10573    /* Fall back to the default.  */
10574    return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10575 }
10576 
10577 /* Emit text to declare externally defined variables and functions, because
10578    the Intel assembler does not support undefined externals.  */
10579 
10580 void
10581 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10582 {
10583   /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10584      set in order to avoid putting out names that are never really
10585      used. */
10586   if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10587     {
10588       /* maybe_assemble_visibility will return 1 if the assembler
10589 	 visibility directive is output.  */
10590       int need_visibility = ((*targetm.binds_local_p) (decl)
10591 			     && maybe_assemble_visibility (decl));
10592 
10593       /* GNU as does not need anything here, but the HP linker does
10594 	 need something for external functions.  */
10595       if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10596 	  && TREE_CODE (decl) == FUNCTION_DECL)
10597 	  (*targetm.asm_out.globalize_decl_name) (file, decl);
10598       else if (need_visibility && !TARGET_GNU_AS)
10599 	(*targetm.asm_out.globalize_label) (file, name);
10600     }
10601 }
10602 
10603 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10604    modes of word_mode and larger.  Rename the TFmode libfuncs using the
10605    HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10606    backward compatibility. */
10607 
10608 static void
10609 ia64_init_libfuncs (void)
10610 {
10611   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10612   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10613   set_optab_libfunc (smod_optab, SImode, "__modsi3");
10614   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10615 
10616   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10617   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10618   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10619   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10620   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10621 
10622   set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10623   set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10624   set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10625   set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10626   set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10627   set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10628 
10629   set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10630   set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10631   set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10632   set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10633   set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10634 
10635   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10636   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10637   set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10638   /* HP-UX 11.23 libc does not have a function for unsigned
10639      SImode-to-TFmode conversion.  */
10640   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10641 }
10642 
10643 /* Rename all the TFmode libfuncs using the HPUX conventions.  */
10644 
10645 static void
10646 ia64_hpux_init_libfuncs (void)
10647 {
10648   ia64_init_libfuncs ();
10649 
10650   /* The HP SI millicode division and mod functions expect DI arguments.
10651      By turning them off completely we avoid using both libgcc and the
10652      non-standard millicode routines and use the HP DI millicode routines
10653      instead.  */
10654 
10655   set_optab_libfunc (sdiv_optab, SImode, 0);
10656   set_optab_libfunc (udiv_optab, SImode, 0);
10657   set_optab_libfunc (smod_optab, SImode, 0);
10658   set_optab_libfunc (umod_optab, SImode, 0);
10659 
10660   set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10661   set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10662   set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10663   set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10664 
10665   /* HP-UX libc has TF min/max/abs routines in it.  */
10666   set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10667   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10668   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10669 
10670   /* ia64_expand_compare uses this.  */
10671   cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10672 
10673   /* These should never be used.  */
10674   set_optab_libfunc (eq_optab, TFmode, 0);
10675   set_optab_libfunc (ne_optab, TFmode, 0);
10676   set_optab_libfunc (gt_optab, TFmode, 0);
10677   set_optab_libfunc (ge_optab, TFmode, 0);
10678   set_optab_libfunc (lt_optab, TFmode, 0);
10679   set_optab_libfunc (le_optab, TFmode, 0);
10680 }
10681 
10682 /* Rename the division and modulus functions in VMS.  */
10683 
10684 static void
10685 ia64_vms_init_libfuncs (void)
10686 {
10687   set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10688   set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10689   set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10690   set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10691   set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10692   set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10693   set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10694   set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10695 #ifdef MEM_LIBFUNCS_INIT
10696   MEM_LIBFUNCS_INIT;
10697 #endif
10698 }
10699 
10700 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10701    the HPUX conventions.  */
10702 
10703 static void
10704 ia64_sysv4_init_libfuncs (void)
10705 {
10706   ia64_init_libfuncs ();
10707 
10708   /* These functions are not part of the HPUX TFmode interface.  We
10709      use them instead of _U_Qfcmp, which doesn't work the way we
10710      expect.  */
10711   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10712   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10713   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10714   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10715   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10716   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10717 
10718   /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10719      glibc doesn't have them.  */
10720 }
10721 
10722 /* Use soft-fp.  */
10723 
10724 static void
10725 ia64_soft_fp_init_libfuncs (void)
10726 {
10727 }
10728 
10729 static bool
10730 ia64_vms_valid_pointer_mode (machine_mode mode)
10731 {
10732   return (mode == SImode || mode == DImode);
10733 }
10734 
10735 /* For HPUX, it is illegal to have relocations in shared segments.  */
10736 
10737 static int
10738 ia64_hpux_reloc_rw_mask (void)
10739 {
10740   return 3;
10741 }
10742 
10743 /* For others, relax this so that relocations to local data goes in
10744    read-only segments, but we still cannot allow global relocations
10745    in read-only segments.  */
10746 
10747 static int
10748 ia64_reloc_rw_mask (void)
10749 {
10750   return flag_pic ? 3 : 2;
10751 }
10752 
10753 /* Return the section to use for X.  The only special thing we do here
10754    is to honor small data.  */
10755 
10756 static section *
10757 ia64_select_rtx_section (machine_mode mode, rtx x,
10758 			 unsigned HOST_WIDE_INT align)
10759 {
10760   if (GET_MODE_SIZE (mode) > 0
10761       && GET_MODE_SIZE (mode) <= ia64_section_threshold
10762       && !TARGET_NO_SDATA)
10763     return sdata_section;
10764   else
10765     return default_elf_select_rtx_section (mode, x, align);
10766 }
10767 
10768 static unsigned int
10769 ia64_section_type_flags (tree decl, const char *name, int reloc)
10770 {
10771   unsigned int flags = 0;
10772 
10773   if (strcmp (name, ".sdata") == 0
10774       || strncmp (name, ".sdata.", 7) == 0
10775       || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10776       || strncmp (name, ".sdata2.", 8) == 0
10777       || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10778       || strcmp (name, ".sbss") == 0
10779       || strncmp (name, ".sbss.", 6) == 0
10780       || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10781     flags = SECTION_SMALL;
10782 
10783   flags |= default_section_type_flags (decl, name, reloc);
10784   return flags;
10785 }
10786 
10787 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10788    structure type and that the address of that type should be passed
10789    in out0, rather than in r8.  */
10790 
10791 static bool
10792 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10793 {
10794   tree ret_type = TREE_TYPE (fntype);
10795 
10796   /* The Itanium C++ ABI requires that out0, rather than r8, be used
10797      as the structure return address parameter, if the return value
10798      type has a non-trivial copy constructor or destructor.  It is not
10799      clear if this same convention should be used for other
10800      programming languages.  Until G++ 3.4, we incorrectly used r8 for
10801      these return values.  */
10802   return (abi_version_at_least (2)
10803 	  && ret_type
10804 	  && TYPE_MODE (ret_type) == BLKmode
10805 	  && TREE_ADDRESSABLE (ret_type)
10806 	  && lang_GNU_CXX ());
10807 }
10808 
10809 /* Output the assembler code for a thunk function.  THUNK_DECL is the
10810    declaration for the thunk function itself, FUNCTION is the decl for
10811    the target function.  DELTA is an immediate constant offset to be
10812    added to THIS.  If VCALL_OFFSET is nonzero, the word at
10813    *(*this + vcall_offset) should be added to THIS.  */
10814 
10815 static void
10816 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10817 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10818 		      tree function)
10819 {
10820   rtx this_rtx, funexp;
10821   rtx_insn *insn;
10822   unsigned int this_parmno;
10823   unsigned int this_regno;
10824   rtx delta_rtx;
10825 
10826   reload_completed = 1;
10827   epilogue_completed = 1;
10828 
10829   /* Set things up as ia64_expand_prologue might.  */
10830   last_scratch_gr_reg = 15;
10831 
10832   memset (&current_frame_info, 0, sizeof (current_frame_info));
10833   current_frame_info.spill_cfa_off = -16;
10834   current_frame_info.n_input_regs = 1;
10835   current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10836 
10837   /* Mark the end of the (empty) prologue.  */
10838   emit_note (NOTE_INSN_PROLOGUE_END);
10839 
10840   /* Figure out whether "this" will be the first parameter (the
10841      typical case) or the second parameter (as happens when the
10842      virtual function returns certain class objects).  */
10843   this_parmno
10844     = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10845        ? 1 : 0);
10846   this_regno = IN_REG (this_parmno);
10847   if (!TARGET_REG_NAMES)
10848     reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10849 
10850   this_rtx = gen_rtx_REG (Pmode, this_regno);
10851 
10852   /* Apply the constant offset, if required.  */
10853   delta_rtx = GEN_INT (delta);
10854   if (TARGET_ILP32)
10855     {
10856       rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10857       REG_POINTER (tmp) = 1;
10858       if (delta && satisfies_constraint_I (delta_rtx))
10859 	{
10860 	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10861 	  delta = 0;
10862 	}
10863       else
10864 	emit_insn (gen_ptr_extend (this_rtx, tmp));
10865     }
10866   if (delta)
10867     {
10868       if (!satisfies_constraint_I (delta_rtx))
10869 	{
10870 	  rtx tmp = gen_rtx_REG (Pmode, 2);
10871 	  emit_move_insn (tmp, delta_rtx);
10872 	  delta_rtx = tmp;
10873 	}
10874       emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10875     }
10876 
10877   /* Apply the offset from the vtable, if required.  */
10878   if (vcall_offset)
10879     {
10880       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10881       rtx tmp = gen_rtx_REG (Pmode, 2);
10882 
10883       if (TARGET_ILP32)
10884 	{
10885 	  rtx t = gen_rtx_REG (ptr_mode, 2);
10886 	  REG_POINTER (t) = 1;
10887 	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10888 	  if (satisfies_constraint_I (vcall_offset_rtx))
10889 	    {
10890 	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10891 	      vcall_offset = 0;
10892 	    }
10893 	  else
10894 	    emit_insn (gen_ptr_extend (tmp, t));
10895 	}
10896       else
10897 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10898 
10899       if (vcall_offset)
10900 	{
10901 	  if (!satisfies_constraint_J (vcall_offset_rtx))
10902 	    {
10903 	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10904 	      emit_move_insn (tmp2, vcall_offset_rtx);
10905 	      vcall_offset_rtx = tmp2;
10906 	    }
10907 	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10908 	}
10909 
10910       if (TARGET_ILP32)
10911 	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10912       else
10913 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10914 
10915       emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10916     }
10917 
10918   /* Generate a tail call to the target function.  */
10919   if (! TREE_USED (function))
10920     {
10921       assemble_external (function);
10922       TREE_USED (function) = 1;
10923     }
10924   funexp = XEXP (DECL_RTL (function), 0);
10925   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10926   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10927   insn = get_last_insn ();
10928   SIBLING_CALL_P (insn) = 1;
10929 
10930   /* Code generation for calls relies on splitting.  */
10931   reload_completed = 1;
10932   epilogue_completed = 1;
10933   try_split (PATTERN (insn), insn, 0);
10934 
10935   emit_barrier ();
10936 
10937   /* Run just enough of rest_of_compilation to get the insns emitted.
10938      There's not really enough bulk here to make other passes such as
10939      instruction scheduling worth while.  Note that use_thunk calls
10940      assemble_start_function and assemble_end_function.  */
10941 
10942   emit_all_insn_group_barriers (NULL);
10943   insn = get_insns ();
10944   shorten_branches (insn);
10945   final_start_function (insn, file, 1);
10946   final (insn, file, 1);
10947   final_end_function ();
10948 
10949   reload_completed = 0;
10950   epilogue_completed = 0;
10951 }
10952 
10953 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10954 
10955 static rtx
10956 ia64_struct_value_rtx (tree fntype,
10957 		       int incoming ATTRIBUTE_UNUSED)
10958 {
10959   if (TARGET_ABI_OPEN_VMS ||
10960       (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10961     return NULL_RTX;
10962   return gen_rtx_REG (Pmode, GR_REG (8));
10963 }
10964 
10965 static bool
10966 ia64_scalar_mode_supported_p (machine_mode mode)
10967 {
10968   switch (mode)
10969     {
10970     case QImode:
10971     case HImode:
10972     case SImode:
10973     case DImode:
10974     case TImode:
10975       return true;
10976 
10977     case SFmode:
10978     case DFmode:
10979     case XFmode:
10980     case RFmode:
10981       return true;
10982 
10983     case TFmode:
10984       return true;
10985 
10986     default:
10987       return false;
10988     }
10989 }
10990 
10991 static bool
10992 ia64_vector_mode_supported_p (machine_mode mode)
10993 {
10994   switch (mode)
10995     {
10996     case V8QImode:
10997     case V4HImode:
10998     case V2SImode:
10999       return true;
11000 
11001     case V2SFmode:
11002       return true;
11003 
11004     default:
11005       return false;
11006     }
11007 }
11008 
11009 /* Implement the FUNCTION_PROFILER macro.  */
11010 
11011 void
11012 ia64_output_function_profiler (FILE *file, int labelno)
11013 {
11014   bool indirect_call;
11015 
11016   /* If the function needs a static chain and the static chain
11017      register is r15, we use an indirect call so as to bypass
11018      the PLT stub in case the executable is dynamically linked,
11019      because the stub clobbers r15 as per 5.3.6 of the psABI.
11020      We don't need to do that in non canonical PIC mode.  */
11021 
11022   if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11023     {
11024       gcc_assert (STATIC_CHAIN_REGNUM == 15);
11025       indirect_call = true;
11026     }
11027   else
11028     indirect_call = false;
11029 
11030   if (TARGET_GNU_AS)
11031     fputs ("\t.prologue 4, r40\n", file);
11032   else
11033     fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11034   fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11035 
11036   if (NO_PROFILE_COUNTERS)
11037     fputs ("\tmov out3 = r0\n", file);
11038   else
11039     {
11040       char buf[20];
11041       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11042 
11043       if (TARGET_AUTO_PIC)
11044 	fputs ("\tmovl out3 = @gprel(", file);
11045       else
11046 	fputs ("\taddl out3 = @ltoff(", file);
11047       assemble_name (file, buf);
11048       if (TARGET_AUTO_PIC)
11049 	fputs (")\n", file);
11050       else
11051 	fputs ("), r1\n", file);
11052     }
11053 
11054   if (indirect_call)
11055     fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11056   fputs ("\t;;\n", file);
11057 
11058   fputs ("\t.save rp, r42\n", file);
11059   fputs ("\tmov out2 = b0\n", file);
11060   if (indirect_call)
11061     fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11062   fputs ("\t.body\n", file);
11063   fputs ("\tmov out1 = r1\n", file);
11064   if (indirect_call)
11065     {
11066       fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11067       fputs ("\tmov b6 = r16\n", file);
11068       fputs ("\tld8 r1 = [r14]\n", file);
11069       fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11070     }
11071   else
11072     fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11073 }
11074 
11075 static GTY(()) rtx mcount_func_rtx;
11076 static rtx
11077 gen_mcount_func_rtx (void)
11078 {
11079   if (!mcount_func_rtx)
11080     mcount_func_rtx = init_one_libfunc ("_mcount");
11081   return mcount_func_rtx;
11082 }
11083 
11084 void
11085 ia64_profile_hook (int labelno)
11086 {
11087   rtx label, ip;
11088 
11089   if (NO_PROFILE_COUNTERS)
11090     label = const0_rtx;
11091   else
11092     {
11093       char buf[30];
11094       const char *label_name;
11095       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11096       label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11097       label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11098       SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11099     }
11100   ip = gen_reg_rtx (Pmode);
11101   emit_insn (gen_ip_value (ip));
11102   emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11103                      VOIDmode, 3,
11104 		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11105 		     ip, Pmode,
11106 		     label, Pmode);
11107 }
11108 
11109 /* Return the mangling of TYPE if it is an extended fundamental type.  */
11110 
11111 static const char *
11112 ia64_mangle_type (const_tree type)
11113 {
11114   type = TYPE_MAIN_VARIANT (type);
11115 
11116   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11117       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11118     return NULL;
11119 
11120   /* On HP-UX, "long double" is mangled as "e" so __float128 is
11121      mangled as "e".  */
11122   if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11123     return "g";
11124   /* On HP-UX, "e" is not available as a mangling of __float80 so use
11125      an extended mangling.  Elsewhere, "e" is available since long
11126      double is 80 bits.  */
11127   if (TYPE_MODE (type) == XFmode)
11128     return TARGET_HPUX ? "u9__float80" : "e";
11129   if (TYPE_MODE (type) == RFmode)
11130     return "u7__fpreg";
11131   return NULL;
11132 }
11133 
11134 /* Return the diagnostic message string if conversion from FROMTYPE to
11135    TOTYPE is not allowed, NULL otherwise.  */
11136 static const char *
11137 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11138 {
11139   /* Reject nontrivial conversion to or from __fpreg.  */
11140   if (TYPE_MODE (fromtype) == RFmode
11141       && TYPE_MODE (totype) != RFmode
11142       && TYPE_MODE (totype) != VOIDmode)
11143     return N_("invalid conversion from %<__fpreg%>");
11144   if (TYPE_MODE (totype) == RFmode
11145       && TYPE_MODE (fromtype) != RFmode)
11146     return N_("invalid conversion to %<__fpreg%>");
11147   return NULL;
11148 }
11149 
11150 /* Return the diagnostic message string if the unary operation OP is
11151    not permitted on TYPE, NULL otherwise.  */
11152 static const char *
11153 ia64_invalid_unary_op (int op, const_tree type)
11154 {
11155   /* Reject operations on __fpreg other than unary + or &.  */
11156   if (TYPE_MODE (type) == RFmode
11157       && op != CONVERT_EXPR
11158       && op != ADDR_EXPR)
11159     return N_("invalid operation on %<__fpreg%>");
11160   return NULL;
11161 }
11162 
11163 /* Return the diagnostic message string if the binary operation OP is
11164    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
11165 static const char *
11166 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11167 {
11168   /* Reject operations on __fpreg.  */
11169   if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11170     return N_("invalid operation on %<__fpreg%>");
11171   return NULL;
11172 }
11173 
11174 /* HP-UX version_id attribute.
11175    For object foo, if the version_id is set to 1234 put out an alias
11176    of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
11177    other than an alias statement because it is an illegal symbol name.  */
11178 
11179 static tree
11180 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11181                                  tree name ATTRIBUTE_UNUSED,
11182                                  tree args,
11183                                  int flags ATTRIBUTE_UNUSED,
11184                                  bool *no_add_attrs)
11185 {
11186   tree arg = TREE_VALUE (args);
11187 
11188   if (TREE_CODE (arg) != STRING_CST)
11189     {
11190       error("version attribute is not a string");
11191       *no_add_attrs = true;
11192       return NULL_TREE;
11193     }
11194   return NULL_TREE;
11195 }
11196 
11197 /* Target hook for c_mode_for_suffix.  */
11198 
11199 static machine_mode
11200 ia64_c_mode_for_suffix (char suffix)
11201 {
11202   if (suffix == 'q')
11203     return TFmode;
11204   if (suffix == 'w')
11205     return XFmode;
11206 
11207   return VOIDmode;
11208 }
11209 
11210 static GTY(()) rtx ia64_dconst_0_5_rtx;
11211 
11212 rtx
11213 ia64_dconst_0_5 (void)
11214 {
11215   if (! ia64_dconst_0_5_rtx)
11216     {
11217       REAL_VALUE_TYPE rv;
11218       real_from_string (&rv, "0.5");
11219       ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11220     }
11221   return ia64_dconst_0_5_rtx;
11222 }
11223 
11224 static GTY(()) rtx ia64_dconst_0_375_rtx;
11225 
11226 rtx
11227 ia64_dconst_0_375 (void)
11228 {
11229   if (! ia64_dconst_0_375_rtx)
11230     {
11231       REAL_VALUE_TYPE rv;
11232       real_from_string (&rv, "0.375");
11233       ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11234     }
11235   return ia64_dconst_0_375_rtx;
11236 }
11237 
11238 static machine_mode
11239 ia64_get_reg_raw_mode (int regno)
11240 {
11241   if (FR_REGNO_P (regno))
11242     return XFmode;
11243   return default_get_reg_raw_mode(regno);
11244 }
11245 
11246 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
11247    anymore.  */
11248 
11249 bool
11250 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11251 {
11252   return TARGET_HPUX && mode == TFmode;
11253 }
11254 
11255 /* Always default to .text section until HP-UX linker is fixed.  */
11256 
11257 ATTRIBUTE_UNUSED static section *
11258 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11259 			    enum node_frequency freq ATTRIBUTE_UNUSED,
11260 			    bool startup ATTRIBUTE_UNUSED,
11261 			    bool exit ATTRIBUTE_UNUSED)
11262 {
11263   return NULL;
11264 }
11265 
11266 /* Construct (set target (vec_select op0 (parallel perm))) and
11267    return true if that's a valid instruction in the active ISA.  */
11268 
11269 static bool
11270 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11271 {
11272   rtx rperm[MAX_VECT_LEN], x;
11273   unsigned i;
11274 
11275   for (i = 0; i < nelt; ++i)
11276     rperm[i] = GEN_INT (perm[i]);
11277 
11278   x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11279   x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11280   x = gen_rtx_SET (target, x);
11281 
11282   rtx_insn *insn = emit_insn (x);
11283   if (recog_memoized (insn) < 0)
11284     {
11285       remove_insn (insn);
11286       return false;
11287     }
11288   return true;
11289 }
11290 
11291 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
11292 
11293 static bool
11294 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11295 			const unsigned char *perm, unsigned nelt)
11296 {
11297   machine_mode v2mode;
11298   rtx x;
11299 
11300   v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11301   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11302   return expand_vselect (target, x, perm, nelt);
11303 }
11304 
11305 /* Try to expand a no-op permutation.  */
11306 
11307 static bool
11308 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11309 {
11310   unsigned i, nelt = d->nelt;
11311 
11312   for (i = 0; i < nelt; ++i)
11313     if (d->perm[i] != i)
11314       return false;
11315 
11316   if (!d->testing_p)
11317     emit_move_insn (d->target, d->op0);
11318 
11319   return true;
11320 }
11321 
11322 /* Try to expand D via a shrp instruction.  */
11323 
11324 static bool
11325 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11326 {
11327   unsigned i, nelt = d->nelt, shift, mask;
11328   rtx tmp, hi, lo;
11329 
11330   /* ??? Don't force V2SFmode into the integer registers.  */
11331   if (d->vmode == V2SFmode)
11332     return false;
11333 
11334   mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11335 
11336   shift = d->perm[0];
11337   if (BYTES_BIG_ENDIAN && shift > nelt)
11338     return false;
11339 
11340   for (i = 1; i < nelt; ++i)
11341     if (d->perm[i] != ((shift + i) & mask))
11342       return false;
11343 
11344   if (d->testing_p)
11345     return true;
11346 
11347   hi = shift < nelt ? d->op1 : d->op0;
11348   lo = shift < nelt ? d->op0 : d->op1;
11349 
11350   shift %= nelt;
11351 
11352   shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11353 
11354   /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
11355   gcc_assert (IN_RANGE (shift, 1, 63));
11356 
11357   /* Recall that big-endian elements are numbered starting at the top of
11358      the register.  Ideally we'd have a shift-left-pair.  But since we
11359      don't, convert to a shift the other direction.  */
11360   if (BYTES_BIG_ENDIAN)
11361     shift = 64 - shift;
11362 
11363   tmp = gen_reg_rtx (DImode);
11364   hi = gen_lowpart (DImode, hi);
11365   lo = gen_lowpart (DImode, lo);
11366   emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11367 
11368   emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11369   return true;
11370 }
11371 
11372 /* Try to instantiate D in a single instruction.  */
11373 
11374 static bool
11375 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11376 {
11377   unsigned i, nelt = d->nelt;
11378   unsigned char perm2[MAX_VECT_LEN];
11379 
11380   /* Try single-operand selections.  */
11381   if (d->one_operand_p)
11382     {
11383       if (expand_vec_perm_identity (d))
11384 	return true;
11385       if (expand_vselect (d->target, d->op0, d->perm, nelt))
11386 	return true;
11387     }
11388 
11389   /* Try two operand selections.  */
11390   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11391     return true;
11392 
11393   /* Recognize interleave style patterns with reversed operands.  */
11394   if (!d->one_operand_p)
11395     {
11396       for (i = 0; i < nelt; ++i)
11397 	{
11398 	  unsigned e = d->perm[i];
11399 	  if (e >= nelt)
11400 	    e -= nelt;
11401 	  else
11402 	    e += nelt;
11403 	  perm2[i] = e;
11404 	}
11405 
11406       if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11407 	return true;
11408     }
11409 
11410   if (expand_vec_perm_shrp (d))
11411     return true;
11412 
11413   /* ??? Look for deposit-like permutations where most of the result
11414      comes from one vector unchanged and the rest comes from a
11415      sequential hunk of the other vector.  */
11416 
11417   return false;
11418 }
11419 
11420 /* Pattern match broadcast permutations.  */
11421 
11422 static bool
11423 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11424 {
11425   unsigned i, elt, nelt = d->nelt;
11426   unsigned char perm2[2];
11427   rtx temp;
11428   bool ok;
11429 
11430   if (!d->one_operand_p)
11431     return false;
11432 
11433   elt = d->perm[0];
11434   for (i = 1; i < nelt; ++i)
11435     if (d->perm[i] != elt)
11436       return false;
11437 
11438   switch (d->vmode)
11439     {
11440     case V2SImode:
11441     case V2SFmode:
11442       /* Implementable by interleave.  */
11443       perm2[0] = elt;
11444       perm2[1] = elt + 2;
11445       ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11446       gcc_assert (ok);
11447       break;
11448 
11449     case V8QImode:
11450       /* Implementable by extract + broadcast.  */
11451       if (BYTES_BIG_ENDIAN)
11452 	elt = 7 - elt;
11453       elt *= BITS_PER_UNIT;
11454       temp = gen_reg_rtx (DImode);
11455       emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11456 			    GEN_INT (8), GEN_INT (elt)));
11457       emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11458       break;
11459 
11460     case V4HImode:
11461       /* Should have been matched directly by vec_select.  */
11462     default:
11463       gcc_unreachable ();
11464     }
11465 
11466   return true;
11467 }
11468 
11469 /* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
11470    two vector permutation into a single vector permutation by using
11471    an interleave operation to merge the vectors.  */
11472 
11473 static bool
11474 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11475 {
11476   struct expand_vec_perm_d dremap, dfinal;
11477   unsigned char remap[2 * MAX_VECT_LEN];
11478   unsigned contents, i, nelt, nelt2;
11479   unsigned h0, h1, h2, h3;
11480   rtx_insn *seq;
11481   bool ok;
11482 
11483   if (d->one_operand_p)
11484     return false;
11485 
11486   nelt = d->nelt;
11487   nelt2 = nelt / 2;
11488 
11489   /* Examine from whence the elements come.  */
11490   contents = 0;
11491   for (i = 0; i < nelt; ++i)
11492     contents |= 1u << d->perm[i];
11493 
11494   memset (remap, 0xff, sizeof (remap));
11495   dremap = *d;
11496 
11497   h0 = (1u << nelt2) - 1;
11498   h1 = h0 << nelt2;
11499   h2 = h0 << nelt;
11500   h3 = h0 << (nelt + nelt2);
11501 
11502   if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
11503     {
11504       for (i = 0; i < nelt; ++i)
11505 	{
11506 	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
11507 	  remap[which] = i;
11508 	  dremap.perm[i] = which;
11509 	}
11510     }
11511   else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
11512     {
11513       for (i = 0; i < nelt; ++i)
11514 	{
11515 	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11516 	  remap[which] = i;
11517 	  dremap.perm[i] = which;
11518 	}
11519     }
11520   else if ((contents & 0x5555) == contents)	/* mix even elements */
11521     {
11522       for (i = 0; i < nelt; ++i)
11523 	{
11524 	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11525 	  remap[which] = i;
11526 	  dremap.perm[i] = which;
11527 	}
11528     }
11529   else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
11530     {
11531       for (i = 0; i < nelt; ++i)
11532 	{
11533 	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11534 	  remap[which] = i;
11535 	  dremap.perm[i] = which;
11536 	}
11537     }
11538   else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11539     {
11540       unsigned shift = ctz_hwi (contents);
11541       for (i = 0; i < nelt; ++i)
11542 	{
11543 	  unsigned which = (i + shift) & (2 * nelt - 1);
11544 	  remap[which] = i;
11545 	  dremap.perm[i] = which;
11546 	}
11547     }
11548   else
11549     return false;
11550 
11551   /* Use the remapping array set up above to move the elements from their
11552      swizzled locations into their final destinations.  */
11553   dfinal = *d;
11554   for (i = 0; i < nelt; ++i)
11555     {
11556       unsigned e = remap[d->perm[i]];
11557       gcc_assert (e < nelt);
11558       dfinal.perm[i] = e;
11559     }
11560   if (d->testing_p)
11561     dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11562   else
11563     dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11564   dfinal.op1 = dfinal.op0;
11565   dfinal.one_operand_p = true;
11566   dremap.target = dfinal.op0;
11567 
11568   /* Test if the final remap can be done with a single insn.  For V4HImode
11569      this *will* succeed.  For V8QImode or V2SImode it may not.  */
11570   start_sequence ();
11571   ok = expand_vec_perm_1 (&dfinal);
11572   seq = get_insns ();
11573   end_sequence ();
11574   if (!ok)
11575     return false;
11576   if (d->testing_p)
11577     return true;
11578 
11579   ok = expand_vec_perm_1 (&dremap);
11580   gcc_assert (ok);
11581 
11582   emit_insn (seq);
11583   return true;
11584 }
11585 
11586 /* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
11587    constant permutation via two mux2 and a merge.  */
11588 
11589 static bool
11590 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11591 {
11592   unsigned char perm2[4];
11593   rtx rmask[4];
11594   unsigned i;
11595   rtx t0, t1, mask, x;
11596   bool ok;
11597 
11598   if (d->vmode != V4HImode || d->one_operand_p)
11599     return false;
11600   if (d->testing_p)
11601     return true;
11602 
11603   for (i = 0; i < 4; ++i)
11604     {
11605       perm2[i] = d->perm[i] & 3;
11606       rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11607     }
11608   mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11609   mask = force_reg (V4HImode, mask);
11610 
11611   t0 = gen_reg_rtx (V4HImode);
11612   t1 = gen_reg_rtx (V4HImode);
11613 
11614   ok = expand_vselect (t0, d->op0, perm2, 4);
11615   gcc_assert (ok);
11616   ok = expand_vselect (t1, d->op1, perm2, 4);
11617   gcc_assert (ok);
11618 
11619   x = gen_rtx_AND (V4HImode, mask, t0);
11620   emit_insn (gen_rtx_SET (t0, x));
11621 
11622   x = gen_rtx_NOT (V4HImode, mask);
11623   x = gen_rtx_AND (V4HImode, x, t1);
11624   emit_insn (gen_rtx_SET (t1, x));
11625 
11626   x = gen_rtx_IOR (V4HImode, t0, t1);
11627   emit_insn (gen_rtx_SET (d->target, x));
11628 
11629   return true;
11630 }
11631 
11632 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11633    With all of the interface bits taken care of, perform the expansion
11634    in D and return true on success.  */
11635 
11636 static bool
11637 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11638 {
11639   if (expand_vec_perm_1 (d))
11640     return true;
11641   if (expand_vec_perm_broadcast (d))
11642     return true;
11643   if (expand_vec_perm_interleave_2 (d))
11644     return true;
11645   if (expand_vec_perm_v4hi_5 (d))
11646     return true;
11647   return false;
11648 }
11649 
11650 bool
11651 ia64_expand_vec_perm_const (rtx operands[4])
11652 {
11653   struct expand_vec_perm_d d;
11654   unsigned char perm[MAX_VECT_LEN];
11655   int i, nelt, which;
11656   rtx sel;
11657 
11658   d.target = operands[0];
11659   d.op0 = operands[1];
11660   d.op1 = operands[2];
11661   sel = operands[3];
11662 
11663   d.vmode = GET_MODE (d.target);
11664   gcc_assert (VECTOR_MODE_P (d.vmode));
11665   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11666   d.testing_p = false;
11667 
11668   gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11669   gcc_assert (XVECLEN (sel, 0) == nelt);
11670   gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11671 
11672   for (i = which = 0; i < nelt; ++i)
11673     {
11674       rtx e = XVECEXP (sel, 0, i);
11675       int ei = INTVAL (e) & (2 * nelt - 1);
11676 
11677       which |= (ei < nelt ? 1 : 2);
11678       d.perm[i] = ei;
11679       perm[i] = ei;
11680     }
11681 
11682   switch (which)
11683     {
11684     default:
11685       gcc_unreachable();
11686 
11687     case 3:
11688       if (!rtx_equal_p (d.op0, d.op1))
11689 	{
11690 	  d.one_operand_p = false;
11691 	  break;
11692 	}
11693 
11694       /* The elements of PERM do not suggest that only the first operand
11695 	 is used, but both operands are identical.  Allow easier matching
11696 	 of the permutation by folding the permutation into the single
11697 	 input vector.  */
11698       for (i = 0; i < nelt; ++i)
11699 	if (d.perm[i] >= nelt)
11700 	  d.perm[i] -= nelt;
11701       /* FALLTHRU */
11702 
11703     case 1:
11704       d.op1 = d.op0;
11705       d.one_operand_p = true;
11706       break;
11707 
11708     case 2:
11709       for (i = 0; i < nelt; ++i)
11710         d.perm[i] -= nelt;
11711       d.op0 = d.op1;
11712       d.one_operand_p = true;
11713       break;
11714     }
11715 
11716   if (ia64_expand_vec_perm_const_1 (&d))
11717     return true;
11718 
11719   /* If the mask says both arguments are needed, but they are the same,
11720      the above tried to expand with one_operand_p true.  If that didn't
11721      work, retry with one_operand_p false, as that's what we used in _ok.  */
11722   if (which == 3 && d.one_operand_p)
11723     {
11724       memcpy (d.perm, perm, sizeof (perm));
11725       d.one_operand_p = false;
11726       return ia64_expand_vec_perm_const_1 (&d);
11727     }
11728 
11729   return false;
11730 }
11731 
11732 /* Implement targetm.vectorize.vec_perm_const_ok.  */
11733 
11734 static bool
11735 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11736 				  const unsigned char *sel)
11737 {
11738   struct expand_vec_perm_d d;
11739   unsigned int i, nelt, which;
11740   bool ret;
11741 
11742   d.vmode = vmode;
11743   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11744   d.testing_p = true;
11745 
11746   /* Extract the values from the vector CST into the permutation
11747      array in D.  */
11748   memcpy (d.perm, sel, nelt);
11749   for (i = which = 0; i < nelt; ++i)
11750     {
11751       unsigned char e = d.perm[i];
11752       gcc_assert (e < 2 * nelt);
11753       which |= (e < nelt ? 1 : 2);
11754     }
11755 
11756   /* For all elements from second vector, fold the elements to first.  */
11757   if (which == 2)
11758     for (i = 0; i < nelt; ++i)
11759       d.perm[i] -= nelt;
11760 
11761   /* Check whether the mask can be applied to the vector type.  */
11762   d.one_operand_p = (which != 3);
11763 
11764   /* Otherwise we have to go through the motions and see if we can
11765      figure out how to generate the requested permutation.  */
11766   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11767   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11768   if (!d.one_operand_p)
11769     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11770 
11771   start_sequence ();
11772   ret = ia64_expand_vec_perm_const_1 (&d);
11773   end_sequence ();
11774 
11775   return ret;
11776 }
11777 
11778 void
11779 ia64_expand_vec_setv2sf (rtx operands[3])
11780 {
11781   struct expand_vec_perm_d d;
11782   unsigned int which;
11783   bool ok;
11784 
11785   d.target = operands[0];
11786   d.op0 = operands[0];
11787   d.op1 = gen_reg_rtx (V2SFmode);
11788   d.vmode = V2SFmode;
11789   d.nelt = 2;
11790   d.one_operand_p = false;
11791   d.testing_p = false;
11792 
11793   which = INTVAL (operands[2]);
11794   gcc_assert (which <= 1);
11795   d.perm[0] = 1 - which;
11796   d.perm[1] = which + 2;
11797 
11798   emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11799 
11800   ok = ia64_expand_vec_perm_const_1 (&d);
11801   gcc_assert (ok);
11802 }
11803 
11804 void
11805 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11806 {
11807   struct expand_vec_perm_d d;
11808   machine_mode vmode = GET_MODE (target);
11809   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11810   bool ok;
11811 
11812   d.target = target;
11813   d.op0 = op0;
11814   d.op1 = op1;
11815   d.vmode = vmode;
11816   d.nelt = nelt;
11817   d.one_operand_p = false;
11818   d.testing_p = false;
11819 
11820   for (i = 0; i < nelt; ++i)
11821     d.perm[i] = i * 2 + odd;
11822 
11823   ok = ia64_expand_vec_perm_const_1 (&d);
11824   gcc_assert (ok);
11825 }
11826 
11827 #include "gt-ia64.h"
11828