xref: /openbsd-src/gnu/gcc/gcc/config/ia64/ia64.c (revision 8529ddd3cf8b8ffce3ab6c5b64acddb7831726a7)
1 /* Definitions of target machine for GNU compiler.
2    Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
3    Free Software Foundation, Inc.
4    Contributed by James E. Wilson <wilson@cygnus.com> and
5 		  David Mosberger <davidm@hpl.hp.com>.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13 
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING.  If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA.  */
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
55 #include "intl.h"
56 #include "debug.h"
57 #include "params.h"
58 
59 /* This is used for communication between ASM_OUTPUT_LABEL and
60    ASM_OUTPUT_LABELREF.  */
61 int ia64_asm_output_label = 0;
62 
63 /* Define the information needed to generate branch and scc insns.  This is
64    stored from the compare operation.  */
65 struct rtx_def * ia64_compare_op0;
66 struct rtx_def * ia64_compare_op1;
67 
68 /* Register names for ia64_expand_prologue.  */
69 static const char * const ia64_reg_numbers[96] =
70 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79   "r104","r105","r106","r107","r108","r109","r110","r111",
80   "r112","r113","r114","r115","r116","r117","r118","r119",
81   "r120","r121","r122","r123","r124","r125","r126","r127"};
82 
83 /* ??? These strings could be shared with REGISTER_NAMES.  */
84 static const char * const ia64_input_reg_names[8] =
85 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
86 
87 /* ??? These strings could be shared with REGISTER_NAMES.  */
88 static const char * const ia64_local_reg_names[80] =
89 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99 
100 /* ??? These strings could be shared with REGISTER_NAMES.  */
101 static const char * const ia64_output_reg_names[8] =
102 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103 
104 /* Which cpu are we scheduling for.  */
105 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
106 
107 /* Determines whether we run our final scheduling pass or not.  We always
108    avoid the normal second scheduling pass.  */
109 static int ia64_flag_schedule_insns2;
110 
111 /* Determines whether we run variable tracking in machine dependent
112    reorganization.  */
113 static int ia64_flag_var_tracking;
114 
115 /* Variables which are this size or smaller are put in the sdata/sbss
116    sections.  */
117 
118 unsigned int ia64_section_threshold;
119 
120 /* The following variable is used by the DFA insn scheduler.  The value is
121    TRUE if we do insn bundling instead of insn scheduling.  */
122 int bundling_p = 0;
123 
124 /* Structure to be filled in by ia64_compute_frame_size with register
125    save masks and offsets for the current function.  */
126 
127 struct ia64_frame_info
128 {
129   HOST_WIDE_INT total_size;	/* size of the stack frame, not including
130 				   the caller's scratch area.  */
131   HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
132   HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
133   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
134   HARD_REG_SET mask;		/* mask of saved registers.  */
135   unsigned int gr_used_mask;	/* mask of registers in use as gr spill
136 				   registers or long-term scratches.  */
137   int n_spilled;		/* number of spilled registers.  */
138   int reg_fp;			/* register for fp.  */
139   int reg_save_b0;		/* save register for b0.  */
140   int reg_save_pr;		/* save register for prs.  */
141   int reg_save_ar_pfs;		/* save register for ar.pfs.  */
142   int reg_save_ar_unat;		/* save register for ar.unat.  */
143   int reg_save_ar_lc;		/* save register for ar.lc.  */
144   int reg_save_gp;		/* save register for gp.  */
145   int n_input_regs;		/* number of input registers used.  */
146   int n_local_regs;		/* number of local registers used.  */
147   int n_output_regs;		/* number of output registers used.  */
148   int n_rotate_regs;		/* number of rotating registers used.  */
149 
150   char need_regstk;		/* true if a .regstk directive needed.  */
151   char initialized;		/* true if the data is finalized.  */
152 };
153 
154 /* Current frame information calculated by ia64_compute_frame_size.  */
155 static struct ia64_frame_info current_frame_info;
156 
157 static int ia64_first_cycle_multipass_dfa_lookahead (void);
158 static void ia64_dependencies_evaluation_hook (rtx, rtx);
159 static void ia64_init_dfa_pre_cycle_insn (void);
160 static rtx ia64_dfa_pre_cycle_insn (void);
161 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
162 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx);
163 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
164 static void ia64_h_i_d_extended (void);
165 static int ia64_mode_to_int (enum machine_mode);
166 static void ia64_set_sched_flags (spec_info_t);
167 static int ia64_speculate_insn (rtx, ds_t, rtx *);
168 static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
169 static bool ia64_needs_block_p (rtx);
170 static rtx ia64_gen_check (rtx, rtx, bool);
171 static int ia64_spec_check_p (rtx);
172 static int ia64_spec_check_src_p (rtx);
173 static rtx gen_tls_get_addr (void);
174 static rtx gen_thread_pointer (void);
175 static int find_gr_spill (int);
176 static int next_scratch_gr_reg (void);
177 static void mark_reg_gr_used_mask (rtx, void *);
178 static void ia64_compute_frame_size (HOST_WIDE_INT);
179 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
180 static void finish_spill_pointers (void);
181 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
182 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
183 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
184 static rtx gen_movdi_x (rtx, rtx, rtx);
185 static rtx gen_fr_spill_x (rtx, rtx, rtx);
186 static rtx gen_fr_restore_x (rtx, rtx, rtx);
187 
188 static enum machine_mode hfa_element_mode (tree, bool);
189 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
190 					 tree, int *, int);
191 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
192 				   tree, bool);
193 static bool ia64_function_ok_for_sibcall (tree, tree);
194 static bool ia64_return_in_memory (tree, tree);
195 static bool ia64_rtx_costs (rtx, int, int, int *);
196 static void fix_range (const char *);
197 static bool ia64_handle_option (size_t, const char *, int);
198 static struct machine_function * ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree);
205 static void process_epilogue (FILE *, rtx, bool, bool);
206 static int process_set (FILE *, rtx, rtx, bool, bool);
207 
208 static bool ia64_assemble_integer (rtx, unsigned int, int);
209 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
210 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
211 static void ia64_output_function_end_prologue (FILE *);
212 
213 static int ia64_issue_rate (void);
214 static int ia64_adjust_cost_2 (rtx, int, rtx, int);
215 static void ia64_sched_init (FILE *, int, int);
216 static void ia64_sched_init_global (FILE *, int, int);
217 static void ia64_sched_finish_global (FILE *, int);
218 static void ia64_sched_finish (FILE *, int);
219 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
220 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
221 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
222 static int ia64_variable_issue (FILE *, int, rtx, int);
223 
224 static struct bundle_state *get_free_bundle_state (void);
225 static void free_bundle_state (struct bundle_state *);
226 static void initiate_bundle_states (void);
227 static void finish_bundle_states (void);
228 static unsigned bundle_state_hash (const void *);
229 static int bundle_state_eq_p (const void *, const void *);
230 static int insert_bundle_state (struct bundle_state *);
231 static void initiate_bundle_state_table (void);
232 static void finish_bundle_state_table (void);
233 static int try_issue_nops (struct bundle_state *, int);
234 static int try_issue_insn (struct bundle_state *, rtx);
235 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
236 static int get_max_pos (state_t);
237 static int get_template (state_t, int);
238 
239 static rtx get_next_important_insn (rtx, rtx);
240 static void bundling (FILE *, int, rtx, rtx);
241 
242 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
243 				  HOST_WIDE_INT, tree);
244 static void ia64_file_start (void);
245 
246 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
247 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
248 static section *ia64_select_rtx_section (enum machine_mode, rtx,
249 					 unsigned HOST_WIDE_INT);
250 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
251      ATTRIBUTE_UNUSED;
252 static unsigned int ia64_section_type_flags (tree, const char *, int);
253 static void ia64_init_libfuncs (void)
254      ATTRIBUTE_UNUSED;
255 static void ia64_hpux_init_libfuncs (void)
256      ATTRIBUTE_UNUSED;
257 static void ia64_sysv4_init_libfuncs (void)
258      ATTRIBUTE_UNUSED;
259 static void ia64_vms_init_libfuncs (void)
260      ATTRIBUTE_UNUSED;
261 
262 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
263 static void ia64_encode_section_info (tree, rtx, int);
264 static rtx ia64_struct_value_rtx (tree, int);
265 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
266 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
267 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
268 static bool ia64_cannot_force_const_mem (rtx);
269 static const char *ia64_mangle_fundamental_type (tree);
270 static const char *ia64_invalid_conversion (tree, tree);
271 static const char *ia64_invalid_unary_op (int, tree);
272 static const char *ia64_invalid_binary_op (int, tree, tree);
273 
274 /* Table of valid machine attributes.  */
275 static const struct attribute_spec ia64_attribute_table[] =
276 {
277   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
278   { "syscall_linkage", 0, 0, false, true,  true,  NULL },
279   { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
280   { NULL,	       0, 0, false, false, false, NULL }
281 };
282 
283 /* Initialize the GCC target structure.  */
284 #undef TARGET_ATTRIBUTE_TABLE
285 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
286 
287 #undef TARGET_INIT_BUILTINS
288 #define TARGET_INIT_BUILTINS ia64_init_builtins
289 
290 #undef TARGET_EXPAND_BUILTIN
291 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
292 
293 #undef TARGET_ASM_BYTE_OP
294 #define TARGET_ASM_BYTE_OP "\tdata1\t"
295 #undef TARGET_ASM_ALIGNED_HI_OP
296 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
297 #undef TARGET_ASM_ALIGNED_SI_OP
298 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
299 #undef TARGET_ASM_ALIGNED_DI_OP
300 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
301 #undef TARGET_ASM_UNALIGNED_HI_OP
302 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
303 #undef TARGET_ASM_UNALIGNED_SI_OP
304 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
305 #undef TARGET_ASM_UNALIGNED_DI_OP
306 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
307 #undef TARGET_ASM_INTEGER
308 #define TARGET_ASM_INTEGER ia64_assemble_integer
309 
310 #undef TARGET_ASM_FUNCTION_PROLOGUE
311 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
312 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
313 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
314 #undef TARGET_ASM_FUNCTION_EPILOGUE
315 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
316 
317 #undef TARGET_IN_SMALL_DATA_P
318 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
319 
320 #undef TARGET_SCHED_ADJUST_COST_2
321 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
322 #undef TARGET_SCHED_ISSUE_RATE
323 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
324 #undef TARGET_SCHED_VARIABLE_ISSUE
325 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
326 #undef TARGET_SCHED_INIT
327 #define TARGET_SCHED_INIT ia64_sched_init
328 #undef TARGET_SCHED_FINISH
329 #define TARGET_SCHED_FINISH ia64_sched_finish
330 #undef TARGET_SCHED_INIT_GLOBAL
331 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
332 #undef TARGET_SCHED_FINISH_GLOBAL
333 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
334 #undef TARGET_SCHED_REORDER
335 #define TARGET_SCHED_REORDER ia64_sched_reorder
336 #undef TARGET_SCHED_REORDER2
337 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
338 
339 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
340 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
341 
342 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
343 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
344 
345 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
346 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
347 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
348 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
349 
350 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
351 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
352   ia64_first_cycle_multipass_dfa_lookahead_guard
353 
354 #undef TARGET_SCHED_DFA_NEW_CYCLE
355 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
356 
357 #undef TARGET_SCHED_H_I_D_EXTENDED
358 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
359 
360 #undef TARGET_SCHED_SET_SCHED_FLAGS
361 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
362 
363 #undef TARGET_SCHED_SPECULATE_INSN
364 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
365 
366 #undef TARGET_SCHED_NEEDS_BLOCK_P
367 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
368 
369 #undef TARGET_SCHED_GEN_CHECK
370 #define TARGET_SCHED_GEN_CHECK ia64_gen_check
371 
372 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
373 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
374   ia64_first_cycle_multipass_dfa_lookahead_guard_spec
375 
376 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
377 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
378 #undef TARGET_ARG_PARTIAL_BYTES
379 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
380 
381 #undef TARGET_ASM_OUTPUT_MI_THUNK
382 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
383 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
384 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
385 
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START ia64_file_start
388 
389 #undef TARGET_RTX_COSTS
390 #define TARGET_RTX_COSTS ia64_rtx_costs
391 #undef TARGET_ADDRESS_COST
392 #define TARGET_ADDRESS_COST hook_int_rtx_0
393 
394 #undef TARGET_MACHINE_DEPENDENT_REORG
395 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
396 
397 #undef TARGET_ENCODE_SECTION_INFO
398 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
399 
400 #undef  TARGET_SECTION_TYPE_FLAGS
401 #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
402 
403 #ifdef HAVE_AS_TLS
404 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
405 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
406 #endif
407 
408 /* ??? ABI doesn't allow us to define this.  */
409 #if 0
410 #undef TARGET_PROMOTE_FUNCTION_ARGS
411 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
412 #endif
413 
414 /* ??? ABI doesn't allow us to define this.  */
415 #if 0
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
418 #endif
419 
420 /* ??? Investigate.  */
421 #if 0
422 #undef TARGET_PROMOTE_PROTOTYPES
423 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
424 #endif
425 
426 #undef TARGET_STRUCT_VALUE_RTX
427 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
428 #undef TARGET_RETURN_IN_MEMORY
429 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
430 #undef TARGET_SETUP_INCOMING_VARARGS
431 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
432 #undef TARGET_STRICT_ARGUMENT_NAMING
433 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
434 #undef TARGET_MUST_PASS_IN_STACK
435 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
436 
437 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
438 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
439 
440 #undef TARGET_UNWIND_EMIT
441 #define TARGET_UNWIND_EMIT process_for_unwind_directive
442 
443 #undef TARGET_SCALAR_MODE_SUPPORTED_P
444 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
446 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
447 
448 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
449    in an order different from the specified program order.  */
450 #undef TARGET_RELAXED_ORDERING
451 #define TARGET_RELAXED_ORDERING true
452 
453 #undef TARGET_DEFAULT_TARGET_FLAGS
454 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
455 #undef TARGET_HANDLE_OPTION
456 #define TARGET_HANDLE_OPTION ia64_handle_option
457 
458 #undef TARGET_CANNOT_FORCE_CONST_MEM
459 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
460 
461 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
462 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
463 
464 #undef TARGET_INVALID_CONVERSION
465 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
466 #undef TARGET_INVALID_UNARY_OP
467 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
468 #undef TARGET_INVALID_BINARY_OP
469 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
470 
471 struct gcc_target targetm = TARGET_INITIALIZER;
472 
473 typedef enum
474   {
475     ADDR_AREA_NORMAL,	/* normal address area */
476     ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
477   }
478 ia64_addr_area;
479 
480 static GTY(()) tree small_ident1;
481 static GTY(()) tree small_ident2;
482 
483 static void
init_idents(void)484 init_idents (void)
485 {
486   if (small_ident1 == 0)
487     {
488       small_ident1 = get_identifier ("small");
489       small_ident2 = get_identifier ("__small__");
490     }
491 }
492 
493 /* Retrieve the address area that has been chosen for the given decl.  */
494 
495 static ia64_addr_area
ia64_get_addr_area(tree decl)496 ia64_get_addr_area (tree decl)
497 {
498   tree model_attr;
499 
500   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
501   if (model_attr)
502     {
503       tree id;
504 
505       init_idents ();
506       id = TREE_VALUE (TREE_VALUE (model_attr));
507       if (id == small_ident1 || id == small_ident2)
508 	return ADDR_AREA_SMALL;
509     }
510   return ADDR_AREA_NORMAL;
511 }
512 
513 static tree
ia64_handle_model_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)514 ia64_handle_model_attribute (tree *node, tree name, tree args,
515 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
516 {
517   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
518   ia64_addr_area area;
519   tree arg, decl = *node;
520 
521   init_idents ();
522   arg = TREE_VALUE (args);
523   if (arg == small_ident1 || arg == small_ident2)
524     {
525       addr_area = ADDR_AREA_SMALL;
526     }
527   else
528     {
529       warning (OPT_Wattributes, "invalid argument of %qs attribute",
530 	       IDENTIFIER_POINTER (name));
531       *no_add_attrs = true;
532     }
533 
534   switch (TREE_CODE (decl))
535     {
536     case VAR_DECL:
537       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
538 	   == FUNCTION_DECL)
539 	  && !TREE_STATIC (decl))
540 	{
541 	  error ("%Jan address area attribute cannot be specified for "
542 		 "local variables", decl);
543 	  *no_add_attrs = true;
544 	}
545       area = ia64_get_addr_area (decl);
546       if (area != ADDR_AREA_NORMAL && addr_area != area)
547 	{
548 	  error ("address area of %q+D conflicts with previous "
549 		 "declaration", decl);
550 	  *no_add_attrs = true;
551 	}
552       break;
553 
554     case FUNCTION_DECL:
555       error ("%Jaddress area attribute cannot be specified for functions",
556 	     decl);
557       *no_add_attrs = true;
558       break;
559 
560     default:
561       warning (OPT_Wattributes, "%qs attribute ignored",
562 	       IDENTIFIER_POINTER (name));
563       *no_add_attrs = true;
564       break;
565     }
566 
567   return NULL_TREE;
568 }
569 
570 static void
ia64_encode_addr_area(tree decl,rtx symbol)571 ia64_encode_addr_area (tree decl, rtx symbol)
572 {
573   int flags;
574 
575   flags = SYMBOL_REF_FLAGS (symbol);
576   switch (ia64_get_addr_area (decl))
577     {
578     case ADDR_AREA_NORMAL: break;
579     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
580     default: gcc_unreachable ();
581     }
582   SYMBOL_REF_FLAGS (symbol) = flags;
583 }
584 
585 static void
ia64_encode_section_info(tree decl,rtx rtl,int first)586 ia64_encode_section_info (tree decl, rtx rtl, int first)
587 {
588   default_encode_section_info (decl, rtl, first);
589 
590   /* Careful not to prod global register variables.  */
591   if (TREE_CODE (decl) == VAR_DECL
592       && GET_CODE (DECL_RTL (decl)) == MEM
593       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
594       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
595     ia64_encode_addr_area (decl, XEXP (rtl, 0));
596 }
597 
598 /* Implement CONST_OK_FOR_LETTER_P.  */
599 
600 bool
ia64_const_ok_for_letter_p(HOST_WIDE_INT value,char c)601 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
602 {
603   switch (c)
604     {
605     case 'I':
606       return CONST_OK_FOR_I (value);
607     case 'J':
608       return CONST_OK_FOR_J (value);
609     case 'K':
610       return CONST_OK_FOR_K (value);
611     case 'L':
612       return CONST_OK_FOR_L (value);
613     case 'M':
614       return CONST_OK_FOR_M (value);
615     case 'N':
616       return CONST_OK_FOR_N (value);
617     case 'O':
618       return CONST_OK_FOR_O (value);
619     case 'P':
620       return CONST_OK_FOR_P (value);
621     default:
622       return false;
623     }
624 }
625 
626 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P.  */
627 
628 bool
ia64_const_double_ok_for_letter_p(rtx value,char c)629 ia64_const_double_ok_for_letter_p (rtx value, char c)
630 {
631   switch (c)
632     {
633     case 'G':
634       return CONST_DOUBLE_OK_FOR_G (value);
635     default:
636       return false;
637     }
638 }
639 
640 /* Implement EXTRA_CONSTRAINT.  */
641 
642 bool
ia64_extra_constraint(rtx value,char c)643 ia64_extra_constraint (rtx value, char c)
644 {
645   switch (c)
646     {
647     case 'Q':
648       /* Non-volatile memory for FP_REG loads/stores.  */
649       return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
650 
651     case 'R':
652       /* 1..4 for shladd arguments.  */
653       return (GET_CODE (value) == CONST_INT
654 	      && INTVAL (value) >= 1 && INTVAL (value) <= 4);
655 
656     case 'S':
657       /* Non-post-inc memory for asms and other unsavory creatures.  */
658       return (GET_CODE (value) == MEM
659 	      && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
660 	      && (reload_in_progress || memory_operand (value, VOIDmode)));
661 
662     case 'T':
663       /* Symbol ref to small-address-area.  */
664       return small_addr_symbolic_operand (value, VOIDmode);
665 
666     case 'U':
667       /* Vector zero.  */
668       return value == CONST0_RTX (GET_MODE (value));
669 
670     case 'W':
671       /* An integer vector, such that conversion to an integer yields a
672 	 value appropriate for an integer 'J' constraint.  */
673       if (GET_CODE (value) == CONST_VECTOR
674 	  && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
675 	{
676 	  value = simplify_subreg (DImode, value, GET_MODE (value), 0);
677 	  return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
678 	}
679       return false;
680 
681     case 'Y':
682       /* A V2SF vector containing elements that satisfy 'G'.  */
683       return
684 	(GET_CODE (value) == CONST_VECTOR
685 	 && GET_MODE (value) == V2SFmode
686 	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
687 	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
688 
689     default:
690       return false;
691     }
692 }
693 
694 /* Return 1 if the operands of a move are ok.  */
695 
696 int
ia64_move_ok(rtx dst,rtx src)697 ia64_move_ok (rtx dst, rtx src)
698 {
699   /* If we're under init_recog_no_volatile, we'll not be able to use
700      memory_operand.  So check the code directly and don't worry about
701      the validity of the underlying address, which should have been
702      checked elsewhere anyway.  */
703   if (GET_CODE (dst) != MEM)
704     return 1;
705   if (GET_CODE (src) == MEM)
706     return 0;
707   if (register_operand (src, VOIDmode))
708     return 1;
709 
710   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
711   if (INTEGRAL_MODE_P (GET_MODE (dst)))
712     return src == const0_rtx;
713   else
714     return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
715 }
716 
717 /* Return 1 if the operands are ok for a floating point load pair.  */
718 
719 int
ia64_load_pair_ok(rtx dst,rtx src)720 ia64_load_pair_ok (rtx dst, rtx src)
721 {
722   if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
723     return 0;
724   if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
725     return 0;
726   switch (GET_CODE (XEXP (src, 0)))
727     {
728     case REG:
729     case POST_INC:
730       break;
731     case POST_DEC:
732       return 0;
733     case POST_MODIFY:
734       {
735 	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
736 
737 	if (GET_CODE (adjust) != CONST_INT
738 	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
739 	  return 0;
740       }
741       break;
742     default:
743       abort ();
744     }
745   return 1;
746 }
747 
748 int
addp4_optimize_ok(rtx op1,rtx op2)749 addp4_optimize_ok (rtx op1, rtx op2)
750 {
751   return (basereg_operand (op1, GET_MODE(op1)) !=
752 	  basereg_operand (op2, GET_MODE(op2)));
753 }
754 
755 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
756    Return the length of the field, or <= 0 on failure.  */
757 
758 int
ia64_depz_field_mask(rtx rop,rtx rshift)759 ia64_depz_field_mask (rtx rop, rtx rshift)
760 {
761   unsigned HOST_WIDE_INT op = INTVAL (rop);
762   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
763 
764   /* Get rid of the zero bits we're shifting in.  */
765   op >>= shift;
766 
767   /* We must now have a solid block of 1's at bit 0.  */
768   return exact_log2 (op + 1);
769 }
770 
771 /* Return the TLS model to use for ADDR.  */
772 
773 static enum tls_model
tls_symbolic_operand_type(rtx addr)774 tls_symbolic_operand_type (rtx addr)
775 {
776   enum tls_model tls_kind = 0;
777 
778   if (GET_CODE (addr) == CONST)
779     {
780       if (GET_CODE (XEXP (addr, 0)) == PLUS
781 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
782         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
783     }
784   else if (GET_CODE (addr) == SYMBOL_REF)
785     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
786 
787   return tls_kind;
788 }
789 
790 /* Return true if X is a constant that is valid for some immediate
791    field in an instruction.  */
792 
793 bool
ia64_legitimate_constant_p(rtx x)794 ia64_legitimate_constant_p (rtx x)
795 {
796   switch (GET_CODE (x))
797     {
798     case CONST_INT:
799     case LABEL_REF:
800       return true;
801 
802     case CONST_DOUBLE:
803       if (GET_MODE (x) == VOIDmode)
804 	return true;
805       return CONST_DOUBLE_OK_FOR_G (x);
806 
807     case CONST:
808     case SYMBOL_REF:
809       /* ??? Short term workaround for PR 28490.  We must make the code here
810 	 match the code in ia64_expand_move and move_operand, even though they
811 	 are both technically wrong.  */
812       if (tls_symbolic_operand_type (x) == 0)
813 	{
814 	  HOST_WIDE_INT addend = 0;
815 	  rtx op = x;
816 
817 	  if (GET_CODE (op) == CONST
818 	      && GET_CODE (XEXP (op, 0)) == PLUS
819 	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
820 	    {
821 	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
822 	      op = XEXP (XEXP (op, 0), 0);
823 	    }
824 
825           if (any_offset_symbol_operand (op, GET_MODE (op))
826               || function_operand (op, GET_MODE (op)))
827             return true;
828 	  if (aligned_offset_symbol_operand (op, GET_MODE (op)))
829 	    return (addend & 0x3fff) == 0;
830 	  return false;
831 	}
832       return false;
833 
834     case CONST_VECTOR:
835       {
836 	enum machine_mode mode = GET_MODE (x);
837 
838 	if (mode == V2SFmode)
839 	  return ia64_extra_constraint (x, 'Y');
840 
841 	return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
842 		&& GET_MODE_SIZE (mode) <= 8);
843       }
844 
845     default:
846       return false;
847     }
848 }
849 
850 /* Don't allow TLS addresses to get spilled to memory.  */
851 
852 static bool
ia64_cannot_force_const_mem(rtx x)853 ia64_cannot_force_const_mem (rtx x)
854 {
855   return tls_symbolic_operand_type (x) != 0;
856 }
857 
858 /* Expand a symbolic constant load.  */
859 
860 bool
ia64_expand_load_address(rtx dest,rtx src)861 ia64_expand_load_address (rtx dest, rtx src)
862 {
863   gcc_assert (GET_CODE (dest) == REG);
864 
865   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
866      having to pointer-extend the value afterward.  Other forms of address
867      computation below are also more natural to compute as 64-bit quantities.
868      If we've been given an SImode destination register, change it.  */
869   if (GET_MODE (dest) != Pmode)
870     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
871 
872   if (TARGET_NO_PIC)
873     return false;
874   if (small_addr_symbolic_operand (src, VOIDmode))
875     return false;
876 
877   if (TARGET_AUTO_PIC)
878     emit_insn (gen_load_gprel64 (dest, src));
879   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
880     emit_insn (gen_load_fptr (dest, src));
881   else if (sdata_symbolic_operand (src, VOIDmode))
882     emit_insn (gen_load_gprel (dest, src));
883   else
884     {
885       HOST_WIDE_INT addend = 0;
886       rtx tmp;
887 
888       /* We did split constant offsets in ia64_expand_move, and we did try
889 	 to keep them split in move_operand, but we also allowed reload to
890 	 rematerialize arbitrary constants rather than spill the value to
891 	 the stack and reload it.  So we have to be prepared here to split
892 	 them apart again.  */
893       if (GET_CODE (src) == CONST)
894 	{
895 	  HOST_WIDE_INT hi, lo;
896 
897 	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
898 	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
899 	  hi = hi - lo;
900 
901 	  if (lo != 0)
902 	    {
903 	      addend = lo;
904 	      src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
905 	    }
906 	}
907 
908       tmp = gen_rtx_HIGH (Pmode, src);
909       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
910       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
911 
912       tmp = gen_rtx_LO_SUM (Pmode, dest, src);
913       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
914 
915       if (addend)
916 	{
917 	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
918 	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
919 	}
920     }
921 
922   return true;
923 }
924 
925 static GTY(()) rtx gen_tls_tga;
926 static rtx
gen_tls_get_addr(void)927 gen_tls_get_addr (void)
928 {
929   if (!gen_tls_tga)
930     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
931   return gen_tls_tga;
932 }
933 
934 static GTY(()) rtx thread_pointer_rtx;
935 static rtx
gen_thread_pointer(void)936 gen_thread_pointer (void)
937 {
938   if (!thread_pointer_rtx)
939     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
940   return thread_pointer_rtx;
941 }
942 
943 static rtx
ia64_expand_tls_address(enum tls_model tls_kind,rtx op0,rtx op1,rtx orig_op1,HOST_WIDE_INT addend)944 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
945 			 rtx orig_op1, HOST_WIDE_INT addend)
946 {
947   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
948   rtx orig_op0 = op0;
949   HOST_WIDE_INT addend_lo, addend_hi;
950 
951   switch (tls_kind)
952     {
953     case TLS_MODEL_GLOBAL_DYNAMIC:
954       start_sequence ();
955 
956       tga_op1 = gen_reg_rtx (Pmode);
957       emit_insn (gen_load_dtpmod (tga_op1, op1));
958 
959       tga_op2 = gen_reg_rtx (Pmode);
960       emit_insn (gen_load_dtprel (tga_op2, op1));
961 
962       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
963 					 LCT_CONST, Pmode, 2, tga_op1,
964 					 Pmode, tga_op2, Pmode);
965 
966       insns = get_insns ();
967       end_sequence ();
968 
969       if (GET_MODE (op0) != Pmode)
970 	op0 = tga_ret;
971       emit_libcall_block (insns, op0, tga_ret, op1);
972       break;
973 
974     case TLS_MODEL_LOCAL_DYNAMIC:
975       /* ??? This isn't the completely proper way to do local-dynamic
976 	 If the call to __tls_get_addr is used only by a single symbol,
977 	 then we should (somehow) move the dtprel to the second arg
978 	 to avoid the extra add.  */
979       start_sequence ();
980 
981       tga_op1 = gen_reg_rtx (Pmode);
982       emit_insn (gen_load_dtpmod (tga_op1, op1));
983 
984       tga_op2 = const0_rtx;
985 
986       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
987 					 LCT_CONST, Pmode, 2, tga_op1,
988 					 Pmode, tga_op2, Pmode);
989 
990       insns = get_insns ();
991       end_sequence ();
992 
993       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
994 				UNSPEC_LD_BASE);
995       tmp = gen_reg_rtx (Pmode);
996       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
997 
998       if (!register_operand (op0, Pmode))
999 	op0 = gen_reg_rtx (Pmode);
1000       if (TARGET_TLS64)
1001 	{
1002 	  emit_insn (gen_load_dtprel (op0, op1));
1003 	  emit_insn (gen_adddi3 (op0, tmp, op0));
1004 	}
1005       else
1006 	emit_insn (gen_add_dtprel (op0, op1, tmp));
1007       break;
1008 
1009     case TLS_MODEL_INITIAL_EXEC:
1010       addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1011       addend_hi = addend - addend_lo;
1012 
1013       op1 = plus_constant (op1, addend_hi);
1014       addend = addend_lo;
1015 
1016       tmp = gen_reg_rtx (Pmode);
1017       emit_insn (gen_load_tprel (tmp, op1));
1018 
1019       if (!register_operand (op0, Pmode))
1020 	op0 = gen_reg_rtx (Pmode);
1021       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1022       break;
1023 
1024     case TLS_MODEL_LOCAL_EXEC:
1025       if (!register_operand (op0, Pmode))
1026 	op0 = gen_reg_rtx (Pmode);
1027 
1028       op1 = orig_op1;
1029       addend = 0;
1030       if (TARGET_TLS64)
1031 	{
1032 	  emit_insn (gen_load_tprel (op0, op1));
1033 	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1034 	}
1035       else
1036 	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1037       break;
1038 
1039     default:
1040       gcc_unreachable ();
1041     }
1042 
1043   if (addend)
1044     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1045 			       orig_op0, 1, OPTAB_DIRECT);
1046   if (orig_op0 == op0)
1047     return NULL_RTX;
1048   if (GET_MODE (orig_op0) == Pmode)
1049     return op0;
1050   return gen_lowpart (GET_MODE (orig_op0), op0);
1051 }
1052 
1053 rtx
ia64_expand_move(rtx op0,rtx op1)1054 ia64_expand_move (rtx op0, rtx op1)
1055 {
1056   enum machine_mode mode = GET_MODE (op0);
1057 
1058   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1059     op1 = force_reg (mode, op1);
1060 
1061   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1062     {
1063       HOST_WIDE_INT addend = 0;
1064       enum tls_model tls_kind;
1065       rtx sym = op1;
1066 
1067       if (GET_CODE (op1) == CONST
1068 	  && GET_CODE (XEXP (op1, 0)) == PLUS
1069 	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1070 	{
1071 	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1072 	  sym = XEXP (XEXP (op1, 0), 0);
1073 	}
1074 
1075       tls_kind = tls_symbolic_operand_type (sym);
1076       if (tls_kind)
1077 	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1078 
1079       if (any_offset_symbol_operand (sym, mode))
1080 	addend = 0;
1081       else if (aligned_offset_symbol_operand (sym, mode))
1082 	{
1083 	  HOST_WIDE_INT addend_lo, addend_hi;
1084 
1085 	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1086 	  addend_hi = addend - addend_lo;
1087 
1088 	  if (addend_lo != 0)
1089 	    {
1090 	      op1 = plus_constant (sym, addend_hi);
1091 	      addend = addend_lo;
1092 	    }
1093 	  else
1094 	    addend = 0;
1095 	}
1096       else
1097 	op1 = sym;
1098 
1099       if (reload_completed)
1100 	{
1101 	  /* We really should have taken care of this offset earlier.  */
1102 	  gcc_assert (addend == 0);
1103 	  if (ia64_expand_load_address (op0, op1))
1104 	    return NULL_RTX;
1105 	}
1106 
1107       if (addend)
1108 	{
1109 	  rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1110 
1111 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1112 
1113 	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1114 				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1115 	  if (op0 == op1)
1116 	    return NULL_RTX;
1117 	}
1118     }
1119 
1120   return op1;
1121 }
1122 
1123 /* Split a move from OP1 to OP0 conditional on COND.  */
1124 
1125 void
ia64_emit_cond_move(rtx op0,rtx op1,rtx cond)1126 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1127 {
1128   rtx insn, first = get_last_insn ();
1129 
1130   emit_move_insn (op0, op1);
1131 
1132   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1133     if (INSN_P (insn))
1134       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1135 					  PATTERN (insn));
1136 }
1137 
1138 /* Split a post-reload TImode or TFmode reference into two DImode
1139    components.  This is made extra difficult by the fact that we do
1140    not get any scratch registers to work with, because reload cannot
1141    be prevented from giving us a scratch that overlaps the register
1142    pair involved.  So instead, when addressing memory, we tweak the
1143    pointer register up and back down with POST_INCs.  Or up and not
1144    back down when we can get away with it.
1145 
1146    REVERSED is true when the loads must be done in reversed order
1147    (high word first) for correctness.  DEAD is true when the pointer
1148    dies with the second insn we generate and therefore the second
1149    address must not carry a postmodify.
1150 
1151    May return an insn which is to be emitted after the moves.  */
1152 
1153 static rtx
ia64_split_tmode(rtx out[2],rtx in,bool reversed,bool dead)1154 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1155 {
1156   rtx fixup = 0;
1157 
1158   switch (GET_CODE (in))
1159     {
1160     case REG:
1161       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1162       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1163       break;
1164 
1165     case CONST_INT:
1166     case CONST_DOUBLE:
1167       /* Cannot occur reversed.  */
1168       gcc_assert (!reversed);
1169 
1170       if (GET_MODE (in) != TFmode)
1171 	split_double (in, &out[0], &out[1]);
1172       else
1173 	/* split_double does not understand how to split a TFmode
1174 	   quantity into a pair of DImode constants.  */
1175 	{
1176 	  REAL_VALUE_TYPE r;
1177 	  unsigned HOST_WIDE_INT p[2];
1178 	  long l[4];  /* TFmode is 128 bits */
1179 
1180 	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1181 	  real_to_target (l, &r, TFmode);
1182 
1183 	  if (FLOAT_WORDS_BIG_ENDIAN)
1184 	    {
1185 	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1186 	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1187 	    }
1188 	  else
1189 	    {
1190 	      p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1191 	      p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1192 	    }
1193 	  out[0] = GEN_INT (p[0]);
1194 	  out[1] = GEN_INT (p[1]);
1195 	}
1196       break;
1197 
1198     case MEM:
1199       {
1200 	rtx base = XEXP (in, 0);
1201 	rtx offset;
1202 
1203 	switch (GET_CODE (base))
1204 	  {
1205 	  case REG:
1206 	    if (!reversed)
1207 	      {
1208 		out[0] = adjust_automodify_address
1209 		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1210 		out[1] = adjust_automodify_address
1211 		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1212 	      }
1213 	    else
1214 	      {
1215 		/* Reversal requires a pre-increment, which can only
1216 		   be done as a separate insn.  */
1217 		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1218 		out[0] = adjust_automodify_address
1219 		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1220 		out[1] = adjust_address (in, DImode, 0);
1221 	      }
1222 	    break;
1223 
1224 	  case POST_INC:
1225 	    gcc_assert (!reversed && !dead);
1226 
1227 	    /* Just do the increment in two steps.  */
1228 	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1229 	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1230 	    break;
1231 
1232 	  case POST_DEC:
1233 	    gcc_assert (!reversed && !dead);
1234 
1235 	    /* Add 8, subtract 24.  */
1236 	    base = XEXP (base, 0);
1237 	    out[0] = adjust_automodify_address
1238 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1239 	    out[1] = adjust_automodify_address
1240 	      (in, DImode,
1241 	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1242 	       8);
1243 	    break;
1244 
1245 	  case POST_MODIFY:
1246 	    gcc_assert (!reversed && !dead);
1247 
1248 	    /* Extract and adjust the modification.  This case is
1249 	       trickier than the others, because we might have an
1250 	       index register, or we might have a combined offset that
1251 	       doesn't fit a signed 9-bit displacement field.  We can
1252 	       assume the incoming expression is already legitimate.  */
1253 	    offset = XEXP (base, 1);
1254 	    base = XEXP (base, 0);
1255 
1256 	    out[0] = adjust_automodify_address
1257 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1258 
1259 	    if (GET_CODE (XEXP (offset, 1)) == REG)
1260 	      {
1261 		/* Can't adjust the postmodify to match.  Emit the
1262 		   original, then a separate addition insn.  */
1263 		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1264 		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1265 	      }
1266 	    else
1267 	      {
1268 		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1269 		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1270 		  {
1271 		    /* Again the postmodify cannot be made to match,
1272 		       but in this case it's more efficient to get rid
1273 		       of the postmodify entirely and fix up with an
1274 		       add insn.  */
1275 		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1276 		    fixup = gen_adddi3
1277 		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1278 		  }
1279 		else
1280 		  {
1281 		    /* Combined offset still fits in the displacement field.
1282 		       (We cannot overflow it at the high end.)  */
1283 		    out[1] = adjust_automodify_address
1284 		      (in, DImode, gen_rtx_POST_MODIFY
1285 		       (Pmode, base, gen_rtx_PLUS
1286 			(Pmode, base,
1287 			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1288 		       8);
1289 		  }
1290 	      }
1291 	    break;
1292 
1293 	  default:
1294 	    gcc_unreachable ();
1295 	  }
1296 	break;
1297       }
1298 
1299     default:
1300       gcc_unreachable ();
1301     }
1302 
1303   return fixup;
1304 }
1305 
1306 /* Split a TImode or TFmode move instruction after reload.
1307    This is used by *movtf_internal and *movti_internal.  */
1308 void
ia64_split_tmode_move(rtx operands[])1309 ia64_split_tmode_move (rtx operands[])
1310 {
1311   rtx in[2], out[2], insn;
1312   rtx fixup[2];
1313   bool dead = false;
1314   bool reversed = false;
1315 
1316   /* It is possible for reload to decide to overwrite a pointer with
1317      the value it points to.  In that case we have to do the loads in
1318      the appropriate order so that the pointer is not destroyed too
1319      early.  Also we must not generate a postmodify for that second
1320      load, or rws_access_regno will die.  */
1321   if (GET_CODE (operands[1]) == MEM
1322       && reg_overlap_mentioned_p (operands[0], operands[1]))
1323     {
1324       rtx base = XEXP (operands[1], 0);
1325       while (GET_CODE (base) != REG)
1326 	base = XEXP (base, 0);
1327 
1328       if (REGNO (base) == REGNO (operands[0]))
1329 	reversed = true;
1330       dead = true;
1331     }
1332   /* Another reason to do the moves in reversed order is if the first
1333      element of the target register pair is also the second element of
1334      the source register pair.  */
1335   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1336       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1337     reversed = true;
1338 
1339   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1340   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1341 
1342 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1343   if (GET_CODE (EXP) == MEM						\
1344       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1345 	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1346 	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1347     REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,			\
1348 					  XEXP (XEXP (EXP, 0), 0),	\
1349 					  REG_NOTES (INSN))
1350 
1351   insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1352   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1353   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1354 
1355   insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1356   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1357   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1358 
1359   if (fixup[0])
1360     emit_insn (fixup[0]);
1361   if (fixup[1])
1362     emit_insn (fixup[1]);
1363 
1364 #undef MAYBE_ADD_REG_INC_NOTE
1365 }
1366 
1367 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1368    through memory plus an extra GR scratch register.  Except that you can
1369    either get the first from SECONDARY_MEMORY_NEEDED or the second from
1370    SECONDARY_RELOAD_CLASS, but not both.
1371 
1372    We got into problems in the first place by allowing a construct like
1373    (subreg:XF (reg:TI)), which we got from a union containing a long double.
1374    This solution attempts to prevent this situation from occurring.  When
1375    we see something like the above, we spill the inner register to memory.  */
1376 
1377 static rtx
spill_xfmode_rfmode_operand(rtx in,int force,enum machine_mode mode)1378 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1379 {
1380   if (GET_CODE (in) == SUBREG
1381       && GET_MODE (SUBREG_REG (in)) == TImode
1382       && GET_CODE (SUBREG_REG (in)) == REG)
1383     {
1384       rtx memt = assign_stack_temp (TImode, 16, 0);
1385       emit_move_insn (memt, SUBREG_REG (in));
1386       return adjust_address (memt, mode, 0);
1387     }
1388   else if (force && GET_CODE (in) == REG)
1389     {
1390       rtx memx = assign_stack_temp (mode, 16, 0);
1391       emit_move_insn (memx, in);
1392       return memx;
1393     }
1394   else
1395     return in;
1396 }
1397 
1398 /* Expand the movxf or movrf pattern (MODE says which) with the given
1399    OPERANDS, returning true if the pattern should then invoke
1400    DONE.  */
1401 
1402 bool
ia64_expand_movxf_movrf(enum machine_mode mode,rtx operands[])1403 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1404 {
1405   rtx op0 = operands[0];
1406 
1407   if (GET_CODE (op0) == SUBREG)
1408     op0 = SUBREG_REG (op0);
1409 
1410   /* We must support XFmode loads into general registers for stdarg/vararg,
1411      unprototyped calls, and a rare case where a long double is passed as
1412      an argument after a float HFA fills the FP registers.  We split them into
1413      DImode loads for convenience.  We also need to support XFmode stores
1414      for the last case.  This case does not happen for stdarg/vararg routines,
1415      because we do a block store to memory of unnamed arguments.  */
1416 
1417   if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1418     {
1419       rtx out[2];
1420 
1421       /* We're hoping to transform everything that deals with XFmode
1422 	 quantities and GR registers early in the compiler.  */
1423       gcc_assert (!no_new_pseudos);
1424 
1425       /* Struct to register can just use TImode instead.  */
1426       if ((GET_CODE (operands[1]) == SUBREG
1427 	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1428 	  || (GET_CODE (operands[1]) == REG
1429 	      && GR_REGNO_P (REGNO (operands[1]))))
1430 	{
1431 	  rtx op1 = operands[1];
1432 
1433 	  if (GET_CODE (op1) == SUBREG)
1434 	    op1 = SUBREG_REG (op1);
1435 	  else
1436 	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1437 
1438 	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1439 	  return true;
1440 	}
1441 
1442       if (GET_CODE (operands[1]) == CONST_DOUBLE)
1443 	{
1444 	  /* Don't word-swap when reading in the constant.  */
1445 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1446 			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1447 					   0, mode));
1448 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1449 			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1450 					   0, mode));
1451 	  return true;
1452 	}
1453 
1454       /* If the quantity is in a register not known to be GR, spill it.  */
1455       if (register_operand (operands[1], mode))
1456 	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1457 
1458       gcc_assert (GET_CODE (operands[1]) == MEM);
1459 
1460       /* Don't word-swap when reading in the value.  */
1461       out[0] = gen_rtx_REG (DImode, REGNO (op0));
1462       out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1463 
1464       emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1465       emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1466       return true;
1467     }
1468 
1469   if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1470     {
1471       /* We're hoping to transform everything that deals with XFmode
1472 	 quantities and GR registers early in the compiler.  */
1473       gcc_assert (!no_new_pseudos);
1474 
1475       /* Op0 can't be a GR_REG here, as that case is handled above.
1476 	 If op0 is a register, then we spill op1, so that we now have a
1477 	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1478 	 to force the spill.  */
1479       if (register_operand (operands[0], mode))
1480 	{
1481 	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1482 	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1483 	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1484 	}
1485 
1486       else
1487 	{
1488 	  rtx in[2];
1489 
1490 	  gcc_assert (GET_CODE (operands[0]) == MEM);
1491 
1492 	  /* Don't word-swap when writing out the value.  */
1493 	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1494 	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1495 
1496 	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1497 	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1498 	  return true;
1499 	}
1500     }
1501 
1502   if (!reload_in_progress && !reload_completed)
1503     {
1504       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1505 
1506       if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1507 	{
1508 	  rtx memt, memx, in = operands[1];
1509 	  if (CONSTANT_P (in))
1510 	    in = validize_mem (force_const_mem (mode, in));
1511 	  if (GET_CODE (in) == MEM)
1512 	    memt = adjust_address (in, TImode, 0);
1513 	  else
1514 	    {
1515 	      memt = assign_stack_temp (TImode, 16, 0);
1516 	      memx = adjust_address (memt, mode, 0);
1517 	      emit_move_insn (memx, in);
1518 	    }
1519 	  emit_move_insn (op0, memt);
1520 	  return true;
1521 	}
1522 
1523       if (!ia64_move_ok (operands[0], operands[1]))
1524 	operands[1] = force_reg (mode, operands[1]);
1525     }
1526 
1527   return false;
1528 }
1529 
1530 /* Emit comparison instruction if necessary, returning the expression
1531    that holds the compare result in the proper mode.  */
1532 
1533 static GTY(()) rtx cmptf_libfunc;
1534 
1535 rtx
ia64_expand_compare(enum rtx_code code,enum machine_mode mode)1536 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1537 {
1538   rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1539   rtx cmp;
1540 
1541   /* If we have a BImode input, then we already have a compare result, and
1542      do not need to emit another comparison.  */
1543   if (GET_MODE (op0) == BImode)
1544     {
1545       gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1546       cmp = op0;
1547     }
1548   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1549      magic number as its third argument, that indicates what to do.
1550      The return value is an integer to be compared against zero.  */
1551   else if (GET_MODE (op0) == TFmode)
1552     {
1553       enum qfcmp_magic {
1554 	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
1555 	QCMP_UNORD = 2,
1556 	QCMP_EQ = 4,
1557 	QCMP_LT = 8,
1558 	QCMP_GT = 16
1559       } magic;
1560       enum rtx_code ncode;
1561       rtx ret, insns;
1562 
1563       gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1564       switch (code)
1565 	{
1566 	  /* 1 = equal, 0 = not equal.  Equality operators do
1567 	     not raise FP_INVALID when given an SNaN operand.  */
1568 	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1569 	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1570 	  /* isunordered() from C99.  */
1571 	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1572 	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1573 	  /* Relational operators raise FP_INVALID when given
1574 	     an SNaN operand.  */
1575 	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1576 	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1577 	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1578 	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1579 	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1580 	     Expanders for buneq etc. weuld have to be added to ia64.md
1581 	     for this to be useful.  */
1582 	default: gcc_unreachable ();
1583 	}
1584 
1585       start_sequence ();
1586 
1587       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1588 				     op0, TFmode, op1, TFmode,
1589 				     GEN_INT (magic), DImode);
1590       cmp = gen_reg_rtx (BImode);
1591       emit_insn (gen_rtx_SET (VOIDmode, cmp,
1592 			      gen_rtx_fmt_ee (ncode, BImode,
1593 					      ret, const0_rtx)));
1594 
1595       insns = get_insns ();
1596       end_sequence ();
1597 
1598       emit_libcall_block (insns, cmp, cmp,
1599 			  gen_rtx_fmt_ee (code, BImode, op0, op1));
1600       code = NE;
1601     }
1602   else
1603     {
1604       cmp = gen_reg_rtx (BImode);
1605       emit_insn (gen_rtx_SET (VOIDmode, cmp,
1606 			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1607       code = NE;
1608     }
1609 
1610   return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1611 }
1612 
1613 /* Generate an integral vector comparison.  Return true if the condition has
1614    been reversed, and so the sense of the comparison should be inverted.  */
1615 
1616 static bool
ia64_expand_vecint_compare(enum rtx_code code,enum machine_mode mode,rtx dest,rtx op0,rtx op1)1617 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1618 			    rtx dest, rtx op0, rtx op1)
1619 {
1620   bool negate = false;
1621   rtx x;
1622 
1623   /* Canonicalize the comparison to EQ, GT, GTU.  */
1624   switch (code)
1625     {
1626     case EQ:
1627     case GT:
1628     case GTU:
1629       break;
1630 
1631     case NE:
1632     case LE:
1633     case LEU:
1634       code = reverse_condition (code);
1635       negate = true;
1636       break;
1637 
1638     case GE:
1639     case GEU:
1640       code = reverse_condition (code);
1641       negate = true;
1642       /* FALLTHRU */
1643 
1644     case LT:
1645     case LTU:
1646       code = swap_condition (code);
1647       x = op0, op0 = op1, op1 = x;
1648       break;
1649 
1650     default:
1651       gcc_unreachable ();
1652     }
1653 
1654   /* Unsigned parallel compare is not supported by the hardware.  Play some
1655      tricks to turn this into a signed comparison against 0.  */
1656   if (code == GTU)
1657     {
1658       switch (mode)
1659 	{
1660 	case V2SImode:
1661 	  {
1662 	    rtx t1, t2, mask;
1663 
1664 	    /* Perform a parallel modulo subtraction.  */
1665 	    t1 = gen_reg_rtx (V2SImode);
1666 	    emit_insn (gen_subv2si3 (t1, op0, op1));
1667 
1668 	    /* Extract the original sign bit of op0.  */
1669 	    mask = GEN_INT (-0x80000000);
1670 	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1671 	    mask = force_reg (V2SImode, mask);
1672 	    t2 = gen_reg_rtx (V2SImode);
1673 	    emit_insn (gen_andv2si3 (t2, op0, mask));
1674 
1675 	    /* XOR it back into the result of the subtraction.  This results
1676 	       in the sign bit set iff we saw unsigned underflow.  */
1677 	    x = gen_reg_rtx (V2SImode);
1678 	    emit_insn (gen_xorv2si3 (x, t1, t2));
1679 
1680 	    code = GT;
1681 	    op0 = x;
1682 	    op1 = CONST0_RTX (mode);
1683 	  }
1684 	  break;
1685 
1686 	case V8QImode:
1687 	case V4HImode:
1688 	  /* Perform a parallel unsigned saturating subtraction.  */
1689 	  x = gen_reg_rtx (mode);
1690 	  emit_insn (gen_rtx_SET (VOIDmode, x,
1691 				  gen_rtx_US_MINUS (mode, op0, op1)));
1692 
1693 	  code = EQ;
1694 	  op0 = x;
1695 	  op1 = CONST0_RTX (mode);
1696 	  negate = !negate;
1697 	  break;
1698 
1699 	default:
1700 	  gcc_unreachable ();
1701 	}
1702     }
1703 
1704   x = gen_rtx_fmt_ee (code, mode, op0, op1);
1705   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1706 
1707   return negate;
1708 }
1709 
1710 /* Emit an integral vector conditional move.  */
1711 
1712 void
ia64_expand_vecint_cmov(rtx operands[])1713 ia64_expand_vecint_cmov (rtx operands[])
1714 {
1715   enum machine_mode mode = GET_MODE (operands[0]);
1716   enum rtx_code code = GET_CODE (operands[3]);
1717   bool negate;
1718   rtx cmp, x, ot, of;
1719 
1720   cmp = gen_reg_rtx (mode);
1721   negate = ia64_expand_vecint_compare (code, mode, cmp,
1722 				       operands[4], operands[5]);
1723 
1724   ot = operands[1+negate];
1725   of = operands[2-negate];
1726 
1727   if (ot == CONST0_RTX (mode))
1728     {
1729       if (of == CONST0_RTX (mode))
1730 	{
1731 	  emit_move_insn (operands[0], ot);
1732 	  return;
1733 	}
1734 
1735       x = gen_rtx_NOT (mode, cmp);
1736       x = gen_rtx_AND (mode, x, of);
1737       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1738     }
1739   else if (of == CONST0_RTX (mode))
1740     {
1741       x = gen_rtx_AND (mode, cmp, ot);
1742       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1743     }
1744   else
1745     {
1746       rtx t, f;
1747 
1748       t = gen_reg_rtx (mode);
1749       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1750       emit_insn (gen_rtx_SET (VOIDmode, t, x));
1751 
1752       f = gen_reg_rtx (mode);
1753       x = gen_rtx_NOT (mode, cmp);
1754       x = gen_rtx_AND (mode, x, operands[2-negate]);
1755       emit_insn (gen_rtx_SET (VOIDmode, f, x));
1756 
1757       x = gen_rtx_IOR (mode, t, f);
1758       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1759     }
1760 }
1761 
1762 /* Emit an integral vector min or max operation.  Return true if all done.  */
1763 
1764 bool
ia64_expand_vecint_minmax(enum rtx_code code,enum machine_mode mode,rtx operands[])1765 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1766 			   rtx operands[])
1767 {
1768   rtx xops[6];
1769 
1770   /* These four combinations are supported directly.  */
1771   if (mode == V8QImode && (code == UMIN || code == UMAX))
1772     return false;
1773   if (mode == V4HImode && (code == SMIN || code == SMAX))
1774     return false;
1775 
1776   /* This combination can be implemented with only saturating subtraction.  */
1777   if (mode == V4HImode && code == UMAX)
1778     {
1779       rtx x, tmp = gen_reg_rtx (mode);
1780 
1781       x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1782       emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1783 
1784       emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1785       return true;
1786     }
1787 
1788   /* Everything else implemented via vector comparisons.  */
1789   xops[0] = operands[0];
1790   xops[4] = xops[1] = operands[1];
1791   xops[5] = xops[2] = operands[2];
1792 
1793   switch (code)
1794     {
1795     case UMIN:
1796       code = LTU;
1797       break;
1798     case UMAX:
1799       code = GTU;
1800       break;
1801     case SMIN:
1802       code = LT;
1803       break;
1804     case SMAX:
1805       code = GT;
1806       break;
1807     default:
1808       gcc_unreachable ();
1809     }
1810   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1811 
1812   ia64_expand_vecint_cmov (xops);
1813   return true;
1814 }
1815 
1816 /* Emit an integral vector widening sum operations.  */
1817 
1818 void
ia64_expand_widen_sum(rtx operands[3],bool unsignedp)1819 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1820 {
1821   rtx l, h, x, s;
1822   enum machine_mode wmode, mode;
1823   rtx (*unpack_l) (rtx, rtx, rtx);
1824   rtx (*unpack_h) (rtx, rtx, rtx);
1825   rtx (*plus) (rtx, rtx, rtx);
1826 
1827   wmode = GET_MODE (operands[0]);
1828   mode = GET_MODE (operands[1]);
1829 
1830   switch (mode)
1831     {
1832     case V8QImode:
1833       unpack_l = gen_unpack1_l;
1834       unpack_h = gen_unpack1_h;
1835       plus = gen_addv4hi3;
1836       break;
1837     case V4HImode:
1838       unpack_l = gen_unpack2_l;
1839       unpack_h = gen_unpack2_h;
1840       plus = gen_addv2si3;
1841       break;
1842     default:
1843       gcc_unreachable ();
1844     }
1845 
1846   /* Fill in x with the sign extension of each element in op1.  */
1847   if (unsignedp)
1848     x = CONST0_RTX (mode);
1849   else
1850     {
1851       bool neg;
1852 
1853       x = gen_reg_rtx (mode);
1854 
1855       neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1856 					CONST0_RTX (mode));
1857       gcc_assert (!neg);
1858     }
1859 
1860   l = gen_reg_rtx (wmode);
1861   h = gen_reg_rtx (wmode);
1862   s = gen_reg_rtx (wmode);
1863 
1864   emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1865   emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1866   emit_insn (plus (s, l, operands[2]));
1867   emit_insn (plus (operands[0], h, s));
1868 }
1869 
1870 /* Emit a signed or unsigned V8QI dot product operation.  */
1871 
1872 void
ia64_expand_dot_prod_v8qi(rtx operands[4],bool unsignedp)1873 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1874 {
1875   rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1876 
1877   /* Fill in x1 and x2 with the sign extension of each element.  */
1878   if (unsignedp)
1879     x1 = x2 = CONST0_RTX (V8QImode);
1880   else
1881     {
1882       bool neg;
1883 
1884       x1 = gen_reg_rtx (V8QImode);
1885       x2 = gen_reg_rtx (V8QImode);
1886 
1887       neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1888 					CONST0_RTX (V8QImode));
1889       gcc_assert (!neg);
1890       neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1891 					CONST0_RTX (V8QImode));
1892       gcc_assert (!neg);
1893     }
1894 
1895   l1 = gen_reg_rtx (V4HImode);
1896   l2 = gen_reg_rtx (V4HImode);
1897   h1 = gen_reg_rtx (V4HImode);
1898   h2 = gen_reg_rtx (V4HImode);
1899 
1900   emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1901   emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1902   emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1903   emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1904 
1905   p1 = gen_reg_rtx (V2SImode);
1906   p2 = gen_reg_rtx (V2SImode);
1907   p3 = gen_reg_rtx (V2SImode);
1908   p4 = gen_reg_rtx (V2SImode);
1909   emit_insn (gen_pmpy2_r (p1, l1, l2));
1910   emit_insn (gen_pmpy2_l (p2, l1, l2));
1911   emit_insn (gen_pmpy2_r (p3, h1, h2));
1912   emit_insn (gen_pmpy2_l (p4, h1, h2));
1913 
1914   s1 = gen_reg_rtx (V2SImode);
1915   s2 = gen_reg_rtx (V2SImode);
1916   s3 = gen_reg_rtx (V2SImode);
1917   emit_insn (gen_addv2si3 (s1, p1, p2));
1918   emit_insn (gen_addv2si3 (s2, p3, p4));
1919   emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1920   emit_insn (gen_addv2si3 (operands[0], s2, s3));
1921 }
1922 
1923 /* Emit the appropriate sequence for a call.  */
1924 
1925 void
ia64_expand_call(rtx retval,rtx addr,rtx nextarg ATTRIBUTE_UNUSED,int sibcall_p)1926 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1927 		  int sibcall_p)
1928 {
1929   rtx insn, b0;
1930 
1931   addr = XEXP (addr, 0);
1932   addr = convert_memory_address (DImode, addr);
1933   b0 = gen_rtx_REG (DImode, R_BR (0));
1934 
1935   /* ??? Should do this for functions known to bind local too.  */
1936   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1937     {
1938       if (sibcall_p)
1939 	insn = gen_sibcall_nogp (addr);
1940       else if (! retval)
1941 	insn = gen_call_nogp (addr, b0);
1942       else
1943 	insn = gen_call_value_nogp (retval, addr, b0);
1944       insn = emit_call_insn (insn);
1945     }
1946   else
1947     {
1948       if (sibcall_p)
1949 	insn = gen_sibcall_gp (addr);
1950       else if (! retval)
1951 	insn = gen_call_gp (addr, b0);
1952       else
1953 	insn = gen_call_value_gp (retval, addr, b0);
1954       insn = emit_call_insn (insn);
1955 
1956       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1957     }
1958 
1959   if (sibcall_p)
1960     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1961 }
1962 
1963 void
ia64_reload_gp(void)1964 ia64_reload_gp (void)
1965 {
1966   rtx tmp;
1967 
1968   if (current_frame_info.reg_save_gp)
1969     tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1970   else
1971     {
1972       HOST_WIDE_INT offset;
1973 
1974       offset = (current_frame_info.spill_cfa_off
1975 	        + current_frame_info.spill_size);
1976       if (frame_pointer_needed)
1977         {
1978           tmp = hard_frame_pointer_rtx;
1979           offset = -offset;
1980         }
1981       else
1982         {
1983           tmp = stack_pointer_rtx;
1984           offset = current_frame_info.total_size - offset;
1985         }
1986 
1987       if (CONST_OK_FOR_I (offset))
1988         emit_insn (gen_adddi3 (pic_offset_table_rtx,
1989 			       tmp, GEN_INT (offset)));
1990       else
1991         {
1992           emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1993           emit_insn (gen_adddi3 (pic_offset_table_rtx,
1994 			         pic_offset_table_rtx, tmp));
1995         }
1996 
1997       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1998     }
1999 
2000   emit_move_insn (pic_offset_table_rtx, tmp);
2001 }
2002 
2003 void
ia64_split_call(rtx retval,rtx addr,rtx retaddr,rtx scratch_r,rtx scratch_b,int noreturn_p,int sibcall_p)2004 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2005 		 rtx scratch_b, int noreturn_p, int sibcall_p)
2006 {
2007   rtx insn;
2008   bool is_desc = false;
2009 
2010   /* If we find we're calling through a register, then we're actually
2011      calling through a descriptor, so load up the values.  */
2012   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2013     {
2014       rtx tmp;
2015       bool addr_dead_p;
2016 
2017       /* ??? We are currently constrained to *not* use peep2, because
2018 	 we can legitimately change the global lifetime of the GP
2019 	 (in the form of killing where previously live).  This is
2020 	 because a call through a descriptor doesn't use the previous
2021 	 value of the GP, while a direct call does, and we do not
2022 	 commit to either form until the split here.
2023 
2024 	 That said, this means that we lack precise life info for
2025 	 whether ADDR is dead after this call.  This is not terribly
2026 	 important, since we can fix things up essentially for free
2027 	 with the POST_DEC below, but it's nice to not use it when we
2028 	 can immediately tell it's not necessary.  */
2029       addr_dead_p = ((noreturn_p || sibcall_p
2030 		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2031 					    REGNO (addr)))
2032 		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2033 
2034       /* Load the code address into scratch_b.  */
2035       tmp = gen_rtx_POST_INC (Pmode, addr);
2036       tmp = gen_rtx_MEM (Pmode, tmp);
2037       emit_move_insn (scratch_r, tmp);
2038       emit_move_insn (scratch_b, scratch_r);
2039 
2040       /* Load the GP address.  If ADDR is not dead here, then we must
2041 	 revert the change made above via the POST_INCREMENT.  */
2042       if (!addr_dead_p)
2043 	tmp = gen_rtx_POST_DEC (Pmode, addr);
2044       else
2045 	tmp = addr;
2046       tmp = gen_rtx_MEM (Pmode, tmp);
2047       emit_move_insn (pic_offset_table_rtx, tmp);
2048 
2049       is_desc = true;
2050       addr = scratch_b;
2051     }
2052 
2053   if (sibcall_p)
2054     insn = gen_sibcall_nogp (addr);
2055   else if (retval)
2056     insn = gen_call_value_nogp (retval, addr, retaddr);
2057   else
2058     insn = gen_call_nogp (addr, retaddr);
2059   emit_call_insn (insn);
2060 
2061   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2062     ia64_reload_gp ();
2063 }
2064 
2065 /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2066 
2067    This differs from the generic code in that we know about the zero-extending
2068    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2069    also know that ld.acq+cmpxchg.rel equals a full barrier.
2070 
2071    The loop we want to generate looks like
2072 
2073 	cmp_reg = mem;
2074       label:
2075         old_reg = cmp_reg;
2076 	new_reg = cmp_reg op val;
2077 	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2078 	if (cmp_reg != old_reg)
2079 	  goto label;
2080 
2081    Note that we only do the plain load from memory once.  Subsequent
2082    iterations use the value loaded by the compare-and-swap pattern.  */
2083 
2084 void
ia64_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx old_dst,rtx new_dst)2085 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2086 		       rtx old_dst, rtx new_dst)
2087 {
2088   enum machine_mode mode = GET_MODE (mem);
2089   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2090   enum insn_code icode;
2091 
2092   /* Special case for using fetchadd.  */
2093   if ((mode == SImode || mode == DImode)
2094       && (code == PLUS || code == MINUS)
2095       && fetchadd_operand (val, mode))
2096     {
2097       if (code == MINUS)
2098 	val = GEN_INT (-INTVAL (val));
2099 
2100       if (!old_dst)
2101         old_dst = gen_reg_rtx (mode);
2102 
2103       emit_insn (gen_memory_barrier ());
2104 
2105       if (mode == SImode)
2106 	icode = CODE_FOR_fetchadd_acq_si;
2107       else
2108 	icode = CODE_FOR_fetchadd_acq_di;
2109       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2110 
2111       if (new_dst)
2112 	{
2113 	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2114 					 true, OPTAB_WIDEN);
2115 	  if (new_reg != new_dst)
2116 	    emit_move_insn (new_dst, new_reg);
2117 	}
2118       return;
2119     }
2120 
2121   /* Because of the volatile mem read, we get an ld.acq, which is the
2122      front half of the full barrier.  The end half is the cmpxchg.rel.  */
2123   gcc_assert (MEM_VOLATILE_P (mem));
2124 
2125   old_reg = gen_reg_rtx (DImode);
2126   cmp_reg = gen_reg_rtx (DImode);
2127   label = gen_label_rtx ();
2128 
2129   if (mode != DImode)
2130     {
2131       val = simplify_gen_subreg (DImode, val, mode, 0);
2132       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2133     }
2134   else
2135     emit_move_insn (cmp_reg, mem);
2136 
2137   emit_label (label);
2138 
2139   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2140   emit_move_insn (old_reg, cmp_reg);
2141   emit_move_insn (ar_ccv, cmp_reg);
2142 
2143   if (old_dst)
2144     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2145 
2146   new_reg = cmp_reg;
2147   if (code == NOT)
2148     {
2149       new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2150       code = AND;
2151     }
2152   new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2153 				 true, OPTAB_DIRECT);
2154 
2155   if (mode != DImode)
2156     new_reg = gen_lowpart (mode, new_reg);
2157   if (new_dst)
2158     emit_move_insn (new_dst, new_reg);
2159 
2160   switch (mode)
2161     {
2162     case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
2163     case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
2164     case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
2165     case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
2166     default:
2167       gcc_unreachable ();
2168     }
2169 
2170   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2171 
2172   emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2173 }
2174 
2175 /* Begin the assembly file.  */
2176 
2177 static void
ia64_file_start(void)2178 ia64_file_start (void)
2179 {
2180   /* Variable tracking should be run after all optimizations which change order
2181      of insns.  It also needs a valid CFG.  This can't be done in
2182      ia64_override_options, because flag_var_tracking is finalized after
2183      that.  */
2184   ia64_flag_var_tracking = flag_var_tracking;
2185   flag_var_tracking = 0;
2186 
2187   default_file_start ();
2188   emit_safe_across_calls ();
2189 }
2190 
2191 void
emit_safe_across_calls(void)2192 emit_safe_across_calls (void)
2193 {
2194   unsigned int rs, re;
2195   int out_state;
2196 
2197   rs = 1;
2198   out_state = 0;
2199   while (1)
2200     {
2201       while (rs < 64 && call_used_regs[PR_REG (rs)])
2202 	rs++;
2203       if (rs >= 64)
2204 	break;
2205       for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2206 	continue;
2207       if (out_state == 0)
2208 	{
2209 	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2210 	  out_state = 1;
2211 	}
2212       else
2213 	fputc (',', asm_out_file);
2214       if (re == rs + 1)
2215 	fprintf (asm_out_file, "p%u", rs);
2216       else
2217 	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2218       rs = re + 1;
2219     }
2220   if (out_state)
2221     fputc ('\n', asm_out_file);
2222 }
2223 
2224 /* Helper function for ia64_compute_frame_size: find an appropriate general
2225    register to spill some special register to.  SPECIAL_SPILL_MASK contains
2226    bits in GR0 to GR31 that have already been allocated by this routine.
2227    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2228 
2229 static int
find_gr_spill(int try_locals)2230 find_gr_spill (int try_locals)
2231 {
2232   int regno;
2233 
2234   /* If this is a leaf function, first try an otherwise unused
2235      call-clobbered register.  */
2236   if (current_function_is_leaf)
2237     {
2238       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2239 	if (! regs_ever_live[regno]
2240 	    && call_used_regs[regno]
2241 	    && ! fixed_regs[regno]
2242 	    && ! global_regs[regno]
2243 	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2244 	  {
2245 	    current_frame_info.gr_used_mask |= 1 << regno;
2246 	    return regno;
2247 	  }
2248     }
2249 
2250   if (try_locals)
2251     {
2252       regno = current_frame_info.n_local_regs;
2253       /* If there is a frame pointer, then we can't use loc79, because
2254 	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2255 	 reg_name switching code in ia64_expand_prologue.  */
2256       if (regno < (80 - frame_pointer_needed))
2257 	{
2258 	  current_frame_info.n_local_regs = regno + 1;
2259 	  return LOC_REG (0) + regno;
2260 	}
2261     }
2262 
2263   /* Failed to find a general register to spill to.  Must use stack.  */
2264   return 0;
2265 }
2266 
2267 /* In order to make for nice schedules, we try to allocate every temporary
2268    to a different register.  We must of course stay away from call-saved,
2269    fixed, and global registers.  We must also stay away from registers
2270    allocated in current_frame_info.gr_used_mask, since those include regs
2271    used all through the prologue.
2272 
2273    Any register allocated here must be used immediately.  The idea is to
2274    aid scheduling, not to solve data flow problems.  */
2275 
2276 static int last_scratch_gr_reg;
2277 
2278 static int
next_scratch_gr_reg(void)2279 next_scratch_gr_reg (void)
2280 {
2281   int i, regno;
2282 
2283   for (i = 0; i < 32; ++i)
2284     {
2285       regno = (last_scratch_gr_reg + i + 1) & 31;
2286       if (call_used_regs[regno]
2287 	  && ! fixed_regs[regno]
2288 	  && ! global_regs[regno]
2289 	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2290 	{
2291 	  last_scratch_gr_reg = regno;
2292 	  return regno;
2293 	}
2294     }
2295 
2296   /* There must be _something_ available.  */
2297   gcc_unreachable ();
2298 }
2299 
2300 /* Helper function for ia64_compute_frame_size, called through
2301    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2302 
2303 static void
mark_reg_gr_used_mask(rtx reg,void * data ATTRIBUTE_UNUSED)2304 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2305 {
2306   unsigned int regno = REGNO (reg);
2307   if (regno < 32)
2308     {
2309       unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2310       for (i = 0; i < n; ++i)
2311 	current_frame_info.gr_used_mask |= 1 << (regno + i);
2312     }
2313 }
2314 
2315 /* Returns the number of bytes offset between the frame pointer and the stack
2316    pointer for the current function.  SIZE is the number of bytes of space
2317    needed for local variables.  */
2318 
2319 static void
ia64_compute_frame_size(HOST_WIDE_INT size)2320 ia64_compute_frame_size (HOST_WIDE_INT size)
2321 {
2322   HOST_WIDE_INT total_size;
2323   HOST_WIDE_INT spill_size = 0;
2324   HOST_WIDE_INT extra_spill_size = 0;
2325   HOST_WIDE_INT pretend_args_size;
2326   HARD_REG_SET mask;
2327   int n_spilled = 0;
2328   int spilled_gr_p = 0;
2329   int spilled_fr_p = 0;
2330   unsigned int regno;
2331   int i;
2332 
2333   if (current_frame_info.initialized)
2334     return;
2335 
2336   memset (&current_frame_info, 0, sizeof current_frame_info);
2337   CLEAR_HARD_REG_SET (mask);
2338 
2339   /* Don't allocate scratches to the return register.  */
2340   diddle_return_value (mark_reg_gr_used_mask, NULL);
2341 
2342   /* Don't allocate scratches to the EH scratch registers.  */
2343   if (cfun->machine->ia64_eh_epilogue_sp)
2344     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2345   if (cfun->machine->ia64_eh_epilogue_bsp)
2346     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2347 
2348   /* Find the size of the register stack frame.  We have only 80 local
2349      registers, because we reserve 8 for the inputs and 8 for the
2350      outputs.  */
2351 
2352   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2353      since we'll be adjusting that down later.  */
2354   regno = LOC_REG (78) + ! frame_pointer_needed;
2355   for (; regno >= LOC_REG (0); regno--)
2356     if (regs_ever_live[regno])
2357       break;
2358   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2359 
2360   /* For functions marked with the syscall_linkage attribute, we must mark
2361      all eight input registers as in use, so that locals aren't visible to
2362      the caller.  */
2363 
2364   if (cfun->machine->n_varargs > 0
2365       || lookup_attribute ("syscall_linkage",
2366 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2367     current_frame_info.n_input_regs = 8;
2368   else
2369     {
2370       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2371 	if (regs_ever_live[regno])
2372 	  break;
2373       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2374     }
2375 
2376   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2377     if (regs_ever_live[regno])
2378       break;
2379   i = regno - OUT_REG (0) + 1;
2380 
2381 #ifndef PROFILE_HOOK
2382   /* When -p profiling, we need one output register for the mcount argument.
2383      Likewise for -a profiling for the bb_init_func argument.  For -ax
2384      profiling, we need two output registers for the two bb_init_trace_func
2385      arguments.  */
2386   if (current_function_profile)
2387     i = MAX (i, 1);
2388 #endif
2389   current_frame_info.n_output_regs = i;
2390 
2391   /* ??? No rotating register support yet.  */
2392   current_frame_info.n_rotate_regs = 0;
2393 
2394   /* Discover which registers need spilling, and how much room that
2395      will take.  Begin with floating point and general registers,
2396      which will always wind up on the stack.  */
2397 
2398   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2399     if (regs_ever_live[regno] && ! call_used_regs[regno])
2400       {
2401 	SET_HARD_REG_BIT (mask, regno);
2402 	spill_size += 16;
2403 	n_spilled += 1;
2404 	spilled_fr_p = 1;
2405       }
2406 
2407   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2408     if (regs_ever_live[regno] && ! call_used_regs[regno])
2409       {
2410 	SET_HARD_REG_BIT (mask, regno);
2411 	spill_size += 8;
2412 	n_spilled += 1;
2413 	spilled_gr_p = 1;
2414       }
2415 
2416   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2417     if (regs_ever_live[regno] && ! call_used_regs[regno])
2418       {
2419 	SET_HARD_REG_BIT (mask, regno);
2420 	spill_size += 8;
2421 	n_spilled += 1;
2422       }
2423 
2424   /* Now come all special registers that might get saved in other
2425      general registers.  */
2426 
2427   if (frame_pointer_needed)
2428     {
2429       current_frame_info.reg_fp = find_gr_spill (1);
2430       /* If we did not get a register, then we take LOC79.  This is guaranteed
2431 	 to be free, even if regs_ever_live is already set, because this is
2432 	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2433 	 as we don't count loc79 above.  */
2434       if (current_frame_info.reg_fp == 0)
2435 	{
2436 	  current_frame_info.reg_fp = LOC_REG (79);
2437 	  current_frame_info.n_local_regs++;
2438 	}
2439     }
2440 
2441   if (! current_function_is_leaf)
2442     {
2443       /* Emit a save of BR0 if we call other functions.  Do this even
2444 	 if this function doesn't return, as EH depends on this to be
2445 	 able to unwind the stack.  */
2446       SET_HARD_REG_BIT (mask, BR_REG (0));
2447 
2448       current_frame_info.reg_save_b0 = find_gr_spill (1);
2449       if (current_frame_info.reg_save_b0 == 0)
2450 	{
2451 	  extra_spill_size += 8;
2452 	  n_spilled += 1;
2453 	}
2454 
2455       /* Similarly for ar.pfs.  */
2456       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2457       current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2458       if (current_frame_info.reg_save_ar_pfs == 0)
2459 	{
2460 	  extra_spill_size += 8;
2461 	  n_spilled += 1;
2462 	}
2463 
2464       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2465 	 registers are clobbered, so we fall back to the stack.  */
2466       current_frame_info.reg_save_gp
2467 	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2468       if (current_frame_info.reg_save_gp == 0)
2469 	{
2470 	  SET_HARD_REG_BIT (mask, GR_REG (1));
2471 	  spill_size += 8;
2472 	  n_spilled += 1;
2473 	}
2474     }
2475   else
2476     {
2477       if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2478 	{
2479 	  SET_HARD_REG_BIT (mask, BR_REG (0));
2480 	  extra_spill_size += 8;
2481 	  n_spilled += 1;
2482 	}
2483 
2484       if (regs_ever_live[AR_PFS_REGNUM])
2485 	{
2486 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2487 	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2488 	  if (current_frame_info.reg_save_ar_pfs == 0)
2489 	    {
2490 	      extra_spill_size += 8;
2491 	      n_spilled += 1;
2492 	    }
2493 	}
2494     }
2495 
2496   /* Unwind descriptor hackery: things are most efficient if we allocate
2497      consecutive GR save registers for RP, PFS, FP in that order. However,
2498      it is absolutely critical that FP get the only hard register that's
2499      guaranteed to be free, so we allocated it first.  If all three did
2500      happen to be allocated hard regs, and are consecutive, rearrange them
2501      into the preferred order now.  */
2502   if (current_frame_info.reg_fp != 0
2503       && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2504       && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2505     {
2506       current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2507       current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2508       current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2509     }
2510 
2511   /* See if we need to store the predicate register block.  */
2512   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2513     if (regs_ever_live[regno] && ! call_used_regs[regno])
2514       break;
2515   if (regno <= PR_REG (63))
2516     {
2517       SET_HARD_REG_BIT (mask, PR_REG (0));
2518       current_frame_info.reg_save_pr = find_gr_spill (1);
2519       if (current_frame_info.reg_save_pr == 0)
2520 	{
2521 	  extra_spill_size += 8;
2522 	  n_spilled += 1;
2523 	}
2524 
2525       /* ??? Mark them all as used so that register renaming and such
2526 	 are free to use them.  */
2527       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2528 	regs_ever_live[regno] = 1;
2529     }
2530 
2531   /* If we're forced to use st8.spill, we're forced to save and restore
2532      ar.unat as well.  The check for existing liveness allows inline asm
2533      to touch ar.unat.  */
2534   if (spilled_gr_p || cfun->machine->n_varargs
2535       || regs_ever_live[AR_UNAT_REGNUM])
2536     {
2537       regs_ever_live[AR_UNAT_REGNUM] = 1;
2538       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2539       current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2540       if (current_frame_info.reg_save_ar_unat == 0)
2541 	{
2542 	  extra_spill_size += 8;
2543 	  n_spilled += 1;
2544 	}
2545     }
2546 
2547   if (regs_ever_live[AR_LC_REGNUM])
2548     {
2549       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2550       current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2551       if (current_frame_info.reg_save_ar_lc == 0)
2552 	{
2553 	  extra_spill_size += 8;
2554 	  n_spilled += 1;
2555 	}
2556     }
2557 
2558   /* If we have an odd number of words of pretend arguments written to
2559      the stack, then the FR save area will be unaligned.  We round the
2560      size of this area up to keep things 16 byte aligned.  */
2561   if (spilled_fr_p)
2562     pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2563   else
2564     pretend_args_size = current_function_pretend_args_size;
2565 
2566   total_size = (spill_size + extra_spill_size + size + pretend_args_size
2567 		+ current_function_outgoing_args_size);
2568   total_size = IA64_STACK_ALIGN (total_size);
2569 
2570   /* We always use the 16-byte scratch area provided by the caller, but
2571      if we are a leaf function, there's no one to which we need to provide
2572      a scratch area.  */
2573   if (current_function_is_leaf)
2574     total_size = MAX (0, total_size - 16);
2575 
2576   current_frame_info.total_size = total_size;
2577   current_frame_info.spill_cfa_off = pretend_args_size - 16;
2578   current_frame_info.spill_size = spill_size;
2579   current_frame_info.extra_spill_size = extra_spill_size;
2580   COPY_HARD_REG_SET (current_frame_info.mask, mask);
2581   current_frame_info.n_spilled = n_spilled;
2582   current_frame_info.initialized = reload_completed;
2583 }
2584 
2585 /* Compute the initial difference between the specified pair of registers.  */
2586 
2587 HOST_WIDE_INT
ia64_initial_elimination_offset(int from,int to)2588 ia64_initial_elimination_offset (int from, int to)
2589 {
2590   HOST_WIDE_INT offset;
2591 
2592   ia64_compute_frame_size (get_frame_size ());
2593   switch (from)
2594     {
2595     case FRAME_POINTER_REGNUM:
2596       switch (to)
2597 	{
2598 	case HARD_FRAME_POINTER_REGNUM:
2599 	  if (current_function_is_leaf)
2600 	    offset = -current_frame_info.total_size;
2601 	  else
2602 	    offset = -(current_frame_info.total_size
2603 		       - current_function_outgoing_args_size - 16);
2604 	  break;
2605 
2606 	case STACK_POINTER_REGNUM:
2607 	  if (current_function_is_leaf)
2608 	    offset = 0;
2609 	  else
2610 	    offset = 16 + current_function_outgoing_args_size;
2611 	  break;
2612 
2613 	default:
2614 	  gcc_unreachable ();
2615 	}
2616       break;
2617 
2618     case ARG_POINTER_REGNUM:
2619       /* Arguments start above the 16 byte save area, unless stdarg
2620 	 in which case we store through the 16 byte save area.  */
2621       switch (to)
2622 	{
2623 	case HARD_FRAME_POINTER_REGNUM:
2624 	  offset = 16 - current_function_pretend_args_size;
2625 	  break;
2626 
2627 	case STACK_POINTER_REGNUM:
2628 	  offset = (current_frame_info.total_size
2629 		    + 16 - current_function_pretend_args_size);
2630 	  break;
2631 
2632 	default:
2633 	  gcc_unreachable ();
2634 	}
2635       break;
2636 
2637     default:
2638       gcc_unreachable ();
2639     }
2640 
2641   return offset;
2642 }
2643 
2644 /* If there are more than a trivial number of register spills, we use
2645    two interleaved iterators so that we can get two memory references
2646    per insn group.
2647 
2648    In order to simplify things in the prologue and epilogue expanders,
2649    we use helper functions to fix up the memory references after the
2650    fact with the appropriate offsets to a POST_MODIFY memory mode.
2651    The following data structure tracks the state of the two iterators
2652    while insns are being emitted.  */
2653 
2654 struct spill_fill_data
2655 {
2656   rtx init_after;		/* point at which to emit initializations */
2657   rtx init_reg[2];		/* initial base register */
2658   rtx iter_reg[2];		/* the iterator registers */
2659   rtx *prev_addr[2];		/* address of last memory use */
2660   rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2661   HOST_WIDE_INT prev_off[2];	/* last offset */
2662   int n_iter;			/* number of iterators in use */
2663   int next_iter;		/* next iterator to use */
2664   unsigned int save_gr_used_mask;
2665 };
2666 
2667 static struct spill_fill_data spill_fill_data;
2668 
2669 static void
setup_spill_pointers(int n_spills,rtx init_reg,HOST_WIDE_INT cfa_off)2670 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2671 {
2672   int i;
2673 
2674   spill_fill_data.init_after = get_last_insn ();
2675   spill_fill_data.init_reg[0] = init_reg;
2676   spill_fill_data.init_reg[1] = init_reg;
2677   spill_fill_data.prev_addr[0] = NULL;
2678   spill_fill_data.prev_addr[1] = NULL;
2679   spill_fill_data.prev_insn[0] = NULL;
2680   spill_fill_data.prev_insn[1] = NULL;
2681   spill_fill_data.prev_off[0] = cfa_off;
2682   spill_fill_data.prev_off[1] = cfa_off;
2683   spill_fill_data.next_iter = 0;
2684   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2685 
2686   spill_fill_data.n_iter = 1 + (n_spills > 2);
2687   for (i = 0; i < spill_fill_data.n_iter; ++i)
2688     {
2689       int regno = next_scratch_gr_reg ();
2690       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2691       current_frame_info.gr_used_mask |= 1 << regno;
2692     }
2693 }
2694 
2695 static void
finish_spill_pointers(void)2696 finish_spill_pointers (void)
2697 {
2698   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2699 }
2700 
2701 static rtx
spill_restore_mem(rtx reg,HOST_WIDE_INT cfa_off)2702 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2703 {
2704   int iter = spill_fill_data.next_iter;
2705   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2706   rtx disp_rtx = GEN_INT (disp);
2707   rtx mem;
2708 
2709   if (spill_fill_data.prev_addr[iter])
2710     {
2711       if (CONST_OK_FOR_N (disp))
2712 	{
2713 	  *spill_fill_data.prev_addr[iter]
2714 	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2715 				   gen_rtx_PLUS (DImode,
2716 						 spill_fill_data.iter_reg[iter],
2717 						 disp_rtx));
2718 	  REG_NOTES (spill_fill_data.prev_insn[iter])
2719 	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2720 				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2721 	}
2722       else
2723 	{
2724 	  /* ??? Could use register post_modify for loads.  */
2725 	  if (! CONST_OK_FOR_I (disp))
2726 	    {
2727 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2728 	      emit_move_insn (tmp, disp_rtx);
2729 	      disp_rtx = tmp;
2730 	    }
2731 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2732 				 spill_fill_data.iter_reg[iter], disp_rtx));
2733 	}
2734     }
2735   /* Micro-optimization: if we've created a frame pointer, it's at
2736      CFA 0, which may allow the real iterator to be initialized lower,
2737      slightly increasing parallelism.  Also, if there are few saves
2738      it may eliminate the iterator entirely.  */
2739   else if (disp == 0
2740 	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2741 	   && frame_pointer_needed)
2742     {
2743       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2744       set_mem_alias_set (mem, get_varargs_alias_set ());
2745       return mem;
2746     }
2747   else
2748     {
2749       rtx seq, insn;
2750 
2751       if (disp == 0)
2752 	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2753 			 spill_fill_data.init_reg[iter]);
2754       else
2755 	{
2756 	  start_sequence ();
2757 
2758 	  if (! CONST_OK_FOR_I (disp))
2759 	    {
2760 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2761 	      emit_move_insn (tmp, disp_rtx);
2762 	      disp_rtx = tmp;
2763 	    }
2764 
2765 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2766 				 spill_fill_data.init_reg[iter],
2767 				 disp_rtx));
2768 
2769 	  seq = get_insns ();
2770 	  end_sequence ();
2771 	}
2772 
2773       /* Careful for being the first insn in a sequence.  */
2774       if (spill_fill_data.init_after)
2775 	insn = emit_insn_after (seq, spill_fill_data.init_after);
2776       else
2777 	{
2778 	  rtx first = get_insns ();
2779 	  if (first)
2780 	    insn = emit_insn_before (seq, first);
2781 	  else
2782 	    insn = emit_insn (seq);
2783 	}
2784       spill_fill_data.init_after = insn;
2785 
2786       /* If DISP is 0, we may or may not have a further adjustment
2787 	 afterward.  If we do, then the load/store insn may be modified
2788 	 to be a post-modify.  If we don't, then this copy may be
2789 	 eliminated by copyprop_hardreg_forward, which makes this
2790 	 insn garbage, which runs afoul of the sanity check in
2791 	 propagate_one_insn.  So mark this insn as legal to delete.  */
2792       if (disp == 0)
2793 	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2794 					     REG_NOTES (insn));
2795     }
2796 
2797   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2798 
2799   /* ??? Not all of the spills are for varargs, but some of them are.
2800      The rest of the spills belong in an alias set of their own.  But
2801      it doesn't actually hurt to include them here.  */
2802   set_mem_alias_set (mem, get_varargs_alias_set ());
2803 
2804   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2805   spill_fill_data.prev_off[iter] = cfa_off;
2806 
2807   if (++iter >= spill_fill_data.n_iter)
2808     iter = 0;
2809   spill_fill_data.next_iter = iter;
2810 
2811   return mem;
2812 }
2813 
2814 static void
do_spill(rtx (* move_fn)(rtx,rtx,rtx),rtx reg,HOST_WIDE_INT cfa_off,rtx frame_reg)2815 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2816 	  rtx frame_reg)
2817 {
2818   int iter = spill_fill_data.next_iter;
2819   rtx mem, insn;
2820 
2821   mem = spill_restore_mem (reg, cfa_off);
2822   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2823   spill_fill_data.prev_insn[iter] = insn;
2824 
2825   if (frame_reg)
2826     {
2827       rtx base;
2828       HOST_WIDE_INT off;
2829 
2830       RTX_FRAME_RELATED_P (insn) = 1;
2831 
2832       /* Don't even pretend that the unwind code can intuit its way
2833 	 through a pair of interleaved post_modify iterators.  Just
2834 	 provide the correct answer.  */
2835 
2836       if (frame_pointer_needed)
2837 	{
2838 	  base = hard_frame_pointer_rtx;
2839 	  off = - cfa_off;
2840 	}
2841       else
2842 	{
2843 	  base = stack_pointer_rtx;
2844 	  off = current_frame_info.total_size - cfa_off;
2845 	}
2846 
2847       REG_NOTES (insn)
2848 	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2849 		gen_rtx_SET (VOIDmode,
2850 			     gen_rtx_MEM (GET_MODE (reg),
2851 					  plus_constant (base, off)),
2852 			     frame_reg),
2853 		REG_NOTES (insn));
2854     }
2855 }
2856 
2857 static void
do_restore(rtx (* move_fn)(rtx,rtx,rtx),rtx reg,HOST_WIDE_INT cfa_off)2858 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2859 {
2860   int iter = spill_fill_data.next_iter;
2861   rtx insn;
2862 
2863   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2864 				GEN_INT (cfa_off)));
2865   spill_fill_data.prev_insn[iter] = insn;
2866 }
2867 
2868 /* Wrapper functions that discards the CONST_INT spill offset.  These
2869    exist so that we can give gr_spill/gr_fill the offset they need and
2870    use a consistent function interface.  */
2871 
2872 static rtx
gen_movdi_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)2873 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2874 {
2875   return gen_movdi (dest, src);
2876 }
2877 
2878 static rtx
gen_fr_spill_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)2879 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2880 {
2881   return gen_fr_spill (dest, src);
2882 }
2883 
2884 static rtx
gen_fr_restore_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)2885 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2886 {
2887   return gen_fr_restore (dest, src);
2888 }
2889 
2890 /* Called after register allocation to add any instructions needed for the
2891    prologue.  Using a prologue insn is favored compared to putting all of the
2892    instructions in output_function_prologue(), since it allows the scheduler
2893    to intermix instructions with the saves of the caller saved registers.  In
2894    some cases, it might be necessary to emit a barrier instruction as the last
2895    insn to prevent such scheduling.
2896 
2897    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2898    so that the debug info generation code can handle them properly.
2899 
2900    The register save area is layed out like so:
2901    cfa+16
2902 	[ varargs spill area ]
2903 	[ fr register spill area ]
2904 	[ br register spill area ]
2905 	[ ar register spill area ]
2906 	[ pr register spill area ]
2907 	[ gr register spill area ] */
2908 
2909 /* ??? Get inefficient code when the frame size is larger than can fit in an
2910    adds instruction.  */
2911 
2912 void
ia64_expand_prologue(void)2913 ia64_expand_prologue (void)
2914 {
2915   rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2916   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2917   rtx reg, alt_reg;
2918 
2919   ia64_compute_frame_size (get_frame_size ());
2920   last_scratch_gr_reg = 15;
2921 
2922   /* If there is no epilogue, then we don't need some prologue insns.
2923      We need to avoid emitting the dead prologue insns, because flow
2924      will complain about them.  */
2925   if (optimize)
2926     {
2927       edge e;
2928       edge_iterator ei;
2929 
2930       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2931 	if ((e->flags & EDGE_FAKE) == 0
2932 	    && (e->flags & EDGE_FALLTHRU) != 0)
2933 	  break;
2934       epilogue_p = (e != NULL);
2935     }
2936   else
2937     epilogue_p = 1;
2938 
2939   /* Set the local, input, and output register names.  We need to do this
2940      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2941      half.  If we use in/loc/out register names, then we get assembler errors
2942      in crtn.S because there is no alloc insn or regstk directive in there.  */
2943   if (! TARGET_REG_NAMES)
2944     {
2945       int inputs = current_frame_info.n_input_regs;
2946       int locals = current_frame_info.n_local_regs;
2947       int outputs = current_frame_info.n_output_regs;
2948 
2949       for (i = 0; i < inputs; i++)
2950 	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2951       for (i = 0; i < locals; i++)
2952 	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2953       for (i = 0; i < outputs; i++)
2954 	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2955     }
2956 
2957   /* Set the frame pointer register name.  The regnum is logically loc79,
2958      but of course we'll not have allocated that many locals.  Rather than
2959      worrying about renumbering the existing rtxs, we adjust the name.  */
2960   /* ??? This code means that we can never use one local register when
2961      there is a frame pointer.  loc79 gets wasted in this case, as it is
2962      renamed to a register that will never be used.  See also the try_locals
2963      code in find_gr_spill.  */
2964   if (current_frame_info.reg_fp)
2965     {
2966       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2967       reg_names[HARD_FRAME_POINTER_REGNUM]
2968 	= reg_names[current_frame_info.reg_fp];
2969       reg_names[current_frame_info.reg_fp] = tmp;
2970     }
2971 
2972   /* We don't need an alloc instruction if we've used no outputs or locals.  */
2973   if (current_frame_info.n_local_regs == 0
2974       && current_frame_info.n_output_regs == 0
2975       && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2976       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2977     {
2978       /* If there is no alloc, but there are input registers used, then we
2979 	 need a .regstk directive.  */
2980       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2981       ar_pfs_save_reg = NULL_RTX;
2982     }
2983   else
2984     {
2985       current_frame_info.need_regstk = 0;
2986 
2987       if (current_frame_info.reg_save_ar_pfs)
2988 	regno = current_frame_info.reg_save_ar_pfs;
2989       else
2990 	regno = next_scratch_gr_reg ();
2991       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2992 
2993       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2994 				   GEN_INT (current_frame_info.n_input_regs),
2995 				   GEN_INT (current_frame_info.n_local_regs),
2996 				   GEN_INT (current_frame_info.n_output_regs),
2997 				   GEN_INT (current_frame_info.n_rotate_regs)));
2998       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2999     }
3000 
3001   /* Set up frame pointer, stack pointer, and spill iterators.  */
3002 
3003   n_varargs = cfun->machine->n_varargs;
3004   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3005 			stack_pointer_rtx, 0);
3006 
3007   if (frame_pointer_needed)
3008     {
3009       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3010       RTX_FRAME_RELATED_P (insn) = 1;
3011     }
3012 
3013   if (current_frame_info.total_size != 0)
3014     {
3015       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3016       rtx offset;
3017 
3018       if (CONST_OK_FOR_I (- current_frame_info.total_size))
3019 	offset = frame_size_rtx;
3020       else
3021 	{
3022 	  regno = next_scratch_gr_reg ();
3023 	  offset = gen_rtx_REG (DImode, regno);
3024 	  emit_move_insn (offset, frame_size_rtx);
3025 	}
3026 
3027       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3028 				    stack_pointer_rtx, offset));
3029 
3030       if (! frame_pointer_needed)
3031 	{
3032 	  RTX_FRAME_RELATED_P (insn) = 1;
3033 	  if (GET_CODE (offset) != CONST_INT)
3034 	    {
3035 	      REG_NOTES (insn)
3036 		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3037 			gen_rtx_SET (VOIDmode,
3038 				     stack_pointer_rtx,
3039 				     gen_rtx_PLUS (DImode,
3040 						   stack_pointer_rtx,
3041 						   frame_size_rtx)),
3042 			REG_NOTES (insn));
3043 	    }
3044 	}
3045 
3046       /* ??? At this point we must generate a magic insn that appears to
3047 	 modify the stack pointer, the frame pointer, and all spill
3048 	 iterators.  This would allow the most scheduling freedom.  For
3049 	 now, just hard stop.  */
3050       emit_insn (gen_blockage ());
3051     }
3052 
3053   /* Must copy out ar.unat before doing any integer spills.  */
3054   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3055     {
3056       if (current_frame_info.reg_save_ar_unat)
3057 	ar_unat_save_reg
3058 	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3059       else
3060 	{
3061 	  alt_regno = next_scratch_gr_reg ();
3062 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3063 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3064 	}
3065 
3066       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3067       insn = emit_move_insn (ar_unat_save_reg, reg);
3068       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
3069 
3070       /* Even if we're not going to generate an epilogue, we still
3071 	 need to save the register so that EH works.  */
3072       if (! epilogue_p && current_frame_info.reg_save_ar_unat)
3073 	emit_insn (gen_prologue_use (ar_unat_save_reg));
3074     }
3075   else
3076     ar_unat_save_reg = NULL_RTX;
3077 
3078   /* Spill all varargs registers.  Do this before spilling any GR registers,
3079      since we want the UNAT bits for the GR registers to override the UNAT
3080      bits from varargs, which we don't care about.  */
3081 
3082   cfa_off = -16;
3083   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3084     {
3085       reg = gen_rtx_REG (DImode, regno);
3086       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3087     }
3088 
3089   /* Locate the bottom of the register save area.  */
3090   cfa_off = (current_frame_info.spill_cfa_off
3091 	     + current_frame_info.spill_size
3092 	     + current_frame_info.extra_spill_size);
3093 
3094   /* Save the predicate register block either in a register or in memory.  */
3095   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3096     {
3097       reg = gen_rtx_REG (DImode, PR_REG (0));
3098       if (current_frame_info.reg_save_pr != 0)
3099 	{
3100 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3101 	  insn = emit_move_insn (alt_reg, reg);
3102 
3103 	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3104 	     64 hard registers.  */
3105 	  RTX_FRAME_RELATED_P (insn) = 1;
3106 	  REG_NOTES (insn)
3107 	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3108 			gen_rtx_SET (VOIDmode, alt_reg, reg),
3109 			REG_NOTES (insn));
3110 
3111 	  /* Even if we're not going to generate an epilogue, we still
3112 	     need to save the register so that EH works.  */
3113 	  if (! epilogue_p)
3114 	    emit_insn (gen_prologue_use (alt_reg));
3115 	}
3116       else
3117 	{
3118 	  alt_regno = next_scratch_gr_reg ();
3119 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3120 	  insn = emit_move_insn (alt_reg, reg);
3121 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3122 	  cfa_off -= 8;
3123 	}
3124     }
3125 
3126   /* Handle AR regs in numerical order.  All of them get special handling.  */
3127   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3128       && current_frame_info.reg_save_ar_unat == 0)
3129     {
3130       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3131       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3132       cfa_off -= 8;
3133     }
3134 
3135   /* The alloc insn already copied ar.pfs into a general register.  The
3136      only thing we have to do now is copy that register to a stack slot
3137      if we'd not allocated a local register for the job.  */
3138   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3139       && current_frame_info.reg_save_ar_pfs == 0)
3140     {
3141       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3142       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3143       cfa_off -= 8;
3144     }
3145 
3146   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3147     {
3148       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3149       if (current_frame_info.reg_save_ar_lc != 0)
3150 	{
3151 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3152 	  insn = emit_move_insn (alt_reg, reg);
3153 	  RTX_FRAME_RELATED_P (insn) = 1;
3154 
3155 	  /* Even if we're not going to generate an epilogue, we still
3156 	     need to save the register so that EH works.  */
3157 	  if (! epilogue_p)
3158 	    emit_insn (gen_prologue_use (alt_reg));
3159 	}
3160       else
3161 	{
3162 	  alt_regno = next_scratch_gr_reg ();
3163 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3164 	  emit_move_insn (alt_reg, reg);
3165 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3166 	  cfa_off -= 8;
3167 	}
3168     }
3169 
3170   /* Save the return pointer.  */
3171   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3172     {
3173       reg = gen_rtx_REG (DImode, BR_REG (0));
3174       if (current_frame_info.reg_save_b0 != 0)
3175 	{
3176 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3177 	  insn = emit_move_insn (alt_reg, reg);
3178 	  RTX_FRAME_RELATED_P (insn) = 1;
3179 
3180 	  /* Even if we're not going to generate an epilogue, we still
3181 	     need to save the register so that EH works.  */
3182 	  if (! epilogue_p)
3183 	    emit_insn (gen_prologue_use (alt_reg));
3184 	}
3185       else
3186 	{
3187 	  alt_regno = next_scratch_gr_reg ();
3188 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3189 	  emit_move_insn (alt_reg, reg);
3190 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3191 	  cfa_off -= 8;
3192 	}
3193     }
3194 
3195   if (current_frame_info.reg_save_gp)
3196     {
3197       insn = emit_move_insn (gen_rtx_REG (DImode,
3198 					  current_frame_info.reg_save_gp),
3199 			     pic_offset_table_rtx);
3200       /* We don't know for sure yet if this is actually needed, since
3201 	 we've not split the PIC call patterns.  If all of the calls
3202 	 are indirect, and not followed by any uses of the gp, then
3203 	 this save is dead.  Allow it to go away.  */
3204       REG_NOTES (insn)
3205 	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
3206     }
3207 
3208   /* We should now be at the base of the gr/br/fr spill area.  */
3209   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3210 			  + current_frame_info.spill_size));
3211 
3212   /* Spill all general registers.  */
3213   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3214     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3215       {
3216 	reg = gen_rtx_REG (DImode, regno);
3217 	do_spill (gen_gr_spill, reg, cfa_off, reg);
3218 	cfa_off -= 8;
3219       }
3220 
3221   /* Spill the rest of the BR registers.  */
3222   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3223     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3224       {
3225 	alt_regno = next_scratch_gr_reg ();
3226 	alt_reg = gen_rtx_REG (DImode, alt_regno);
3227 	reg = gen_rtx_REG (DImode, regno);
3228 	emit_move_insn (alt_reg, reg);
3229 	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3230 	cfa_off -= 8;
3231       }
3232 
3233   /* Align the frame and spill all FR registers.  */
3234   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3235     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3236       {
3237         gcc_assert (!(cfa_off & 15));
3238 	reg = gen_rtx_REG (XFmode, regno);
3239 	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3240 	cfa_off -= 16;
3241       }
3242 
3243   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3244 
3245   finish_spill_pointers ();
3246 }
3247 
3248 /* Called after register allocation to add any instructions needed for the
3249    epilogue.  Using an epilogue insn is favored compared to putting all of the
3250    instructions in output_function_prologue(), since it allows the scheduler
3251    to intermix instructions with the saves of the caller saved registers.  In
3252    some cases, it might be necessary to emit a barrier instruction as the last
3253    insn to prevent such scheduling.  */
3254 
3255 void
ia64_expand_epilogue(int sibcall_p)3256 ia64_expand_epilogue (int sibcall_p)
3257 {
3258   rtx insn, reg, alt_reg, ar_unat_save_reg;
3259   int regno, alt_regno, cfa_off;
3260 
3261   ia64_compute_frame_size (get_frame_size ());
3262 
3263   /* If there is a frame pointer, then we use it instead of the stack
3264      pointer, so that the stack pointer does not need to be valid when
3265      the epilogue starts.  See EXIT_IGNORE_STACK.  */
3266   if (frame_pointer_needed)
3267     setup_spill_pointers (current_frame_info.n_spilled,
3268 			  hard_frame_pointer_rtx, 0);
3269   else
3270     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3271 			  current_frame_info.total_size);
3272 
3273   if (current_frame_info.total_size != 0)
3274     {
3275       /* ??? At this point we must generate a magic insn that appears to
3276          modify the spill iterators and the frame pointer.  This would
3277 	 allow the most scheduling freedom.  For now, just hard stop.  */
3278       emit_insn (gen_blockage ());
3279     }
3280 
3281   /* Locate the bottom of the register save area.  */
3282   cfa_off = (current_frame_info.spill_cfa_off
3283 	     + current_frame_info.spill_size
3284 	     + current_frame_info.extra_spill_size);
3285 
3286   /* Restore the predicate registers.  */
3287   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3288     {
3289       if (current_frame_info.reg_save_pr != 0)
3290 	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3291       else
3292 	{
3293 	  alt_regno = next_scratch_gr_reg ();
3294 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3295 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3296 	  cfa_off -= 8;
3297 	}
3298       reg = gen_rtx_REG (DImode, PR_REG (0));
3299       emit_move_insn (reg, alt_reg);
3300     }
3301 
3302   /* Restore the application registers.  */
3303 
3304   /* Load the saved unat from the stack, but do not restore it until
3305      after the GRs have been restored.  */
3306   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3307     {
3308       if (current_frame_info.reg_save_ar_unat != 0)
3309         ar_unat_save_reg
3310 	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3311       else
3312 	{
3313 	  alt_regno = next_scratch_gr_reg ();
3314 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3315 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3316 	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3317 	  cfa_off -= 8;
3318 	}
3319     }
3320   else
3321     ar_unat_save_reg = NULL_RTX;
3322 
3323   if (current_frame_info.reg_save_ar_pfs != 0)
3324     {
3325       alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3326       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3327       emit_move_insn (reg, alt_reg);
3328     }
3329   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3330     {
3331       alt_regno = next_scratch_gr_reg ();
3332       alt_reg = gen_rtx_REG (DImode, alt_regno);
3333       do_restore (gen_movdi_x, alt_reg, cfa_off);
3334       cfa_off -= 8;
3335       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3336       emit_move_insn (reg, alt_reg);
3337     }
3338 
3339   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3340     {
3341       if (current_frame_info.reg_save_ar_lc != 0)
3342 	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3343       else
3344 	{
3345 	  alt_regno = next_scratch_gr_reg ();
3346 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3347 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3348 	  cfa_off -= 8;
3349 	}
3350       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3351       emit_move_insn (reg, alt_reg);
3352     }
3353 
3354   /* Restore the return pointer.  */
3355   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3356     {
3357       if (current_frame_info.reg_save_b0 != 0)
3358 	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3359       else
3360 	{
3361 	  alt_regno = next_scratch_gr_reg ();
3362 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3363 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3364 	  cfa_off -= 8;
3365 	}
3366       reg = gen_rtx_REG (DImode, BR_REG (0));
3367       emit_move_insn (reg, alt_reg);
3368     }
3369 
3370   /* We should now be at the base of the gr/br/fr spill area.  */
3371   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3372 			  + current_frame_info.spill_size));
3373 
3374   /* The GP may be stored on the stack in the prologue, but it's
3375      never restored in the epilogue.  Skip the stack slot.  */
3376   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3377     cfa_off -= 8;
3378 
3379   /* Restore all general registers.  */
3380   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3381     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3382       {
3383 	reg = gen_rtx_REG (DImode, regno);
3384 	do_restore (gen_gr_restore, reg, cfa_off);
3385 	cfa_off -= 8;
3386       }
3387 
3388   /* Restore the branch registers.  */
3389   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3390     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3391       {
3392 	alt_regno = next_scratch_gr_reg ();
3393 	alt_reg = gen_rtx_REG (DImode, alt_regno);
3394 	do_restore (gen_movdi_x, alt_reg, cfa_off);
3395 	cfa_off -= 8;
3396 	reg = gen_rtx_REG (DImode, regno);
3397 	emit_move_insn (reg, alt_reg);
3398       }
3399 
3400   /* Restore floating point registers.  */
3401   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3402     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3403       {
3404         gcc_assert (!(cfa_off & 15));
3405 	reg = gen_rtx_REG (XFmode, regno);
3406 	do_restore (gen_fr_restore_x, reg, cfa_off);
3407 	cfa_off -= 16;
3408       }
3409 
3410   /* Restore ar.unat for real.  */
3411   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3412     {
3413       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3414       emit_move_insn (reg, ar_unat_save_reg);
3415     }
3416 
3417   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3418 
3419   finish_spill_pointers ();
3420 
3421   if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3422     {
3423       /* ??? At this point we must generate a magic insn that appears to
3424          modify the spill iterators, the stack pointer, and the frame
3425 	 pointer.  This would allow the most scheduling freedom.  For now,
3426 	 just hard stop.  */
3427       emit_insn (gen_blockage ());
3428     }
3429 
3430   if (cfun->machine->ia64_eh_epilogue_sp)
3431     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3432   else if (frame_pointer_needed)
3433     {
3434       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3435       RTX_FRAME_RELATED_P (insn) = 1;
3436     }
3437   else if (current_frame_info.total_size)
3438     {
3439       rtx offset, frame_size_rtx;
3440 
3441       frame_size_rtx = GEN_INT (current_frame_info.total_size);
3442       if (CONST_OK_FOR_I (current_frame_info.total_size))
3443 	offset = frame_size_rtx;
3444       else
3445 	{
3446 	  regno = next_scratch_gr_reg ();
3447 	  offset = gen_rtx_REG (DImode, regno);
3448 	  emit_move_insn (offset, frame_size_rtx);
3449 	}
3450 
3451       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3452 				    offset));
3453 
3454       RTX_FRAME_RELATED_P (insn) = 1;
3455       if (GET_CODE (offset) != CONST_INT)
3456 	{
3457 	  REG_NOTES (insn)
3458 	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3459 			gen_rtx_SET (VOIDmode,
3460 				     stack_pointer_rtx,
3461 				     gen_rtx_PLUS (DImode,
3462 						   stack_pointer_rtx,
3463 						   frame_size_rtx)),
3464 			REG_NOTES (insn));
3465 	}
3466     }
3467 
3468   if (cfun->machine->ia64_eh_epilogue_bsp)
3469     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3470 
3471   if (! sibcall_p)
3472     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3473   else
3474     {
3475       int fp = GR_REG (2);
3476       /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3477 	 first available call clobbered register.  If there was a frame_pointer
3478 	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3479 	 so we have to make sure we're using the string "r2" when emitting
3480 	 the register name for the assembler.  */
3481       if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3482 	fp = HARD_FRAME_POINTER_REGNUM;
3483 
3484       /* We must emit an alloc to force the input registers to become output
3485 	 registers.  Otherwise, if the callee tries to pass its parameters
3486 	 through to another call without an intervening alloc, then these
3487 	 values get lost.  */
3488       /* ??? We don't need to preserve all input registers.  We only need to
3489 	 preserve those input registers used as arguments to the sibling call.
3490 	 It is unclear how to compute that number here.  */
3491       if (current_frame_info.n_input_regs != 0)
3492 	{
3493 	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3494 	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3495 				const0_rtx, const0_rtx,
3496 				n_inputs, const0_rtx));
3497 	  RTX_FRAME_RELATED_P (insn) = 1;
3498 	}
3499     }
3500 }
3501 
3502 /* Return 1 if br.ret can do all the work required to return from a
3503    function.  */
3504 
3505 int
ia64_direct_return(void)3506 ia64_direct_return (void)
3507 {
3508   if (reload_completed && ! frame_pointer_needed)
3509     {
3510       ia64_compute_frame_size (get_frame_size ());
3511 
3512       return (current_frame_info.total_size == 0
3513 	      && current_frame_info.n_spilled == 0
3514 	      && current_frame_info.reg_save_b0 == 0
3515 	      && current_frame_info.reg_save_pr == 0
3516 	      && current_frame_info.reg_save_ar_pfs == 0
3517 	      && current_frame_info.reg_save_ar_unat == 0
3518 	      && current_frame_info.reg_save_ar_lc == 0);
3519     }
3520   return 0;
3521 }
3522 
3523 /* Return the magic cookie that we use to hold the return address
3524    during early compilation.  */
3525 
3526 rtx
ia64_return_addr_rtx(HOST_WIDE_INT count,rtx frame ATTRIBUTE_UNUSED)3527 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3528 {
3529   if (count != 0)
3530     return NULL;
3531   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3532 }
3533 
3534 /* Split this value after reload, now that we know where the return
3535    address is saved.  */
3536 
3537 void
ia64_split_return_addr_rtx(rtx dest)3538 ia64_split_return_addr_rtx (rtx dest)
3539 {
3540   rtx src;
3541 
3542   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3543     {
3544       if (current_frame_info.reg_save_b0 != 0)
3545 	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3546       else
3547 	{
3548 	  HOST_WIDE_INT off;
3549 	  unsigned int regno;
3550 
3551 	  /* Compute offset from CFA for BR0.  */
3552 	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
3553 	  off = (current_frame_info.spill_cfa_off
3554 		 + current_frame_info.spill_size);
3555 	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3556 	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3557 	      off -= 8;
3558 
3559 	  /* Convert CFA offset to a register based offset.  */
3560 	  if (frame_pointer_needed)
3561 	    src = hard_frame_pointer_rtx;
3562 	  else
3563 	    {
3564 	      src = stack_pointer_rtx;
3565 	      off += current_frame_info.total_size;
3566 	    }
3567 
3568 	  /* Load address into scratch register.  */
3569 	  if (CONST_OK_FOR_I (off))
3570 	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3571 	  else
3572 	    {
3573 	      emit_move_insn (dest, GEN_INT (off));
3574 	      emit_insn (gen_adddi3 (dest, src, dest));
3575 	    }
3576 
3577 	  src = gen_rtx_MEM (Pmode, dest);
3578 	}
3579     }
3580   else
3581     src = gen_rtx_REG (DImode, BR_REG (0));
3582 
3583   emit_move_insn (dest, src);
3584 }
3585 
3586 int
ia64_hard_regno_rename_ok(int from,int to)3587 ia64_hard_regno_rename_ok (int from, int to)
3588 {
3589   /* Don't clobber any of the registers we reserved for the prologue.  */
3590   if (to == current_frame_info.reg_fp
3591       || to == current_frame_info.reg_save_b0
3592       || to == current_frame_info.reg_save_pr
3593       || to == current_frame_info.reg_save_ar_pfs
3594       || to == current_frame_info.reg_save_ar_unat
3595       || to == current_frame_info.reg_save_ar_lc)
3596     return 0;
3597 
3598   if (from == current_frame_info.reg_fp
3599       || from == current_frame_info.reg_save_b0
3600       || from == current_frame_info.reg_save_pr
3601       || from == current_frame_info.reg_save_ar_pfs
3602       || from == current_frame_info.reg_save_ar_unat
3603       || from == current_frame_info.reg_save_ar_lc)
3604     return 0;
3605 
3606   /* Don't use output registers outside the register frame.  */
3607   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3608     return 0;
3609 
3610   /* Retain even/oddness on predicate register pairs.  */
3611   if (PR_REGNO_P (from) && PR_REGNO_P (to))
3612     return (from & 1) == (to & 1);
3613 
3614   return 1;
3615 }
3616 
3617 /* Target hook for assembling integer objects.  Handle word-sized
3618    aligned objects and detect the cases when @fptr is needed.  */
3619 
3620 static bool
ia64_assemble_integer(rtx x,unsigned int size,int aligned_p)3621 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3622 {
3623   if (size == POINTER_SIZE / BITS_PER_UNIT
3624       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3625       && GET_CODE (x) == SYMBOL_REF
3626       && SYMBOL_REF_FUNCTION_P (x))
3627     {
3628       static const char * const directive[2][2] = {
3629 	  /* 64-bit pointer */  /* 32-bit pointer */
3630 	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
3631 	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
3632       };
3633       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3634       output_addr_const (asm_out_file, x);
3635       fputs (")\n", asm_out_file);
3636       return true;
3637     }
3638   return default_assemble_integer (x, size, aligned_p);
3639 }
3640 
3641 /* Emit the function prologue.  */
3642 
3643 static void
ia64_output_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)3644 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3645 {
3646   int mask, grsave, grsave_prev;
3647 
3648   if (current_frame_info.need_regstk)
3649     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3650 	     current_frame_info.n_input_regs,
3651 	     current_frame_info.n_local_regs,
3652 	     current_frame_info.n_output_regs,
3653 	     current_frame_info.n_rotate_regs);
3654 
3655   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3656     return;
3657 
3658   /* Emit the .prologue directive.  */
3659 
3660   mask = 0;
3661   grsave = grsave_prev = 0;
3662   if (current_frame_info.reg_save_b0 != 0)
3663     {
3664       mask |= 8;
3665       grsave = grsave_prev = current_frame_info.reg_save_b0;
3666     }
3667   if (current_frame_info.reg_save_ar_pfs != 0
3668       && (grsave_prev == 0
3669 	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3670     {
3671       mask |= 4;
3672       if (grsave_prev == 0)
3673 	grsave = current_frame_info.reg_save_ar_pfs;
3674       grsave_prev = current_frame_info.reg_save_ar_pfs;
3675     }
3676   if (current_frame_info.reg_fp != 0
3677       && (grsave_prev == 0
3678 	  || current_frame_info.reg_fp == grsave_prev + 1))
3679     {
3680       mask |= 2;
3681       if (grsave_prev == 0)
3682 	grsave = HARD_FRAME_POINTER_REGNUM;
3683       grsave_prev = current_frame_info.reg_fp;
3684     }
3685   if (current_frame_info.reg_save_pr != 0
3686       && (grsave_prev == 0
3687 	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3688     {
3689       mask |= 1;
3690       if (grsave_prev == 0)
3691 	grsave = current_frame_info.reg_save_pr;
3692     }
3693 
3694   if (mask && TARGET_GNU_AS)
3695     fprintf (file, "\t.prologue %d, %d\n", mask,
3696 	     ia64_dbx_register_number (grsave));
3697   else
3698     fputs ("\t.prologue\n", file);
3699 
3700   /* Emit a .spill directive, if necessary, to relocate the base of
3701      the register spill area.  */
3702   if (current_frame_info.spill_cfa_off != -16)
3703     fprintf (file, "\t.spill %ld\n",
3704 	     (long) (current_frame_info.spill_cfa_off
3705 		     + current_frame_info.spill_size));
3706 }
3707 
3708 /* Emit the .body directive at the scheduled end of the prologue.  */
3709 
3710 static void
ia64_output_function_end_prologue(FILE * file)3711 ia64_output_function_end_prologue (FILE *file)
3712 {
3713   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3714     return;
3715 
3716   fputs ("\t.body\n", file);
3717 }
3718 
3719 /* Emit the function epilogue.  */
3720 
3721 static void
ia64_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED,HOST_WIDE_INT size ATTRIBUTE_UNUSED)3722 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3723 			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3724 {
3725   int i;
3726 
3727   if (current_frame_info.reg_fp)
3728     {
3729       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3730       reg_names[HARD_FRAME_POINTER_REGNUM]
3731 	= reg_names[current_frame_info.reg_fp];
3732       reg_names[current_frame_info.reg_fp] = tmp;
3733     }
3734   if (! TARGET_REG_NAMES)
3735     {
3736       for (i = 0; i < current_frame_info.n_input_regs; i++)
3737 	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3738       for (i = 0; i < current_frame_info.n_local_regs; i++)
3739 	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3740       for (i = 0; i < current_frame_info.n_output_regs; i++)
3741 	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3742     }
3743 
3744   current_frame_info.initialized = 0;
3745 }
3746 
3747 int
ia64_dbx_register_number(int regno)3748 ia64_dbx_register_number (int regno)
3749 {
3750   /* In ia64_expand_prologue we quite literally renamed the frame pointer
3751      from its home at loc79 to something inside the register frame.  We
3752      must perform the same renumbering here for the debug info.  */
3753   if (current_frame_info.reg_fp)
3754     {
3755       if (regno == HARD_FRAME_POINTER_REGNUM)
3756 	regno = current_frame_info.reg_fp;
3757       else if (regno == current_frame_info.reg_fp)
3758 	regno = HARD_FRAME_POINTER_REGNUM;
3759     }
3760 
3761   if (IN_REGNO_P (regno))
3762     return 32 + regno - IN_REG (0);
3763   else if (LOC_REGNO_P (regno))
3764     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3765   else if (OUT_REGNO_P (regno))
3766     return (32 + current_frame_info.n_input_regs
3767 	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3768   else
3769     return regno;
3770 }
3771 
3772 void
ia64_initialize_trampoline(rtx addr,rtx fnaddr,rtx static_chain)3773 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3774 {
3775   rtx addr_reg, eight = GEN_INT (8);
3776 
3777   /* The Intel assembler requires that the global __ia64_trampoline symbol
3778      be declared explicitly */
3779   if (!TARGET_GNU_AS)
3780     {
3781       static bool declared_ia64_trampoline = false;
3782 
3783       if (!declared_ia64_trampoline)
3784 	{
3785 	  declared_ia64_trampoline = true;
3786 	  (*targetm.asm_out.globalize_label) (asm_out_file,
3787 					      "__ia64_trampoline");
3788 	}
3789     }
3790 
3791   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3792   addr = convert_memory_address (Pmode, addr);
3793   fnaddr = convert_memory_address (Pmode, fnaddr);
3794   static_chain = convert_memory_address (Pmode, static_chain);
3795 
3796   /* Load up our iterator.  */
3797   addr_reg = gen_reg_rtx (Pmode);
3798   emit_move_insn (addr_reg, addr);
3799 
3800   /* The first two words are the fake descriptor:
3801      __ia64_trampoline, ADDR+16.  */
3802   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3803 		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3804   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3805 
3806   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3807 		  copy_to_reg (plus_constant (addr, 16)));
3808   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3809 
3810   /* The third word is the target descriptor.  */
3811   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3812   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3813 
3814   /* The fourth word is the static chain.  */
3815   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3816 }
3817 
3818 /* Do any needed setup for a variadic function.  CUM has not been updated
3819    for the last named argument which has type TYPE and mode MODE.
3820 
3821    We generate the actual spill instructions during prologue generation.  */
3822 
3823 static void
ia64_setup_incoming_varargs(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int * pretend_size,int second_time ATTRIBUTE_UNUSED)3824 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3825 			     tree type, int * pretend_size,
3826 			     int second_time ATTRIBUTE_UNUSED)
3827 {
3828   CUMULATIVE_ARGS next_cum = *cum;
3829 
3830   /* Skip the current argument.  */
3831   ia64_function_arg_advance (&next_cum, mode, type, 1);
3832 
3833   if (next_cum.words < MAX_ARGUMENT_SLOTS)
3834     {
3835       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3836       *pretend_size = n * UNITS_PER_WORD;
3837       cfun->machine->n_varargs = n;
3838     }
3839 }
3840 
3841 /* Check whether TYPE is a homogeneous floating point aggregate.  If
3842    it is, return the mode of the floating point type that appears
3843    in all leafs.  If it is not, return VOIDmode.
3844 
3845    An aggregate is a homogeneous floating point aggregate is if all
3846    fields/elements in it have the same floating point type (e.g,
3847    SFmode).  128-bit quad-precision floats are excluded.
3848 
3849    Variable sized aggregates should never arrive here, since we should
3850    have already decided to pass them by reference.  Top-level zero-sized
3851    aggregates are excluded because our parallels crash the middle-end.  */
3852 
3853 static enum machine_mode
hfa_element_mode(tree type,bool nested)3854 hfa_element_mode (tree type, bool nested)
3855 {
3856   enum machine_mode element_mode = VOIDmode;
3857   enum machine_mode mode;
3858   enum tree_code code = TREE_CODE (type);
3859   int know_element_mode = 0;
3860   tree t;
3861 
3862   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3863     return VOIDmode;
3864 
3865   switch (code)
3866     {
3867     case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3868     case BOOLEAN_TYPE:	case POINTER_TYPE:
3869     case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3870     case LANG_TYPE:		case FUNCTION_TYPE:
3871       return VOIDmode;
3872 
3873       /* Fortran complex types are supposed to be HFAs, so we need to handle
3874 	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3875 	 types though.  */
3876     case COMPLEX_TYPE:
3877       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3878 	  && TYPE_MODE (type) != TCmode)
3879 	return GET_MODE_INNER (TYPE_MODE (type));
3880       else
3881 	return VOIDmode;
3882 
3883     case REAL_TYPE:
3884       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3885 	 mode if this is contained within an aggregate.  */
3886       if (nested && TYPE_MODE (type) != TFmode)
3887 	return TYPE_MODE (type);
3888       else
3889 	return VOIDmode;
3890 
3891     case ARRAY_TYPE:
3892       return hfa_element_mode (TREE_TYPE (type), 1);
3893 
3894     case RECORD_TYPE:
3895     case UNION_TYPE:
3896     case QUAL_UNION_TYPE:
3897       for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3898 	{
3899 	  if (TREE_CODE (t) != FIELD_DECL)
3900 	    continue;
3901 
3902 	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3903 	  if (know_element_mode)
3904 	    {
3905 	      if (mode != element_mode)
3906 		return VOIDmode;
3907 	    }
3908 	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3909 	    return VOIDmode;
3910 	  else
3911 	    {
3912 	      know_element_mode = 1;
3913 	      element_mode = mode;
3914 	    }
3915 	}
3916       return element_mode;
3917 
3918     default:
3919       /* If we reach here, we probably have some front-end specific type
3920 	 that the backend doesn't know about.  This can happen via the
3921 	 aggregate_value_p call in init_function_start.  All we can do is
3922 	 ignore unknown tree types.  */
3923       return VOIDmode;
3924     }
3925 
3926   return VOIDmode;
3927 }
3928 
3929 /* Return the number of words required to hold a quantity of TYPE and MODE
3930    when passed as an argument.  */
3931 static int
ia64_function_arg_words(tree type,enum machine_mode mode)3932 ia64_function_arg_words (tree type, enum machine_mode mode)
3933 {
3934   int words;
3935 
3936   if (mode == BLKmode)
3937     words = int_size_in_bytes (type);
3938   else
3939     words = GET_MODE_SIZE (mode);
3940 
3941   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
3942 }
3943 
3944 /* Return the number of registers that should be skipped so the current
3945    argument (described by TYPE and WORDS) will be properly aligned.
3946 
3947    Integer and float arguments larger than 8 bytes start at the next
3948    even boundary.  Aggregates larger than 8 bytes start at the next
3949    even boundary if the aggregate has 16 byte alignment.  Note that
3950    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3951    but are still to be aligned in registers.
3952 
3953    ??? The ABI does not specify how to handle aggregates with
3954    alignment from 9 to 15 bytes, or greater than 16.  We handle them
3955    all as if they had 16 byte alignment.  Such aggregates can occur
3956    only if gcc extensions are used.  */
3957 static int
ia64_function_arg_offset(CUMULATIVE_ARGS * cum,tree type,int words)3958 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3959 {
3960   if ((cum->words & 1) == 0)
3961     return 0;
3962 
3963   if (type
3964       && TREE_CODE (type) != INTEGER_TYPE
3965       && TREE_CODE (type) != REAL_TYPE)
3966     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3967   else
3968     return words > 1;
3969 }
3970 
3971 /* Return rtx for register where argument is passed, or zero if it is passed
3972    on the stack.  */
3973 /* ??? 128-bit quad-precision floats are always passed in general
3974    registers.  */
3975 
3976 rtx
ia64_function_arg(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int named,int incoming)3977 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3978 		   int named, int incoming)
3979 {
3980   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3981   int words = ia64_function_arg_words (type, mode);
3982   int offset = ia64_function_arg_offset (cum, type, words);
3983   enum machine_mode hfa_mode = VOIDmode;
3984 
3985   /* If all argument slots are used, then it must go on the stack.  */
3986   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3987     return 0;
3988 
3989   /* Check for and handle homogeneous FP aggregates.  */
3990   if (type)
3991     hfa_mode = hfa_element_mode (type, 0);
3992 
3993   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3994      and unprototyped hfas are passed specially.  */
3995   if (hfa_mode != VOIDmode && (! cum->prototype || named))
3996     {
3997       rtx loc[16];
3998       int i = 0;
3999       int fp_regs = cum->fp_regs;
4000       int int_regs = cum->words + offset;
4001       int hfa_size = GET_MODE_SIZE (hfa_mode);
4002       int byte_size;
4003       int args_byte_size;
4004 
4005       /* If prototyped, pass it in FR regs then GR regs.
4006 	 If not prototyped, pass it in both FR and GR regs.
4007 
4008 	 If this is an SFmode aggregate, then it is possible to run out of
4009 	 FR regs while GR regs are still left.  In that case, we pass the
4010 	 remaining part in the GR regs.  */
4011 
4012       /* Fill the FP regs.  We do this always.  We stop if we reach the end
4013 	 of the argument, the last FP register, or the last argument slot.  */
4014 
4015       byte_size = ((mode == BLKmode)
4016 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4017       args_byte_size = int_regs * UNITS_PER_WORD;
4018       offset = 0;
4019       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4020 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4021 	{
4022 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4023 				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4024 							      + fp_regs)),
4025 				      GEN_INT (offset));
4026 	  offset += hfa_size;
4027 	  args_byte_size += hfa_size;
4028 	  fp_regs++;
4029 	}
4030 
4031       /* If no prototype, then the whole thing must go in GR regs.  */
4032       if (! cum->prototype)
4033 	offset = 0;
4034       /* If this is an SFmode aggregate, then we might have some left over
4035 	 that needs to go in GR regs.  */
4036       else if (byte_size != offset)
4037 	int_regs += offset / UNITS_PER_WORD;
4038 
4039       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4040 
4041       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4042 	{
4043 	  enum machine_mode gr_mode = DImode;
4044 	  unsigned int gr_size;
4045 
4046 	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4047 	     then this goes in a GR reg left adjusted/little endian, right
4048 	     adjusted/big endian.  */
4049 	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4050 	     always right adjusted/little endian.  */
4051 	  if (offset & 0x4)
4052 	    gr_mode = SImode;
4053 	  /* If we have an even 4 byte hunk because the aggregate is a
4054 	     multiple of 4 bytes in size, then this goes in a GR reg right
4055 	     adjusted/little endian.  */
4056 	  else if (byte_size - offset == 4)
4057 	    gr_mode = SImode;
4058 
4059 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4060 				      gen_rtx_REG (gr_mode, (basereg
4061 							     + int_regs)),
4062 				      GEN_INT (offset));
4063 
4064 	  gr_size = GET_MODE_SIZE (gr_mode);
4065 	  offset += gr_size;
4066 	  if (gr_size == UNITS_PER_WORD
4067 	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4068 	    int_regs++;
4069 	  else if (gr_size > UNITS_PER_WORD)
4070 	    int_regs += gr_size / UNITS_PER_WORD;
4071 	}
4072       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4073     }
4074 
4075   /* Integral and aggregates go in general registers.  If we have run out of
4076      FR registers, then FP values must also go in general registers.  This can
4077      happen when we have a SFmode HFA.  */
4078   else if (mode == TFmode || mode == TCmode
4079 	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4080     {
4081       int byte_size = ((mode == BLKmode)
4082                        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4083       if (BYTES_BIG_ENDIAN
4084 	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4085 	&& byte_size < UNITS_PER_WORD
4086 	&& byte_size > 0)
4087 	{
4088 	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4089 					  gen_rtx_REG (DImode,
4090 						       (basereg + cum->words
4091 							+ offset)),
4092 					  const0_rtx);
4093 	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4094 	}
4095       else
4096 	return gen_rtx_REG (mode, basereg + cum->words + offset);
4097 
4098     }
4099 
4100   /* If there is a prototype, then FP values go in a FR register when
4101      named, and in a GR register when unnamed.  */
4102   else if (cum->prototype)
4103     {
4104       if (named)
4105 	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4106       /* In big-endian mode, an anonymous SFmode value must be represented
4107          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4108 	 the value into the high half of the general register.  */
4109       else if (BYTES_BIG_ENDIAN && mode == SFmode)
4110 	return gen_rtx_PARALLEL (mode,
4111 		 gen_rtvec (1,
4112                    gen_rtx_EXPR_LIST (VOIDmode,
4113 		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4114 				      const0_rtx)));
4115       else
4116 	return gen_rtx_REG (mode, basereg + cum->words + offset);
4117     }
4118   /* If there is no prototype, then FP values go in both FR and GR
4119      registers.  */
4120   else
4121     {
4122       /* See comment above.  */
4123       enum machine_mode inner_mode =
4124 	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4125 
4126       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4127 				      gen_rtx_REG (mode, (FR_ARG_FIRST
4128 							  + cum->fp_regs)),
4129 				      const0_rtx);
4130       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4131 				      gen_rtx_REG (inner_mode,
4132 						   (basereg + cum->words
4133 						    + offset)),
4134 				      const0_rtx);
4135 
4136       return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4137     }
4138 }
4139 
4140 /* Return number of bytes, at the beginning of the argument, that must be
4141    put in registers.  0 is the argument is entirely in registers or entirely
4142    in memory.  */
4143 
4144 static int
ia64_arg_partial_bytes(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)4145 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4146 			tree type, bool named ATTRIBUTE_UNUSED)
4147 {
4148   int words = ia64_function_arg_words (type, mode);
4149   int offset = ia64_function_arg_offset (cum, type, words);
4150 
4151   /* If all argument slots are used, then it must go on the stack.  */
4152   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4153     return 0;
4154 
4155   /* It doesn't matter whether the argument goes in FR or GR regs.  If
4156      it fits within the 8 argument slots, then it goes entirely in
4157      registers.  If it extends past the last argument slot, then the rest
4158      goes on the stack.  */
4159 
4160   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4161     return 0;
4162 
4163   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4164 }
4165 
4166 /* Update CUM to point after this argument.  This is patterned after
4167    ia64_function_arg.  */
4168 
4169 void
ia64_function_arg_advance(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int named)4170 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4171 			   tree type, int named)
4172 {
4173   int words = ia64_function_arg_words (type, mode);
4174   int offset = ia64_function_arg_offset (cum, type, words);
4175   enum machine_mode hfa_mode = VOIDmode;
4176 
4177   /* If all arg slots are already full, then there is nothing to do.  */
4178   if (cum->words >= MAX_ARGUMENT_SLOTS)
4179     return;
4180 
4181   cum->words += words + offset;
4182 
4183   /* Check for and handle homogeneous FP aggregates.  */
4184   if (type)
4185     hfa_mode = hfa_element_mode (type, 0);
4186 
4187   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4188      and unprototyped hfas are passed specially.  */
4189   if (hfa_mode != VOIDmode && (! cum->prototype || named))
4190     {
4191       int fp_regs = cum->fp_regs;
4192       /* This is the original value of cum->words + offset.  */
4193       int int_regs = cum->words - words;
4194       int hfa_size = GET_MODE_SIZE (hfa_mode);
4195       int byte_size;
4196       int args_byte_size;
4197 
4198       /* If prototyped, pass it in FR regs then GR regs.
4199 	 If not prototyped, pass it in both FR and GR regs.
4200 
4201 	 If this is an SFmode aggregate, then it is possible to run out of
4202 	 FR regs while GR regs are still left.  In that case, we pass the
4203 	 remaining part in the GR regs.  */
4204 
4205       /* Fill the FP regs.  We do this always.  We stop if we reach the end
4206 	 of the argument, the last FP register, or the last argument slot.  */
4207 
4208       byte_size = ((mode == BLKmode)
4209 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4210       args_byte_size = int_regs * UNITS_PER_WORD;
4211       offset = 0;
4212       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4213 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4214 	{
4215 	  offset += hfa_size;
4216 	  args_byte_size += hfa_size;
4217 	  fp_regs++;
4218 	}
4219 
4220       cum->fp_regs = fp_regs;
4221     }
4222 
4223   /* Integral and aggregates go in general registers.  So do TFmode FP values.
4224      If we have run out of FR registers, then other FP values must also go in
4225      general registers.  This can happen when we have a SFmode HFA.  */
4226   else if (mode == TFmode || mode == TCmode
4227            || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4228     cum->int_regs = cum->words;
4229 
4230   /* If there is a prototype, then FP values go in a FR register when
4231      named, and in a GR register when unnamed.  */
4232   else if (cum->prototype)
4233     {
4234       if (! named)
4235 	cum->int_regs = cum->words;
4236       else
4237 	/* ??? Complex types should not reach here.  */
4238 	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4239     }
4240   /* If there is no prototype, then FP values go in both FR and GR
4241      registers.  */
4242   else
4243     {
4244       /* ??? Complex types should not reach here.  */
4245       cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4246       cum->int_regs = cum->words;
4247     }
4248 }
4249 
4250 /* Arguments with alignment larger than 8 bytes start at the next even
4251    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4252    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4253 
4254 int
ia64_function_arg_boundary(enum machine_mode mode,tree type)4255 ia64_function_arg_boundary (enum machine_mode mode, tree type)
4256 {
4257 
4258   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4259     return PARM_BOUNDARY * 2;
4260 
4261   if (type)
4262     {
4263       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4264         return PARM_BOUNDARY * 2;
4265       else
4266         return PARM_BOUNDARY;
4267     }
4268 
4269   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4270     return PARM_BOUNDARY * 2;
4271   else
4272     return PARM_BOUNDARY;
4273 }
4274 
4275 /* True if it is OK to do sibling call optimization for the specified
4276    call expression EXP.  DECL will be the called function, or NULL if
4277    this is an indirect call.  */
4278 static bool
ia64_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)4279 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4280 {
4281   /* We can't perform a sibcall if the current function has the syscall_linkage
4282      attribute.  */
4283   if (lookup_attribute ("syscall_linkage",
4284 			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4285     return false;
4286 
4287   /* We must always return with our current GP.  This means we can
4288      only sibcall to functions defined in the current module.  */
4289   return decl && (*targetm.binds_local_p) (decl);
4290 }
4291 
4292 
4293 /* Implement va_arg.  */
4294 
4295 static tree
ia64_gimplify_va_arg(tree valist,tree type,tree * pre_p,tree * post_p)4296 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4297 {
4298   /* Variable sized types are passed by reference.  */
4299   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4300     {
4301       tree ptrtype = build_pointer_type (type);
4302       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4303       return build_va_arg_indirect_ref (addr);
4304     }
4305 
4306   /* Aggregate arguments with alignment larger than 8 bytes start at
4307      the next even boundary.  Integer and floating point arguments
4308      do so if they are larger than 8 bytes, whether or not they are
4309      also aligned larger than 8 bytes.  */
4310   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4311       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4312     {
4313       tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist,
4314 		       build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4315       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4316 		  build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4317       t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4318       gimplify_and_add (t, pre_p);
4319     }
4320 
4321   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4322 }
4323 
4324 /* Return 1 if function return value returned in memory.  Return 0 if it is
4325    in a register.  */
4326 
4327 static bool
ia64_return_in_memory(tree valtype,tree fntype ATTRIBUTE_UNUSED)4328 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4329 {
4330   enum machine_mode mode;
4331   enum machine_mode hfa_mode;
4332   HOST_WIDE_INT byte_size;
4333 
4334   mode = TYPE_MODE (valtype);
4335   byte_size = GET_MODE_SIZE (mode);
4336   if (mode == BLKmode)
4337     {
4338       byte_size = int_size_in_bytes (valtype);
4339       if (byte_size < 0)
4340 	return true;
4341     }
4342 
4343   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4344 
4345   hfa_mode = hfa_element_mode (valtype, 0);
4346   if (hfa_mode != VOIDmode)
4347     {
4348       int hfa_size = GET_MODE_SIZE (hfa_mode);
4349 
4350       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4351 	return true;
4352       else
4353 	return false;
4354     }
4355   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4356     return true;
4357   else
4358     return false;
4359 }
4360 
4361 /* Return rtx for register that holds the function return value.  */
4362 
4363 rtx
ia64_function_value(tree valtype,tree func ATTRIBUTE_UNUSED)4364 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4365 {
4366   enum machine_mode mode;
4367   enum machine_mode hfa_mode;
4368 
4369   mode = TYPE_MODE (valtype);
4370   hfa_mode = hfa_element_mode (valtype, 0);
4371 
4372   if (hfa_mode != VOIDmode)
4373     {
4374       rtx loc[8];
4375       int i;
4376       int hfa_size;
4377       int byte_size;
4378       int offset;
4379 
4380       hfa_size = GET_MODE_SIZE (hfa_mode);
4381       byte_size = ((mode == BLKmode)
4382 		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4383       offset = 0;
4384       for (i = 0; offset < byte_size; i++)
4385 	{
4386 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4387 				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4388 				      GEN_INT (offset));
4389 	  offset += hfa_size;
4390 	}
4391       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4392     }
4393   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4394     return gen_rtx_REG (mode, FR_ARG_FIRST);
4395   else
4396     {
4397       bool need_parallel = false;
4398 
4399       /* In big-endian mode, we need to manage the layout of aggregates
4400 	 in the registers so that we get the bits properly aligned in
4401 	 the highpart of the registers.  */
4402       if (BYTES_BIG_ENDIAN
4403 	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4404 	need_parallel = true;
4405 
4406       /* Something like struct S { long double x; char a[0] } is not an
4407 	 HFA structure, and therefore doesn't go in fp registers.  But
4408 	 the middle-end will give it XFmode anyway, and XFmode values
4409 	 don't normally fit in integer registers.  So we need to smuggle
4410 	 the value inside a parallel.  */
4411       else if (mode == XFmode || mode == XCmode || mode == RFmode)
4412 	need_parallel = true;
4413 
4414       if (need_parallel)
4415 	{
4416 	  rtx loc[8];
4417 	  int offset;
4418 	  int bytesize;
4419 	  int i;
4420 
4421 	  offset = 0;
4422 	  bytesize = int_size_in_bytes (valtype);
4423 	  /* An empty PARALLEL is invalid here, but the return value
4424 	     doesn't matter for empty structs.  */
4425 	  if (bytesize == 0)
4426 	    return gen_rtx_REG (mode, GR_RET_FIRST);
4427 	  for (i = 0; offset < bytesize; i++)
4428 	    {
4429 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4430 					  gen_rtx_REG (DImode,
4431 						       GR_RET_FIRST + i),
4432 					  GEN_INT (offset));
4433 	      offset += UNITS_PER_WORD;
4434 	    }
4435 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4436 	}
4437 
4438       return gen_rtx_REG (mode, GR_RET_FIRST);
4439     }
4440 }
4441 
4442 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4443    We need to emit DTP-relative relocations.  */
4444 
4445 static void
ia64_output_dwarf_dtprel(FILE * file,int size,rtx x)4446 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4447 {
4448   gcc_assert (size == 4 || size == 8);
4449   if (size == 4)
4450     fputs ("\tdata4.ua\t@dtprel(", file);
4451   else
4452     fputs ("\tdata8.ua\t@dtprel(", file);
4453   output_addr_const (file, x);
4454   fputs (")", file);
4455 }
4456 
4457 /* Print a memory address as an operand to reference that memory location.  */
4458 
4459 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4460    also call this from ia64_print_operand for memory addresses.  */
4461 
4462 void
ia64_print_operand_address(FILE * stream ATTRIBUTE_UNUSED,rtx address ATTRIBUTE_UNUSED)4463 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4464 			    rtx address ATTRIBUTE_UNUSED)
4465 {
4466 }
4467 
4468 /* Print an operand to an assembler instruction.
4469    C	Swap and print a comparison operator.
4470    D	Print an FP comparison operator.
4471    E    Print 32 - constant, for SImode shifts as extract.
4472    e    Print 64 - constant, for DImode rotates.
4473    F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4474         a floating point register emitted normally.
4475    I	Invert a predicate register by adding 1.
4476    J    Select the proper predicate register for a condition.
4477    j    Select the inverse predicate register for a condition.
4478    O	Append .acq for volatile load.
4479    P	Postincrement of a MEM.
4480    Q	Append .rel for volatile store.
4481    S	Shift amount for shladd instruction.
4482    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4483 	for Intel assembler.
4484    U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4485 	for Intel assembler.
4486    X	A pair of floating point registers.
4487    r	Print register name, or constant 0 as r0.  HP compatibility for
4488 	Linux kernel.
4489    v    Print vector constant value as an 8-byte integer value.  */
4490 
4491 void
ia64_print_operand(FILE * file,rtx x,int code)4492 ia64_print_operand (FILE * file, rtx x, int code)
4493 {
4494   const char *str;
4495 
4496   switch (code)
4497     {
4498     case 0:
4499       /* Handled below.  */
4500       break;
4501 
4502     case 'C':
4503       {
4504 	enum rtx_code c = swap_condition (GET_CODE (x));
4505 	fputs (GET_RTX_NAME (c), file);
4506 	return;
4507       }
4508 
4509     case 'D':
4510       switch (GET_CODE (x))
4511 	{
4512 	case NE:
4513 	  str = "neq";
4514 	  break;
4515 	case UNORDERED:
4516 	  str = "unord";
4517 	  break;
4518 	case ORDERED:
4519 	  str = "ord";
4520 	  break;
4521 	default:
4522 	  str = GET_RTX_NAME (GET_CODE (x));
4523 	  break;
4524 	}
4525       fputs (str, file);
4526       return;
4527 
4528     case 'E':
4529       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4530       return;
4531 
4532     case 'e':
4533       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4534       return;
4535 
4536     case 'F':
4537       if (x == CONST0_RTX (GET_MODE (x)))
4538 	str = reg_names [FR_REG (0)];
4539       else if (x == CONST1_RTX (GET_MODE (x)))
4540 	str = reg_names [FR_REG (1)];
4541       else
4542 	{
4543 	  gcc_assert (GET_CODE (x) == REG);
4544 	  str = reg_names [REGNO (x)];
4545 	}
4546       fputs (str, file);
4547       return;
4548 
4549     case 'I':
4550       fputs (reg_names [REGNO (x) + 1], file);
4551       return;
4552 
4553     case 'J':
4554     case 'j':
4555       {
4556 	unsigned int regno = REGNO (XEXP (x, 0));
4557 	if (GET_CODE (x) == EQ)
4558 	  regno += 1;
4559 	if (code == 'j')
4560 	  regno ^= 1;
4561         fputs (reg_names [regno], file);
4562       }
4563       return;
4564 
4565     case 'O':
4566       if (MEM_VOLATILE_P (x))
4567 	fputs(".acq", file);
4568       return;
4569 
4570     case 'P':
4571       {
4572 	HOST_WIDE_INT value;
4573 
4574 	switch (GET_CODE (XEXP (x, 0)))
4575 	  {
4576 	  default:
4577 	    return;
4578 
4579 	  case POST_MODIFY:
4580 	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4581 	    if (GET_CODE (x) == CONST_INT)
4582 	      value = INTVAL (x);
4583 	    else
4584 	      {
4585 		gcc_assert (GET_CODE (x) == REG);
4586 		fprintf (file, ", %s", reg_names[REGNO (x)]);
4587 		return;
4588 	      }
4589 	    break;
4590 
4591 	  case POST_INC:
4592 	    value = GET_MODE_SIZE (GET_MODE (x));
4593 	    break;
4594 
4595 	  case POST_DEC:
4596 	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4597 	    break;
4598 	  }
4599 
4600 	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4601 	return;
4602       }
4603 
4604     case 'Q':
4605       if (MEM_VOLATILE_P (x))
4606 	fputs(".rel", file);
4607       return;
4608 
4609     case 'S':
4610       fprintf (file, "%d", exact_log2 (INTVAL (x)));
4611       return;
4612 
4613     case 'T':
4614       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4615 	{
4616 	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4617 	  return;
4618 	}
4619       break;
4620 
4621     case 'U':
4622       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4623 	{
4624 	  const char *prefix = "0x";
4625 	  if (INTVAL (x) & 0x80000000)
4626 	    {
4627 	      fprintf (file, "0xffffffff");
4628 	      prefix = "";
4629 	    }
4630 	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4631 	  return;
4632 	}
4633       break;
4634 
4635     case 'X':
4636       {
4637 	unsigned int regno = REGNO (x);
4638 	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4639       }
4640       return;
4641 
4642     case 'r':
4643       /* If this operand is the constant zero, write it as register zero.
4644 	 Any register, zero, or CONST_INT value is OK here.  */
4645       if (GET_CODE (x) == REG)
4646 	fputs (reg_names[REGNO (x)], file);
4647       else if (x == CONST0_RTX (GET_MODE (x)))
4648 	fputs ("r0", file);
4649       else if (GET_CODE (x) == CONST_INT)
4650 	output_addr_const (file, x);
4651       else
4652 	output_operand_lossage ("invalid %%r value");
4653       return;
4654 
4655     case 'v':
4656       gcc_assert (GET_CODE (x) == CONST_VECTOR);
4657       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4658       break;
4659 
4660     case '+':
4661       {
4662 	const char *which;
4663 
4664 	/* For conditional branches, returns or calls, substitute
4665 	   sptk, dptk, dpnt, or spnt for %s.  */
4666 	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4667 	if (x)
4668 	  {
4669 	    int pred_val = INTVAL (XEXP (x, 0));
4670 
4671 	    /* Guess top and bottom 10% statically predicted.  */
4672 	    if (pred_val < REG_BR_PROB_BASE / 50
4673 		&& br_prob_note_reliable_p (x))
4674 	      which = ".spnt";
4675 	    else if (pred_val < REG_BR_PROB_BASE / 2)
4676 	      which = ".dpnt";
4677 	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4678 		     || !br_prob_note_reliable_p (x))
4679 	      which = ".dptk";
4680 	    else
4681 	      which = ".sptk";
4682 	  }
4683 	else if (GET_CODE (current_output_insn) == CALL_INSN)
4684 	  which = ".sptk";
4685 	else
4686 	  which = ".dptk";
4687 
4688 	fputs (which, file);
4689 	return;
4690       }
4691 
4692     case ',':
4693       x = current_insn_predicate;
4694       if (x)
4695 	{
4696 	  unsigned int regno = REGNO (XEXP (x, 0));
4697 	  if (GET_CODE (x) == EQ)
4698 	    regno += 1;
4699           fprintf (file, "(%s) ", reg_names [regno]);
4700 	}
4701       return;
4702 
4703     default:
4704       output_operand_lossage ("ia64_print_operand: unknown code");
4705       return;
4706     }
4707 
4708   switch (GET_CODE (x))
4709     {
4710       /* This happens for the spill/restore instructions.  */
4711     case POST_INC:
4712     case POST_DEC:
4713     case POST_MODIFY:
4714       x = XEXP (x, 0);
4715       /* ... fall through ...  */
4716 
4717     case REG:
4718       fputs (reg_names [REGNO (x)], file);
4719       break;
4720 
4721     case MEM:
4722       {
4723 	rtx addr = XEXP (x, 0);
4724 	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4725 	  addr = XEXP (addr, 0);
4726 	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4727 	break;
4728       }
4729 
4730     default:
4731       output_addr_const (file, x);
4732       break;
4733     }
4734 
4735   return;
4736 }
4737 
4738 /* Compute a (partial) cost for rtx X.  Return true if the complete
4739    cost has been computed, and false if subexpressions should be
4740    scanned.  In either case, *TOTAL contains the cost result.  */
4741 /* ??? This is incomplete.  */
4742 
4743 static bool
ia64_rtx_costs(rtx x,int code,int outer_code,int * total)4744 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4745 {
4746   switch (code)
4747     {
4748     case CONST_INT:
4749       switch (outer_code)
4750         {
4751         case SET:
4752 	  *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4753 	  return true;
4754         case PLUS:
4755 	  if (CONST_OK_FOR_I (INTVAL (x)))
4756 	    *total = 0;
4757 	  else if (CONST_OK_FOR_J (INTVAL (x)))
4758 	    *total = 1;
4759 	  else
4760 	    *total = COSTS_N_INSNS (1);
4761 	  return true;
4762         default:
4763 	  if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4764 	    *total = 0;
4765 	  else
4766 	    *total = COSTS_N_INSNS (1);
4767 	  return true;
4768 	}
4769 
4770     case CONST_DOUBLE:
4771       *total = COSTS_N_INSNS (1);
4772       return true;
4773 
4774     case CONST:
4775     case SYMBOL_REF:
4776     case LABEL_REF:
4777       *total = COSTS_N_INSNS (3);
4778       return true;
4779 
4780     case MULT:
4781       /* For multiplies wider than HImode, we have to go to the FPU,
4782          which normally involves copies.  Plus there's the latency
4783          of the multiply itself, and the latency of the instructions to
4784          transfer integer regs to FP regs.  */
4785       /* ??? Check for FP mode.  */
4786       if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4787         *total = COSTS_N_INSNS (10);
4788       else
4789 	*total = COSTS_N_INSNS (2);
4790       return true;
4791 
4792     case PLUS:
4793     case MINUS:
4794     case ASHIFT:
4795     case ASHIFTRT:
4796     case LSHIFTRT:
4797       *total = COSTS_N_INSNS (1);
4798       return true;
4799 
4800     case DIV:
4801     case UDIV:
4802     case MOD:
4803     case UMOD:
4804       /* We make divide expensive, so that divide-by-constant will be
4805          optimized to a multiply.  */
4806       *total = COSTS_N_INSNS (60);
4807       return true;
4808 
4809     default:
4810       return false;
4811     }
4812 }
4813 
4814 /* Calculate the cost of moving data from a register in class FROM to
4815    one in class TO, using MODE.  */
4816 
4817 int
ia64_register_move_cost(enum machine_mode mode,enum reg_class from,enum reg_class to)4818 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4819 			 enum reg_class to)
4820 {
4821   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4822   if (to == ADDL_REGS)
4823     to = GR_REGS;
4824   if (from == ADDL_REGS)
4825     from = GR_REGS;
4826 
4827   /* All costs are symmetric, so reduce cases by putting the
4828      lower number class as the destination.  */
4829   if (from < to)
4830     {
4831       enum reg_class tmp = to;
4832       to = from, from = tmp;
4833     }
4834 
4835   /* Moving from FR<->GR in XFmode must be more expensive than 2,
4836      so that we get secondary memory reloads.  Between FR_REGS,
4837      we have to make this at least as expensive as MEMORY_MOVE_COST
4838      to avoid spectacularly poor register class preferencing.  */
4839   if (mode == XFmode || mode == RFmode)
4840     {
4841       if (to != GR_REGS || from != GR_REGS)
4842         return MEMORY_MOVE_COST (mode, to, 0);
4843       else
4844 	return 3;
4845     }
4846 
4847   switch (to)
4848     {
4849     case PR_REGS:
4850       /* Moving between PR registers takes two insns.  */
4851       if (from == PR_REGS)
4852 	return 3;
4853       /* Moving between PR and anything but GR is impossible.  */
4854       if (from != GR_REGS)
4855 	return MEMORY_MOVE_COST (mode, to, 0);
4856       break;
4857 
4858     case BR_REGS:
4859       /* Moving between BR and anything but GR is impossible.  */
4860       if (from != GR_REGS && from != GR_AND_BR_REGS)
4861 	return MEMORY_MOVE_COST (mode, to, 0);
4862       break;
4863 
4864     case AR_I_REGS:
4865     case AR_M_REGS:
4866       /* Moving between AR and anything but GR is impossible.  */
4867       if (from != GR_REGS)
4868 	return MEMORY_MOVE_COST (mode, to, 0);
4869       break;
4870 
4871     case GR_REGS:
4872     case FR_REGS:
4873     case FP_REGS:
4874     case GR_AND_FR_REGS:
4875     case GR_AND_BR_REGS:
4876     case ALL_REGS:
4877       break;
4878 
4879     default:
4880       gcc_unreachable ();
4881     }
4882 
4883   return 2;
4884 }
4885 
4886 /* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on CLASS
4887    to use when copying X into that class.  */
4888 
4889 enum reg_class
ia64_preferred_reload_class(rtx x,enum reg_class class)4890 ia64_preferred_reload_class (rtx x, enum reg_class class)
4891 {
4892   switch (class)
4893     {
4894     case FR_REGS:
4895     case FP_REGS:
4896       /* Don't allow volatile mem reloads into floating point registers.
4897 	 This is defined to force reload to choose the r/m case instead
4898 	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
4899       if (MEM_P (x) && MEM_VOLATILE_P (x))
4900 	return NO_REGS;
4901 
4902       /* Force all unrecognized constants into the constant pool.  */
4903       if (CONSTANT_P (x))
4904 	return NO_REGS;
4905       break;
4906 
4907     case AR_M_REGS:
4908     case AR_I_REGS:
4909       if (!OBJECT_P (x))
4910 	return NO_REGS;
4911       break;
4912 
4913     default:
4914       break;
4915     }
4916 
4917   return class;
4918 }
4919 
4920 /* This function returns the register class required for a secondary
4921    register when copying between one of the registers in CLASS, and X,
4922    using MODE.  A return value of NO_REGS means that no secondary register
4923    is required.  */
4924 
4925 enum reg_class
ia64_secondary_reload_class(enum reg_class class,enum machine_mode mode ATTRIBUTE_UNUSED,rtx x)4926 ia64_secondary_reload_class (enum reg_class class,
4927 			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4928 {
4929   int regno = -1;
4930 
4931   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4932     regno = true_regnum (x);
4933 
4934   switch (class)
4935     {
4936     case BR_REGS:
4937     case AR_M_REGS:
4938     case AR_I_REGS:
4939       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4940 	 interaction.  We end up with two pseudos with overlapping lifetimes
4941 	 both of which are equiv to the same constant, and both which need
4942 	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4943 	 changes depending on the path length, which means the qty_first_reg
4944 	 check in make_regs_eqv can give different answers at different times.
4945 	 At some point I'll probably need a reload_indi pattern to handle
4946 	 this.
4947 
4948 	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4949 	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4950 	 non-general registers for good measure.  */
4951       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4952 	return GR_REGS;
4953 
4954       /* This is needed if a pseudo used as a call_operand gets spilled to a
4955 	 stack slot.  */
4956       if (GET_CODE (x) == MEM)
4957 	return GR_REGS;
4958       break;
4959 
4960     case FR_REGS:
4961     case FP_REGS:
4962       /* Need to go through general registers to get to other class regs.  */
4963       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4964 	return GR_REGS;
4965 
4966       /* This can happen when a paradoxical subreg is an operand to the
4967 	 muldi3 pattern.  */
4968       /* ??? This shouldn't be necessary after instruction scheduling is
4969 	 enabled, because paradoxical subregs are not accepted by
4970 	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4971 	 stop the paradoxical subreg stupidity in the *_operand functions
4972 	 in recog.c.  */
4973       if (GET_CODE (x) == MEM
4974 	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4975 	      || GET_MODE (x) == QImode))
4976 	return GR_REGS;
4977 
4978       /* This can happen because of the ior/and/etc patterns that accept FP
4979 	 registers as operands.  If the third operand is a constant, then it
4980 	 needs to be reloaded into a FP register.  */
4981       if (GET_CODE (x) == CONST_INT)
4982 	return GR_REGS;
4983 
4984       /* This can happen because of register elimination in a muldi3 insn.
4985 	 E.g. `26107 * (unsigned long)&u'.  */
4986       if (GET_CODE (x) == PLUS)
4987 	return GR_REGS;
4988       break;
4989 
4990     case PR_REGS:
4991       /* ??? This happens if we cse/gcse a BImode value across a call,
4992 	 and the function has a nonlocal goto.  This is because global
4993 	 does not allocate call crossing pseudos to hard registers when
4994 	 current_function_has_nonlocal_goto is true.  This is relatively
4995 	 common for C++ programs that use exceptions.  To reproduce,
4996 	 return NO_REGS and compile libstdc++.  */
4997       if (GET_CODE (x) == MEM)
4998 	return GR_REGS;
4999 
5000       /* This can happen when we take a BImode subreg of a DImode value,
5001 	 and that DImode value winds up in some non-GR register.  */
5002       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5003 	return GR_REGS;
5004       break;
5005 
5006     default:
5007       break;
5008     }
5009 
5010   return NO_REGS;
5011 }
5012 
5013 
5014 /* Parse the -mfixed-range= option string.  */
5015 
5016 static void
fix_range(const char * const_str)5017 fix_range (const char *const_str)
5018 {
5019   int i, first, last;
5020   char *str, *dash, *comma;
5021 
5022   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5023      REG2 are either register names or register numbers.  The effect
5024      of this option is to mark the registers in the range from REG1 to
5025      REG2 as ``fixed'' so they won't be used by the compiler.  This is
5026      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5027 
5028   i = strlen (const_str);
5029   str = (char *) alloca (i + 1);
5030   memcpy (str, const_str, i + 1);
5031 
5032   while (1)
5033     {
5034       dash = strchr (str, '-');
5035       if (!dash)
5036 	{
5037 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5038 	  return;
5039 	}
5040       *dash = '\0';
5041 
5042       comma = strchr (dash + 1, ',');
5043       if (comma)
5044 	*comma = '\0';
5045 
5046       first = decode_reg_name (str);
5047       if (first < 0)
5048 	{
5049 	  warning (0, "unknown register name: %s", str);
5050 	  return;
5051 	}
5052 
5053       last = decode_reg_name (dash + 1);
5054       if (last < 0)
5055 	{
5056 	  warning (0, "unknown register name: %s", dash + 1);
5057 	  return;
5058 	}
5059 
5060       *dash = '-';
5061 
5062       if (first > last)
5063 	{
5064 	  warning (0, "%s-%s is an empty range", str, dash + 1);
5065 	  return;
5066 	}
5067 
5068       for (i = first; i <= last; ++i)
5069 	fixed_regs[i] = call_used_regs[i] = 1;
5070 
5071       if (!comma)
5072 	break;
5073 
5074       *comma = ',';
5075       str = comma + 1;
5076     }
5077 }
5078 
5079 /* Implement TARGET_HANDLE_OPTION.  */
5080 
5081 static bool
ia64_handle_option(size_t code,const char * arg,int value)5082 ia64_handle_option (size_t code, const char *arg, int value)
5083 {
5084   switch (code)
5085     {
5086     case OPT_mfixed_range_:
5087       fix_range (arg);
5088       return true;
5089 
5090     case OPT_mtls_size_:
5091       if (value != 14 && value != 22 && value != 64)
5092 	error ("bad value %<%s%> for -mtls-size= switch", arg);
5093       return true;
5094 
5095     case OPT_mtune_:
5096       {
5097 	static struct pta
5098 	  {
5099 	    const char *name;		/* processor name or nickname.  */
5100 	    enum processor_type processor;
5101 	  }
5102 	const processor_alias_table[] =
5103 	  {
5104 	    {"itanium", PROCESSOR_ITANIUM},
5105 	    {"itanium1", PROCESSOR_ITANIUM},
5106 	    {"merced", PROCESSOR_ITANIUM},
5107 	    {"itanium2", PROCESSOR_ITANIUM2},
5108 	    {"mckinley", PROCESSOR_ITANIUM2},
5109 	  };
5110 	int const pta_size = ARRAY_SIZE (processor_alias_table);
5111 	int i;
5112 
5113 	for (i = 0; i < pta_size; i++)
5114 	  if (!strcmp (arg, processor_alias_table[i].name))
5115 	    {
5116 	      ia64_tune = processor_alias_table[i].processor;
5117 	      break;
5118 	    }
5119 	if (i == pta_size)
5120 	  error ("bad value %<%s%> for -mtune= switch", arg);
5121 	return true;
5122       }
5123 
5124     default:
5125       return true;
5126     }
5127 }
5128 
5129 /* Implement OVERRIDE_OPTIONS.  */
5130 
5131 void
ia64_override_options(void)5132 ia64_override_options (void)
5133 {
5134   if (TARGET_AUTO_PIC)
5135     target_flags |= MASK_CONST_GP;
5136 
5137   if (TARGET_INLINE_SQRT == INL_MIN_LAT)
5138     {
5139       warning (0, "not yet implemented: latency-optimized inline square root");
5140       TARGET_INLINE_SQRT = INL_MAX_THR;
5141     }
5142 
5143   ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5144   flag_schedule_insns_after_reload = 0;
5145 
5146   ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5147 
5148   init_machine_status = ia64_init_machine_status;
5149 }
5150 
5151 static struct machine_function *
ia64_init_machine_status(void)5152 ia64_init_machine_status (void)
5153 {
5154   return ggc_alloc_cleared (sizeof (struct machine_function));
5155 }
5156 
5157 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5158 static enum attr_type ia64_safe_type (rtx);
5159 
5160 static enum attr_itanium_class
ia64_safe_itanium_class(rtx insn)5161 ia64_safe_itanium_class (rtx insn)
5162 {
5163   if (recog_memoized (insn) >= 0)
5164     return get_attr_itanium_class (insn);
5165   else
5166     return ITANIUM_CLASS_UNKNOWN;
5167 }
5168 
5169 static enum attr_type
ia64_safe_type(rtx insn)5170 ia64_safe_type (rtx insn)
5171 {
5172   if (recog_memoized (insn) >= 0)
5173     return get_attr_type (insn);
5174   else
5175     return TYPE_UNKNOWN;
5176 }
5177 
5178 /* The following collection of routines emit instruction group stop bits as
5179    necessary to avoid dependencies.  */
5180 
5181 /* Need to track some additional registers as far as serialization is
5182    concerned so we can properly handle br.call and br.ret.  We could
5183    make these registers visible to gcc, but since these registers are
5184    never explicitly used in gcc generated code, it seems wasteful to
5185    do so (plus it would make the call and return patterns needlessly
5186    complex).  */
5187 #define REG_RP		(BR_REG (0))
5188 #define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
5189 /* This is used for volatile asms which may require a stop bit immediately
5190    before and after them.  */
5191 #define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
5192 #define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
5193 #define NUM_REGS	(AR_UNAT_BIT_0 + 64)
5194 
5195 /* For each register, we keep track of how it has been written in the
5196    current instruction group.
5197 
5198    If a register is written unconditionally (no qualifying predicate),
5199    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5200 
5201    If a register is written if its qualifying predicate P is true, we
5202    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5203    may be written again by the complement of P (P^1) and when this happens,
5204    WRITE_COUNT gets set to 2.
5205 
5206    The result of this is that whenever an insn attempts to write a register
5207    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5208 
5209    If a predicate register is written by a floating-point insn, we set
5210    WRITTEN_BY_FP to true.
5211 
5212    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5213    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5214 
5215 struct reg_write_state
5216 {
5217   unsigned int write_count : 2;
5218   unsigned int first_pred : 16;
5219   unsigned int written_by_fp : 1;
5220   unsigned int written_by_and : 1;
5221   unsigned int written_by_or : 1;
5222 };
5223 
5224 /* Cumulative info for the current instruction group.  */
5225 struct reg_write_state rws_sum[NUM_REGS];
5226 /* Info for the current instruction.  This gets copied to rws_sum after a
5227    stop bit is emitted.  */
5228 struct reg_write_state rws_insn[NUM_REGS];
5229 
5230 /* Indicates whether this is the first instruction after a stop bit,
5231    in which case we don't need another stop bit.  Without this,
5232    ia64_variable_issue will die when scheduling an alloc.  */
5233 static int first_instruction;
5234 
5235 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5236    RTL for one instruction.  */
5237 struct reg_flags
5238 {
5239   unsigned int is_write : 1;	/* Is register being written?  */
5240   unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
5241   unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
5242   unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
5243   unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
5244   unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
5245 };
5246 
5247 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
5248 static int rws_access_regno (int, struct reg_flags, int);
5249 static int rws_access_reg (rtx, struct reg_flags, int);
5250 static void update_set_flags (rtx, struct reg_flags *);
5251 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5252 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5253 static void init_insn_group_barriers (void);
5254 static int group_barrier_needed (rtx);
5255 static int safe_group_barrier_needed (rtx);
5256 
5257 /* Update *RWS for REGNO, which is being written by the current instruction,
5258    with predicate PRED, and associated register flags in FLAGS.  */
5259 
5260 static void
rws_update(struct reg_write_state * rws,int regno,struct reg_flags flags,int pred)5261 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5262 {
5263   if (pred)
5264     rws[regno].write_count++;
5265   else
5266     rws[regno].write_count = 2;
5267   rws[regno].written_by_fp |= flags.is_fp;
5268   /* ??? Not tracking and/or across differing predicates.  */
5269   rws[regno].written_by_and = flags.is_and;
5270   rws[regno].written_by_or = flags.is_or;
5271   rws[regno].first_pred = pred;
5272 }
5273 
5274 /* Handle an access to register REGNO of type FLAGS using predicate register
5275    PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
5276    a dependency with an earlier instruction in the same group.  */
5277 
5278 static int
rws_access_regno(int regno,struct reg_flags flags,int pred)5279 rws_access_regno (int regno, struct reg_flags flags, int pred)
5280 {
5281   int need_barrier = 0;
5282 
5283   gcc_assert (regno < NUM_REGS);
5284 
5285   if (! PR_REGNO_P (regno))
5286     flags.is_and = flags.is_or = 0;
5287 
5288   if (flags.is_write)
5289     {
5290       int write_count;
5291 
5292       /* One insn writes same reg multiple times?  */
5293       gcc_assert (!rws_insn[regno].write_count);
5294 
5295       /* Update info for current instruction.  */
5296       rws_update (rws_insn, regno, flags, pred);
5297       write_count = rws_sum[regno].write_count;
5298 
5299       switch (write_count)
5300 	{
5301 	case 0:
5302 	  /* The register has not been written yet.  */
5303 	  rws_update (rws_sum, regno, flags, pred);
5304 	  break;
5305 
5306 	case 1:
5307 	  /* The register has been written via a predicate.  If this is
5308 	     not a complementary predicate, then we need a barrier.  */
5309 	  /* ??? This assumes that P and P+1 are always complementary
5310 	     predicates for P even.  */
5311 	  if (flags.is_and && rws_sum[regno].written_by_and)
5312 	    ;
5313 	  else if (flags.is_or && rws_sum[regno].written_by_or)
5314 	    ;
5315 	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
5316 	    need_barrier = 1;
5317 	  rws_update (rws_sum, regno, flags, pred);
5318 	  break;
5319 
5320 	case 2:
5321 	  /* The register has been unconditionally written already.  We
5322 	     need a barrier.  */
5323 	  if (flags.is_and && rws_sum[regno].written_by_and)
5324 	    ;
5325 	  else if (flags.is_or && rws_sum[regno].written_by_or)
5326 	    ;
5327 	  else
5328 	    need_barrier = 1;
5329 	  rws_sum[regno].written_by_and = flags.is_and;
5330 	  rws_sum[regno].written_by_or = flags.is_or;
5331 	  break;
5332 
5333 	default:
5334 	  gcc_unreachable ();
5335 	}
5336     }
5337   else
5338     {
5339       if (flags.is_branch)
5340 	{
5341 	  /* Branches have several RAW exceptions that allow to avoid
5342 	     barriers.  */
5343 
5344 	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5345 	    /* RAW dependencies on branch regs are permissible as long
5346 	       as the writer is a non-branch instruction.  Since we
5347 	       never generate code that uses a branch register written
5348 	       by a branch instruction, handling this case is
5349 	       easy.  */
5350 	    return 0;
5351 
5352 	  if (REGNO_REG_CLASS (regno) == PR_REGS
5353 	      && ! rws_sum[regno].written_by_fp)
5354 	    /* The predicates of a branch are available within the
5355 	       same insn group as long as the predicate was written by
5356 	       something other than a floating-point instruction.  */
5357 	    return 0;
5358 	}
5359 
5360       if (flags.is_and && rws_sum[regno].written_by_and)
5361 	return 0;
5362       if (flags.is_or && rws_sum[regno].written_by_or)
5363 	return 0;
5364 
5365       switch (rws_sum[regno].write_count)
5366 	{
5367 	case 0:
5368 	  /* The register has not been written yet.  */
5369 	  break;
5370 
5371 	case 1:
5372 	  /* The register has been written via a predicate.  If this is
5373 	     not a complementary predicate, then we need a barrier.  */
5374 	  /* ??? This assumes that P and P+1 are always complementary
5375 	     predicates for P even.  */
5376 	  if ((rws_sum[regno].first_pred ^ 1) != pred)
5377 	    need_barrier = 1;
5378 	  break;
5379 
5380 	case 2:
5381 	  /* The register has been unconditionally written already.  We
5382 	     need a barrier.  */
5383 	  need_barrier = 1;
5384 	  break;
5385 
5386 	default:
5387 	  gcc_unreachable ();
5388 	}
5389     }
5390 
5391   return need_barrier;
5392 }
5393 
5394 static int
rws_access_reg(rtx reg,struct reg_flags flags,int pred)5395 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5396 {
5397   int regno = REGNO (reg);
5398   int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5399 
5400   if (n == 1)
5401     return rws_access_regno (regno, flags, pred);
5402   else
5403     {
5404       int need_barrier = 0;
5405       while (--n >= 0)
5406 	need_barrier |= rws_access_regno (regno + n, flags, pred);
5407       return need_barrier;
5408     }
5409 }
5410 
5411 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5412    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5413 
5414 static void
update_set_flags(rtx x,struct reg_flags * pflags)5415 update_set_flags (rtx x, struct reg_flags *pflags)
5416 {
5417   rtx src = SET_SRC (x);
5418 
5419   switch (GET_CODE (src))
5420     {
5421     case CALL:
5422       return;
5423 
5424     case IF_THEN_ELSE:
5425       /* There are four cases here:
5426 	 (1) The destination is (pc), in which case this is a branch,
5427 	 nothing here applies.
5428 	 (2) The destination is ar.lc, in which case this is a
5429 	 doloop_end_internal,
5430 	 (3) The destination is an fp register, in which case this is
5431 	 an fselect instruction.
5432 	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5433 	 this is a check load.
5434 	 In all cases, nothing we do in this function applies.  */
5435       return;
5436 
5437     default:
5438       if (COMPARISON_P (src)
5439 	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5440 	/* Set pflags->is_fp to 1 so that we know we're dealing
5441 	   with a floating point comparison when processing the
5442 	   destination of the SET.  */
5443 	pflags->is_fp = 1;
5444 
5445       /* Discover if this is a parallel comparison.  We only handle
5446 	 and.orcm and or.andcm at present, since we must retain a
5447 	 strict inverse on the predicate pair.  */
5448       else if (GET_CODE (src) == AND)
5449 	pflags->is_and = 1;
5450       else if (GET_CODE (src) == IOR)
5451 	pflags->is_or = 1;
5452 
5453       break;
5454     }
5455 }
5456 
5457 /* Subroutine of rtx_needs_barrier; this function determines whether the
5458    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5459    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5460    for this insn.  */
5461 
5462 static int
set_src_needs_barrier(rtx x,struct reg_flags flags,int pred)5463 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5464 {
5465   int need_barrier = 0;
5466   rtx dst;
5467   rtx src = SET_SRC (x);
5468 
5469   if (GET_CODE (src) == CALL)
5470     /* We don't need to worry about the result registers that
5471        get written by subroutine call.  */
5472     return rtx_needs_barrier (src, flags, pred);
5473   else if (SET_DEST (x) == pc_rtx)
5474     {
5475       /* X is a conditional branch.  */
5476       /* ??? This seems redundant, as the caller sets this bit for
5477 	 all JUMP_INSNs.  */
5478       if (!ia64_spec_check_src_p (src))
5479 	flags.is_branch = 1;
5480       return rtx_needs_barrier (src, flags, pred);
5481     }
5482 
5483   if (ia64_spec_check_src_p (src))
5484     /* Avoid checking one register twice (in condition
5485        and in 'then' section) for ldc pattern.  */
5486     {
5487       gcc_assert (REG_P (XEXP (src, 2)));
5488       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5489 
5490       /* We process MEM below.  */
5491       src = XEXP (src, 1);
5492     }
5493 
5494   need_barrier |= rtx_needs_barrier (src, flags, pred);
5495 
5496   dst = SET_DEST (x);
5497   if (GET_CODE (dst) == ZERO_EXTRACT)
5498     {
5499       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5500       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5501     }
5502   return need_barrier;
5503 }
5504 
5505 /* Handle an access to rtx X of type FLAGS using predicate register
5506    PRED.  Return 1 if this access creates a dependency with an earlier
5507    instruction in the same group.  */
5508 
5509 static int
rtx_needs_barrier(rtx x,struct reg_flags flags,int pred)5510 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5511 {
5512   int i, j;
5513   int is_complemented = 0;
5514   int need_barrier = 0;
5515   const char *format_ptr;
5516   struct reg_flags new_flags;
5517   rtx cond;
5518 
5519   if (! x)
5520     return 0;
5521 
5522   new_flags = flags;
5523 
5524   switch (GET_CODE (x))
5525     {
5526     case SET:
5527       update_set_flags (x, &new_flags);
5528       need_barrier = set_src_needs_barrier (x, new_flags, pred);
5529       if (GET_CODE (SET_SRC (x)) != CALL)
5530 	{
5531 	  new_flags.is_write = 1;
5532 	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5533 	}
5534       break;
5535 
5536     case CALL:
5537       new_flags.is_write = 0;
5538       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5539 
5540       /* Avoid multiple register writes, in case this is a pattern with
5541 	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
5542       if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5543 	{
5544 	  new_flags.is_write = 1;
5545 	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5546 	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5547 	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5548 	}
5549       break;
5550 
5551     case COND_EXEC:
5552       /* X is a predicated instruction.  */
5553 
5554       cond = COND_EXEC_TEST (x);
5555       gcc_assert (!pred);
5556       need_barrier = rtx_needs_barrier (cond, flags, 0);
5557 
5558       if (GET_CODE (cond) == EQ)
5559 	is_complemented = 1;
5560       cond = XEXP (cond, 0);
5561       gcc_assert (GET_CODE (cond) == REG
5562 		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5563       pred = REGNO (cond);
5564       if (is_complemented)
5565 	++pred;
5566 
5567       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5568       return need_barrier;
5569 
5570     case CLOBBER:
5571     case USE:
5572       /* Clobber & use are for earlier compiler-phases only.  */
5573       break;
5574 
5575     case ASM_OPERANDS:
5576     case ASM_INPUT:
5577       /* We always emit stop bits for traditional asms.  We emit stop bits
5578 	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
5579       if (GET_CODE (x) != ASM_OPERANDS
5580 	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5581 	{
5582 	  /* Avoid writing the register multiple times if we have multiple
5583 	     asm outputs.  This avoids a failure in rws_access_reg.  */
5584 	  if (! rws_insn[REG_VOLATILE].write_count)
5585 	    {
5586 	      new_flags.is_write = 1;
5587 	      rws_access_regno (REG_VOLATILE, new_flags, pred);
5588 	    }
5589 	  return 1;
5590 	}
5591 
5592       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5593 	 We cannot just fall through here since then we would be confused
5594 	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5595 	 traditional asms unlike their normal usage.  */
5596 
5597       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5598 	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5599 	  need_barrier = 1;
5600       break;
5601 
5602     case PARALLEL:
5603       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5604 	{
5605 	  rtx pat = XVECEXP (x, 0, i);
5606 	  switch (GET_CODE (pat))
5607 	    {
5608 	    case SET:
5609 	      update_set_flags (pat, &new_flags);
5610 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5611 	      break;
5612 
5613 	    case USE:
5614 	    case CALL:
5615 	    case ASM_OPERANDS:
5616 	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
5617 	      break;
5618 
5619 	    case CLOBBER:
5620 	    case RETURN:
5621 	      break;
5622 
5623 	    default:
5624 	      gcc_unreachable ();
5625 	    }
5626 	}
5627       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5628 	{
5629 	  rtx pat = XVECEXP (x, 0, i);
5630 	  if (GET_CODE (pat) == SET)
5631 	    {
5632 	      if (GET_CODE (SET_SRC (pat)) != CALL)
5633 		{
5634 		  new_flags.is_write = 1;
5635 		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5636 						     pred);
5637 		}
5638 	    }
5639 	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5640 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5641 	}
5642       break;
5643 
5644     case SUBREG:
5645       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5646       break;
5647     case REG:
5648       if (REGNO (x) == AR_UNAT_REGNUM)
5649 	{
5650 	  for (i = 0; i < 64; ++i)
5651 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5652 	}
5653       else
5654 	need_barrier = rws_access_reg (x, flags, pred);
5655       break;
5656 
5657     case MEM:
5658       /* Find the regs used in memory address computation.  */
5659       new_flags.is_write = 0;
5660       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5661       break;
5662 
5663     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
5664     case SYMBOL_REF:  case LABEL_REF:     case CONST:
5665       break;
5666 
5667       /* Operators with side-effects.  */
5668     case POST_INC:    case POST_DEC:
5669       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5670 
5671       new_flags.is_write = 0;
5672       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5673       new_flags.is_write = 1;
5674       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5675       break;
5676 
5677     case POST_MODIFY:
5678       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5679 
5680       new_flags.is_write = 0;
5681       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5682       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5683       new_flags.is_write = 1;
5684       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5685       break;
5686 
5687       /* Handle common unary and binary ops for efficiency.  */
5688     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5689     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5690     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5691     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5692     case NE:       case EQ:      case GE:      case GT:        case LE:
5693     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5694       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5695       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5696       break;
5697 
5698     case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
5699     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
5700     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
5701     case SQRT:     case FFS:		case POPCOUNT:
5702       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5703       break;
5704 
5705     case VEC_SELECT:
5706       /* VEC_SELECT's second argument is a PARALLEL with integers that
5707 	 describe the elements selected.  On ia64, those integers are
5708 	 always constants.  Avoid walking the PARALLEL so that we don't
5709 	 get confused with "normal" parallels and then die.  */
5710       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5711       break;
5712 
5713     case UNSPEC:
5714       switch (XINT (x, 1))
5715 	{
5716 	case UNSPEC_LTOFF_DTPMOD:
5717 	case UNSPEC_LTOFF_DTPREL:
5718 	case UNSPEC_DTPREL:
5719 	case UNSPEC_LTOFF_TPREL:
5720 	case UNSPEC_TPREL:
5721 	case UNSPEC_PRED_REL_MUTEX:
5722 	case UNSPEC_PIC_CALL:
5723         case UNSPEC_MF:
5724         case UNSPEC_FETCHADD_ACQ:
5725 	case UNSPEC_BSP_VALUE:
5726 	case UNSPEC_FLUSHRS:
5727 	case UNSPEC_BUNDLE_SELECTOR:
5728           break;
5729 
5730 	case UNSPEC_GR_SPILL:
5731 	case UNSPEC_GR_RESTORE:
5732 	  {
5733 	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5734 	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5735 
5736 	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5737 	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5738 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5739 					      new_flags, pred);
5740 	    break;
5741 	  }
5742 
5743 	case UNSPEC_FR_SPILL:
5744 	case UNSPEC_FR_RESTORE:
5745 	case UNSPEC_GETF_EXP:
5746 	case UNSPEC_SETF_EXP:
5747         case UNSPEC_ADDP4:
5748 	case UNSPEC_FR_SQRT_RECIP_APPROX:
5749 	case UNSPEC_LDA:
5750 	case UNSPEC_LDS:
5751 	case UNSPEC_LDSA:
5752 	case UNSPEC_CHKACLR:
5753         case UNSPEC_CHKS:
5754 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5755 	  break;
5756 
5757 	case UNSPEC_FR_RECIP_APPROX:
5758 	case UNSPEC_SHRP:
5759 	case UNSPEC_COPYSIGN:
5760 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5761 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5762 	  break;
5763 
5764         case UNSPEC_CMPXCHG_ACQ:
5765 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5766 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5767 	  break;
5768 
5769 	default:
5770 	  gcc_unreachable ();
5771 	}
5772       break;
5773 
5774     case UNSPEC_VOLATILE:
5775       switch (XINT (x, 1))
5776 	{
5777 	case UNSPECV_ALLOC:
5778 	  /* Alloc must always be the first instruction of a group.
5779 	     We force this by always returning true.  */
5780 	  /* ??? We might get better scheduling if we explicitly check for
5781 	     input/local/output register dependencies, and modify the
5782 	     scheduler so that alloc is always reordered to the start of
5783 	     the current group.  We could then eliminate all of the
5784 	     first_instruction code.  */
5785 	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5786 
5787 	  new_flags.is_write = 1;
5788 	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5789 	  return 1;
5790 
5791 	case UNSPECV_SET_BSP:
5792 	  need_barrier = 1;
5793           break;
5794 
5795 	case UNSPECV_BLOCKAGE:
5796 	case UNSPECV_INSN_GROUP_BARRIER:
5797 	case UNSPECV_BREAK:
5798 	case UNSPECV_PSAC_ALL:
5799 	case UNSPECV_PSAC_NORMAL:
5800 	  return 0;
5801 
5802 	default:
5803 	  gcc_unreachable ();
5804 	}
5805       break;
5806 
5807     case RETURN:
5808       new_flags.is_write = 0;
5809       need_barrier  = rws_access_regno (REG_RP, flags, pred);
5810       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5811 
5812       new_flags.is_write = 1;
5813       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5814       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5815       break;
5816 
5817     default:
5818       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5819       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5820 	switch (format_ptr[i])
5821 	  {
5822 	  case '0':	/* unused field */
5823 	  case 'i':	/* integer */
5824 	  case 'n':	/* note */
5825 	  case 'w':	/* wide integer */
5826 	  case 's':	/* pointer to string */
5827 	  case 'S':	/* optional pointer to string */
5828 	    break;
5829 
5830 	  case 'e':
5831 	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5832 	      need_barrier = 1;
5833 	    break;
5834 
5835 	  case 'E':
5836 	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5837 	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5838 		need_barrier = 1;
5839 	    break;
5840 
5841 	  default:
5842 	    gcc_unreachable ();
5843 	  }
5844       break;
5845     }
5846   return need_barrier;
5847 }
5848 
5849 /* Clear out the state for group_barrier_needed at the start of a
5850    sequence of insns.  */
5851 
5852 static void
init_insn_group_barriers(void)5853 init_insn_group_barriers (void)
5854 {
5855   memset (rws_sum, 0, sizeof (rws_sum));
5856   first_instruction = 1;
5857 }
5858 
5859 /* Given the current state, determine whether a group barrier (a stop bit) is
5860    necessary before INSN.  Return nonzero if so.  This modifies the state to
5861    include the effects of INSN as a side-effect.  */
5862 
5863 static int
group_barrier_needed(rtx insn)5864 group_barrier_needed (rtx insn)
5865 {
5866   rtx pat;
5867   int need_barrier = 0;
5868   struct reg_flags flags;
5869 
5870   memset (&flags, 0, sizeof (flags));
5871   switch (GET_CODE (insn))
5872     {
5873     case NOTE:
5874       break;
5875 
5876     case BARRIER:
5877       /* A barrier doesn't imply an instruction group boundary.  */
5878       break;
5879 
5880     case CODE_LABEL:
5881       memset (rws_insn, 0, sizeof (rws_insn));
5882       return 1;
5883 
5884     case CALL_INSN:
5885       flags.is_branch = 1;
5886       flags.is_sibcall = SIBLING_CALL_P (insn);
5887       memset (rws_insn, 0, sizeof (rws_insn));
5888 
5889       /* Don't bundle a call following another call.  */
5890       if ((pat = prev_active_insn (insn))
5891 	  && GET_CODE (pat) == CALL_INSN)
5892 	{
5893 	  need_barrier = 1;
5894 	  break;
5895 	}
5896 
5897       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5898       break;
5899 
5900     case JUMP_INSN:
5901       if (!ia64_spec_check_p (insn))
5902 	flags.is_branch = 1;
5903 
5904       /* Don't bundle a jump following a call.  */
5905       if ((pat = prev_active_insn (insn))
5906 	  && GET_CODE (pat) == CALL_INSN)
5907 	{
5908 	  need_barrier = 1;
5909 	  break;
5910 	}
5911       /* FALLTHRU */
5912 
5913     case INSN:
5914       if (GET_CODE (PATTERN (insn)) == USE
5915 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5916 	/* Don't care about USE and CLOBBER "insns"---those are used to
5917 	   indicate to the optimizer that it shouldn't get rid of
5918 	   certain operations.  */
5919 	break;
5920 
5921       pat = PATTERN (insn);
5922 
5923       /* Ug.  Hack hacks hacked elsewhere.  */
5924       switch (recog_memoized (insn))
5925 	{
5926 	  /* We play dependency tricks with the epilogue in order
5927 	     to get proper schedules.  Undo this for dv analysis.  */
5928 	case CODE_FOR_epilogue_deallocate_stack:
5929 	case CODE_FOR_prologue_allocate_stack:
5930 	  pat = XVECEXP (pat, 0, 0);
5931 	  break;
5932 
5933 	  /* The pattern we use for br.cloop confuses the code above.
5934 	     The second element of the vector is representative.  */
5935 	case CODE_FOR_doloop_end_internal:
5936 	  pat = XVECEXP (pat, 0, 1);
5937 	  break;
5938 
5939 	  /* Doesn't generate code.  */
5940 	case CODE_FOR_pred_rel_mutex:
5941 	case CODE_FOR_prologue_use:
5942 	  return 0;
5943 
5944 	default:
5945 	  break;
5946 	}
5947 
5948       memset (rws_insn, 0, sizeof (rws_insn));
5949       need_barrier = rtx_needs_barrier (pat, flags, 0);
5950 
5951       /* Check to see if the previous instruction was a volatile
5952 	 asm.  */
5953       if (! need_barrier)
5954 	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5955       break;
5956 
5957     default:
5958       gcc_unreachable ();
5959     }
5960 
5961   if (first_instruction && INSN_P (insn)
5962       && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5963       && GET_CODE (PATTERN (insn)) != USE
5964       && GET_CODE (PATTERN (insn)) != CLOBBER)
5965     {
5966       need_barrier = 0;
5967       first_instruction = 0;
5968     }
5969 
5970   return need_barrier;
5971 }
5972 
5973 /* Like group_barrier_needed, but do not clobber the current state.  */
5974 
5975 static int
safe_group_barrier_needed(rtx insn)5976 safe_group_barrier_needed (rtx insn)
5977 {
5978   struct reg_write_state rws_saved[NUM_REGS];
5979   int saved_first_instruction;
5980   int t;
5981 
5982   memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5983   saved_first_instruction = first_instruction;
5984 
5985   t = group_barrier_needed (insn);
5986 
5987   memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5988   first_instruction = saved_first_instruction;
5989 
5990   return t;
5991 }
5992 
5993 /* Scan the current function and insert stop bits as necessary to
5994    eliminate dependencies.  This function assumes that a final
5995    instruction scheduling pass has been run which has already
5996    inserted most of the necessary stop bits.  This function only
5997    inserts new ones at basic block boundaries, since these are
5998    invisible to the scheduler.  */
5999 
6000 static void
emit_insn_group_barriers(FILE * dump)6001 emit_insn_group_barriers (FILE *dump)
6002 {
6003   rtx insn;
6004   rtx last_label = 0;
6005   int insns_since_last_label = 0;
6006 
6007   init_insn_group_barriers ();
6008 
6009   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6010     {
6011       if (GET_CODE (insn) == CODE_LABEL)
6012 	{
6013 	  if (insns_since_last_label)
6014 	    last_label = insn;
6015 	  insns_since_last_label = 0;
6016 	}
6017       else if (GET_CODE (insn) == NOTE
6018 	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6019 	{
6020 	  if (insns_since_last_label)
6021 	    last_label = insn;
6022 	  insns_since_last_label = 0;
6023 	}
6024       else if (GET_CODE (insn) == INSN
6025 	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6026 	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6027 	{
6028 	  init_insn_group_barriers ();
6029 	  last_label = 0;
6030 	}
6031       else if (INSN_P (insn))
6032 	{
6033 	  insns_since_last_label = 1;
6034 
6035 	  if (group_barrier_needed (insn))
6036 	    {
6037 	      if (last_label)
6038 		{
6039 		  if (dump)
6040 		    fprintf (dump, "Emitting stop before label %d\n",
6041 			     INSN_UID (last_label));
6042 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6043 		  insn = last_label;
6044 
6045 		  init_insn_group_barriers ();
6046 		  last_label = 0;
6047 		}
6048 	    }
6049 	}
6050     }
6051 }
6052 
6053 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6054    This function has to emit all necessary group barriers.  */
6055 
6056 static void
emit_all_insn_group_barriers(FILE * dump ATTRIBUTE_UNUSED)6057 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6058 {
6059   rtx insn;
6060 
6061   init_insn_group_barriers ();
6062 
6063   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6064     {
6065       if (GET_CODE (insn) == BARRIER)
6066 	{
6067 	  rtx last = prev_active_insn (insn);
6068 
6069 	  if (! last)
6070 	    continue;
6071 	  if (GET_CODE (last) == JUMP_INSN
6072 	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6073 	    last = prev_active_insn (last);
6074 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6075 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6076 
6077 	  init_insn_group_barriers ();
6078 	}
6079       else if (INSN_P (insn))
6080 	{
6081 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6082 	    init_insn_group_barriers ();
6083 	  else if (group_barrier_needed (insn))
6084 	    {
6085 	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6086 	      init_insn_group_barriers ();
6087 	      group_barrier_needed (insn);
6088 	    }
6089 	}
6090     }
6091 }
6092 
6093 
6094 
6095 /* Instruction scheduling support.  */
6096 
6097 #define NR_BUNDLES 10
6098 
6099 /* A list of names of all available bundles.  */
6100 
6101 static const char *bundle_name [NR_BUNDLES] =
6102 {
6103   ".mii",
6104   ".mmi",
6105   ".mfi",
6106   ".mmf",
6107 #if NR_BUNDLES == 10
6108   ".bbb",
6109   ".mbb",
6110 #endif
6111   ".mib",
6112   ".mmb",
6113   ".mfb",
6114   ".mlx"
6115 };
6116 
6117 /* Nonzero if we should insert stop bits into the schedule.  */
6118 
6119 int ia64_final_schedule = 0;
6120 
6121 /* Codes of the corresponding queried units: */
6122 
6123 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6124 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6125 
6126 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6127 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6128 
6129 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6130 
6131 /* The following variable value is an insn group barrier.  */
6132 
6133 static rtx dfa_stop_insn;
6134 
6135 /* The following variable value is the last issued insn.  */
6136 
6137 static rtx last_scheduled_insn;
6138 
6139 /* The following variable value is size of the DFA state.  */
6140 
6141 static size_t dfa_state_size;
6142 
6143 /* The following variable value is pointer to a DFA state used as
6144    temporary variable.  */
6145 
6146 static state_t temp_dfa_state = NULL;
6147 
6148 /* The following variable value is DFA state after issuing the last
6149    insn.  */
6150 
6151 static state_t prev_cycle_state = NULL;
6152 
6153 /* The following array element values are TRUE if the corresponding
6154    insn requires to add stop bits before it.  */
6155 
6156 static char *stops_p = NULL;
6157 
6158 /* The following array element values are ZERO for non-speculative
6159    instructions and hold corresponding speculation check number for
6160    speculative instructions.  */
6161 static int *spec_check_no = NULL;
6162 
6163 /* Size of spec_check_no array.  */
6164 static int max_uid = 0;
6165 
6166 /* The following variable is used to set up the mentioned above array.  */
6167 
6168 static int stop_before_p = 0;
6169 
6170 /* The following variable value is length of the arrays `clocks' and
6171    `add_cycles'. */
6172 
6173 static int clocks_length;
6174 
6175 /* The following array element values are cycles on which the
6176    corresponding insn will be issued.  The array is used only for
6177    Itanium1.  */
6178 
6179 static int *clocks;
6180 
6181 /* The following array element values are numbers of cycles should be
6182    added to improve insn scheduling for MM_insns for Itanium1.  */
6183 
6184 static int *add_cycles;
6185 
6186 /* The following variable value is number of data speculations in progress.  */
6187 static int pending_data_specs = 0;
6188 
6189 static rtx ia64_single_set (rtx);
6190 static void ia64_emit_insn_before (rtx, rtx);
6191 
6192 /* Map a bundle number to its pseudo-op.  */
6193 
6194 const char *
get_bundle_name(int b)6195 get_bundle_name (int b)
6196 {
6197   return bundle_name[b];
6198 }
6199 
6200 
6201 /* Return the maximum number of instructions a cpu can issue.  */
6202 
6203 static int
ia64_issue_rate(void)6204 ia64_issue_rate (void)
6205 {
6206   return 6;
6207 }
6208 
6209 /* Helper function - like single_set, but look inside COND_EXEC.  */
6210 
6211 static rtx
ia64_single_set(rtx insn)6212 ia64_single_set (rtx insn)
6213 {
6214   rtx x = PATTERN (insn), ret;
6215   if (GET_CODE (x) == COND_EXEC)
6216     x = COND_EXEC_CODE (x);
6217   if (GET_CODE (x) == SET)
6218     return x;
6219 
6220   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6221      Although they are not classical single set, the second set is there just
6222      to protect it from moving past FP-relative stack accesses.  */
6223   switch (recog_memoized (insn))
6224     {
6225     case CODE_FOR_prologue_allocate_stack:
6226     case CODE_FOR_epilogue_deallocate_stack:
6227       ret = XVECEXP (x, 0, 0);
6228       break;
6229 
6230     default:
6231       ret = single_set_2 (insn, x);
6232       break;
6233     }
6234 
6235   return ret;
6236 }
6237 
6238 /* Adjust the cost of a scheduling dependency.
6239    Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6240    COST is the current cost.  */
6241 
6242 static int
ia64_adjust_cost_2(rtx insn,int dep_type1,rtx dep_insn,int cost)6243 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost)
6244 {
6245   enum reg_note dep_type = (enum reg_note) dep_type1;
6246   enum attr_itanium_class dep_class;
6247   enum attr_itanium_class insn_class;
6248 
6249   if (dep_type != REG_DEP_OUTPUT)
6250     return cost;
6251 
6252   insn_class = ia64_safe_itanium_class (insn);
6253   dep_class = ia64_safe_itanium_class (dep_insn);
6254   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6255       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6256     return 0;
6257 
6258   return cost;
6259 }
6260 
6261 /* Like emit_insn_before, but skip cycle_display notes.
6262    ??? When cycle display notes are implemented, update this.  */
6263 
6264 static void
ia64_emit_insn_before(rtx insn,rtx before)6265 ia64_emit_insn_before (rtx insn, rtx before)
6266 {
6267   emit_insn_before (insn, before);
6268 }
6269 
6270 /* The following function marks insns who produce addresses for load
6271    and store insns.  Such insns will be placed into M slots because it
6272    decrease latency time for Itanium1 (see function
6273    `ia64_produce_address_p' and the DFA descriptions).  */
6274 
6275 static void
ia64_dependencies_evaluation_hook(rtx head,rtx tail)6276 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6277 {
6278   rtx insn, link, next, next_tail;
6279 
6280   /* Before reload, which_alternative is not set, which means that
6281      ia64_safe_itanium_class will produce wrong results for (at least)
6282      move instructions.  */
6283   if (!reload_completed)
6284     return;
6285 
6286   next_tail = NEXT_INSN (tail);
6287   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6288     if (INSN_P (insn))
6289       insn->call = 0;
6290   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6291     if (INSN_P (insn)
6292 	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6293       {
6294 	for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6295 	  {
6296 	    enum attr_itanium_class c;
6297 
6298 	    if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6299 	      continue;
6300 	    next = XEXP (link, 0);
6301 	    c = ia64_safe_itanium_class (next);
6302 	    if ((c == ITANIUM_CLASS_ST
6303 		 || c == ITANIUM_CLASS_STF)
6304 		&& ia64_st_address_bypass_p (insn, next))
6305 	      break;
6306 	    else if ((c == ITANIUM_CLASS_LD
6307 		      || c == ITANIUM_CLASS_FLD
6308 		      || c == ITANIUM_CLASS_FLDP)
6309 		     && ia64_ld_address_bypass_p (insn, next))
6310 	      break;
6311 	  }
6312 	insn->call = link != 0;
6313       }
6314 }
6315 
6316 /* We're beginning a new block.  Initialize data structures as necessary.  */
6317 
6318 static void
ia64_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)6319 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6320 		 int sched_verbose ATTRIBUTE_UNUSED,
6321 		 int max_ready ATTRIBUTE_UNUSED)
6322 {
6323 #ifdef ENABLE_CHECKING
6324   rtx insn;
6325 
6326   if (reload_completed)
6327     for (insn = NEXT_INSN (current_sched_info->prev_head);
6328 	 insn != current_sched_info->next_tail;
6329 	 insn = NEXT_INSN (insn))
6330       gcc_assert (!SCHED_GROUP_P (insn));
6331 #endif
6332   last_scheduled_insn = NULL_RTX;
6333   init_insn_group_barriers ();
6334 }
6335 
6336 /* We're beginning a scheduling pass.  Check assertion.  */
6337 
6338 static void
ia64_sched_init_global(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)6339 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6340                         int sched_verbose ATTRIBUTE_UNUSED,
6341                         int max_ready ATTRIBUTE_UNUSED)
6342 {
6343   gcc_assert (!pending_data_specs);
6344 }
6345 
6346 /* Scheduling pass is now finished.  Free/reset static variable.  */
6347 static void
ia64_sched_finish_global(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED)6348 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6349 			  int sched_verbose ATTRIBUTE_UNUSED)
6350 {
6351   free (spec_check_no);
6352   spec_check_no = 0;
6353   max_uid = 0;
6354 }
6355 
6356 /* We are about to being issuing insns for this clock cycle.
6357    Override the default sort algorithm to better slot instructions.  */
6358 
6359 static int
ia64_dfa_sched_reorder(FILE * dump,int sched_verbose,rtx * ready,int * pn_ready,int clock_var ATTRIBUTE_UNUSED,int reorder_type)6360 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6361 			int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6362 			int reorder_type)
6363 {
6364   int n_asms;
6365   int n_ready = *pn_ready;
6366   rtx *e_ready = ready + n_ready;
6367   rtx *insnp;
6368 
6369   if (sched_verbose)
6370     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6371 
6372   if (reorder_type == 0)
6373     {
6374       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6375       n_asms = 0;
6376       for (insnp = ready; insnp < e_ready; insnp++)
6377 	if (insnp < e_ready)
6378 	  {
6379 	    rtx insn = *insnp;
6380 	    enum attr_type t = ia64_safe_type (insn);
6381 	    if (t == TYPE_UNKNOWN)
6382 	      {
6383 		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6384 		    || asm_noperands (PATTERN (insn)) >= 0)
6385 		  {
6386 		    rtx lowest = ready[n_asms];
6387 		    ready[n_asms] = insn;
6388 		    *insnp = lowest;
6389 		    n_asms++;
6390 		  }
6391 		else
6392 		  {
6393 		    rtx highest = ready[n_ready - 1];
6394 		    ready[n_ready - 1] = insn;
6395 		    *insnp = highest;
6396 		    return 1;
6397 		  }
6398 	      }
6399 	  }
6400 
6401       if (n_asms < n_ready)
6402 	{
6403 	  /* Some normal insns to process.  Skip the asms.  */
6404 	  ready += n_asms;
6405 	  n_ready -= n_asms;
6406 	}
6407       else if (n_ready > 0)
6408 	return 1;
6409     }
6410 
6411   if (ia64_final_schedule)
6412     {
6413       int deleted = 0;
6414       int nr_need_stop = 0;
6415 
6416       for (insnp = ready; insnp < e_ready; insnp++)
6417 	if (safe_group_barrier_needed (*insnp))
6418 	  nr_need_stop++;
6419 
6420       if (reorder_type == 1 && n_ready == nr_need_stop)
6421 	return 0;
6422       if (reorder_type == 0)
6423 	return 1;
6424       insnp = e_ready;
6425       /* Move down everything that needs a stop bit, preserving
6426 	 relative order.  */
6427       while (insnp-- > ready + deleted)
6428 	while (insnp >= ready + deleted)
6429 	  {
6430 	    rtx insn = *insnp;
6431 	    if (! safe_group_barrier_needed (insn))
6432 	      break;
6433 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6434 	    *ready = insn;
6435 	    deleted++;
6436 	  }
6437       n_ready -= deleted;
6438       ready += deleted;
6439     }
6440 
6441   return 1;
6442 }
6443 
6444 /* We are about to being issuing insns for this clock cycle.  Override
6445    the default sort algorithm to better slot instructions.  */
6446 
6447 static int
ia64_sched_reorder(FILE * dump,int sched_verbose,rtx * ready,int * pn_ready,int clock_var)6448 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6449 		    int clock_var)
6450 {
6451   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6452 				 pn_ready, clock_var, 0);
6453 }
6454 
6455 /* Like ia64_sched_reorder, but called after issuing each insn.
6456    Override the default sort algorithm to better slot instructions.  */
6457 
6458 static int
ia64_sched_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx * ready,int * pn_ready,int clock_var)6459 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6460 		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6461 		     int *pn_ready, int clock_var)
6462 {
6463   if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6464     clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6465   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6466 				 clock_var, 1);
6467 }
6468 
6469 /* We are about to issue INSN.  Return the number of insns left on the
6470    ready queue that can be issued this cycle.  */
6471 
6472 static int
ia64_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx insn ATTRIBUTE_UNUSED,int can_issue_more ATTRIBUTE_UNUSED)6473 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6474 		     int sched_verbose ATTRIBUTE_UNUSED,
6475 		     rtx insn ATTRIBUTE_UNUSED,
6476 		     int can_issue_more ATTRIBUTE_UNUSED)
6477 {
6478   if (current_sched_info->flags & DO_SPECULATION)
6479     /* Modulo scheduling does not extend h_i_d when emitting
6480        new instructions.  Deal with it.  */
6481     {
6482       if (DONE_SPEC (insn) & BEGIN_DATA)
6483 	pending_data_specs++;
6484       if (CHECK_SPEC (insn) & BEGIN_DATA)
6485 	pending_data_specs--;
6486     }
6487 
6488   last_scheduled_insn = insn;
6489   memcpy (prev_cycle_state, curr_state, dfa_state_size);
6490   if (reload_completed)
6491     {
6492       int needed = group_barrier_needed (insn);
6493 
6494       gcc_assert (!needed);
6495       if (GET_CODE (insn) == CALL_INSN)
6496 	init_insn_group_barriers ();
6497       stops_p [INSN_UID (insn)] = stop_before_p;
6498       stop_before_p = 0;
6499     }
6500   return 1;
6501 }
6502 
6503 /* We are choosing insn from the ready queue.  Return nonzero if INSN
6504    can be chosen.  */
6505 
6506 static int
ia64_first_cycle_multipass_dfa_lookahead_guard(rtx insn)6507 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6508 {
6509   gcc_assert (insn  && INSN_P (insn));
6510   return ((!reload_completed
6511 	   || !safe_group_barrier_needed (insn))
6512 	  && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
6513 }
6514 
6515 /* We are choosing insn from the ready queue.  Return nonzero if INSN
6516    can be chosen.  */
6517 
6518 static bool
ia64_first_cycle_multipass_dfa_lookahead_guard_spec(rtx insn)6519 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn)
6520 {
6521   gcc_assert (insn  && INSN_P (insn));
6522   /* Size of ALAT is 32.  As far as we perform conservative data speculation,
6523      we keep ALAT half-empty.  */
6524   return (pending_data_specs < 16
6525 	  || !(TODO_SPEC (insn) & BEGIN_DATA));
6526 }
6527 
6528 /* The following variable value is pseudo-insn used by the DFA insn
6529    scheduler to change the DFA state when the simulated clock is
6530    increased.  */
6531 
6532 static rtx dfa_pre_cycle_insn;
6533 
6534 /* We are about to being issuing INSN.  Return nonzero if we cannot
6535    issue it on given cycle CLOCK and return zero if we should not sort
6536    the ready queue on the next clock start.  */
6537 
6538 static int
ia64_dfa_new_cycle(FILE * dump,int verbose,rtx insn,int last_clock,int clock,int * sort_p)6539 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6540 		    int clock, int *sort_p)
6541 {
6542   int setup_clocks_p = FALSE;
6543 
6544   gcc_assert (insn && INSN_P (insn));
6545   if ((reload_completed && safe_group_barrier_needed (insn))
6546       || (last_scheduled_insn
6547 	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
6548 	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6549 	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6550     {
6551       init_insn_group_barriers ();
6552       if (verbose && dump)
6553 	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
6554 		 last_clock == clock ? " + cycle advance" : "");
6555       stop_before_p = 1;
6556       if (last_clock == clock)
6557 	{
6558 	  state_transition (curr_state, dfa_stop_insn);
6559 	  if (TARGET_EARLY_STOP_BITS)
6560 	    *sort_p = (last_scheduled_insn == NULL_RTX
6561 		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
6562 	  else
6563 	    *sort_p = 0;
6564 	  return 1;
6565 	}
6566       else if (reload_completed)
6567 	setup_clocks_p = TRUE;
6568       if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6569 	  || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6570 	state_reset (curr_state);
6571       else
6572 	{
6573 	  memcpy (curr_state, prev_cycle_state, dfa_state_size);
6574 	  state_transition (curr_state, dfa_stop_insn);
6575 	  state_transition (curr_state, dfa_pre_cycle_insn);
6576 	  state_transition (curr_state, NULL);
6577 	}
6578     }
6579   else if (reload_completed)
6580     setup_clocks_p = TRUE;
6581   if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6582       && GET_CODE (PATTERN (insn)) != ASM_INPUT
6583       && asm_noperands (PATTERN (insn)) < 0)
6584     {
6585       enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6586 
6587       if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6588 	{
6589 	  rtx link;
6590 	  int d = -1;
6591 
6592 	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6593 	    if (REG_NOTE_KIND (link) == 0)
6594 	      {
6595 		enum attr_itanium_class dep_class;
6596 		rtx dep_insn = XEXP (link, 0);
6597 
6598 		dep_class = ia64_safe_itanium_class (dep_insn);
6599 		if ((dep_class == ITANIUM_CLASS_MMMUL
6600 		     || dep_class == ITANIUM_CLASS_MMSHF)
6601 		    && last_clock - clocks [INSN_UID (dep_insn)] < 4
6602 		    && (d < 0
6603 			|| last_clock - clocks [INSN_UID (dep_insn)] < d))
6604 		  d = last_clock - clocks [INSN_UID (dep_insn)];
6605 	      }
6606 	  if (d >= 0)
6607 	    add_cycles [INSN_UID (insn)] = 3 - d;
6608 	}
6609     }
6610   return 0;
6611 }
6612 
6613 /* Implement targetm.sched.h_i_d_extended hook.
6614    Extend internal data structures.  */
6615 static void
ia64_h_i_d_extended(void)6616 ia64_h_i_d_extended (void)
6617 {
6618   if (current_sched_info->flags & DO_SPECULATION)
6619     {
6620       int new_max_uid = get_max_uid () + 1;
6621 
6622       spec_check_no = xrecalloc (spec_check_no, new_max_uid,
6623 				 max_uid, sizeof (*spec_check_no));
6624       max_uid = new_max_uid;
6625     }
6626 
6627   if (stops_p != NULL)
6628     {
6629       int new_clocks_length = get_max_uid () + 1;
6630 
6631       stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
6632 
6633       if (ia64_tune == PROCESSOR_ITANIUM)
6634 	{
6635 	  clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
6636 			      sizeof (int));
6637 	  add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
6638 				  sizeof (int));
6639 	}
6640 
6641       clocks_length = new_clocks_length;
6642     }
6643 }
6644 
6645 /* Constants that help mapping 'enum machine_mode' to int.  */
6646 enum SPEC_MODES
6647   {
6648     SPEC_MODE_INVALID = -1,
6649     SPEC_MODE_FIRST = 0,
6650     SPEC_MODE_FOR_EXTEND_FIRST = 1,
6651     SPEC_MODE_FOR_EXTEND_LAST = 3,
6652     SPEC_MODE_LAST = 8
6653   };
6654 
6655 /* Return index of the MODE.  */
6656 static int
ia64_mode_to_int(enum machine_mode mode)6657 ia64_mode_to_int (enum machine_mode mode)
6658 {
6659   switch (mode)
6660     {
6661     case BImode: return 0; /* SPEC_MODE_FIRST  */
6662     case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
6663     case HImode: return 2;
6664     case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
6665     case DImode: return 4;
6666     case SFmode: return 5;
6667     case DFmode: return 6;
6668     case XFmode: return 7;
6669     case TImode:
6670       /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
6671 	 mentioned in itanium[12].md.  Predicate fp_register_operand also
6672 	 needs to be defined.  Bottom line: better disable for now.  */
6673       return SPEC_MODE_INVALID;
6674     default:     return SPEC_MODE_INVALID;
6675     }
6676 }
6677 
6678 /* Provide information about speculation capabilities.  */
6679 static void
ia64_set_sched_flags(spec_info_t spec_info)6680 ia64_set_sched_flags (spec_info_t spec_info)
6681 {
6682   unsigned int *flags = &(current_sched_info->flags);
6683 
6684   if (*flags & SCHED_RGN
6685       || *flags & SCHED_EBB)
6686     {
6687       int mask = 0;
6688 
6689       if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
6690 	  || (mflag_sched_ar_data_spec && reload_completed))
6691 	{
6692 	  mask |= BEGIN_DATA;
6693 
6694 	  if ((mflag_sched_br_in_data_spec && !reload_completed)
6695 	      || (mflag_sched_ar_in_data_spec && reload_completed))
6696 	    mask |= BE_IN_DATA;
6697 	}
6698 
6699       if (mflag_sched_control_spec)
6700 	{
6701 	  mask |= BEGIN_CONTROL;
6702 
6703 	  if (mflag_sched_in_control_spec)
6704 	    mask |= BE_IN_CONTROL;
6705 	}
6706 
6707       gcc_assert (*flags & USE_GLAT);
6708 
6709       if (mask)
6710 	{
6711 	  *flags |= USE_DEPS_LIST | DETACH_LIFE_INFO | DO_SPECULATION;
6712 
6713 	  spec_info->mask = mask;
6714 	  spec_info->flags = 0;
6715 
6716 	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
6717 	    spec_info->flags |= PREFER_NON_DATA_SPEC;
6718 
6719 	  if ((mask & CONTROL_SPEC)
6720 	      && mflag_sched_prefer_non_control_spec_insns)
6721 	    spec_info->flags |= PREFER_NON_CONTROL_SPEC;
6722 
6723 	  if (mflag_sched_spec_verbose)
6724 	    {
6725 	      if (sched_verbose >= 1)
6726 		spec_info->dump = sched_dump;
6727 	      else
6728 		spec_info->dump = stderr;
6729 	    }
6730 	  else
6731 	    spec_info->dump = 0;
6732 
6733 	  if (mflag_sched_count_spec_in_critical_path)
6734 	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
6735 	}
6736     }
6737 }
6738 
6739 /* Implement targetm.sched.speculate_insn hook.
6740    Check if the INSN can be TS speculative.
6741    If 'no' - return -1.
6742    If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
6743    If current pattern of the INSN already provides TS speculation, return 0.  */
6744 static int
ia64_speculate_insn(rtx insn,ds_t ts,rtx * new_pat)6745 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
6746 {
6747   rtx pat, reg, mem, mem_reg;
6748   int mode_no, gen_p = 1;
6749   bool extend_p;
6750 
6751   gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
6752 
6753   pat = PATTERN (insn);
6754 
6755   if (GET_CODE (pat) == COND_EXEC)
6756     pat = COND_EXEC_CODE (pat);
6757 
6758   /* This should be a SET ...  */
6759   if (GET_CODE (pat) != SET)
6760     return -1;
6761 
6762   reg = SET_DEST (pat);
6763   /* ... to the general/fp register ...  */
6764   if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
6765     return -1;
6766 
6767   /* ... from the mem ...  */
6768   mem = SET_SRC (pat);
6769 
6770   /* ... that can, possibly, be a zero_extend ...  */
6771   if (GET_CODE (mem) == ZERO_EXTEND)
6772     {
6773       mem = XEXP (mem, 0);
6774       extend_p = true;
6775     }
6776   else
6777     extend_p = false;
6778 
6779   /* ... or a speculative load.  */
6780   if (GET_CODE (mem) == UNSPEC)
6781     {
6782       int code;
6783 
6784       code = XINT (mem, 1);
6785       if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
6786 	return -1;
6787 
6788       if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
6789 	  || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
6790 	  || code == UNSPEC_LDSA)
6791 	gen_p = 0;
6792 
6793       mem = XVECEXP (mem, 0, 0);
6794       gcc_assert (MEM_P (mem));
6795     }
6796 
6797   /* Source should be a mem ...  */
6798   if (!MEM_P (mem))
6799     return -1;
6800 
6801   /* ... addressed by a register.  */
6802   mem_reg = XEXP (mem, 0);
6803   if (!REG_P (mem_reg))
6804     return -1;
6805 
6806   /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
6807      will always be DImode.  */
6808   mode_no = ia64_mode_to_int (GET_MODE (mem));
6809 
6810   if (mode_no == SPEC_MODE_INVALID
6811       || (extend_p
6812 	  && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
6813 	       && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
6814     return -1;
6815 
6816   extract_insn_cached (insn);
6817   gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
6818 
6819   *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
6820 
6821   return gen_p;
6822 }
6823 
6824 enum
6825   {
6826     /* Offset to reach ZERO_EXTEND patterns.  */
6827     SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
6828     /* Number of patterns for each speculation mode.  */
6829     SPEC_N = (SPEC_MODE_LAST
6830               + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
6831   };
6832 
6833 enum SPEC_GEN_LD_MAP
6834   {
6835     /* Offset to ld.a patterns.  */
6836     SPEC_GEN_A = 0 * SPEC_N,
6837     /* Offset to ld.s patterns.  */
6838     SPEC_GEN_S = 1 * SPEC_N,
6839     /* Offset to ld.sa patterns.  */
6840     SPEC_GEN_SA = 2 * SPEC_N,
6841     /* Offset to ld.sa patterns.  For this patterns corresponding ld.c will
6842        mutate to chk.s.  */
6843     SPEC_GEN_SA_FOR_S = 3 * SPEC_N
6844   };
6845 
6846 /* These offsets are used to get (4 * SPEC_N).  */
6847 enum SPEC_GEN_CHECK_OFFSET
6848   {
6849     SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
6850     SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
6851   };
6852 
6853 /* If GEN_P is true, calculate the index of needed speculation check and return
6854    speculative pattern for INSN with speculative mode TS, machine mode
6855    MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
6856    If GEN_P is false, just calculate the index of needed speculation check.  */
6857 static rtx
ia64_gen_spec_insn(rtx insn,ds_t ts,int mode_no,bool gen_p,bool extend_p)6858 ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
6859 {
6860   rtx pat, new_pat;
6861   int load_no;
6862   int shift = 0;
6863 
6864   static rtx (* const gen_load[]) (rtx, rtx) = {
6865     gen_movbi_advanced,
6866     gen_movqi_advanced,
6867     gen_movhi_advanced,
6868     gen_movsi_advanced,
6869     gen_movdi_advanced,
6870     gen_movsf_advanced,
6871     gen_movdf_advanced,
6872     gen_movxf_advanced,
6873     gen_movti_advanced,
6874     gen_zero_extendqidi2_advanced,
6875     gen_zero_extendhidi2_advanced,
6876     gen_zero_extendsidi2_advanced,
6877 
6878     gen_movbi_speculative,
6879     gen_movqi_speculative,
6880     gen_movhi_speculative,
6881     gen_movsi_speculative,
6882     gen_movdi_speculative,
6883     gen_movsf_speculative,
6884     gen_movdf_speculative,
6885     gen_movxf_speculative,
6886     gen_movti_speculative,
6887     gen_zero_extendqidi2_speculative,
6888     gen_zero_extendhidi2_speculative,
6889     gen_zero_extendsidi2_speculative,
6890 
6891     gen_movbi_speculative_advanced,
6892     gen_movqi_speculative_advanced,
6893     gen_movhi_speculative_advanced,
6894     gen_movsi_speculative_advanced,
6895     gen_movdi_speculative_advanced,
6896     gen_movsf_speculative_advanced,
6897     gen_movdf_speculative_advanced,
6898     gen_movxf_speculative_advanced,
6899     gen_movti_speculative_advanced,
6900     gen_zero_extendqidi2_speculative_advanced,
6901     gen_zero_extendhidi2_speculative_advanced,
6902     gen_zero_extendsidi2_speculative_advanced,
6903 
6904     gen_movbi_speculative_advanced,
6905     gen_movqi_speculative_advanced,
6906     gen_movhi_speculative_advanced,
6907     gen_movsi_speculative_advanced,
6908     gen_movdi_speculative_advanced,
6909     gen_movsf_speculative_advanced,
6910     gen_movdf_speculative_advanced,
6911     gen_movxf_speculative_advanced,
6912     gen_movti_speculative_advanced,
6913     gen_zero_extendqidi2_speculative_advanced,
6914     gen_zero_extendhidi2_speculative_advanced,
6915     gen_zero_extendsidi2_speculative_advanced
6916   };
6917 
6918   load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
6919 
6920   if (ts & BEGIN_DATA)
6921     {
6922       /* We don't need recovery because even if this is ld.sa
6923 	 ALAT entry will be allocated only if NAT bit is set to zero.
6924 	 So it is enough to use ld.c here.  */
6925 
6926       if (ts & BEGIN_CONTROL)
6927 	{
6928 	  load_no += SPEC_GEN_SA;
6929 
6930 	  if (!mflag_sched_ldc)
6931 	    shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
6932 	}
6933       else
6934 	{
6935 	  load_no += SPEC_GEN_A;
6936 
6937 	  if (!mflag_sched_ldc)
6938 	    shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
6939 	}
6940     }
6941   else if (ts & BEGIN_CONTROL)
6942     {
6943       /* ld.sa can be used instead of ld.s to avoid basic block splitting.  */
6944       if (!mflag_control_ldc)
6945 	load_no += SPEC_GEN_S;
6946       else
6947 	{
6948 	  gcc_assert (mflag_sched_ldc);
6949 	  load_no += SPEC_GEN_SA_FOR_S;
6950 	}
6951     }
6952   else
6953     gcc_unreachable ();
6954 
6955   /* Set the desired check index.  We add '1', because zero element in this
6956      array means, that instruction with such uid is non-speculative.  */
6957   spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
6958 
6959   if (!gen_p)
6960     return 0;
6961 
6962   new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
6963 			       copy_rtx (recog_data.operand[1]));
6964 
6965   pat = PATTERN (insn);
6966   if (GET_CODE (pat) == COND_EXEC)
6967     new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx
6968 				 (COND_EXEC_TEST (pat)), new_pat);
6969 
6970   return new_pat;
6971 }
6972 
6973 /* Offset to branchy checks.  */
6974 enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
6975 
6976 /* Return nonzero, if INSN needs branchy recovery check.  */
6977 static bool
ia64_needs_block_p(rtx insn)6978 ia64_needs_block_p (rtx insn)
6979 {
6980   int check_no;
6981 
6982   check_no = spec_check_no[INSN_UID(insn)] - 1;
6983   gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
6984 
6985   return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
6986 	  || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
6987 }
6988 
6989 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
6990    If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
6991    Otherwise, generate a simple check.  */
6992 static rtx
ia64_gen_check(rtx insn,rtx label,bool mutate_p)6993 ia64_gen_check (rtx insn, rtx label, bool mutate_p)
6994 {
6995   rtx op1, pat, check_pat;
6996 
6997   static rtx (* const gen_check[]) (rtx, rtx) = {
6998     gen_movbi_clr,
6999     gen_movqi_clr,
7000     gen_movhi_clr,
7001     gen_movsi_clr,
7002     gen_movdi_clr,
7003     gen_movsf_clr,
7004     gen_movdf_clr,
7005     gen_movxf_clr,
7006     gen_movti_clr,
7007     gen_zero_extendqidi2_clr,
7008     gen_zero_extendhidi2_clr,
7009     gen_zero_extendsidi2_clr,
7010 
7011     gen_speculation_check_bi,
7012     gen_speculation_check_qi,
7013     gen_speculation_check_hi,
7014     gen_speculation_check_si,
7015     gen_speculation_check_di,
7016     gen_speculation_check_sf,
7017     gen_speculation_check_df,
7018     gen_speculation_check_xf,
7019     gen_speculation_check_ti,
7020     gen_speculation_check_di,
7021     gen_speculation_check_di,
7022     gen_speculation_check_di,
7023 
7024     gen_movbi_clr,
7025     gen_movqi_clr,
7026     gen_movhi_clr,
7027     gen_movsi_clr,
7028     gen_movdi_clr,
7029     gen_movsf_clr,
7030     gen_movdf_clr,
7031     gen_movxf_clr,
7032     gen_movti_clr,
7033     gen_zero_extendqidi2_clr,
7034     gen_zero_extendhidi2_clr,
7035     gen_zero_extendsidi2_clr,
7036 
7037     gen_movbi_clr,
7038     gen_movqi_clr,
7039     gen_movhi_clr,
7040     gen_movsi_clr,
7041     gen_movdi_clr,
7042     gen_movsf_clr,
7043     gen_movdf_clr,
7044     gen_movxf_clr,
7045     gen_movti_clr,
7046     gen_zero_extendqidi2_clr,
7047     gen_zero_extendhidi2_clr,
7048     gen_zero_extendsidi2_clr,
7049 
7050     gen_advanced_load_check_clr_bi,
7051     gen_advanced_load_check_clr_qi,
7052     gen_advanced_load_check_clr_hi,
7053     gen_advanced_load_check_clr_si,
7054     gen_advanced_load_check_clr_di,
7055     gen_advanced_load_check_clr_sf,
7056     gen_advanced_load_check_clr_df,
7057     gen_advanced_load_check_clr_xf,
7058     gen_advanced_load_check_clr_ti,
7059     gen_advanced_load_check_clr_di,
7060     gen_advanced_load_check_clr_di,
7061     gen_advanced_load_check_clr_di,
7062 
7063     /* Following checks are generated during mutation.  */
7064     gen_advanced_load_check_clr_bi,
7065     gen_advanced_load_check_clr_qi,
7066     gen_advanced_load_check_clr_hi,
7067     gen_advanced_load_check_clr_si,
7068     gen_advanced_load_check_clr_di,
7069     gen_advanced_load_check_clr_sf,
7070     gen_advanced_load_check_clr_df,
7071     gen_advanced_load_check_clr_xf,
7072     gen_advanced_load_check_clr_ti,
7073     gen_advanced_load_check_clr_di,
7074     gen_advanced_load_check_clr_di,
7075     gen_advanced_load_check_clr_di,
7076 
7077     0,0,0,0,0,0,0,0,0,0,0,0,
7078 
7079     gen_advanced_load_check_clr_bi,
7080     gen_advanced_load_check_clr_qi,
7081     gen_advanced_load_check_clr_hi,
7082     gen_advanced_load_check_clr_si,
7083     gen_advanced_load_check_clr_di,
7084     gen_advanced_load_check_clr_sf,
7085     gen_advanced_load_check_clr_df,
7086     gen_advanced_load_check_clr_xf,
7087     gen_advanced_load_check_clr_ti,
7088     gen_advanced_load_check_clr_di,
7089     gen_advanced_load_check_clr_di,
7090     gen_advanced_load_check_clr_di,
7091 
7092     gen_speculation_check_bi,
7093     gen_speculation_check_qi,
7094     gen_speculation_check_hi,
7095     gen_speculation_check_si,
7096     gen_speculation_check_di,
7097     gen_speculation_check_sf,
7098     gen_speculation_check_df,
7099     gen_speculation_check_xf,
7100     gen_speculation_check_ti,
7101     gen_speculation_check_di,
7102     gen_speculation_check_di,
7103     gen_speculation_check_di
7104   };
7105 
7106   extract_insn_cached (insn);
7107 
7108   if (label)
7109     {
7110       gcc_assert (mutate_p || ia64_needs_block_p (insn));
7111       op1 = label;
7112     }
7113   else
7114     {
7115       gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
7116       op1 = copy_rtx (recog_data.operand[1]);
7117     }
7118 
7119   if (mutate_p)
7120     /* INSN is ld.c.
7121        Find the speculation check number by searching for original
7122        speculative load in the RESOLVED_DEPS list of INSN.
7123        As long as patterns are unique for each instruction, this can be
7124        accomplished by matching ORIG_PAT fields.  */
7125     {
7126       rtx link;
7127       int check_no = 0;
7128       rtx orig_pat = ORIG_PAT (insn);
7129 
7130       for (link = RESOLVED_DEPS (insn); link; link = XEXP (link, 1))
7131 	{
7132 	  rtx x = XEXP (link, 0);
7133 
7134 	  if (ORIG_PAT (x) == orig_pat)
7135 	    check_no = spec_check_no[INSN_UID (x)];
7136 	}
7137       gcc_assert (check_no);
7138 
7139       spec_check_no[INSN_UID (insn)] = (check_no
7140 					+ SPEC_GEN_CHECK_MUTATION_OFFSET);
7141     }
7142 
7143   check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
7144 	       (copy_rtx (recog_data.operand[0]), op1));
7145 
7146   pat = PATTERN (insn);
7147   if (GET_CODE (pat) == COND_EXEC)
7148     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7149 				   check_pat);
7150 
7151   return check_pat;
7152 }
7153 
7154 /* Return nonzero, if X is branchy recovery check.  */
7155 static int
ia64_spec_check_p(rtx x)7156 ia64_spec_check_p (rtx x)
7157 {
7158   x = PATTERN (x);
7159   if (GET_CODE (x) == COND_EXEC)
7160     x = COND_EXEC_CODE (x);
7161   if (GET_CODE (x) == SET)
7162     return ia64_spec_check_src_p (SET_SRC (x));
7163   return 0;
7164 }
7165 
7166 /* Return nonzero, if SRC belongs to recovery check.  */
7167 static int
ia64_spec_check_src_p(rtx src)7168 ia64_spec_check_src_p (rtx src)
7169 {
7170   if (GET_CODE (src) == IF_THEN_ELSE)
7171     {
7172       rtx t;
7173 
7174       t = XEXP (src, 0);
7175       if (GET_CODE (t) == NE)
7176 	{
7177 	  t = XEXP (t, 0);
7178 
7179 	  if (GET_CODE (t) == UNSPEC)
7180 	    {
7181 	      int code;
7182 
7183 	      code = XINT (t, 1);
7184 
7185 	      if (code == UNSPEC_CHKACLR
7186 		  || code == UNSPEC_CHKS
7187 		  || code == UNSPEC_LDCCLR)
7188 		{
7189 		  gcc_assert (code != 0);
7190 		  return code;
7191 		}
7192 	    }
7193 	}
7194     }
7195   return 0;
7196 }
7197 
7198 
7199 /* The following page contains abstract data `bundle states' which are
7200    used for bundling insns (inserting nops and template generation).  */
7201 
7202 /* The following describes state of insn bundling.  */
7203 
7204 struct bundle_state
7205 {
7206   /* Unique bundle state number to identify them in the debugging
7207      output  */
7208   int unique_num;
7209   rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
7210   /* number nops before and after the insn  */
7211   short before_nops_num, after_nops_num;
7212   int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7213                    insn */
7214   int cost;     /* cost of the state in cycles */
7215   int accumulated_insns_num; /* number of all previous insns including
7216 				nops.  L is considered as 2 insns */
7217   int branch_deviation; /* deviation of previous branches from 3rd slots  */
7218   struct bundle_state *next;  /* next state with the same insn_num  */
7219   struct bundle_state *originator; /* originator (previous insn state)  */
7220   /* All bundle states are in the following chain.  */
7221   struct bundle_state *allocated_states_chain;
7222   /* The DFA State after issuing the insn and the nops.  */
7223   state_t dfa_state;
7224 };
7225 
7226 /* The following is map insn number to the corresponding bundle state.  */
7227 
7228 static struct bundle_state **index_to_bundle_states;
7229 
7230 /* The unique number of next bundle state.  */
7231 
7232 static int bundle_states_num;
7233 
7234 /* All allocated bundle states are in the following chain.  */
7235 
7236 static struct bundle_state *allocated_bundle_states_chain;
7237 
7238 /* All allocated but not used bundle states are in the following
7239    chain.  */
7240 
7241 static struct bundle_state *free_bundle_state_chain;
7242 
7243 
7244 /* The following function returns a free bundle state.  */
7245 
7246 static struct bundle_state *
get_free_bundle_state(void)7247 get_free_bundle_state (void)
7248 {
7249   struct bundle_state *result;
7250 
7251   if (free_bundle_state_chain != NULL)
7252     {
7253       result = free_bundle_state_chain;
7254       free_bundle_state_chain = result->next;
7255     }
7256   else
7257     {
7258       result = xmalloc (sizeof (struct bundle_state));
7259       result->dfa_state = xmalloc (dfa_state_size);
7260       result->allocated_states_chain = allocated_bundle_states_chain;
7261       allocated_bundle_states_chain = result;
7262     }
7263   result->unique_num = bundle_states_num++;
7264   return result;
7265 
7266 }
7267 
7268 /* The following function frees given bundle state.  */
7269 
7270 static void
free_bundle_state(struct bundle_state * state)7271 free_bundle_state (struct bundle_state *state)
7272 {
7273   state->next = free_bundle_state_chain;
7274   free_bundle_state_chain = state;
7275 }
7276 
7277 /* Start work with abstract data `bundle states'.  */
7278 
7279 static void
initiate_bundle_states(void)7280 initiate_bundle_states (void)
7281 {
7282   bundle_states_num = 0;
7283   free_bundle_state_chain = NULL;
7284   allocated_bundle_states_chain = NULL;
7285 }
7286 
7287 /* Finish work with abstract data `bundle states'.  */
7288 
7289 static void
finish_bundle_states(void)7290 finish_bundle_states (void)
7291 {
7292   struct bundle_state *curr_state, *next_state;
7293 
7294   for (curr_state = allocated_bundle_states_chain;
7295        curr_state != NULL;
7296        curr_state = next_state)
7297     {
7298       next_state = curr_state->allocated_states_chain;
7299       free (curr_state->dfa_state);
7300       free (curr_state);
7301     }
7302 }
7303 
7304 /* Hash table of the bundle states.  The key is dfa_state and insn_num
7305    of the bundle states.  */
7306 
7307 static htab_t bundle_state_table;
7308 
7309 /* The function returns hash of BUNDLE_STATE.  */
7310 
7311 static unsigned
bundle_state_hash(const void * bundle_state)7312 bundle_state_hash (const void *bundle_state)
7313 {
7314   const struct bundle_state *state = (struct bundle_state *) bundle_state;
7315   unsigned result, i;
7316 
7317   for (result = i = 0; i < dfa_state_size; i++)
7318     result += (((unsigned char *) state->dfa_state) [i]
7319 	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
7320   return result + state->insn_num;
7321 }
7322 
7323 /* The function returns nonzero if the bundle state keys are equal.  */
7324 
7325 static int
bundle_state_eq_p(const void * bundle_state_1,const void * bundle_state_2)7326 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
7327 {
7328   const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
7329   const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
7330 
7331   return (state1->insn_num == state2->insn_num
7332 	  && memcmp (state1->dfa_state, state2->dfa_state,
7333 		     dfa_state_size) == 0);
7334 }
7335 
7336 /* The function inserts the BUNDLE_STATE into the hash table.  The
7337    function returns nonzero if the bundle has been inserted into the
7338    table.  The table contains the best bundle state with given key.  */
7339 
7340 static int
insert_bundle_state(struct bundle_state * bundle_state)7341 insert_bundle_state (struct bundle_state *bundle_state)
7342 {
7343   void **entry_ptr;
7344 
7345   entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
7346   if (*entry_ptr == NULL)
7347     {
7348       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
7349       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
7350       *entry_ptr = (void *) bundle_state;
7351       return TRUE;
7352     }
7353   else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
7354 	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
7355 	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
7356 		   > bundle_state->accumulated_insns_num
7357 		   || (((struct bundle_state *)
7358 			*entry_ptr)->accumulated_insns_num
7359 		       == bundle_state->accumulated_insns_num
7360 		       && ((struct bundle_state *)
7361 			   *entry_ptr)->branch_deviation
7362 		       > bundle_state->branch_deviation))))
7363 
7364     {
7365       struct bundle_state temp;
7366 
7367       temp = *(struct bundle_state *) *entry_ptr;
7368       *(struct bundle_state *) *entry_ptr = *bundle_state;
7369       ((struct bundle_state *) *entry_ptr)->next = temp.next;
7370       *bundle_state = temp;
7371     }
7372   return FALSE;
7373 }
7374 
7375 /* Start work with the hash table.  */
7376 
7377 static void
initiate_bundle_state_table(void)7378 initiate_bundle_state_table (void)
7379 {
7380   bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
7381 				    (htab_del) 0);
7382 }
7383 
7384 /* Finish work with the hash table.  */
7385 
7386 static void
finish_bundle_state_table(void)7387 finish_bundle_state_table (void)
7388 {
7389   htab_delete (bundle_state_table);
7390 }
7391 
7392 
7393 
7394 /* The following variable is a insn `nop' used to check bundle states
7395    with different number of inserted nops.  */
7396 
7397 static rtx ia64_nop;
7398 
7399 /* The following function tries to issue NOPS_NUM nops for the current
7400    state without advancing processor cycle.  If it failed, the
7401    function returns FALSE and frees the current state.  */
7402 
7403 static int
try_issue_nops(struct bundle_state * curr_state,int nops_num)7404 try_issue_nops (struct bundle_state *curr_state, int nops_num)
7405 {
7406   int i;
7407 
7408   for (i = 0; i < nops_num; i++)
7409     if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
7410       {
7411 	free_bundle_state (curr_state);
7412 	return FALSE;
7413       }
7414   return TRUE;
7415 }
7416 
7417 /* The following function tries to issue INSN for the current
7418    state without advancing processor cycle.  If it failed, the
7419    function returns FALSE and frees the current state.  */
7420 
7421 static int
try_issue_insn(struct bundle_state * curr_state,rtx insn)7422 try_issue_insn (struct bundle_state *curr_state, rtx insn)
7423 {
7424   if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
7425     {
7426       free_bundle_state (curr_state);
7427       return FALSE;
7428     }
7429   return TRUE;
7430 }
7431 
7432 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
7433    starting with ORIGINATOR without advancing processor cycle.  If
7434    TRY_BUNDLE_END_P is TRUE, the function also/only (if
7435    ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
7436    If it was successful, the function creates new bundle state and
7437    insert into the hash table and into `index_to_bundle_states'.  */
7438 
7439 static void
issue_nops_and_insn(struct bundle_state * originator,int before_nops_num,rtx insn,int try_bundle_end_p,int only_bundle_end_p)7440 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
7441 		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
7442 {
7443   struct bundle_state *curr_state;
7444 
7445   curr_state = get_free_bundle_state ();
7446   memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
7447   curr_state->insn = insn;
7448   curr_state->insn_num = originator->insn_num + 1;
7449   curr_state->cost = originator->cost;
7450   curr_state->originator = originator;
7451   curr_state->before_nops_num = before_nops_num;
7452   curr_state->after_nops_num = 0;
7453   curr_state->accumulated_insns_num
7454     = originator->accumulated_insns_num + before_nops_num;
7455   curr_state->branch_deviation = originator->branch_deviation;
7456   gcc_assert (insn);
7457   if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
7458     {
7459       gcc_assert (GET_MODE (insn) != TImode);
7460       if (!try_issue_nops (curr_state, before_nops_num))
7461 	return;
7462       if (!try_issue_insn (curr_state, insn))
7463 	return;
7464       memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
7465       if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
7466 	  && curr_state->accumulated_insns_num % 3 != 0)
7467 	{
7468 	  free_bundle_state (curr_state);
7469 	  return;
7470 	}
7471     }
7472   else if (GET_MODE (insn) != TImode)
7473     {
7474       if (!try_issue_nops (curr_state, before_nops_num))
7475 	return;
7476       if (!try_issue_insn (curr_state, insn))
7477 	return;
7478       curr_state->accumulated_insns_num++;
7479       gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
7480 		  && asm_noperands (PATTERN (insn)) < 0);
7481 
7482       if (ia64_safe_type (insn) == TYPE_L)
7483 	curr_state->accumulated_insns_num++;
7484     }
7485   else
7486     {
7487       /* If this is an insn that must be first in a group, then don't allow
7488 	 nops to be emitted before it.  Currently, alloc is the only such
7489 	 supported instruction.  */
7490       /* ??? The bundling automatons should handle this for us, but they do
7491 	 not yet have support for the first_insn attribute.  */
7492       if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
7493 	{
7494 	  free_bundle_state (curr_state);
7495 	  return;
7496 	}
7497 
7498       state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
7499       state_transition (curr_state->dfa_state, NULL);
7500       curr_state->cost++;
7501       if (!try_issue_nops (curr_state, before_nops_num))
7502 	return;
7503       if (!try_issue_insn (curr_state, insn))
7504 	return;
7505       curr_state->accumulated_insns_num++;
7506       if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7507 	  || asm_noperands (PATTERN (insn)) >= 0)
7508 	{
7509 	  /* Finish bundle containing asm insn.  */
7510 	  curr_state->after_nops_num
7511 	    = 3 - curr_state->accumulated_insns_num % 3;
7512 	  curr_state->accumulated_insns_num
7513 	    += 3 - curr_state->accumulated_insns_num % 3;
7514 	}
7515       else if (ia64_safe_type (insn) == TYPE_L)
7516 	curr_state->accumulated_insns_num++;
7517     }
7518   if (ia64_safe_type (insn) == TYPE_B)
7519     curr_state->branch_deviation
7520       += 2 - (curr_state->accumulated_insns_num - 1) % 3;
7521   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
7522     {
7523       if (!only_bundle_end_p && insert_bundle_state (curr_state))
7524 	{
7525 	  state_t dfa_state;
7526 	  struct bundle_state *curr_state1;
7527 	  struct bundle_state *allocated_states_chain;
7528 
7529 	  curr_state1 = get_free_bundle_state ();
7530 	  dfa_state = curr_state1->dfa_state;
7531 	  allocated_states_chain = curr_state1->allocated_states_chain;
7532 	  *curr_state1 = *curr_state;
7533 	  curr_state1->dfa_state = dfa_state;
7534 	  curr_state1->allocated_states_chain = allocated_states_chain;
7535 	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
7536 		  dfa_state_size);
7537 	  curr_state = curr_state1;
7538 	}
7539       if (!try_issue_nops (curr_state,
7540 			   3 - curr_state->accumulated_insns_num % 3))
7541 	return;
7542       curr_state->after_nops_num
7543 	= 3 - curr_state->accumulated_insns_num % 3;
7544       curr_state->accumulated_insns_num
7545 	+= 3 - curr_state->accumulated_insns_num % 3;
7546     }
7547   if (!insert_bundle_state (curr_state))
7548     free_bundle_state (curr_state);
7549   return;
7550 }
7551 
7552 /* The following function returns position in the two window bundle
7553    for given STATE.  */
7554 
7555 static int
get_max_pos(state_t state)7556 get_max_pos (state_t state)
7557 {
7558   if (cpu_unit_reservation_p (state, pos_6))
7559     return 6;
7560   else if (cpu_unit_reservation_p (state, pos_5))
7561     return 5;
7562   else if (cpu_unit_reservation_p (state, pos_4))
7563     return 4;
7564   else if (cpu_unit_reservation_p (state, pos_3))
7565     return 3;
7566   else if (cpu_unit_reservation_p (state, pos_2))
7567     return 2;
7568   else if (cpu_unit_reservation_p (state, pos_1))
7569     return 1;
7570   else
7571     return 0;
7572 }
7573 
7574 /* The function returns code of a possible template for given position
7575    and state.  The function should be called only with 2 values of
7576    position equal to 3 or 6.  We avoid generating F NOPs by putting
7577    templates containing F insns at the end of the template search
7578    because undocumented anomaly in McKinley derived cores which can
7579    cause stalls if an F-unit insn (including a NOP) is issued within a
7580    six-cycle window after reading certain application registers (such
7581    as ar.bsp).  Furthermore, power-considerations also argue against
7582    the use of F-unit instructions unless they're really needed.  */
7583 
7584 static int
get_template(state_t state,int pos)7585 get_template (state_t state, int pos)
7586 {
7587   switch (pos)
7588     {
7589     case 3:
7590       if (cpu_unit_reservation_p (state, _0mmi_))
7591 	return 1;
7592       else if (cpu_unit_reservation_p (state, _0mii_))
7593 	return 0;
7594       else if (cpu_unit_reservation_p (state, _0mmb_))
7595 	return 7;
7596       else if (cpu_unit_reservation_p (state, _0mib_))
7597 	return 6;
7598       else if (cpu_unit_reservation_p (state, _0mbb_))
7599 	return 5;
7600       else if (cpu_unit_reservation_p (state, _0bbb_))
7601 	return 4;
7602       else if (cpu_unit_reservation_p (state, _0mmf_))
7603 	return 3;
7604       else if (cpu_unit_reservation_p (state, _0mfi_))
7605 	return 2;
7606       else if (cpu_unit_reservation_p (state, _0mfb_))
7607 	return 8;
7608       else if (cpu_unit_reservation_p (state, _0mlx_))
7609 	return 9;
7610       else
7611 	gcc_unreachable ();
7612     case 6:
7613       if (cpu_unit_reservation_p (state, _1mmi_))
7614 	return 1;
7615       else if (cpu_unit_reservation_p (state, _1mii_))
7616 	return 0;
7617       else if (cpu_unit_reservation_p (state, _1mmb_))
7618 	return 7;
7619       else if (cpu_unit_reservation_p (state, _1mib_))
7620 	return 6;
7621       else if (cpu_unit_reservation_p (state, _1mbb_))
7622 	return 5;
7623       else if (cpu_unit_reservation_p (state, _1bbb_))
7624 	return 4;
7625       else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
7626 	return 3;
7627       else if (cpu_unit_reservation_p (state, _1mfi_))
7628 	return 2;
7629       else if (cpu_unit_reservation_p (state, _1mfb_))
7630 	return 8;
7631       else if (cpu_unit_reservation_p (state, _1mlx_))
7632 	return 9;
7633       else
7634 	gcc_unreachable ();
7635     default:
7636       gcc_unreachable ();
7637     }
7638 }
7639 
7640 /* The following function returns an insn important for insn bundling
7641    followed by INSN and before TAIL.  */
7642 
7643 static rtx
get_next_important_insn(rtx insn,rtx tail)7644 get_next_important_insn (rtx insn, rtx tail)
7645 {
7646   for (; insn && insn != tail; insn = NEXT_INSN (insn))
7647     if (INSN_P (insn)
7648 	&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7649 	&& GET_CODE (PATTERN (insn)) != USE
7650 	&& GET_CODE (PATTERN (insn)) != CLOBBER)
7651       return insn;
7652   return NULL_RTX;
7653 }
7654 
7655 /* Add a bundle selector TEMPLATE0 before INSN.  */
7656 
7657 static void
ia64_add_bundle_selector_before(int template0,rtx insn)7658 ia64_add_bundle_selector_before (int template0, rtx insn)
7659 {
7660   rtx b = gen_bundle_selector (GEN_INT (template0));
7661 
7662   ia64_emit_insn_before (b, insn);
7663 #if NR_BUNDLES == 10
7664   if ((template0 == 4 || template0 == 5)
7665       && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
7666     {
7667       int i;
7668       rtx note = NULL_RTX;
7669 
7670       /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7671 	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
7672 	 to following nops, as br.call sets rp to the address of following
7673 	 bundle and therefore an EH region end must be on a bundle
7674 	 boundary.  */
7675       insn = PREV_INSN (insn);
7676       for (i = 0; i < 3; i++)
7677 	{
7678 	  do
7679 	    insn = next_active_insn (insn);
7680 	  while (GET_CODE (insn) == INSN
7681 		 && get_attr_empty (insn) == EMPTY_YES);
7682 	  if (GET_CODE (insn) == CALL_INSN)
7683 	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
7684 	  else if (note)
7685 	    {
7686 	      int code;
7687 
7688 	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
7689 			  || code == CODE_FOR_nop_b);
7690 	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
7691 		note = NULL_RTX;
7692 	      else
7693 		REG_NOTES (insn)
7694 		  = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
7695 				       REG_NOTES (insn));
7696 	    }
7697 	}
7698     }
7699 #endif
7700 }
7701 
7702 /* The following function does insn bundling.  Bundling means
7703    inserting templates and nop insns to fit insn groups into permitted
7704    templates.  Instruction scheduling uses NDFA (non-deterministic
7705    finite automata) encoding informations about the templates and the
7706    inserted nops.  Nondeterminism of the automata permits follows
7707    all possible insn sequences very fast.
7708 
7709    Unfortunately it is not possible to get information about inserting
7710    nop insns and used templates from the automata states.  The
7711    automata only says that we can issue an insn possibly inserting
7712    some nops before it and using some template.  Therefore insn
7713    bundling in this function is implemented by using DFA
7714    (deterministic finite automata).  We follow all possible insn
7715    sequences by inserting 0-2 nops (that is what the NDFA describe for
7716    insn scheduling) before/after each insn being bundled.  We know the
7717    start of simulated processor cycle from insn scheduling (insn
7718    starting a new cycle has TImode).
7719 
7720    Simple implementation of insn bundling would create enormous
7721    number of possible insn sequences satisfying information about new
7722    cycle ticks taken from the insn scheduling.  To make the algorithm
7723    practical we use dynamic programming.  Each decision (about
7724    inserting nops and implicitly about previous decisions) is described
7725    by structure bundle_state (see above).  If we generate the same
7726    bundle state (key is automaton state after issuing the insns and
7727    nops for it), we reuse already generated one.  As consequence we
7728    reject some decisions which cannot improve the solution and
7729    reduce memory for the algorithm.
7730 
7731    When we reach the end of EBB (extended basic block), we choose the
7732    best sequence and then, moving back in EBB, insert templates for
7733    the best alternative.  The templates are taken from querying
7734    automaton state for each insn in chosen bundle states.
7735 
7736    So the algorithm makes two (forward and backward) passes through
7737    EBB.  There is an additional forward pass through EBB for Itanium1
7738    processor.  This pass inserts more nops to make dependency between
7739    a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
7740 
7741 static void
bundling(FILE * dump,int verbose,rtx prev_head_insn,rtx tail)7742 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
7743 {
7744   struct bundle_state *curr_state, *next_state, *best_state;
7745   rtx insn, next_insn;
7746   int insn_num;
7747   int i, bundle_end_p, only_bundle_end_p, asm_p;
7748   int pos = 0, max_pos, template0, template1;
7749   rtx b;
7750   rtx nop;
7751   enum attr_type type;
7752 
7753   insn_num = 0;
7754   /* Count insns in the EBB.  */
7755   for (insn = NEXT_INSN (prev_head_insn);
7756        insn && insn != tail;
7757        insn = NEXT_INSN (insn))
7758     if (INSN_P (insn))
7759       insn_num++;
7760   if (insn_num == 0)
7761     return;
7762   bundling_p = 1;
7763   dfa_clean_insn_cache ();
7764   initiate_bundle_state_table ();
7765   index_to_bundle_states = xmalloc ((insn_num + 2)
7766 				    * sizeof (struct bundle_state *));
7767   /* First (forward) pass -- generation of bundle states.  */
7768   curr_state = get_free_bundle_state ();
7769   curr_state->insn = NULL;
7770   curr_state->before_nops_num = 0;
7771   curr_state->after_nops_num = 0;
7772   curr_state->insn_num = 0;
7773   curr_state->cost = 0;
7774   curr_state->accumulated_insns_num = 0;
7775   curr_state->branch_deviation = 0;
7776   curr_state->next = NULL;
7777   curr_state->originator = NULL;
7778   state_reset (curr_state->dfa_state);
7779   index_to_bundle_states [0] = curr_state;
7780   insn_num = 0;
7781   /* Shift cycle mark if it is put on insn which could be ignored.  */
7782   for (insn = NEXT_INSN (prev_head_insn);
7783        insn != tail;
7784        insn = NEXT_INSN (insn))
7785     if (INSN_P (insn)
7786 	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7787 	    || GET_CODE (PATTERN (insn)) == USE
7788 	    || GET_CODE (PATTERN (insn)) == CLOBBER)
7789 	&& GET_MODE (insn) == TImode)
7790       {
7791 	PUT_MODE (insn, VOIDmode);
7792 	for (next_insn = NEXT_INSN (insn);
7793 	     next_insn != tail;
7794 	     next_insn = NEXT_INSN (next_insn))
7795 	  if (INSN_P (next_insn)
7796 	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
7797 	      && GET_CODE (PATTERN (next_insn)) != USE
7798 	      && GET_CODE (PATTERN (next_insn)) != CLOBBER)
7799 	    {
7800 	      PUT_MODE (next_insn, TImode);
7801 	      break;
7802 	    }
7803       }
7804   /* Forward pass: generation of bundle states.  */
7805   for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7806        insn != NULL_RTX;
7807        insn = next_insn)
7808     {
7809       gcc_assert (INSN_P (insn)
7810 		  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7811 		  && GET_CODE (PATTERN (insn)) != USE
7812 		  && GET_CODE (PATTERN (insn)) != CLOBBER);
7813       type = ia64_safe_type (insn);
7814       next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7815       insn_num++;
7816       index_to_bundle_states [insn_num] = NULL;
7817       for (curr_state = index_to_bundle_states [insn_num - 1];
7818 	   curr_state != NULL;
7819 	   curr_state = next_state)
7820 	{
7821 	  pos = curr_state->accumulated_insns_num % 3;
7822 	  next_state = curr_state->next;
7823 	  /* We must fill up the current bundle in order to start a
7824 	     subsequent asm insn in a new bundle.  Asm insn is always
7825 	     placed in a separate bundle.  */
7826 	  only_bundle_end_p
7827 	    = (next_insn != NULL_RTX
7828 	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7829 	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
7830 	  /* We may fill up the current bundle if it is the cycle end
7831 	     without a group barrier.  */
7832 	  bundle_end_p
7833 	    = (only_bundle_end_p || next_insn == NULL_RTX
7834 	       || (GET_MODE (next_insn) == TImode
7835 		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
7836 	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7837 	      || type == TYPE_S
7838 	      /* We need to insert 2 nops for cases like M_MII.  To
7839 		 guarantee issuing all insns on the same cycle for
7840 		 Itanium 1, we need to issue 2 nops after the first M
7841 		 insn (MnnMII where n is a nop insn).  */
7842 	      || ((type == TYPE_M || type == TYPE_A)
7843 		  && ia64_tune == PROCESSOR_ITANIUM
7844 		  && !bundle_end_p && pos == 1))
7845 	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
7846 				 only_bundle_end_p);
7847 	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
7848 			       only_bundle_end_p);
7849 	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
7850 			       only_bundle_end_p);
7851 	}
7852       gcc_assert (index_to_bundle_states [insn_num]);
7853       for (curr_state = index_to_bundle_states [insn_num];
7854 	   curr_state != NULL;
7855 	   curr_state = curr_state->next)
7856 	if (verbose >= 2 && dump)
7857 	  {
7858 	    /* This structure is taken from generated code of the
7859 	       pipeline hazard recognizer (see file insn-attrtab.c).
7860 	       Please don't forget to change the structure if a new
7861 	       automaton is added to .md file.  */
7862 	    struct DFA_chip
7863 	    {
7864 	      unsigned short one_automaton_state;
7865 	      unsigned short oneb_automaton_state;
7866 	      unsigned short two_automaton_state;
7867 	      unsigned short twob_automaton_state;
7868 	    };
7869 
7870 	    fprintf
7871 	      (dump,
7872 	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7873 	       curr_state->unique_num,
7874 	       (curr_state->originator == NULL
7875 		? -1 : curr_state->originator->unique_num),
7876 	       curr_state->cost,
7877 	       curr_state->before_nops_num, curr_state->after_nops_num,
7878 	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
7879 	       (ia64_tune == PROCESSOR_ITANIUM
7880 		? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7881 		: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7882 	       INSN_UID (insn));
7883 	  }
7884     }
7885 
7886   /* We should find a solution because the 2nd insn scheduling has
7887      found one.  */
7888   gcc_assert (index_to_bundle_states [insn_num]);
7889   /* Find a state corresponding to the best insn sequence.  */
7890   best_state = NULL;
7891   for (curr_state = index_to_bundle_states [insn_num];
7892        curr_state != NULL;
7893        curr_state = curr_state->next)
7894     /* We are just looking at the states with fully filled up last
7895        bundle.  The first we prefer insn sequences with minimal cost
7896        then with minimal inserted nops and finally with branch insns
7897        placed in the 3rd slots.  */
7898     if (curr_state->accumulated_insns_num % 3 == 0
7899 	&& (best_state == NULL || best_state->cost > curr_state->cost
7900 	    || (best_state->cost == curr_state->cost
7901 		&& (curr_state->accumulated_insns_num
7902 		    < best_state->accumulated_insns_num
7903 		    || (curr_state->accumulated_insns_num
7904 			== best_state->accumulated_insns_num
7905 			&& curr_state->branch_deviation
7906 			< best_state->branch_deviation)))))
7907       best_state = curr_state;
7908   /* Second (backward) pass: adding nops and templates.  */
7909   insn_num = best_state->before_nops_num;
7910   template0 = template1 = -1;
7911   for (curr_state = best_state;
7912        curr_state->originator != NULL;
7913        curr_state = curr_state->originator)
7914     {
7915       insn = curr_state->insn;
7916       asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7917 	       || asm_noperands (PATTERN (insn)) >= 0);
7918       insn_num++;
7919       if (verbose >= 2 && dump)
7920 	{
7921 	  struct DFA_chip
7922 	  {
7923 	    unsigned short one_automaton_state;
7924 	    unsigned short oneb_automaton_state;
7925 	    unsigned short two_automaton_state;
7926 	    unsigned short twob_automaton_state;
7927 	  };
7928 
7929 	  fprintf
7930 	    (dump,
7931 	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7932 	     curr_state->unique_num,
7933 	     (curr_state->originator == NULL
7934 	      ? -1 : curr_state->originator->unique_num),
7935 	     curr_state->cost,
7936 	     curr_state->before_nops_num, curr_state->after_nops_num,
7937 	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
7938 	     (ia64_tune == PROCESSOR_ITANIUM
7939 	      ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7940 	      : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7941 	     INSN_UID (insn));
7942 	}
7943       /* Find the position in the current bundle window.  The window can
7944 	 contain at most two bundles.  Two bundle window means that
7945 	 the processor will make two bundle rotation.  */
7946       max_pos = get_max_pos (curr_state->dfa_state);
7947       if (max_pos == 6
7948 	  /* The following (negative template number) means that the
7949 	     processor did one bundle rotation.  */
7950 	  || (max_pos == 3 && template0 < 0))
7951 	{
7952 	  /* We are at the end of the window -- find template(s) for
7953 	     its bundle(s).  */
7954 	  pos = max_pos;
7955 	  if (max_pos == 3)
7956 	    template0 = get_template (curr_state->dfa_state, 3);
7957 	  else
7958 	    {
7959 	      template1 = get_template (curr_state->dfa_state, 3);
7960 	      template0 = get_template (curr_state->dfa_state, 6);
7961 	    }
7962 	}
7963       if (max_pos > 3 && template1 < 0)
7964 	/* It may happen when we have the stop inside a bundle.  */
7965 	{
7966 	  gcc_assert (pos <= 3);
7967 	  template1 = get_template (curr_state->dfa_state, 3);
7968 	  pos += 3;
7969 	}
7970       if (!asm_p)
7971 	/* Emit nops after the current insn.  */
7972 	for (i = 0; i < curr_state->after_nops_num; i++)
7973 	  {
7974 	    nop = gen_nop ();
7975 	    emit_insn_after (nop, insn);
7976 	    pos--;
7977 	    gcc_assert (pos >= 0);
7978 	    if (pos % 3 == 0)
7979 	      {
7980 		/* We are at the start of a bundle: emit the template
7981 		   (it should be defined).  */
7982 		gcc_assert (template0 >= 0);
7983 		ia64_add_bundle_selector_before (template0, nop);
7984 		/* If we have two bundle window, we make one bundle
7985 		   rotation.  Otherwise template0 will be undefined
7986 		   (negative value).  */
7987 		template0 = template1;
7988 		template1 = -1;
7989 	      }
7990 	  }
7991       /* Move the position backward in the window.  Group barrier has
7992 	 no slot.  Asm insn takes all bundle.  */
7993       if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7994 	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7995 	  && asm_noperands (PATTERN (insn)) < 0)
7996 	pos--;
7997       /* Long insn takes 2 slots.  */
7998       if (ia64_safe_type (insn) == TYPE_L)
7999 	pos--;
8000       gcc_assert (pos >= 0);
8001       if (pos % 3 == 0
8002 	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8003 	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
8004 	  && asm_noperands (PATTERN (insn)) < 0)
8005 	{
8006 	  /* The current insn is at the bundle start: emit the
8007 	     template.  */
8008 	  gcc_assert (template0 >= 0);
8009 	  ia64_add_bundle_selector_before (template0, insn);
8010 	  b = PREV_INSN (insn);
8011 	  insn = b;
8012 	  /* See comment above in analogous place for emitting nops
8013 	     after the insn.  */
8014 	  template0 = template1;
8015 	  template1 = -1;
8016 	}
8017       /* Emit nops after the current insn.  */
8018       for (i = 0; i < curr_state->before_nops_num; i++)
8019 	{
8020 	  nop = gen_nop ();
8021 	  ia64_emit_insn_before (nop, insn);
8022 	  nop = PREV_INSN (insn);
8023 	  insn = nop;
8024 	  pos--;
8025 	  gcc_assert (pos >= 0);
8026 	  if (pos % 3 == 0)
8027 	    {
8028 	      /* See comment above in analogous place for emitting nops
8029 		 after the insn.  */
8030 	      gcc_assert (template0 >= 0);
8031 	      ia64_add_bundle_selector_before (template0, insn);
8032 	      b = PREV_INSN (insn);
8033 	      insn = b;
8034 	      template0 = template1;
8035 	      template1 = -1;
8036 	    }
8037 	}
8038     }
8039   if (ia64_tune == PROCESSOR_ITANIUM)
8040     /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8041        Itanium1 has a strange design, if the distance between an insn
8042        and dependent MM-insn is less 4 then we have a 6 additional
8043        cycles stall.  So we make the distance equal to 4 cycles if it
8044        is less.  */
8045     for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8046 	 insn != NULL_RTX;
8047 	 insn = next_insn)
8048       {
8049 	gcc_assert (INSN_P (insn)
8050 		    && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8051 		    && GET_CODE (PATTERN (insn)) != USE
8052 		    && GET_CODE (PATTERN (insn)) != CLOBBER);
8053 	next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8054 	if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
8055 	  /* We found a MM-insn which needs additional cycles.  */
8056 	  {
8057 	    rtx last;
8058 	    int i, j, n;
8059 	    int pred_stop_p;
8060 
8061 	    /* Now we are searching for a template of the bundle in
8062 	       which the MM-insn is placed and the position of the
8063 	       insn in the bundle (0, 1, 2).  Also we are searching
8064 	       for that there is a stop before the insn.  */
8065 	    last = prev_active_insn (insn);
8066 	    pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8067 	    if (pred_stop_p)
8068 	      last = prev_active_insn (last);
8069 	    n = 0;
8070 	    for (;; last = prev_active_insn (last))
8071 	      if (recog_memoized (last) == CODE_FOR_bundle_selector)
8072 		{
8073 		  template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8074 		  if (template0 == 9)
8075 		    /* The insn is in MLX bundle.  Change the template
8076 		       onto MFI because we will add nops before the
8077 		       insn.  It simplifies subsequent code a lot.  */
8078 		    PATTERN (last)
8079 		      = gen_bundle_selector (const2_rtx); /* -> MFI */
8080 		  break;
8081 		}
8082 	      else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8083 		       && (ia64_safe_itanium_class (last)
8084 			   != ITANIUM_CLASS_IGNORE))
8085 		n++;
8086 	    /* Some check of correctness: the stop is not at the
8087 	       bundle start, there are no more 3 insns in the bundle,
8088 	       and the MM-insn is not at the start of bundle with
8089 	       template MLX.  */
8090 	    gcc_assert ((!pred_stop_p || n)
8091 			&& n <= 2
8092 			&& (template0 != 9 || !n));
8093 	    /* Put nops after the insn in the bundle.  */
8094 	    for (j = 3 - n; j > 0; j --)
8095 	      ia64_emit_insn_before (gen_nop (), insn);
8096 	    /* It takes into account that we will add more N nops
8097 	       before the insn lately -- please see code below.  */
8098 	    add_cycles [INSN_UID (insn)]--;
8099 	    if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8100 	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8101 				     insn);
8102 	    if (pred_stop_p)
8103 	      add_cycles [INSN_UID (insn)]--;
8104 	    for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8105 	      {
8106 		/* Insert "MII;" template.  */
8107 		ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
8108 				       insn);
8109 		ia64_emit_insn_before (gen_nop (), insn);
8110 		ia64_emit_insn_before (gen_nop (), insn);
8111 		if (i > 1)
8112 		  {
8113 		    /* To decrease code size, we use "MI;I;"
8114 		       template.  */
8115 		    ia64_emit_insn_before
8116 		      (gen_insn_group_barrier (GEN_INT (3)), insn);
8117 		    i--;
8118 		  }
8119 		ia64_emit_insn_before (gen_nop (), insn);
8120 		ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8121 				       insn);
8122 	      }
8123 	    /* Put the MM-insn in the same slot of a bundle with the
8124 	       same template as the original one.  */
8125 	    ia64_add_bundle_selector_before (template0, insn);
8126 	    /* To put the insn in the same slot, add necessary number
8127 	       of nops.  */
8128 	    for (j = n; j > 0; j --)
8129 	      ia64_emit_insn_before (gen_nop (), insn);
8130 	    /* Put the stop if the original bundle had it.  */
8131 	    if (pred_stop_p)
8132 	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8133 				     insn);
8134 	  }
8135       }
8136   free (index_to_bundle_states);
8137   finish_bundle_state_table ();
8138   bundling_p = 0;
8139   dfa_clean_insn_cache ();
8140 }
8141 
8142 /* The following function is called at the end of scheduling BB or
8143    EBB.  After reload, it inserts stop bits and does insn bundling.  */
8144 
8145 static void
ia64_sched_finish(FILE * dump,int sched_verbose)8146 ia64_sched_finish (FILE *dump, int sched_verbose)
8147 {
8148   if (sched_verbose)
8149     fprintf (dump, "// Finishing schedule.\n");
8150   if (!reload_completed)
8151     return;
8152   if (reload_completed)
8153     {
8154       final_emit_insn_group_barriers (dump);
8155       bundling (dump, sched_verbose, current_sched_info->prev_head,
8156 		current_sched_info->next_tail);
8157       if (sched_verbose && dump)
8158 	fprintf (dump, "//    finishing %d-%d\n",
8159 		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8160 		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8161 
8162       return;
8163     }
8164 }
8165 
8166 /* The following function inserts stop bits in scheduled BB or EBB.  */
8167 
8168 static void
final_emit_insn_group_barriers(FILE * dump ATTRIBUTE_UNUSED)8169 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8170 {
8171   rtx insn;
8172   int need_barrier_p = 0;
8173   rtx prev_insn = NULL_RTX;
8174 
8175   init_insn_group_barriers ();
8176 
8177   for (insn = NEXT_INSN (current_sched_info->prev_head);
8178        insn != current_sched_info->next_tail;
8179        insn = NEXT_INSN (insn))
8180     {
8181       if (GET_CODE (insn) == BARRIER)
8182 	{
8183 	  rtx last = prev_active_insn (insn);
8184 
8185 	  if (! last)
8186 	    continue;
8187 	  if (GET_CODE (last) == JUMP_INSN
8188 	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8189 	    last = prev_active_insn (last);
8190 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8191 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8192 
8193 	  init_insn_group_barriers ();
8194 	  need_barrier_p = 0;
8195 	  prev_insn = NULL_RTX;
8196 	}
8197       else if (INSN_P (insn))
8198 	{
8199 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8200 	    {
8201 	      init_insn_group_barriers ();
8202 	      need_barrier_p = 0;
8203 	      prev_insn = NULL_RTX;
8204 	    }
8205 	  else if (need_barrier_p || group_barrier_needed (insn))
8206 	    {
8207 	      if (TARGET_EARLY_STOP_BITS)
8208 		{
8209 		  rtx last;
8210 
8211 		  for (last = insn;
8212 		       last != current_sched_info->prev_head;
8213 		       last = PREV_INSN (last))
8214 		    if (INSN_P (last) && GET_MODE (last) == TImode
8215 			&& stops_p [INSN_UID (last)])
8216 		      break;
8217 		  if (last == current_sched_info->prev_head)
8218 		    last = insn;
8219 		  last = prev_active_insn (last);
8220 		  if (last
8221 		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8222 		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8223 				     last);
8224 		  init_insn_group_barriers ();
8225 		  for (last = NEXT_INSN (last);
8226 		       last != insn;
8227 		       last = NEXT_INSN (last))
8228 		    if (INSN_P (last))
8229 		      group_barrier_needed (last);
8230 		}
8231 	      else
8232 		{
8233 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8234 				    insn);
8235 		  init_insn_group_barriers ();
8236 		}
8237 	      group_barrier_needed (insn);
8238 	      prev_insn = NULL_RTX;
8239 	    }
8240 	  else if (recog_memoized (insn) >= 0)
8241 	    prev_insn = insn;
8242 	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
8243 			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
8244 			    || asm_noperands (PATTERN (insn)) >= 0);
8245 	}
8246     }
8247 }
8248 
8249 
8250 
8251 /* If the following function returns TRUE, we will use the DFA
8252    insn scheduler.  */
8253 
8254 static int
ia64_first_cycle_multipass_dfa_lookahead(void)8255 ia64_first_cycle_multipass_dfa_lookahead (void)
8256 {
8257   return (reload_completed ? 6 : 4);
8258 }
8259 
8260 /* The following function initiates variable `dfa_pre_cycle_insn'.  */
8261 
8262 static void
ia64_init_dfa_pre_cycle_insn(void)8263 ia64_init_dfa_pre_cycle_insn (void)
8264 {
8265   if (temp_dfa_state == NULL)
8266     {
8267       dfa_state_size = state_size ();
8268       temp_dfa_state = xmalloc (dfa_state_size);
8269       prev_cycle_state = xmalloc (dfa_state_size);
8270     }
8271   dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
8272   PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
8273   recog_memoized (dfa_pre_cycle_insn);
8274   dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8275   PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
8276   recog_memoized (dfa_stop_insn);
8277 }
8278 
8279 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8280    used by the DFA insn scheduler.  */
8281 
8282 static rtx
ia64_dfa_pre_cycle_insn(void)8283 ia64_dfa_pre_cycle_insn (void)
8284 {
8285   return dfa_pre_cycle_insn;
8286 }
8287 
8288 /* The following function returns TRUE if PRODUCER (of type ilog or
8289    ld) produces address for CONSUMER (of type st or stf). */
8290 
8291 int
ia64_st_address_bypass_p(rtx producer,rtx consumer)8292 ia64_st_address_bypass_p (rtx producer, rtx consumer)
8293 {
8294   rtx dest, reg, mem;
8295 
8296   gcc_assert (producer && consumer);
8297   dest = ia64_single_set (producer);
8298   gcc_assert (dest);
8299   reg = SET_DEST (dest);
8300   gcc_assert (reg);
8301   if (GET_CODE (reg) == SUBREG)
8302     reg = SUBREG_REG (reg);
8303   gcc_assert (GET_CODE (reg) == REG);
8304 
8305   dest = ia64_single_set (consumer);
8306   gcc_assert (dest);
8307   mem = SET_DEST (dest);
8308   gcc_assert (mem && GET_CODE (mem) == MEM);
8309   return reg_mentioned_p (reg, mem);
8310 }
8311 
8312 /* The following function returns TRUE if PRODUCER (of type ilog or
8313    ld) produces address for CONSUMER (of type ld or fld). */
8314 
8315 int
ia64_ld_address_bypass_p(rtx producer,rtx consumer)8316 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
8317 {
8318   rtx dest, src, reg, mem;
8319 
8320   gcc_assert (producer && consumer);
8321   dest = ia64_single_set (producer);
8322   gcc_assert (dest);
8323   reg = SET_DEST (dest);
8324   gcc_assert (reg);
8325   if (GET_CODE (reg) == SUBREG)
8326     reg = SUBREG_REG (reg);
8327   gcc_assert (GET_CODE (reg) == REG);
8328 
8329   src = ia64_single_set (consumer);
8330   gcc_assert (src);
8331   mem = SET_SRC (src);
8332   gcc_assert (mem);
8333 
8334   if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
8335     mem = XVECEXP (mem, 0, 0);
8336   else if (GET_CODE (mem) == IF_THEN_ELSE)
8337     /* ??? Is this bypass necessary for ld.c?  */
8338     {
8339       gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
8340       mem = XEXP (mem, 1);
8341     }
8342 
8343   while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
8344     mem = XEXP (mem, 0);
8345 
8346   if (GET_CODE (mem) == UNSPEC)
8347     {
8348       int c = XINT (mem, 1);
8349 
8350       gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA);
8351       mem = XVECEXP (mem, 0, 0);
8352     }
8353 
8354   /* Note that LO_SUM is used for GOT loads.  */
8355   gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
8356 
8357   return reg_mentioned_p (reg, mem);
8358 }
8359 
8360 /* The following function returns TRUE if INSN produces address for a
8361    load/store insn.  We will place such insns into M slot because it
8362    decreases its latency time.  */
8363 
8364 int
ia64_produce_address_p(rtx insn)8365 ia64_produce_address_p (rtx insn)
8366 {
8367   return insn->call;
8368 }
8369 
8370 
8371 /* Emit pseudo-ops for the assembler to describe predicate relations.
8372    At present this assumes that we only consider predicate pairs to
8373    be mutex, and that the assembler can deduce proper values from
8374    straight-line code.  */
8375 
8376 static void
emit_predicate_relation_info(void)8377 emit_predicate_relation_info (void)
8378 {
8379   basic_block bb;
8380 
8381   FOR_EACH_BB_REVERSE (bb)
8382     {
8383       int r;
8384       rtx head = BB_HEAD (bb);
8385 
8386       /* We only need such notes at code labels.  */
8387       if (GET_CODE (head) != CODE_LABEL)
8388 	continue;
8389       if (GET_CODE (NEXT_INSN (head)) == NOTE
8390 	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
8391 	head = NEXT_INSN (head);
8392 
8393       /* Skip p0, which may be thought to be live due to (reg:DI p0)
8394 	 grabbing the entire block of predicate registers.  */
8395       for (r = PR_REG (2); r < PR_REG (64); r += 2)
8396 	if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r))
8397 	  {
8398 	    rtx p = gen_rtx_REG (BImode, r);
8399 	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
8400 	    if (head == BB_END (bb))
8401 	      BB_END (bb) = n;
8402 	    head = n;
8403 	  }
8404     }
8405 
8406   /* Look for conditional calls that do not return, and protect predicate
8407      relations around them.  Otherwise the assembler will assume the call
8408      returns, and complain about uses of call-clobbered predicates after
8409      the call.  */
8410   FOR_EACH_BB_REVERSE (bb)
8411     {
8412       rtx insn = BB_HEAD (bb);
8413 
8414       while (1)
8415 	{
8416 	  if (GET_CODE (insn) == CALL_INSN
8417 	      && GET_CODE (PATTERN (insn)) == COND_EXEC
8418 	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
8419 	    {
8420 	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
8421 	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
8422 	      if (BB_HEAD (bb) == insn)
8423 		BB_HEAD (bb) = b;
8424 	      if (BB_END (bb) == insn)
8425 		BB_END (bb) = a;
8426 	    }
8427 
8428 	  if (insn == BB_END (bb))
8429 	    break;
8430 	  insn = NEXT_INSN (insn);
8431 	}
8432     }
8433 }
8434 
8435 /* Perform machine dependent operations on the rtl chain INSNS.  */
8436 
8437 static void
ia64_reorg(void)8438 ia64_reorg (void)
8439 {
8440   /* We are freeing block_for_insn in the toplev to keep compatibility
8441      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
8442   compute_bb_for_insn ();
8443 
8444   /* If optimizing, we'll have split before scheduling.  */
8445   if (optimize == 0)
8446     split_all_insns (0);
8447 
8448   /* ??? update_life_info_in_dirty_blocks fails to terminate during
8449      non-optimizing bootstrap.  */
8450   update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
8451 
8452   if (optimize && ia64_flag_schedule_insns2)
8453     {
8454       timevar_push (TV_SCHED2);
8455       ia64_final_schedule = 1;
8456 
8457       initiate_bundle_states ();
8458       ia64_nop = make_insn_raw (gen_nop ());
8459       PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
8460       recog_memoized (ia64_nop);
8461       clocks_length = get_max_uid () + 1;
8462       stops_p = xcalloc (1, clocks_length);
8463       if (ia64_tune == PROCESSOR_ITANIUM)
8464 	{
8465 	  clocks = xcalloc (clocks_length, sizeof (int));
8466 	  add_cycles = xcalloc (clocks_length, sizeof (int));
8467 	}
8468       if (ia64_tune == PROCESSOR_ITANIUM2)
8469 	{
8470 	  pos_1 = get_cpu_unit_code ("2_1");
8471 	  pos_2 = get_cpu_unit_code ("2_2");
8472 	  pos_3 = get_cpu_unit_code ("2_3");
8473 	  pos_4 = get_cpu_unit_code ("2_4");
8474 	  pos_5 = get_cpu_unit_code ("2_5");
8475 	  pos_6 = get_cpu_unit_code ("2_6");
8476 	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
8477 	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
8478 	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
8479 	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
8480 	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
8481 	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
8482 	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
8483 	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
8484 	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
8485 	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
8486 	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
8487 	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
8488 	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
8489 	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
8490 	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
8491 	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
8492 	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
8493 	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
8494 	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
8495 	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
8496 	}
8497       else
8498 	{
8499 	  pos_1 = get_cpu_unit_code ("1_1");
8500 	  pos_2 = get_cpu_unit_code ("1_2");
8501 	  pos_3 = get_cpu_unit_code ("1_3");
8502 	  pos_4 = get_cpu_unit_code ("1_4");
8503 	  pos_5 = get_cpu_unit_code ("1_5");
8504 	  pos_6 = get_cpu_unit_code ("1_6");
8505 	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
8506 	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
8507 	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
8508 	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
8509 	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
8510 	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
8511 	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
8512 	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
8513 	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
8514 	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
8515 	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
8516 	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
8517 	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
8518 	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
8519 	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
8520 	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
8521 	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
8522 	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
8523 	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
8524 	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
8525 	}
8526       schedule_ebbs ();
8527       finish_bundle_states ();
8528       if (ia64_tune == PROCESSOR_ITANIUM)
8529 	{
8530 	  free (add_cycles);
8531 	  free (clocks);
8532 	}
8533       free (stops_p);
8534       stops_p = NULL;
8535       emit_insn_group_barriers (dump_file);
8536 
8537       ia64_final_schedule = 0;
8538       timevar_pop (TV_SCHED2);
8539     }
8540   else
8541     emit_all_insn_group_barriers (dump_file);
8542 
8543   /* A call must not be the last instruction in a function, so that the
8544      return address is still within the function, so that unwinding works
8545      properly.  Note that IA-64 differs from dwarf2 on this point.  */
8546   if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8547     {
8548       rtx insn;
8549       int saw_stop = 0;
8550 
8551       insn = get_last_insn ();
8552       if (! INSN_P (insn))
8553         insn = prev_active_insn (insn);
8554       /* Skip over insns that expand to nothing.  */
8555       while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
8556         {
8557 	  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
8558 	      && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
8559 	    saw_stop = 1;
8560 	  insn = prev_active_insn (insn);
8561 	}
8562       if (GET_CODE (insn) == CALL_INSN)
8563 	{
8564 	  if (! saw_stop)
8565 	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8566 	  emit_insn (gen_break_f ());
8567 	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8568 	}
8569     }
8570 
8571   emit_predicate_relation_info ();
8572 
8573   if (ia64_flag_var_tracking)
8574     {
8575       timevar_push (TV_VAR_TRACKING);
8576       variable_tracking_main ();
8577       timevar_pop (TV_VAR_TRACKING);
8578     }
8579 }
8580 
8581 /* Return true if REGNO is used by the epilogue.  */
8582 
8583 int
ia64_epilogue_uses(int regno)8584 ia64_epilogue_uses (int regno)
8585 {
8586   switch (regno)
8587     {
8588     case R_GR (1):
8589       /* With a call to a function in another module, we will write a new
8590 	 value to "gp".  After returning from such a call, we need to make
8591 	 sure the function restores the original gp-value, even if the
8592 	 function itself does not use the gp anymore.  */
8593       return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
8594 
8595     case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
8596     case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
8597       /* For functions defined with the syscall_linkage attribute, all
8598 	 input registers are marked as live at all function exits.  This
8599 	 prevents the register allocator from using the input registers,
8600 	 which in turn makes it possible to restart a system call after
8601 	 an interrupt without having to save/restore the input registers.
8602 	 This also prevents kernel data from leaking to application code.  */
8603       return lookup_attribute ("syscall_linkage",
8604 	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
8605 
8606     case R_BR (0):
8607       /* Conditional return patterns can't represent the use of `b0' as
8608          the return address, so we force the value live this way.  */
8609       return 1;
8610 
8611     case AR_PFS_REGNUM:
8612       /* Likewise for ar.pfs, which is used by br.ret.  */
8613       return 1;
8614 
8615     default:
8616       return 0;
8617     }
8618 }
8619 
8620 /* Return true if REGNO is used by the frame unwinder.  */
8621 
8622 int
ia64_eh_uses(int regno)8623 ia64_eh_uses (int regno)
8624 {
8625   if (! reload_completed)
8626     return 0;
8627 
8628   if (current_frame_info.reg_save_b0
8629       && regno == current_frame_info.reg_save_b0)
8630     return 1;
8631   if (current_frame_info.reg_save_pr
8632       && regno == current_frame_info.reg_save_pr)
8633     return 1;
8634   if (current_frame_info.reg_save_ar_pfs
8635       && regno == current_frame_info.reg_save_ar_pfs)
8636     return 1;
8637   if (current_frame_info.reg_save_ar_unat
8638       && regno == current_frame_info.reg_save_ar_unat)
8639     return 1;
8640   if (current_frame_info.reg_save_ar_lc
8641       && regno == current_frame_info.reg_save_ar_lc)
8642     return 1;
8643 
8644   return 0;
8645 }
8646 
8647 /* Return true if this goes in small data/bss.  */
8648 
8649 /* ??? We could also support own long data here.  Generating movl/add/ld8
8650    instead of addl,ld8/ld8.  This makes the code bigger, but should make the
8651    code faster because there is one less load.  This also includes incomplete
8652    types which can't go in sdata/sbss.  */
8653 
8654 static bool
ia64_in_small_data_p(tree exp)8655 ia64_in_small_data_p (tree exp)
8656 {
8657   if (TARGET_NO_SDATA)
8658     return false;
8659 
8660   /* We want to merge strings, so we never consider them small data.  */
8661   if (TREE_CODE (exp) == STRING_CST)
8662     return false;
8663 
8664   /* Functions are never small data.  */
8665   if (TREE_CODE (exp) == FUNCTION_DECL)
8666     return false;
8667 
8668   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
8669     {
8670       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
8671 
8672       if (strcmp (section, ".sdata") == 0
8673 	  || strncmp (section, ".sdata.", 7) == 0
8674 	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
8675 	  || strcmp (section, ".sbss") == 0
8676 	  || strncmp (section, ".sbss.", 6) == 0
8677 	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
8678 	return true;
8679     }
8680   else
8681     {
8682       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
8683 
8684       /* If this is an incomplete type with size 0, then we can't put it
8685 	 in sdata because it might be too big when completed.  */
8686       if (size > 0 && size <= ia64_section_threshold)
8687 	return true;
8688     }
8689 
8690   return false;
8691 }
8692 
8693 /* Output assembly directives for prologue regions.  */
8694 
8695 /* The current basic block number.  */
8696 
8697 static bool last_block;
8698 
8699 /* True if we need a copy_state command at the start of the next block.  */
8700 
8701 static bool need_copy_state;
8702 
8703 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
8704 # define MAX_ARTIFICIAL_LABEL_BYTES 30
8705 #endif
8706 
8707 /* Emit a debugging label after a call-frame-related insn.  We'd
8708    rather output the label right away, but we'd have to output it
8709    after, not before, the instruction, and the instruction has not
8710    been output yet.  So we emit the label after the insn, delete it to
8711    avoid introducing basic blocks, and mark it as preserved, such that
8712    it is still output, given that it is referenced in debug info.  */
8713 
8714 static const char *
ia64_emit_deleted_label_after_insn(rtx insn)8715 ia64_emit_deleted_label_after_insn (rtx insn)
8716 {
8717   char label[MAX_ARTIFICIAL_LABEL_BYTES];
8718   rtx lb = gen_label_rtx ();
8719   rtx label_insn = emit_label_after (lb, insn);
8720 
8721   LABEL_PRESERVE_P (lb) = 1;
8722 
8723   delete_insn (label_insn);
8724 
8725   ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
8726 
8727   return xstrdup (label);
8728 }
8729 
8730 /* Define the CFA after INSN with the steady-state definition.  */
8731 
8732 static void
ia64_dwarf2out_def_steady_cfa(rtx insn)8733 ia64_dwarf2out_def_steady_cfa (rtx insn)
8734 {
8735   rtx fp = frame_pointer_needed
8736     ? hard_frame_pointer_rtx
8737     : stack_pointer_rtx;
8738 
8739   dwarf2out_def_cfa
8740     (ia64_emit_deleted_label_after_insn (insn),
8741      REGNO (fp),
8742      ia64_initial_elimination_offset
8743      (REGNO (arg_pointer_rtx), REGNO (fp))
8744      + ARG_POINTER_CFA_OFFSET (current_function_decl));
8745 }
8746 
8747 /* The generic dwarf2 frame debug info generator does not define a
8748    separate region for the very end of the epilogue, so refrain from
8749    doing so in the IA64-specific code as well.  */
8750 
8751 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
8752 
8753 /* The function emits unwind directives for the start of an epilogue.  */
8754 
8755 static void
process_epilogue(FILE * asm_out_file,rtx insn,bool unwind,bool frame)8756 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
8757 {
8758   /* If this isn't the last block of the function, then we need to label the
8759      current state, and copy it back in at the start of the next block.  */
8760 
8761   if (!last_block)
8762     {
8763       if (unwind)
8764 	fprintf (asm_out_file, "\t.label_state %d\n",
8765 		 ++cfun->machine->state_num);
8766       need_copy_state = true;
8767     }
8768 
8769   if (unwind)
8770     fprintf (asm_out_file, "\t.restore sp\n");
8771   if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8772     dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
8773 		       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
8774 }
8775 
8776 /* This function processes a SET pattern looking for specific patterns
8777    which result in emitting an assembly directive required for unwinding.  */
8778 
8779 static int
process_set(FILE * asm_out_file,rtx pat,rtx insn,bool unwind,bool frame)8780 process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
8781 {
8782   rtx src = SET_SRC (pat);
8783   rtx dest = SET_DEST (pat);
8784   int src_regno, dest_regno;
8785 
8786   /* Look for the ALLOC insn.  */
8787   if (GET_CODE (src) == UNSPEC_VOLATILE
8788       && XINT (src, 1) == UNSPECV_ALLOC
8789       && GET_CODE (dest) == REG)
8790     {
8791       dest_regno = REGNO (dest);
8792 
8793       /* If this is the final destination for ar.pfs, then this must
8794 	 be the alloc in the prologue.  */
8795       if (dest_regno == current_frame_info.reg_save_ar_pfs)
8796 	{
8797 	  if (unwind)
8798 	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
8799 		     ia64_dbx_register_number (dest_regno));
8800 	}
8801       else
8802 	{
8803 	  /* This must be an alloc before a sibcall.  We must drop the
8804 	     old frame info.  The easiest way to drop the old frame
8805 	     info is to ensure we had a ".restore sp" directive
8806 	     followed by a new prologue.  If the procedure doesn't
8807 	     have a memory-stack frame, we'll issue a dummy ".restore
8808 	     sp" now.  */
8809 	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
8810 	    /* if haven't done process_epilogue() yet, do it now */
8811 	    process_epilogue (asm_out_file, insn, unwind, frame);
8812 	  if (unwind)
8813 	    fprintf (asm_out_file, "\t.prologue\n");
8814 	}
8815       return 1;
8816     }
8817 
8818   /* Look for SP = ....  */
8819   if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
8820     {
8821       if (GET_CODE (src) == PLUS)
8822         {
8823 	  rtx op0 = XEXP (src, 0);
8824 	  rtx op1 = XEXP (src, 1);
8825 
8826 	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
8827 
8828 	  if (INTVAL (op1) < 0)
8829 	    {
8830 	      gcc_assert (!frame_pointer_needed);
8831 	      if (unwind)
8832 		fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
8833 			 -INTVAL (op1));
8834 	      if (frame)
8835 		ia64_dwarf2out_def_steady_cfa (insn);
8836 	    }
8837 	  else
8838 	    process_epilogue (asm_out_file, insn, unwind, frame);
8839 	}
8840       else
8841 	{
8842 	  gcc_assert (GET_CODE (src) == REG
8843 		      && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
8844 	  process_epilogue (asm_out_file, insn, unwind, frame);
8845 	}
8846 
8847       return 1;
8848     }
8849 
8850   /* Register move we need to look at.  */
8851   if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
8852     {
8853       src_regno = REGNO (src);
8854       dest_regno = REGNO (dest);
8855 
8856       switch (src_regno)
8857 	{
8858 	case BR_REG (0):
8859 	  /* Saving return address pointer.  */
8860 	  gcc_assert (dest_regno == current_frame_info.reg_save_b0);
8861 	  if (unwind)
8862 	    fprintf (asm_out_file, "\t.save rp, r%d\n",
8863 		     ia64_dbx_register_number (dest_regno));
8864 	  return 1;
8865 
8866 	case PR_REG (0):
8867 	  gcc_assert (dest_regno == current_frame_info.reg_save_pr);
8868 	  if (unwind)
8869 	    fprintf (asm_out_file, "\t.save pr, r%d\n",
8870 		     ia64_dbx_register_number (dest_regno));
8871 	  return 1;
8872 
8873 	case AR_UNAT_REGNUM:
8874 	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
8875 	  if (unwind)
8876 	    fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
8877 		     ia64_dbx_register_number (dest_regno));
8878 	  return 1;
8879 
8880 	case AR_LC_REGNUM:
8881 	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
8882 	  if (unwind)
8883 	    fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
8884 		     ia64_dbx_register_number (dest_regno));
8885 	  return 1;
8886 
8887 	case STACK_POINTER_REGNUM:
8888 	  gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
8889 		      && frame_pointer_needed);
8890 	  if (unwind)
8891 	    fprintf (asm_out_file, "\t.vframe r%d\n",
8892 		     ia64_dbx_register_number (dest_regno));
8893 	  if (frame)
8894 	    ia64_dwarf2out_def_steady_cfa (insn);
8895 	  return 1;
8896 
8897 	default:
8898 	  /* Everything else should indicate being stored to memory.  */
8899 	  gcc_unreachable ();
8900 	}
8901     }
8902 
8903   /* Memory store we need to look at.  */
8904   if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
8905     {
8906       long off;
8907       rtx base;
8908       const char *saveop;
8909 
8910       if (GET_CODE (XEXP (dest, 0)) == REG)
8911 	{
8912 	  base = XEXP (dest, 0);
8913 	  off = 0;
8914 	}
8915       else
8916 	{
8917 	  gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
8918 		      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
8919 	  base = XEXP (XEXP (dest, 0), 0);
8920 	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
8921 	}
8922 
8923       if (base == hard_frame_pointer_rtx)
8924 	{
8925 	  saveop = ".savepsp";
8926 	  off = - off;
8927 	}
8928       else
8929 	{
8930 	  gcc_assert (base == stack_pointer_rtx);
8931 	  saveop = ".savesp";
8932 	}
8933 
8934       src_regno = REGNO (src);
8935       switch (src_regno)
8936 	{
8937 	case BR_REG (0):
8938 	  gcc_assert (!current_frame_info.reg_save_b0);
8939 	  if (unwind)
8940 	    fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
8941 	  return 1;
8942 
8943 	case PR_REG (0):
8944 	  gcc_assert (!current_frame_info.reg_save_pr);
8945 	  if (unwind)
8946 	    fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
8947 	  return 1;
8948 
8949 	case AR_LC_REGNUM:
8950 	  gcc_assert (!current_frame_info.reg_save_ar_lc);
8951 	  if (unwind)
8952 	    fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
8953 	  return 1;
8954 
8955 	case AR_PFS_REGNUM:
8956 	  gcc_assert (!current_frame_info.reg_save_ar_pfs);
8957 	  if (unwind)
8958 	    fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
8959 	  return 1;
8960 
8961 	case AR_UNAT_REGNUM:
8962 	  gcc_assert (!current_frame_info.reg_save_ar_unat);
8963 	  if (unwind)
8964 	    fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
8965 	  return 1;
8966 
8967 	case GR_REG (4):
8968 	case GR_REG (5):
8969 	case GR_REG (6):
8970 	case GR_REG (7):
8971 	  if (unwind)
8972 	    fprintf (asm_out_file, "\t.save.g 0x%x\n",
8973 		     1 << (src_regno - GR_REG (4)));
8974 	  return 1;
8975 
8976 	case BR_REG (1):
8977 	case BR_REG (2):
8978 	case BR_REG (3):
8979 	case BR_REG (4):
8980 	case BR_REG (5):
8981 	  if (unwind)
8982 	    fprintf (asm_out_file, "\t.save.b 0x%x\n",
8983 		     1 << (src_regno - BR_REG (1)));
8984 	  return 1;
8985 
8986 	case FR_REG (2):
8987 	case FR_REG (3):
8988 	case FR_REG (4):
8989 	case FR_REG (5):
8990 	  if (unwind)
8991 	    fprintf (asm_out_file, "\t.save.f 0x%x\n",
8992 		     1 << (src_regno - FR_REG (2)));
8993 	  return 1;
8994 
8995 	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
8996 	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
8997 	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
8998 	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
8999 	  if (unwind)
9000 	    fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9001 		     1 << (src_regno - FR_REG (12)));
9002 	  return 1;
9003 
9004 	default:
9005 	  return 0;
9006 	}
9007     }
9008 
9009   return 0;
9010 }
9011 
9012 
9013 /* This function looks at a single insn and emits any directives
9014    required to unwind this insn.  */
9015 void
process_for_unwind_directive(FILE * asm_out_file,rtx insn)9016 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9017 {
9018   bool unwind = (flag_unwind_tables
9019 		 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9020   bool frame = dwarf2out_do_frame ();
9021 
9022   if (unwind || frame)
9023     {
9024       rtx pat;
9025 
9026       if (GET_CODE (insn) == NOTE
9027 	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
9028 	{
9029 	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9030 
9031 	  /* Restore unwind state from immediately before the epilogue.  */
9032 	  if (need_copy_state)
9033 	    {
9034 	      if (unwind)
9035 		{
9036 		  fprintf (asm_out_file, "\t.body\n");
9037 		  fprintf (asm_out_file, "\t.copy_state %d\n",
9038 			   cfun->machine->state_num);
9039 		}
9040 	      if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9041 		ia64_dwarf2out_def_steady_cfa (insn);
9042 	      need_copy_state = false;
9043 	    }
9044 	}
9045 
9046       if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9047 	return;
9048 
9049       pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9050       if (pat)
9051 	pat = XEXP (pat, 0);
9052       else
9053 	pat = PATTERN (insn);
9054 
9055       switch (GET_CODE (pat))
9056         {
9057 	case SET:
9058 	  process_set (asm_out_file, pat, insn, unwind, frame);
9059 	  break;
9060 
9061 	case PARALLEL:
9062 	  {
9063 	    int par_index;
9064 	    int limit = XVECLEN (pat, 0);
9065 	    for (par_index = 0; par_index < limit; par_index++)
9066 	      {
9067 		rtx x = XVECEXP (pat, 0, par_index);
9068 		if (GET_CODE (x) == SET)
9069 		  process_set (asm_out_file, x, insn, unwind, frame);
9070 	      }
9071 	    break;
9072 	  }
9073 
9074 	default:
9075 	  gcc_unreachable ();
9076 	}
9077     }
9078 }
9079 
9080 
9081 enum ia64_builtins
9082 {
9083   IA64_BUILTIN_BSP,
9084   IA64_BUILTIN_FLUSHRS
9085 };
9086 
9087 void
ia64_init_builtins(void)9088 ia64_init_builtins (void)
9089 {
9090   tree fpreg_type;
9091   tree float80_type;
9092 
9093   /* The __fpreg type.  */
9094   fpreg_type = make_node (REAL_TYPE);
9095   TYPE_PRECISION (fpreg_type) = 82;
9096   layout_type (fpreg_type);
9097   (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9098 
9099   /* The __float80 type.  */
9100   float80_type = make_node (REAL_TYPE);
9101   TYPE_PRECISION (float80_type) = 80;
9102   layout_type (float80_type);
9103   (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9104 
9105   /* The __float128 type.  */
9106   if (!TARGET_HPUX)
9107     {
9108       tree float128_type = make_node (REAL_TYPE);
9109       TYPE_PRECISION (float128_type) = 128;
9110       layout_type (float128_type);
9111       (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9112     }
9113   else
9114     /* Under HPUX, this is a synonym for "long double".  */
9115     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9116 					       "__float128");
9117 
9118 #define def_builtin(name, type, code)					\
9119   lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD,	\
9120 			       NULL, NULL_TREE)
9121 
9122   def_builtin ("__builtin_ia64_bsp",
9123 	       build_function_type (ptr_type_node, void_list_node),
9124 	       IA64_BUILTIN_BSP);
9125 
9126   def_builtin ("__builtin_ia64_flushrs",
9127 	       build_function_type (void_type_node, void_list_node),
9128 	       IA64_BUILTIN_FLUSHRS);
9129 
9130 #undef def_builtin
9131 }
9132 
9133 rtx
ia64_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)9134 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9135 		     enum machine_mode mode ATTRIBUTE_UNUSED,
9136 		     int ignore ATTRIBUTE_UNUSED)
9137 {
9138   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9139   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9140 
9141   switch (fcode)
9142     {
9143     case IA64_BUILTIN_BSP:
9144       if (! target || ! register_operand (target, DImode))
9145 	target = gen_reg_rtx (DImode);
9146       emit_insn (gen_bsp_value (target));
9147 #ifdef POINTERS_EXTEND_UNSIGNED
9148       target = convert_memory_address (ptr_mode, target);
9149 #endif
9150       return target;
9151 
9152     case IA64_BUILTIN_FLUSHRS:
9153       emit_insn (gen_flushrs ());
9154       return const0_rtx;
9155 
9156     default:
9157       break;
9158     }
9159 
9160   return NULL_RTX;
9161 }
9162 
9163 /* For the HP-UX IA64 aggregate parameters are passed stored in the
9164    most significant bits of the stack slot.  */
9165 
9166 enum direction
ia64_hpux_function_arg_padding(enum machine_mode mode,tree type)9167 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
9168 {
9169    /* Exception to normal case for structures/unions/etc.  */
9170 
9171    if (type && AGGREGATE_TYPE_P (type)
9172        && int_size_in_bytes (type) < UNITS_PER_WORD)
9173      return upward;
9174 
9175    /* Fall back to the default.  */
9176    return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9177 }
9178 
9179 /* Emit text to declare externally defined variables and functions, because
9180    the Intel assembler does not support undefined externals.  */
9181 
9182 void
ia64_asm_output_external(FILE * file,tree decl,const char * name)9183 ia64_asm_output_external (FILE *file, tree decl, const char *name)
9184 {
9185   /* We output the name if and only if TREE_SYMBOL_REFERENCED is
9186      set in order to avoid putting out names that are never really
9187      used. */
9188   if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
9189     {
9190       /* maybe_assemble_visibility will return 1 if the assembler
9191 	 visibility directive is outputed.  */
9192       int need_visibility = ((*targetm.binds_local_p) (decl)
9193 			     && maybe_assemble_visibility (decl));
9194 
9195       /* GNU as does not need anything here, but the HP linker does
9196 	 need something for external functions.  */
9197       if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
9198 	  && TREE_CODE (decl) == FUNCTION_DECL)
9199 	{
9200 	  ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
9201 	  (*targetm.asm_out.globalize_label) (file, name);
9202 	}
9203       else if (need_visibility && !TARGET_GNU_AS)
9204 	(*targetm.asm_out.globalize_label) (file, name);
9205     }
9206 }
9207 
9208 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
9209    modes of word_mode and larger.  Rename the TFmode libfuncs using the
9210    HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9211    backward compatibility. */
9212 
9213 static void
ia64_init_libfuncs(void)9214 ia64_init_libfuncs (void)
9215 {
9216   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
9217   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
9218   set_optab_libfunc (smod_optab, SImode, "__modsi3");
9219   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
9220 
9221   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
9222   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
9223   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
9224   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
9225   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
9226 
9227   set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
9228   set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
9229   set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
9230   set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
9231   set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
9232   set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
9233 
9234   set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
9235   set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
9236   set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
9237   set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
9238   set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
9239 
9240   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
9241   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
9242   set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
9243   /* HP-UX 11.23 libc does not have a function for unsigned
9244      SImode-to-TFmode conversion.  */
9245   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
9246 }
9247 
9248 /* Rename all the TFmode libfuncs using the HPUX conventions.  */
9249 
9250 static void
ia64_hpux_init_libfuncs(void)9251 ia64_hpux_init_libfuncs (void)
9252 {
9253   ia64_init_libfuncs ();
9254 
9255   /* The HP SI millicode division and mod functions expect DI arguments.
9256      By turning them off completely we avoid using both libgcc and the
9257      non-standard millicode routines and use the HP DI millicode routines
9258      instead.  */
9259 
9260   set_optab_libfunc (sdiv_optab, SImode, 0);
9261   set_optab_libfunc (udiv_optab, SImode, 0);
9262   set_optab_libfunc (smod_optab, SImode, 0);
9263   set_optab_libfunc (umod_optab, SImode, 0);
9264 
9265   set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
9266   set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
9267   set_optab_libfunc (smod_optab, DImode, "__milli_remI");
9268   set_optab_libfunc (umod_optab, DImode, "__milli_remU");
9269 
9270   /* HP-UX libc has TF min/max/abs routines in it.  */
9271   set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
9272   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
9273   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
9274 
9275   /* ia64_expand_compare uses this.  */
9276   cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
9277 
9278   /* These should never be used.  */
9279   set_optab_libfunc (eq_optab, TFmode, 0);
9280   set_optab_libfunc (ne_optab, TFmode, 0);
9281   set_optab_libfunc (gt_optab, TFmode, 0);
9282   set_optab_libfunc (ge_optab, TFmode, 0);
9283   set_optab_libfunc (lt_optab, TFmode, 0);
9284   set_optab_libfunc (le_optab, TFmode, 0);
9285 }
9286 
9287 /* Rename the division and modulus functions in VMS.  */
9288 
9289 static void
ia64_vms_init_libfuncs(void)9290 ia64_vms_init_libfuncs (void)
9291 {
9292   set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9293   set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9294   set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9295   set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9296   set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9297   set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9298   set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9299   set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9300 }
9301 
9302 /* Rename the TFmode libfuncs available from soft-fp in glibc using
9303    the HPUX conventions.  */
9304 
9305 static void
ia64_sysv4_init_libfuncs(void)9306 ia64_sysv4_init_libfuncs (void)
9307 {
9308   ia64_init_libfuncs ();
9309 
9310   /* These functions are not part of the HPUX TFmode interface.  We
9311      use them instead of _U_Qfcmp, which doesn't work the way we
9312      expect.  */
9313   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
9314   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
9315   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
9316   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
9317   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
9318   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
9319 
9320   /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
9321      glibc doesn't have them.  */
9322 }
9323 
9324 /* For HPUX, it is illegal to have relocations in shared segments.  */
9325 
9326 static int
ia64_hpux_reloc_rw_mask(void)9327 ia64_hpux_reloc_rw_mask (void)
9328 {
9329   return 3;
9330 }
9331 
9332 /* For others, relax this so that relocations to local data goes in
9333    read-only segments, but we still cannot allow global relocations
9334    in read-only segments.  */
9335 
9336 static int
ia64_reloc_rw_mask(void)9337 ia64_reloc_rw_mask (void)
9338 {
9339   return flag_pic ? 3 : 2;
9340 }
9341 
9342 /* Return the section to use for X.  The only special thing we do here
9343    is to honor small data.  */
9344 
9345 static section *
ia64_select_rtx_section(enum machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)9346 ia64_select_rtx_section (enum machine_mode mode, rtx x,
9347 			 unsigned HOST_WIDE_INT align)
9348 {
9349   if (GET_MODE_SIZE (mode) > 0
9350       && GET_MODE_SIZE (mode) <= ia64_section_threshold
9351       && !TARGET_NO_SDATA)
9352     return sdata_section;
9353   else
9354     return default_elf_select_rtx_section (mode, x, align);
9355 }
9356 
9357 static unsigned int
ia64_section_type_flags(tree decl,const char * name,int reloc)9358 ia64_section_type_flags (tree decl, const char *name, int reloc)
9359 {
9360   unsigned int flags = 0;
9361 
9362   if (strcmp (name, ".sdata") == 0
9363       || strncmp (name, ".sdata.", 7) == 0
9364       || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9365       || strncmp (name, ".sdata2.", 8) == 0
9366       || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
9367       || strcmp (name, ".sbss") == 0
9368       || strncmp (name, ".sbss.", 6) == 0
9369       || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9370     flags = SECTION_SMALL;
9371 
9372   flags |= default_section_type_flags (decl, name, reloc);
9373   return flags;
9374 }
9375 
9376 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
9377    structure type and that the address of that type should be passed
9378    in out0, rather than in r8.  */
9379 
9380 static bool
ia64_struct_retval_addr_is_first_parm_p(tree fntype)9381 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
9382 {
9383   tree ret_type = TREE_TYPE (fntype);
9384 
9385   /* The Itanium C++ ABI requires that out0, rather than r8, be used
9386      as the structure return address parameter, if the return value
9387      type has a non-trivial copy constructor or destructor.  It is not
9388      clear if this same convention should be used for other
9389      programming languages.  Until G++ 3.4, we incorrectly used r8 for
9390      these return values.  */
9391   return (abi_version_at_least (2)
9392 	  && ret_type
9393 	  && TYPE_MODE (ret_type) == BLKmode
9394 	  && TREE_ADDRESSABLE (ret_type)
9395 	  && strcmp (lang_hooks.name, "GNU C++") == 0);
9396 }
9397 
9398 /* Output the assembler code for a thunk function.  THUNK_DECL is the
9399    declaration for the thunk function itself, FUNCTION is the decl for
9400    the target function.  DELTA is an immediate constant offset to be
9401    added to THIS.  If VCALL_OFFSET is nonzero, the word at
9402    *(*this + vcall_offset) should be added to THIS.  */
9403 
9404 static void
ia64_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)9405 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9406 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9407 		      tree function)
9408 {
9409   rtx this, insn, funexp;
9410   unsigned int this_parmno;
9411   unsigned int this_regno;
9412 
9413   reload_completed = 1;
9414   epilogue_completed = 1;
9415   no_new_pseudos = 1;
9416   reset_block_changes ();
9417 
9418   /* Set things up as ia64_expand_prologue might.  */
9419   last_scratch_gr_reg = 15;
9420 
9421   memset (&current_frame_info, 0, sizeof (current_frame_info));
9422   current_frame_info.spill_cfa_off = -16;
9423   current_frame_info.n_input_regs = 1;
9424   current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
9425 
9426   /* Mark the end of the (empty) prologue.  */
9427   emit_note (NOTE_INSN_PROLOGUE_END);
9428 
9429   /* Figure out whether "this" will be the first parameter (the
9430      typical case) or the second parameter (as happens when the
9431      virtual function returns certain class objects).  */
9432   this_parmno
9433     = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
9434        ? 1 : 0);
9435   this_regno = IN_REG (this_parmno);
9436   if (!TARGET_REG_NAMES)
9437     reg_names[this_regno] = ia64_reg_numbers[this_parmno];
9438 
9439   this = gen_rtx_REG (Pmode, this_regno);
9440   if (TARGET_ILP32)
9441     {
9442       rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
9443       REG_POINTER (tmp) = 1;
9444       if (delta && CONST_OK_FOR_I (delta))
9445 	{
9446 	  emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
9447 	  delta = 0;
9448 	}
9449       else
9450 	emit_insn (gen_ptr_extend (this, tmp));
9451     }
9452 
9453   /* Apply the constant offset, if required.  */
9454   if (delta)
9455     {
9456       rtx delta_rtx = GEN_INT (delta);
9457 
9458       if (!CONST_OK_FOR_I (delta))
9459 	{
9460 	  rtx tmp = gen_rtx_REG (Pmode, 2);
9461 	  emit_move_insn (tmp, delta_rtx);
9462 	  delta_rtx = tmp;
9463 	}
9464       emit_insn (gen_adddi3 (this, this, delta_rtx));
9465     }
9466 
9467   /* Apply the offset from the vtable, if required.  */
9468   if (vcall_offset)
9469     {
9470       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9471       rtx tmp = gen_rtx_REG (Pmode, 2);
9472 
9473       if (TARGET_ILP32)
9474 	{
9475 	  rtx t = gen_rtx_REG (ptr_mode, 2);
9476 	  REG_POINTER (t) = 1;
9477 	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
9478 	  if (CONST_OK_FOR_I (vcall_offset))
9479 	    {
9480 	      emit_insn (gen_ptr_extend_plus_imm (tmp, t,
9481 						  vcall_offset_rtx));
9482 	      vcall_offset = 0;
9483 	    }
9484 	  else
9485 	    emit_insn (gen_ptr_extend (tmp, t));
9486 	}
9487       else
9488 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
9489 
9490       if (vcall_offset)
9491 	{
9492 	  if (!CONST_OK_FOR_J (vcall_offset))
9493 	    {
9494 	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9495 	      emit_move_insn (tmp2, vcall_offset_rtx);
9496 	      vcall_offset_rtx = tmp2;
9497 	    }
9498 	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
9499 	}
9500 
9501       if (TARGET_ILP32)
9502 	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
9503 			gen_rtx_MEM (ptr_mode, tmp));
9504       else
9505 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
9506 
9507       emit_insn (gen_adddi3 (this, this, tmp));
9508     }
9509 
9510   /* Generate a tail call to the target function.  */
9511   if (! TREE_USED (function))
9512     {
9513       assemble_external (function);
9514       TREE_USED (function) = 1;
9515     }
9516   funexp = XEXP (DECL_RTL (function), 0);
9517   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9518   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9519   insn = get_last_insn ();
9520   SIBLING_CALL_P (insn) = 1;
9521 
9522   /* Code generation for calls relies on splitting.  */
9523   reload_completed = 1;
9524   epilogue_completed = 1;
9525   try_split (PATTERN (insn), insn, 0);
9526 
9527   emit_barrier ();
9528 
9529   /* Run just enough of rest_of_compilation to get the insns emitted.
9530      There's not really enough bulk here to make other passes such as
9531      instruction scheduling worth while.  Note that use_thunk calls
9532      assemble_start_function and assemble_end_function.  */
9533 
9534   insn_locators_initialize ();
9535   emit_all_insn_group_barriers (NULL);
9536   insn = get_insns ();
9537   shorten_branches (insn);
9538   final_start_function (insn, file, 1);
9539   final (insn, file, 1);
9540   final_end_function ();
9541 
9542   reload_completed = 0;
9543   epilogue_completed = 0;
9544   no_new_pseudos = 0;
9545 }
9546 
9547 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9548 
9549 static rtx
ia64_struct_value_rtx(tree fntype,int incoming ATTRIBUTE_UNUSED)9550 ia64_struct_value_rtx (tree fntype,
9551 		       int incoming ATTRIBUTE_UNUSED)
9552 {
9553   if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9554     return NULL_RTX;
9555   return gen_rtx_REG (Pmode, GR_REG (8));
9556 }
9557 
9558 static bool
ia64_scalar_mode_supported_p(enum machine_mode mode)9559 ia64_scalar_mode_supported_p (enum machine_mode mode)
9560 {
9561   switch (mode)
9562     {
9563     case QImode:
9564     case HImode:
9565     case SImode:
9566     case DImode:
9567     case TImode:
9568       return true;
9569 
9570     case SFmode:
9571     case DFmode:
9572     case XFmode:
9573     case RFmode:
9574       return true;
9575 
9576     case TFmode:
9577       return TARGET_HPUX;
9578 
9579     default:
9580       return false;
9581     }
9582 }
9583 
9584 static bool
ia64_vector_mode_supported_p(enum machine_mode mode)9585 ia64_vector_mode_supported_p (enum machine_mode mode)
9586 {
9587   switch (mode)
9588     {
9589     case V8QImode:
9590     case V4HImode:
9591     case V2SImode:
9592       return true;
9593 
9594     case V2SFmode:
9595       return true;
9596 
9597     default:
9598       return false;
9599     }
9600 }
9601 
9602 /* Implement the FUNCTION_PROFILER macro.  */
9603 
9604 void
ia64_output_function_profiler(FILE * file,int labelno)9605 ia64_output_function_profiler (FILE *file, int labelno)
9606 {
9607   bool indirect_call;
9608 
9609   /* If the function needs a static chain and the static chain
9610      register is r15, we use an indirect call so as to bypass
9611      the PLT stub in case the executable is dynamically linked,
9612      because the stub clobbers r15 as per 5.3.6 of the psABI.
9613      We don't need to do that in non canonical PIC mode.  */
9614 
9615   if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
9616     {
9617       gcc_assert (STATIC_CHAIN_REGNUM == 15);
9618       indirect_call = true;
9619     }
9620   else
9621     indirect_call = false;
9622 
9623   if (TARGET_GNU_AS)
9624     fputs ("\t.prologue 4, r40\n", file);
9625   else
9626     fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
9627   fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
9628 
9629   if (NO_PROFILE_COUNTERS)
9630     fputs ("\tmov out3 = r0\n", file);
9631   else
9632     {
9633       char buf[20];
9634       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9635 
9636       if (TARGET_AUTO_PIC)
9637 	fputs ("\tmovl out3 = @gprel(", file);
9638       else
9639 	fputs ("\taddl out3 = @ltoff(", file);
9640       assemble_name (file, buf);
9641       if (TARGET_AUTO_PIC)
9642 	fputs (")\n", file);
9643       else
9644 	fputs ("), r1\n", file);
9645     }
9646 
9647   if (indirect_call)
9648     fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
9649   fputs ("\t;;\n", file);
9650 
9651   fputs ("\t.save rp, r42\n", file);
9652   fputs ("\tmov out2 = b0\n", file);
9653   if (indirect_call)
9654     fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
9655   fputs ("\t.body\n", file);
9656   fputs ("\tmov out1 = r1\n", file);
9657   if (indirect_call)
9658     {
9659       fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
9660       fputs ("\tmov b6 = r16\n", file);
9661       fputs ("\tld8 r1 = [r14]\n", file);
9662       fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
9663     }
9664   else
9665     fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
9666 }
9667 
9668 static GTY(()) rtx mcount_func_rtx;
9669 static rtx
gen_mcount_func_rtx(void)9670 gen_mcount_func_rtx (void)
9671 {
9672   if (!mcount_func_rtx)
9673     mcount_func_rtx = init_one_libfunc ("_mcount");
9674   return mcount_func_rtx;
9675 }
9676 
9677 void
ia64_profile_hook(int labelno)9678 ia64_profile_hook (int labelno)
9679 {
9680   rtx label, ip;
9681 
9682   if (NO_PROFILE_COUNTERS)
9683     label = const0_rtx;
9684   else
9685     {
9686       char buf[30];
9687       const char *label_name;
9688       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9689       label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
9690       label = gen_rtx_SYMBOL_REF (Pmode, label_name);
9691       SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
9692     }
9693   ip = gen_reg_rtx (Pmode);
9694   emit_insn (gen_ip_value (ip));
9695   emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
9696                      VOIDmode, 3,
9697 		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
9698 		     ip, Pmode,
9699 		     label, Pmode);
9700 }
9701 
9702 /* Return the mangling of TYPE if it is an extended fundamental type.  */
9703 
9704 static const char *
ia64_mangle_fundamental_type(tree type)9705 ia64_mangle_fundamental_type (tree type)
9706 {
9707   /* On HP-UX, "long double" is mangled as "e" so __float128 is
9708      mangled as "e".  */
9709   if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
9710     return "g";
9711   /* On HP-UX, "e" is not available as a mangling of __float80 so use
9712      an extended mangling.  Elsewhere, "e" is available since long
9713      double is 80 bits.  */
9714   if (TYPE_MODE (type) == XFmode)
9715     return TARGET_HPUX ? "u9__float80" : "e";
9716   if (TYPE_MODE (type) == RFmode)
9717     return "u7__fpreg";
9718   return NULL;
9719 }
9720 
9721 /* Return the diagnostic message string if conversion from FROMTYPE to
9722    TOTYPE is not allowed, NULL otherwise.  */
9723 static const char *
ia64_invalid_conversion(tree fromtype,tree totype)9724 ia64_invalid_conversion (tree fromtype, tree totype)
9725 {
9726   /* Reject nontrivial conversion to or from __fpreg.  */
9727   if (TYPE_MODE (fromtype) == RFmode
9728       && TYPE_MODE (totype) != RFmode
9729       && TYPE_MODE (totype) != VOIDmode)
9730     return N_("invalid conversion from %<__fpreg%>");
9731   if (TYPE_MODE (totype) == RFmode
9732       && TYPE_MODE (fromtype) != RFmode)
9733     return N_("invalid conversion to %<__fpreg%>");
9734   return NULL;
9735 }
9736 
9737 /* Return the diagnostic message string if the unary operation OP is
9738    not permitted on TYPE, NULL otherwise.  */
9739 static const char *
ia64_invalid_unary_op(int op,tree type)9740 ia64_invalid_unary_op (int op, tree type)
9741 {
9742   /* Reject operations on __fpreg other than unary + or &.  */
9743   if (TYPE_MODE (type) == RFmode
9744       && op != CONVERT_EXPR
9745       && op != ADDR_EXPR)
9746     return N_("invalid operation on %<__fpreg%>");
9747   return NULL;
9748 }
9749 
9750 /* Return the diagnostic message string if the binary operation OP is
9751    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
9752 static const char *
ia64_invalid_binary_op(int op ATTRIBUTE_UNUSED,tree type1,tree type2)9753 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2)
9754 {
9755   /* Reject operations on __fpreg.  */
9756   if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
9757     return N_("invalid operation on %<__fpreg%>");
9758   return NULL;
9759 }
9760 
9761 /* Implement overriding of the optimization options.  */
9762 void
ia64_optimization_options(int level ATTRIBUTE_UNUSED,int size ATTRIBUTE_UNUSED)9763 ia64_optimization_options (int level ATTRIBUTE_UNUSED,
9764                            int size ATTRIBUTE_UNUSED)
9765 {
9766   /* Let the scheduler form additional regions.  */
9767   set_param_value ("max-sched-extend-regions-iters", 2);
9768 }
9769 
9770 #include "gt-ia64.h"
9771