1 /* Definitions of target machine for GNU compiler. 2 Copyright (C) 1999-2017 Free Software Foundation, Inc. 3 Contributed by James E. Wilson <wilson@cygnus.com> and 4 David Mosberger <davidm@hpl.hp.com>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "target.h" 27 #include "rtl.h" 28 #include "tree.h" 29 #include "memmodel.h" 30 #include "cfghooks.h" 31 #include "df.h" 32 #include "tm_p.h" 33 #include "stringpool.h" 34 #include "optabs.h" 35 #include "regs.h" 36 #include "emit-rtl.h" 37 #include "recog.h" 38 #include "diagnostic-core.h" 39 #include "alias.h" 40 #include "fold-const.h" 41 #include "stor-layout.h" 42 #include "calls.h" 43 #include "varasm.h" 44 #include "output.h" 45 #include "insn-attr.h" 46 #include "flags.h" 47 #include "explow.h" 48 #include "expr.h" 49 #include "cfgrtl.h" 50 #include "libfuncs.h" 51 #include "sched-int.h" 52 #include "common/common-target.h" 53 #include "langhooks.h" 54 #include "gimplify.h" 55 #include "intl.h" 56 #include "debug.h" 57 #include "params.h" 58 #include "dbgcnt.h" 59 #include "tm-constrs.h" 60 #include "sel-sched.h" 61 #include "reload.h" 62 #include "opts.h" 63 #include "dumpfile.h" 64 #include "builtins.h" 65 66 /* This file should be included last. */ 67 #include "target-def.h" 68 69 /* This is used for communication between ASM_OUTPUT_LABEL and 70 ASM_OUTPUT_LABELREF. */ 71 int ia64_asm_output_label = 0; 72 73 /* Register names for ia64_expand_prologue. */ 74 static const char * const ia64_reg_numbers[96] = 75 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", 76 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", 77 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", 78 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", 79 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", 80 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", 81 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", 82 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", 83 "r96", "r97", "r98", "r99", "r100","r101","r102","r103", 84 "r104","r105","r106","r107","r108","r109","r110","r111", 85 "r112","r113","r114","r115","r116","r117","r118","r119", 86 "r120","r121","r122","r123","r124","r125","r126","r127"}; 87 88 /* ??? These strings could be shared with REGISTER_NAMES. */ 89 static const char * const ia64_input_reg_names[8] = 90 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; 91 92 /* ??? These strings could be shared with REGISTER_NAMES. */ 93 static const char * const ia64_local_reg_names[80] = 94 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", 95 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", 96 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", 97 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", 98 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", 99 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", 100 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", 101 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", 102 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", 103 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; 104 105 /* ??? These strings could be shared with REGISTER_NAMES. */ 106 static const char * const ia64_output_reg_names[8] = 107 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; 108 109 /* Variables which are this size or smaller are put in the sdata/sbss 110 sections. */ 111 112 unsigned int ia64_section_threshold; 113 114 /* The following variable is used by the DFA insn scheduler. The value is 115 TRUE if we do insn bundling instead of insn scheduling. */ 116 int bundling_p = 0; 117 118 enum ia64_frame_regs 119 { 120 reg_fp, 121 reg_save_b0, 122 reg_save_pr, 123 reg_save_ar_pfs, 124 reg_save_ar_unat, 125 reg_save_ar_lc, 126 reg_save_gp, 127 number_of_ia64_frame_regs 128 }; 129 130 /* Structure to be filled in by ia64_compute_frame_size with register 131 save masks and offsets for the current function. */ 132 133 struct ia64_frame_info 134 { 135 HOST_WIDE_INT total_size; /* size of the stack frame, not including 136 the caller's scratch area. */ 137 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ 138 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ 139 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ 140 HARD_REG_SET mask; /* mask of saved registers. */ 141 unsigned int gr_used_mask; /* mask of registers in use as gr spill 142 registers or long-term scratches. */ 143 int n_spilled; /* number of spilled registers. */ 144 int r[number_of_ia64_frame_regs]; /* Frame related registers. */ 145 int n_input_regs; /* number of input registers used. */ 146 int n_local_regs; /* number of local registers used. */ 147 int n_output_regs; /* number of output registers used. */ 148 int n_rotate_regs; /* number of rotating registers used. */ 149 150 char need_regstk; /* true if a .regstk directive needed. */ 151 char initialized; /* true if the data is finalized. */ 152 }; 153 154 /* Current frame information calculated by ia64_compute_frame_size. */ 155 static struct ia64_frame_info current_frame_info; 156 /* The actual registers that are emitted. */ 157 static int emitted_frame_related_regs[number_of_ia64_frame_regs]; 158 159 static int ia64_first_cycle_multipass_dfa_lookahead (void); 160 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *); 161 static void ia64_init_dfa_pre_cycle_insn (void); 162 static rtx ia64_dfa_pre_cycle_insn (void); 163 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int); 164 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *); 165 static void ia64_h_i_d_extended (void); 166 static void * ia64_alloc_sched_context (void); 167 static void ia64_init_sched_context (void *, bool); 168 static void ia64_set_sched_context (void *); 169 static void ia64_clear_sched_context (void *); 170 static void ia64_free_sched_context (void *); 171 static int ia64_mode_to_int (machine_mode); 172 static void ia64_set_sched_flags (spec_info_t); 173 static ds_t ia64_get_insn_spec_ds (rtx_insn *); 174 static ds_t ia64_get_insn_checked_ds (rtx_insn *); 175 static bool ia64_skip_rtx_p (const_rtx); 176 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *); 177 static bool ia64_needs_block_p (ds_t); 178 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t); 179 static int ia64_spec_check_p (rtx); 180 static int ia64_spec_check_src_p (rtx); 181 static rtx gen_tls_get_addr (void); 182 static rtx gen_thread_pointer (void); 183 static int find_gr_spill (enum ia64_frame_regs, int); 184 static int next_scratch_gr_reg (void); 185 static void mark_reg_gr_used_mask (rtx, void *); 186 static void ia64_compute_frame_size (HOST_WIDE_INT); 187 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT); 188 static void finish_spill_pointers (void); 189 static rtx spill_restore_mem (rtx, HOST_WIDE_INT); 190 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx); 191 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT); 192 static rtx gen_movdi_x (rtx, rtx, rtx); 193 static rtx gen_fr_spill_x (rtx, rtx, rtx); 194 static rtx gen_fr_restore_x (rtx, rtx, rtx); 195 196 static void ia64_option_override (void); 197 static bool ia64_can_eliminate (const int, const int); 198 static machine_mode hfa_element_mode (const_tree, bool); 199 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode, 200 tree, int *, int); 201 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode, 202 tree, bool); 203 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode, 204 const_tree, bool, bool); 205 static rtx ia64_function_arg (cumulative_args_t, machine_mode, 206 const_tree, bool); 207 static rtx ia64_function_incoming_arg (cumulative_args_t, 208 machine_mode, const_tree, bool); 209 static void ia64_function_arg_advance (cumulative_args_t, machine_mode, 210 const_tree, bool); 211 static unsigned int ia64_function_arg_boundary (machine_mode, 212 const_tree); 213 static bool ia64_function_ok_for_sibcall (tree, tree); 214 static bool ia64_return_in_memory (const_tree, const_tree); 215 static rtx ia64_function_value (const_tree, const_tree, bool); 216 static rtx ia64_libcall_value (machine_mode, const_rtx); 217 static bool ia64_function_value_regno_p (const unsigned int); 218 static int ia64_register_move_cost (machine_mode, reg_class_t, 219 reg_class_t); 220 static int ia64_memory_move_cost (machine_mode mode, reg_class_t, 221 bool); 222 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool); 223 static int ia64_unspec_may_trap_p (const_rtx, unsigned); 224 static void fix_range (const char *); 225 static struct machine_function * ia64_init_machine_status (void); 226 static void emit_insn_group_barriers (FILE *); 227 static void emit_all_insn_group_barriers (FILE *); 228 static void final_emit_insn_group_barriers (FILE *); 229 static void emit_predicate_relation_info (void); 230 static void ia64_reorg (void); 231 static bool ia64_in_small_data_p (const_tree); 232 static void process_epilogue (FILE *, rtx, bool, bool); 233 234 static bool ia64_assemble_integer (rtx, unsigned int, int); 235 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT); 236 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT); 237 static void ia64_output_function_end_prologue (FILE *); 238 239 static void ia64_print_operand (FILE *, rtx, int); 240 static void ia64_print_operand_address (FILE *, machine_mode, rtx); 241 static bool ia64_print_operand_punct_valid_p (unsigned char code); 242 243 static int ia64_issue_rate (void); 244 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t); 245 static void ia64_sched_init (FILE *, int, int); 246 static void ia64_sched_init_global (FILE *, int, int); 247 static void ia64_sched_finish_global (FILE *, int); 248 static void ia64_sched_finish (FILE *, int); 249 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int); 250 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int); 251 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int); 252 static int ia64_variable_issue (FILE *, int, rtx_insn *, int); 253 254 static void ia64_asm_unwind_emit (FILE *, rtx_insn *); 255 static void ia64_asm_emit_except_personality (rtx); 256 static void ia64_asm_init_sections (void); 257 258 static enum unwind_info_type ia64_debug_unwind_info (void); 259 260 static struct bundle_state *get_free_bundle_state (void); 261 static void free_bundle_state (struct bundle_state *); 262 static void initiate_bundle_states (void); 263 static void finish_bundle_states (void); 264 static int insert_bundle_state (struct bundle_state *); 265 static void initiate_bundle_state_table (void); 266 static void finish_bundle_state_table (void); 267 static int try_issue_nops (struct bundle_state *, int); 268 static int try_issue_insn (struct bundle_state *, rtx); 269 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *, 270 int, int); 271 static int get_max_pos (state_t); 272 static int get_template (state_t, int); 273 274 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *); 275 static bool important_for_bundling_p (rtx_insn *); 276 static bool unknown_for_bundling_p (rtx_insn *); 277 static void bundling (FILE *, int, rtx_insn *, rtx_insn *); 278 279 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 280 HOST_WIDE_INT, tree); 281 static void ia64_file_start (void); 282 static void ia64_globalize_decl_name (FILE *, tree); 283 284 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED; 285 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED; 286 static section *ia64_select_rtx_section (machine_mode, rtx, 287 unsigned HOST_WIDE_INT); 288 static void ia64_output_dwarf_dtprel (FILE *, int, rtx) 289 ATTRIBUTE_UNUSED; 290 static unsigned int ia64_section_type_flags (tree, const char *, int); 291 static void ia64_init_libfuncs (void) 292 ATTRIBUTE_UNUSED; 293 static void ia64_hpux_init_libfuncs (void) 294 ATTRIBUTE_UNUSED; 295 static void ia64_sysv4_init_libfuncs (void) 296 ATTRIBUTE_UNUSED; 297 static void ia64_vms_init_libfuncs (void) 298 ATTRIBUTE_UNUSED; 299 static void ia64_soft_fp_init_libfuncs (void) 300 ATTRIBUTE_UNUSED; 301 static bool ia64_vms_valid_pointer_mode (machine_mode mode) 302 ATTRIBUTE_UNUSED; 303 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *) 304 ATTRIBUTE_UNUSED; 305 306 static bool ia64_attribute_takes_identifier_p (const_tree); 307 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *); 308 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *); 309 static void ia64_encode_section_info (tree, rtx, int); 310 static rtx ia64_struct_value_rtx (tree, int); 311 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 312 static bool ia64_scalar_mode_supported_p (machine_mode mode); 313 static bool ia64_vector_mode_supported_p (machine_mode mode); 314 static bool ia64_legitimate_constant_p (machine_mode, rtx); 315 static bool ia64_legitimate_address_p (machine_mode, rtx, bool); 316 static bool ia64_cannot_force_const_mem (machine_mode, rtx); 317 static const char *ia64_mangle_type (const_tree); 318 static const char *ia64_invalid_conversion (const_tree, const_tree); 319 static const char *ia64_invalid_unary_op (int, const_tree); 320 static const char *ia64_invalid_binary_op (int, const_tree, const_tree); 321 static machine_mode ia64_c_mode_for_suffix (char); 322 static void ia64_trampoline_init (rtx, tree, rtx); 323 static void ia64_override_options_after_change (void); 324 static bool ia64_member_type_forces_blk (const_tree, machine_mode); 325 326 static tree ia64_fold_builtin (tree, int, tree *, bool); 327 static tree ia64_builtin_decl (unsigned, bool); 328 329 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t); 330 static machine_mode ia64_get_reg_raw_mode (int regno); 331 static section * ia64_hpux_function_section (tree, enum node_frequency, 332 bool, bool); 333 334 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode, 335 const unsigned char *sel); 336 337 #define MAX_VECT_LEN 8 338 339 struct expand_vec_perm_d 340 { 341 rtx target, op0, op1; 342 unsigned char perm[MAX_VECT_LEN]; 343 machine_mode vmode; 344 unsigned char nelt; 345 bool one_operand_p; 346 bool testing_p; 347 }; 348 349 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d); 350 351 352 /* Table of valid machine attributes. */ 353 static const struct attribute_spec ia64_attribute_table[] = 354 { 355 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 356 affects_type_identity } */ 357 { "syscall_linkage", 0, 0, false, true, true, NULL, false }, 358 { "model", 1, 1, true, false, false, ia64_handle_model_attribute, 359 false }, 360 #if TARGET_ABI_OPEN_VMS 361 { "common_object", 1, 1, true, false, false, 362 ia64_vms_common_object_attribute, false }, 363 #endif 364 { "version_id", 1, 1, true, false, false, 365 ia64_handle_version_id_attribute, false }, 366 { NULL, 0, 0, false, false, false, NULL, false } 367 }; 368 369 /* Initialize the GCC target structure. */ 370 #undef TARGET_ATTRIBUTE_TABLE 371 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table 372 373 #undef TARGET_INIT_BUILTINS 374 #define TARGET_INIT_BUILTINS ia64_init_builtins 375 376 #undef TARGET_FOLD_BUILTIN 377 #define TARGET_FOLD_BUILTIN ia64_fold_builtin 378 379 #undef TARGET_EXPAND_BUILTIN 380 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin 381 382 #undef TARGET_BUILTIN_DECL 383 #define TARGET_BUILTIN_DECL ia64_builtin_decl 384 385 #undef TARGET_ASM_BYTE_OP 386 #define TARGET_ASM_BYTE_OP "\tdata1\t" 387 #undef TARGET_ASM_ALIGNED_HI_OP 388 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" 389 #undef TARGET_ASM_ALIGNED_SI_OP 390 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" 391 #undef TARGET_ASM_ALIGNED_DI_OP 392 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" 393 #undef TARGET_ASM_UNALIGNED_HI_OP 394 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" 395 #undef TARGET_ASM_UNALIGNED_SI_OP 396 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" 397 #undef TARGET_ASM_UNALIGNED_DI_OP 398 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" 399 #undef TARGET_ASM_INTEGER 400 #define TARGET_ASM_INTEGER ia64_assemble_integer 401 402 #undef TARGET_OPTION_OVERRIDE 403 #define TARGET_OPTION_OVERRIDE ia64_option_override 404 405 #undef TARGET_ASM_FUNCTION_PROLOGUE 406 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue 407 #undef TARGET_ASM_FUNCTION_END_PROLOGUE 408 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue 409 #undef TARGET_ASM_FUNCTION_EPILOGUE 410 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue 411 412 #undef TARGET_PRINT_OPERAND 413 #define TARGET_PRINT_OPERAND ia64_print_operand 414 #undef TARGET_PRINT_OPERAND_ADDRESS 415 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address 416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p 418 419 #undef TARGET_IN_SMALL_DATA_P 420 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p 421 422 #undef TARGET_SCHED_ADJUST_COST 423 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost 424 #undef TARGET_SCHED_ISSUE_RATE 425 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate 426 #undef TARGET_SCHED_VARIABLE_ISSUE 427 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue 428 #undef TARGET_SCHED_INIT 429 #define TARGET_SCHED_INIT ia64_sched_init 430 #undef TARGET_SCHED_FINISH 431 #define TARGET_SCHED_FINISH ia64_sched_finish 432 #undef TARGET_SCHED_INIT_GLOBAL 433 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global 434 #undef TARGET_SCHED_FINISH_GLOBAL 435 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global 436 #undef TARGET_SCHED_REORDER 437 #define TARGET_SCHED_REORDER ia64_sched_reorder 438 #undef TARGET_SCHED_REORDER2 439 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2 440 441 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK 442 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook 443 444 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 445 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead 446 447 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN 448 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn 449 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN 450 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn 451 452 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD 453 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\ 454 ia64_first_cycle_multipass_dfa_lookahead_guard 455 456 #undef TARGET_SCHED_DFA_NEW_CYCLE 457 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle 458 459 #undef TARGET_SCHED_H_I_D_EXTENDED 460 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended 461 462 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT 463 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context 464 465 #undef TARGET_SCHED_INIT_SCHED_CONTEXT 466 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context 467 468 #undef TARGET_SCHED_SET_SCHED_CONTEXT 469 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context 470 471 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT 472 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context 473 474 #undef TARGET_SCHED_FREE_SCHED_CONTEXT 475 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context 476 477 #undef TARGET_SCHED_SET_SCHED_FLAGS 478 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags 479 480 #undef TARGET_SCHED_GET_INSN_SPEC_DS 481 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds 482 483 #undef TARGET_SCHED_GET_INSN_CHECKED_DS 484 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds 485 486 #undef TARGET_SCHED_SPECULATE_INSN 487 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn 488 489 #undef TARGET_SCHED_NEEDS_BLOCK_P 490 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p 491 492 #undef TARGET_SCHED_GEN_SPEC_CHECK 493 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check 494 495 #undef TARGET_SCHED_SKIP_RTX_P 496 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p 497 498 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 499 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall 500 #undef TARGET_ARG_PARTIAL_BYTES 501 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes 502 #undef TARGET_FUNCTION_ARG 503 #define TARGET_FUNCTION_ARG ia64_function_arg 504 #undef TARGET_FUNCTION_INCOMING_ARG 505 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg 506 #undef TARGET_FUNCTION_ARG_ADVANCE 507 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance 508 #undef TARGET_FUNCTION_ARG_BOUNDARY 509 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary 510 511 #undef TARGET_ASM_OUTPUT_MI_THUNK 512 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk 513 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 514 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 515 516 #undef TARGET_ASM_FILE_START 517 #define TARGET_ASM_FILE_START ia64_file_start 518 519 #undef TARGET_ASM_GLOBALIZE_DECL_NAME 520 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name 521 522 #undef TARGET_REGISTER_MOVE_COST 523 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost 524 #undef TARGET_MEMORY_MOVE_COST 525 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost 526 #undef TARGET_RTX_COSTS 527 #define TARGET_RTX_COSTS ia64_rtx_costs 528 #undef TARGET_ADDRESS_COST 529 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 530 531 #undef TARGET_UNSPEC_MAY_TRAP_P 532 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p 533 534 #undef TARGET_MACHINE_DEPENDENT_REORG 535 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg 536 537 #undef TARGET_ENCODE_SECTION_INFO 538 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info 539 540 #undef TARGET_SECTION_TYPE_FLAGS 541 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags 542 543 #ifdef HAVE_AS_TLS 544 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 545 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel 546 #endif 547 548 /* ??? Investigate. */ 549 #if 0 550 #undef TARGET_PROMOTE_PROTOTYPES 551 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 552 #endif 553 554 #undef TARGET_FUNCTION_VALUE 555 #define TARGET_FUNCTION_VALUE ia64_function_value 556 #undef TARGET_LIBCALL_VALUE 557 #define TARGET_LIBCALL_VALUE ia64_libcall_value 558 #undef TARGET_FUNCTION_VALUE_REGNO_P 559 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p 560 561 #undef TARGET_STRUCT_VALUE_RTX 562 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx 563 #undef TARGET_RETURN_IN_MEMORY 564 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory 565 #undef TARGET_SETUP_INCOMING_VARARGS 566 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs 567 #undef TARGET_STRICT_ARGUMENT_NAMING 568 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 569 #undef TARGET_MUST_PASS_IN_STACK 570 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 571 #undef TARGET_GET_RAW_RESULT_MODE 572 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode 573 #undef TARGET_GET_RAW_ARG_MODE 574 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode 575 576 #undef TARGET_MEMBER_TYPE_FORCES_BLK 577 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk 578 579 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 580 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg 581 582 #undef TARGET_ASM_UNWIND_EMIT 583 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit 584 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY 585 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality 586 #undef TARGET_ASM_INIT_SECTIONS 587 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections 588 589 #undef TARGET_DEBUG_UNWIND_INFO 590 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info 591 592 #undef TARGET_SCALAR_MODE_SUPPORTED_P 593 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p 594 #undef TARGET_VECTOR_MODE_SUPPORTED_P 595 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p 596 597 #undef TARGET_LEGITIMATE_CONSTANT_P 598 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p 599 #undef TARGET_LEGITIMATE_ADDRESS_P 600 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p 601 602 #undef TARGET_LRA_P 603 #define TARGET_LRA_P hook_bool_void_false 604 605 #undef TARGET_CANNOT_FORCE_CONST_MEM 606 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem 607 608 #undef TARGET_MANGLE_TYPE 609 #define TARGET_MANGLE_TYPE ia64_mangle_type 610 611 #undef TARGET_INVALID_CONVERSION 612 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion 613 #undef TARGET_INVALID_UNARY_OP 614 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op 615 #undef TARGET_INVALID_BINARY_OP 616 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op 617 618 #undef TARGET_C_MODE_FOR_SUFFIX 619 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix 620 621 #undef TARGET_CAN_ELIMINATE 622 #define TARGET_CAN_ELIMINATE ia64_can_eliminate 623 624 #undef TARGET_TRAMPOLINE_INIT 625 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init 626 627 #undef TARGET_CAN_USE_DOLOOP_P 628 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost 629 #undef TARGET_INVALID_WITHIN_DOLOOP 630 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null 631 632 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 633 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change 634 635 #undef TARGET_PREFERRED_RELOAD_CLASS 636 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class 637 638 #undef TARGET_DELAY_SCHED2 639 #define TARGET_DELAY_SCHED2 true 640 641 /* Variable tracking should be run after all optimizations which 642 change order of insns. It also needs a valid CFG. */ 643 #undef TARGET_DELAY_VARTRACK 644 #define TARGET_DELAY_VARTRACK true 645 646 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK 647 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok 648 649 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P 650 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p 651 652 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 653 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0 654 655 struct gcc_target targetm = TARGET_INITIALIZER; 656 657 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain 658 identifier as an argument, so the front end shouldn't look it up. */ 659 660 static bool 661 ia64_attribute_takes_identifier_p (const_tree attr_id) 662 { 663 if (is_attribute_p ("model", attr_id)) 664 return true; 665 #if TARGET_ABI_OPEN_VMS 666 if (is_attribute_p ("common_object", attr_id)) 667 return true; 668 #endif 669 return false; 670 } 671 672 typedef enum 673 { 674 ADDR_AREA_NORMAL, /* normal address area */ 675 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */ 676 } 677 ia64_addr_area; 678 679 static GTY(()) tree small_ident1; 680 static GTY(()) tree small_ident2; 681 682 static void 683 init_idents (void) 684 { 685 if (small_ident1 == 0) 686 { 687 small_ident1 = get_identifier ("small"); 688 small_ident2 = get_identifier ("__small__"); 689 } 690 } 691 692 /* Retrieve the address area that has been chosen for the given decl. */ 693 694 static ia64_addr_area 695 ia64_get_addr_area (tree decl) 696 { 697 tree model_attr; 698 699 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl)); 700 if (model_attr) 701 { 702 tree id; 703 704 init_idents (); 705 id = TREE_VALUE (TREE_VALUE (model_attr)); 706 if (id == small_ident1 || id == small_ident2) 707 return ADDR_AREA_SMALL; 708 } 709 return ADDR_AREA_NORMAL; 710 } 711 712 static tree 713 ia64_handle_model_attribute (tree *node, tree name, tree args, 714 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 715 { 716 ia64_addr_area addr_area = ADDR_AREA_NORMAL; 717 ia64_addr_area area; 718 tree arg, decl = *node; 719 720 init_idents (); 721 arg = TREE_VALUE (args); 722 if (arg == small_ident1 || arg == small_ident2) 723 { 724 addr_area = ADDR_AREA_SMALL; 725 } 726 else 727 { 728 warning (OPT_Wattributes, "invalid argument of %qE attribute", 729 name); 730 *no_add_attrs = true; 731 } 732 733 switch (TREE_CODE (decl)) 734 { 735 case VAR_DECL: 736 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) 737 == FUNCTION_DECL) 738 && !TREE_STATIC (decl)) 739 { 740 error_at (DECL_SOURCE_LOCATION (decl), 741 "an address area attribute cannot be specified for " 742 "local variables"); 743 *no_add_attrs = true; 744 } 745 area = ia64_get_addr_area (decl); 746 if (area != ADDR_AREA_NORMAL && addr_area != area) 747 { 748 error ("address area of %q+D conflicts with previous " 749 "declaration", decl); 750 *no_add_attrs = true; 751 } 752 break; 753 754 case FUNCTION_DECL: 755 error_at (DECL_SOURCE_LOCATION (decl), 756 "address area attribute cannot be specified for " 757 "functions"); 758 *no_add_attrs = true; 759 break; 760 761 default: 762 warning (OPT_Wattributes, "%qE attribute ignored", 763 name); 764 *no_add_attrs = true; 765 break; 766 } 767 768 return NULL_TREE; 769 } 770 771 /* Part of the low level implementation of DEC Ada pragma Common_Object which 772 enables the shared use of variables stored in overlaid linker areas 773 corresponding to the use of Fortran COMMON. */ 774 775 static tree 776 ia64_vms_common_object_attribute (tree *node, tree name, tree args, 777 int flags ATTRIBUTE_UNUSED, 778 bool *no_add_attrs) 779 { 780 tree decl = *node; 781 tree id; 782 783 gcc_assert (DECL_P (decl)); 784 785 DECL_COMMON (decl) = 1; 786 id = TREE_VALUE (args); 787 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST) 788 { 789 error ("%qE attribute requires a string constant argument", name); 790 *no_add_attrs = true; 791 return NULL_TREE; 792 } 793 return NULL_TREE; 794 } 795 796 /* Part of the low level implementation of DEC Ada pragma Common_Object. */ 797 798 void 799 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name, 800 unsigned HOST_WIDE_INT size, 801 unsigned int align) 802 { 803 tree attr = DECL_ATTRIBUTES (decl); 804 805 if (attr) 806 attr = lookup_attribute ("common_object", attr); 807 if (attr) 808 { 809 tree id = TREE_VALUE (TREE_VALUE (attr)); 810 const char *name; 811 812 if (TREE_CODE (id) == IDENTIFIER_NODE) 813 name = IDENTIFIER_POINTER (id); 814 else if (TREE_CODE (id) == STRING_CST) 815 name = TREE_STRING_POINTER (id); 816 else 817 abort (); 818 819 fprintf (file, "\t.vms_common\t\"%s\",", name); 820 } 821 else 822 fprintf (file, "%s", COMMON_ASM_OP); 823 824 /* Code from elfos.h. */ 825 assemble_name (file, name); 826 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u", 827 size, align / BITS_PER_UNIT); 828 829 fputc ('\n', file); 830 } 831 832 static void 833 ia64_encode_addr_area (tree decl, rtx symbol) 834 { 835 int flags; 836 837 flags = SYMBOL_REF_FLAGS (symbol); 838 switch (ia64_get_addr_area (decl)) 839 { 840 case ADDR_AREA_NORMAL: break; 841 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break; 842 default: gcc_unreachable (); 843 } 844 SYMBOL_REF_FLAGS (symbol) = flags; 845 } 846 847 static void 848 ia64_encode_section_info (tree decl, rtx rtl, int first) 849 { 850 default_encode_section_info (decl, rtl, first); 851 852 /* Careful not to prod global register variables. */ 853 if (TREE_CODE (decl) == VAR_DECL 854 && GET_CODE (DECL_RTL (decl)) == MEM 855 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF 856 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))) 857 ia64_encode_addr_area (decl, XEXP (rtl, 0)); 858 } 859 860 /* Return 1 if the operands of a move are ok. */ 861 862 int 863 ia64_move_ok (rtx dst, rtx src) 864 { 865 /* If we're under init_recog_no_volatile, we'll not be able to use 866 memory_operand. So check the code directly and don't worry about 867 the validity of the underlying address, which should have been 868 checked elsewhere anyway. */ 869 if (GET_CODE (dst) != MEM) 870 return 1; 871 if (GET_CODE (src) == MEM) 872 return 0; 873 if (register_operand (src, VOIDmode)) 874 return 1; 875 876 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ 877 if (INTEGRAL_MODE_P (GET_MODE (dst))) 878 return src == const0_rtx; 879 else 880 return satisfies_constraint_G (src); 881 } 882 883 /* Return 1 if the operands are ok for a floating point load pair. */ 884 885 int 886 ia64_load_pair_ok (rtx dst, rtx src) 887 { 888 /* ??? There is a thinko in the implementation of the "x" constraint and the 889 FP_REGS class. The constraint will also reject (reg f30:TI) so we must 890 also return false for it. */ 891 if (GET_CODE (dst) != REG 892 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1))) 893 return 0; 894 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src)) 895 return 0; 896 switch (GET_CODE (XEXP (src, 0))) 897 { 898 case REG: 899 case POST_INC: 900 break; 901 case POST_DEC: 902 return 0; 903 case POST_MODIFY: 904 { 905 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1); 906 907 if (GET_CODE (adjust) != CONST_INT 908 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src))) 909 return 0; 910 } 911 break; 912 default: 913 abort (); 914 } 915 return 1; 916 } 917 918 int 919 addp4_optimize_ok (rtx op1, rtx op2) 920 { 921 return (basereg_operand (op1, GET_MODE(op1)) != 922 basereg_operand (op2, GET_MODE(op2))); 923 } 924 925 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction. 926 Return the length of the field, or <= 0 on failure. */ 927 928 int 929 ia64_depz_field_mask (rtx rop, rtx rshift) 930 { 931 unsigned HOST_WIDE_INT op = INTVAL (rop); 932 unsigned HOST_WIDE_INT shift = INTVAL (rshift); 933 934 /* Get rid of the zero bits we're shifting in. */ 935 op >>= shift; 936 937 /* We must now have a solid block of 1's at bit 0. */ 938 return exact_log2 (op + 1); 939 } 940 941 /* Return the TLS model to use for ADDR. */ 942 943 static enum tls_model 944 tls_symbolic_operand_type (rtx addr) 945 { 946 enum tls_model tls_kind = TLS_MODEL_NONE; 947 948 if (GET_CODE (addr) == CONST) 949 { 950 if (GET_CODE (XEXP (addr, 0)) == PLUS 951 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF) 952 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0)); 953 } 954 else if (GET_CODE (addr) == SYMBOL_REF) 955 tls_kind = SYMBOL_REF_TLS_MODEL (addr); 956 957 return tls_kind; 958 } 959 960 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use 961 as a base register. */ 962 963 static inline bool 964 ia64_reg_ok_for_base_p (const_rtx reg, bool strict) 965 { 966 if (strict 967 && REGNO_OK_FOR_BASE_P (REGNO (reg))) 968 return true; 969 else if (!strict 970 && (GENERAL_REGNO_P (REGNO (reg)) 971 || !HARD_REGISTER_P (reg))) 972 return true; 973 else 974 return false; 975 } 976 977 static bool 978 ia64_legitimate_address_reg (const_rtx reg, bool strict) 979 { 980 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict)) 981 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0)) 982 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict))) 983 return true; 984 985 return false; 986 } 987 988 static bool 989 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict) 990 { 991 if (GET_CODE (disp) == PLUS 992 && rtx_equal_p (reg, XEXP (disp, 0)) 993 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict) 994 || (CONST_INT_P (XEXP (disp, 1)) 995 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255)))) 996 return true; 997 998 return false; 999 } 1000 1001 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */ 1002 1003 static bool 1004 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, 1005 rtx x, bool strict) 1006 { 1007 if (ia64_legitimate_address_reg (x, strict)) 1008 return true; 1009 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC) 1010 && ia64_legitimate_address_reg (XEXP (x, 0), strict) 1011 && XEXP (x, 0) != arg_pointer_rtx) 1012 return true; 1013 else if (GET_CODE (x) == POST_MODIFY 1014 && ia64_legitimate_address_reg (XEXP (x, 0), strict) 1015 && XEXP (x, 0) != arg_pointer_rtx 1016 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict)) 1017 return true; 1018 else 1019 return false; 1020 } 1021 1022 /* Return true if X is a constant that is valid for some immediate 1023 field in an instruction. */ 1024 1025 static bool 1026 ia64_legitimate_constant_p (machine_mode mode, rtx x) 1027 { 1028 switch (GET_CODE (x)) 1029 { 1030 case CONST_INT: 1031 case LABEL_REF: 1032 return true; 1033 1034 case CONST_DOUBLE: 1035 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode) 1036 return true; 1037 return satisfies_constraint_G (x); 1038 1039 case CONST: 1040 case SYMBOL_REF: 1041 /* ??? Short term workaround for PR 28490. We must make the code here 1042 match the code in ia64_expand_move and move_operand, even though they 1043 are both technically wrong. */ 1044 if (tls_symbolic_operand_type (x) == 0) 1045 { 1046 HOST_WIDE_INT addend = 0; 1047 rtx op = x; 1048 1049 if (GET_CODE (op) == CONST 1050 && GET_CODE (XEXP (op, 0)) == PLUS 1051 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 1052 { 1053 addend = INTVAL (XEXP (XEXP (op, 0), 1)); 1054 op = XEXP (XEXP (op, 0), 0); 1055 } 1056 1057 if (any_offset_symbol_operand (op, mode) 1058 || function_operand (op, mode)) 1059 return true; 1060 if (aligned_offset_symbol_operand (op, mode)) 1061 return (addend & 0x3fff) == 0; 1062 return false; 1063 } 1064 return false; 1065 1066 case CONST_VECTOR: 1067 if (mode == V2SFmode) 1068 return satisfies_constraint_Y (x); 1069 1070 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 1071 && GET_MODE_SIZE (mode) <= 8); 1072 1073 default: 1074 return false; 1075 } 1076 } 1077 1078 /* Don't allow TLS addresses to get spilled to memory. */ 1079 1080 static bool 1081 ia64_cannot_force_const_mem (machine_mode mode, rtx x) 1082 { 1083 if (mode == RFmode) 1084 return true; 1085 return tls_symbolic_operand_type (x) != 0; 1086 } 1087 1088 /* Expand a symbolic constant load. */ 1089 1090 bool 1091 ia64_expand_load_address (rtx dest, rtx src) 1092 { 1093 gcc_assert (GET_CODE (dest) == REG); 1094 1095 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids 1096 having to pointer-extend the value afterward. Other forms of address 1097 computation below are also more natural to compute as 64-bit quantities. 1098 If we've been given an SImode destination register, change it. */ 1099 if (GET_MODE (dest) != Pmode) 1100 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 1101 byte_lowpart_offset (Pmode, GET_MODE (dest))); 1102 1103 if (TARGET_NO_PIC) 1104 return false; 1105 if (small_addr_symbolic_operand (src, VOIDmode)) 1106 return false; 1107 1108 if (TARGET_AUTO_PIC) 1109 emit_insn (gen_load_gprel64 (dest, src)); 1110 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src)) 1111 emit_insn (gen_load_fptr (dest, src)); 1112 else if (sdata_symbolic_operand (src, VOIDmode)) 1113 emit_insn (gen_load_gprel (dest, src)); 1114 else if (local_symbolic_operand64 (src, VOIDmode)) 1115 { 1116 /* We want to use @gprel rather than @ltoff relocations for local 1117 symbols: 1118 - @gprel does not require dynamic linker 1119 - and does not use .sdata section 1120 https://gcc.gnu.org/bugzilla/60465 */ 1121 emit_insn (gen_load_gprel64 (dest, src)); 1122 } 1123 else 1124 { 1125 HOST_WIDE_INT addend = 0; 1126 rtx tmp; 1127 1128 /* We did split constant offsets in ia64_expand_move, and we did try 1129 to keep them split in move_operand, but we also allowed reload to 1130 rematerialize arbitrary constants rather than spill the value to 1131 the stack and reload it. So we have to be prepared here to split 1132 them apart again. */ 1133 if (GET_CODE (src) == CONST) 1134 { 1135 HOST_WIDE_INT hi, lo; 1136 1137 hi = INTVAL (XEXP (XEXP (src, 0), 1)); 1138 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000; 1139 hi = hi - lo; 1140 1141 if (lo != 0) 1142 { 1143 addend = lo; 1144 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi); 1145 } 1146 } 1147 1148 tmp = gen_rtx_HIGH (Pmode, src); 1149 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx); 1150 emit_insn (gen_rtx_SET (dest, tmp)); 1151 1152 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src); 1153 emit_insn (gen_rtx_SET (dest, tmp)); 1154 1155 if (addend) 1156 { 1157 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend)); 1158 emit_insn (gen_rtx_SET (dest, tmp)); 1159 } 1160 } 1161 1162 return true; 1163 } 1164 1165 static GTY(()) rtx gen_tls_tga; 1166 static rtx 1167 gen_tls_get_addr (void) 1168 { 1169 if (!gen_tls_tga) 1170 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 1171 return gen_tls_tga; 1172 } 1173 1174 static GTY(()) rtx thread_pointer_rtx; 1175 static rtx 1176 gen_thread_pointer (void) 1177 { 1178 if (!thread_pointer_rtx) 1179 thread_pointer_rtx = gen_rtx_REG (Pmode, 13); 1180 return thread_pointer_rtx; 1181 } 1182 1183 static rtx 1184 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1, 1185 rtx orig_op1, HOST_WIDE_INT addend) 1186 { 1187 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp; 1188 rtx_insn *insns; 1189 rtx orig_op0 = op0; 1190 HOST_WIDE_INT addend_lo, addend_hi; 1191 1192 switch (tls_kind) 1193 { 1194 case TLS_MODEL_GLOBAL_DYNAMIC: 1195 start_sequence (); 1196 1197 tga_op1 = gen_reg_rtx (Pmode); 1198 emit_insn (gen_load_dtpmod (tga_op1, op1)); 1199 1200 tga_op2 = gen_reg_rtx (Pmode); 1201 emit_insn (gen_load_dtprel (tga_op2, op1)); 1202 1203 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1204 LCT_CONST, Pmode, 2, tga_op1, 1205 Pmode, tga_op2, Pmode); 1206 1207 insns = get_insns (); 1208 end_sequence (); 1209 1210 if (GET_MODE (op0) != Pmode) 1211 op0 = tga_ret; 1212 emit_libcall_block (insns, op0, tga_ret, op1); 1213 break; 1214 1215 case TLS_MODEL_LOCAL_DYNAMIC: 1216 /* ??? This isn't the completely proper way to do local-dynamic 1217 If the call to __tls_get_addr is used only by a single symbol, 1218 then we should (somehow) move the dtprel to the second arg 1219 to avoid the extra add. */ 1220 start_sequence (); 1221 1222 tga_op1 = gen_reg_rtx (Pmode); 1223 emit_insn (gen_load_dtpmod (tga_op1, op1)); 1224 1225 tga_op2 = const0_rtx; 1226 1227 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1228 LCT_CONST, Pmode, 2, tga_op1, 1229 Pmode, tga_op2, Pmode); 1230 1231 insns = get_insns (); 1232 end_sequence (); 1233 1234 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1235 UNSPEC_LD_BASE); 1236 tmp = gen_reg_rtx (Pmode); 1237 emit_libcall_block (insns, tmp, tga_ret, tga_eqv); 1238 1239 if (!register_operand (op0, Pmode)) 1240 op0 = gen_reg_rtx (Pmode); 1241 if (TARGET_TLS64) 1242 { 1243 emit_insn (gen_load_dtprel (op0, op1)); 1244 emit_insn (gen_adddi3 (op0, tmp, op0)); 1245 } 1246 else 1247 emit_insn (gen_add_dtprel (op0, op1, tmp)); 1248 break; 1249 1250 case TLS_MODEL_INITIAL_EXEC: 1251 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; 1252 addend_hi = addend - addend_lo; 1253 1254 op1 = plus_constant (Pmode, op1, addend_hi); 1255 addend = addend_lo; 1256 1257 tmp = gen_reg_rtx (Pmode); 1258 emit_insn (gen_load_tprel (tmp, op1)); 1259 1260 if (!register_operand (op0, Pmode)) 1261 op0 = gen_reg_rtx (Pmode); 1262 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); 1263 break; 1264 1265 case TLS_MODEL_LOCAL_EXEC: 1266 if (!register_operand (op0, Pmode)) 1267 op0 = gen_reg_rtx (Pmode); 1268 1269 op1 = orig_op1; 1270 addend = 0; 1271 if (TARGET_TLS64) 1272 { 1273 emit_insn (gen_load_tprel (op0, op1)); 1274 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ())); 1275 } 1276 else 1277 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ())); 1278 break; 1279 1280 default: 1281 gcc_unreachable (); 1282 } 1283 1284 if (addend) 1285 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend), 1286 orig_op0, 1, OPTAB_DIRECT); 1287 if (orig_op0 == op0) 1288 return NULL_RTX; 1289 if (GET_MODE (orig_op0) == Pmode) 1290 return op0; 1291 return gen_lowpart (GET_MODE (orig_op0), op0); 1292 } 1293 1294 rtx 1295 ia64_expand_move (rtx op0, rtx op1) 1296 { 1297 machine_mode mode = GET_MODE (op0); 1298 1299 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) 1300 op1 = force_reg (mode, op1); 1301 1302 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode)) 1303 { 1304 HOST_WIDE_INT addend = 0; 1305 enum tls_model tls_kind; 1306 rtx sym = op1; 1307 1308 if (GET_CODE (op1) == CONST 1309 && GET_CODE (XEXP (op1, 0)) == PLUS 1310 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT) 1311 { 1312 addend = INTVAL (XEXP (XEXP (op1, 0), 1)); 1313 sym = XEXP (XEXP (op1, 0), 0); 1314 } 1315 1316 tls_kind = tls_symbolic_operand_type (sym); 1317 if (tls_kind) 1318 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend); 1319 1320 if (any_offset_symbol_operand (sym, mode)) 1321 addend = 0; 1322 else if (aligned_offset_symbol_operand (sym, mode)) 1323 { 1324 HOST_WIDE_INT addend_lo, addend_hi; 1325 1326 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; 1327 addend_hi = addend - addend_lo; 1328 1329 if (addend_lo != 0) 1330 { 1331 op1 = plus_constant (mode, sym, addend_hi); 1332 addend = addend_lo; 1333 } 1334 else 1335 addend = 0; 1336 } 1337 else 1338 op1 = sym; 1339 1340 if (reload_completed) 1341 { 1342 /* We really should have taken care of this offset earlier. */ 1343 gcc_assert (addend == 0); 1344 if (ia64_expand_load_address (op0, op1)) 1345 return NULL_RTX; 1346 } 1347 1348 if (addend) 1349 { 1350 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode); 1351 1352 emit_insn (gen_rtx_SET (subtarget, op1)); 1353 1354 op1 = expand_simple_binop (mode, PLUS, subtarget, 1355 GEN_INT (addend), op0, 1, OPTAB_DIRECT); 1356 if (op0 == op1) 1357 return NULL_RTX; 1358 } 1359 } 1360 1361 return op1; 1362 } 1363 1364 /* Split a move from OP1 to OP0 conditional on COND. */ 1365 1366 void 1367 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond) 1368 { 1369 rtx_insn *insn, *first = get_last_insn (); 1370 1371 emit_move_insn (op0, op1); 1372 1373 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn)) 1374 if (INSN_P (insn)) 1375 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), 1376 PATTERN (insn)); 1377 } 1378 1379 /* Split a post-reload TImode or TFmode reference into two DImode 1380 components. This is made extra difficult by the fact that we do 1381 not get any scratch registers to work with, because reload cannot 1382 be prevented from giving us a scratch that overlaps the register 1383 pair involved. So instead, when addressing memory, we tweak the 1384 pointer register up and back down with POST_INCs. Or up and not 1385 back down when we can get away with it. 1386 1387 REVERSED is true when the loads must be done in reversed order 1388 (high word first) for correctness. DEAD is true when the pointer 1389 dies with the second insn we generate and therefore the second 1390 address must not carry a postmodify. 1391 1392 May return an insn which is to be emitted after the moves. */ 1393 1394 static rtx 1395 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead) 1396 { 1397 rtx fixup = 0; 1398 1399 switch (GET_CODE (in)) 1400 { 1401 case REG: 1402 out[reversed] = gen_rtx_REG (DImode, REGNO (in)); 1403 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1); 1404 break; 1405 1406 case CONST_INT: 1407 case CONST_DOUBLE: 1408 /* Cannot occur reversed. */ 1409 gcc_assert (!reversed); 1410 1411 if (GET_MODE (in) != TFmode) 1412 split_double (in, &out[0], &out[1]); 1413 else 1414 /* split_double does not understand how to split a TFmode 1415 quantity into a pair of DImode constants. */ 1416 { 1417 unsigned HOST_WIDE_INT p[2]; 1418 long l[4]; /* TFmode is 128 bits */ 1419 1420 real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode); 1421 1422 if (FLOAT_WORDS_BIG_ENDIAN) 1423 { 1424 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1]; 1425 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3]; 1426 } 1427 else 1428 { 1429 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0]; 1430 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2]; 1431 } 1432 out[0] = GEN_INT (p[0]); 1433 out[1] = GEN_INT (p[1]); 1434 } 1435 break; 1436 1437 case MEM: 1438 { 1439 rtx base = XEXP (in, 0); 1440 rtx offset; 1441 1442 switch (GET_CODE (base)) 1443 { 1444 case REG: 1445 if (!reversed) 1446 { 1447 out[0] = adjust_automodify_address 1448 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1449 out[1] = adjust_automodify_address 1450 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8); 1451 } 1452 else 1453 { 1454 /* Reversal requires a pre-increment, which can only 1455 be done as a separate insn. */ 1456 emit_insn (gen_adddi3 (base, base, GEN_INT (8))); 1457 out[0] = adjust_automodify_address 1458 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8); 1459 out[1] = adjust_address (in, DImode, 0); 1460 } 1461 break; 1462 1463 case POST_INC: 1464 gcc_assert (!reversed && !dead); 1465 1466 /* Just do the increment in two steps. */ 1467 out[0] = adjust_automodify_address (in, DImode, 0, 0); 1468 out[1] = adjust_automodify_address (in, DImode, 0, 8); 1469 break; 1470 1471 case POST_DEC: 1472 gcc_assert (!reversed && !dead); 1473 1474 /* Add 8, subtract 24. */ 1475 base = XEXP (base, 0); 1476 out[0] = adjust_automodify_address 1477 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1478 out[1] = adjust_automodify_address 1479 (in, DImode, 1480 gen_rtx_POST_MODIFY (Pmode, base, 1481 plus_constant (Pmode, base, -24)), 1482 8); 1483 break; 1484 1485 case POST_MODIFY: 1486 gcc_assert (!reversed && !dead); 1487 1488 /* Extract and adjust the modification. This case is 1489 trickier than the others, because we might have an 1490 index register, or we might have a combined offset that 1491 doesn't fit a signed 9-bit displacement field. We can 1492 assume the incoming expression is already legitimate. */ 1493 offset = XEXP (base, 1); 1494 base = XEXP (base, 0); 1495 1496 out[0] = adjust_automodify_address 1497 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1498 1499 if (GET_CODE (XEXP (offset, 1)) == REG) 1500 { 1501 /* Can't adjust the postmodify to match. Emit the 1502 original, then a separate addition insn. */ 1503 out[1] = adjust_automodify_address (in, DImode, 0, 8); 1504 fixup = gen_adddi3 (base, base, GEN_INT (-8)); 1505 } 1506 else 1507 { 1508 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT); 1509 if (INTVAL (XEXP (offset, 1)) < -256 + 8) 1510 { 1511 /* Again the postmodify cannot be made to match, 1512 but in this case it's more efficient to get rid 1513 of the postmodify entirely and fix up with an 1514 add insn. */ 1515 out[1] = adjust_automodify_address (in, DImode, base, 8); 1516 fixup = gen_adddi3 1517 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8)); 1518 } 1519 else 1520 { 1521 /* Combined offset still fits in the displacement field. 1522 (We cannot overflow it at the high end.) */ 1523 out[1] = adjust_automodify_address 1524 (in, DImode, gen_rtx_POST_MODIFY 1525 (Pmode, base, gen_rtx_PLUS 1526 (Pmode, base, 1527 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))), 1528 8); 1529 } 1530 } 1531 break; 1532 1533 default: 1534 gcc_unreachable (); 1535 } 1536 break; 1537 } 1538 1539 default: 1540 gcc_unreachable (); 1541 } 1542 1543 return fixup; 1544 } 1545 1546 /* Split a TImode or TFmode move instruction after reload. 1547 This is used by *movtf_internal and *movti_internal. */ 1548 void 1549 ia64_split_tmode_move (rtx operands[]) 1550 { 1551 rtx in[2], out[2], insn; 1552 rtx fixup[2]; 1553 bool dead = false; 1554 bool reversed = false; 1555 1556 /* It is possible for reload to decide to overwrite a pointer with 1557 the value it points to. In that case we have to do the loads in 1558 the appropriate order so that the pointer is not destroyed too 1559 early. Also we must not generate a postmodify for that second 1560 load, or rws_access_regno will die. And we must not generate a 1561 postmodify for the second load if the destination register 1562 overlaps with the base register. */ 1563 if (GET_CODE (operands[1]) == MEM 1564 && reg_overlap_mentioned_p (operands[0], operands[1])) 1565 { 1566 rtx base = XEXP (operands[1], 0); 1567 while (GET_CODE (base) != REG) 1568 base = XEXP (base, 0); 1569 1570 if (REGNO (base) == REGNO (operands[0])) 1571 reversed = true; 1572 1573 if (refers_to_regno_p (REGNO (operands[0]), 1574 REGNO (operands[0])+2, 1575 base, 0)) 1576 dead = true; 1577 } 1578 /* Another reason to do the moves in reversed order is if the first 1579 element of the target register pair is also the second element of 1580 the source register pair. */ 1581 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG 1582 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 1583 reversed = true; 1584 1585 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead); 1586 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead); 1587 1588 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \ 1589 if (GET_CODE (EXP) == MEM \ 1590 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \ 1591 || GET_CODE (XEXP (EXP, 0)) == POST_INC \ 1592 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \ 1593 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0)) 1594 1595 insn = emit_insn (gen_rtx_SET (out[0], in[0])); 1596 MAYBE_ADD_REG_INC_NOTE (insn, in[0]); 1597 MAYBE_ADD_REG_INC_NOTE (insn, out[0]); 1598 1599 insn = emit_insn (gen_rtx_SET (out[1], in[1])); 1600 MAYBE_ADD_REG_INC_NOTE (insn, in[1]); 1601 MAYBE_ADD_REG_INC_NOTE (insn, out[1]); 1602 1603 if (fixup[0]) 1604 emit_insn (fixup[0]); 1605 if (fixup[1]) 1606 emit_insn (fixup[1]); 1607 1608 #undef MAYBE_ADD_REG_INC_NOTE 1609 } 1610 1611 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go 1612 through memory plus an extra GR scratch register. Except that you can 1613 either get the first from SECONDARY_MEMORY_NEEDED or the second from 1614 SECONDARY_RELOAD_CLASS, but not both. 1615 1616 We got into problems in the first place by allowing a construct like 1617 (subreg:XF (reg:TI)), which we got from a union containing a long double. 1618 This solution attempts to prevent this situation from occurring. When 1619 we see something like the above, we spill the inner register to memory. */ 1620 1621 static rtx 1622 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode) 1623 { 1624 if (GET_CODE (in) == SUBREG 1625 && GET_MODE (SUBREG_REG (in)) == TImode 1626 && GET_CODE (SUBREG_REG (in)) == REG) 1627 { 1628 rtx memt = assign_stack_temp (TImode, 16); 1629 emit_move_insn (memt, SUBREG_REG (in)); 1630 return adjust_address (memt, mode, 0); 1631 } 1632 else if (force && GET_CODE (in) == REG) 1633 { 1634 rtx memx = assign_stack_temp (mode, 16); 1635 emit_move_insn (memx, in); 1636 return memx; 1637 } 1638 else 1639 return in; 1640 } 1641 1642 /* Expand the movxf or movrf pattern (MODE says which) with the given 1643 OPERANDS, returning true if the pattern should then invoke 1644 DONE. */ 1645 1646 bool 1647 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[]) 1648 { 1649 rtx op0 = operands[0]; 1650 1651 if (GET_CODE (op0) == SUBREG) 1652 op0 = SUBREG_REG (op0); 1653 1654 /* We must support XFmode loads into general registers for stdarg/vararg, 1655 unprototyped calls, and a rare case where a long double is passed as 1656 an argument after a float HFA fills the FP registers. We split them into 1657 DImode loads for convenience. We also need to support XFmode stores 1658 for the last case. This case does not happen for stdarg/vararg routines, 1659 because we do a block store to memory of unnamed arguments. */ 1660 1661 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0))) 1662 { 1663 rtx out[2]; 1664 1665 /* We're hoping to transform everything that deals with XFmode 1666 quantities and GR registers early in the compiler. */ 1667 gcc_assert (can_create_pseudo_p ()); 1668 1669 /* Struct to register can just use TImode instead. */ 1670 if ((GET_CODE (operands[1]) == SUBREG 1671 && GET_MODE (SUBREG_REG (operands[1])) == TImode) 1672 || (GET_CODE (operands[1]) == REG 1673 && GR_REGNO_P (REGNO (operands[1])))) 1674 { 1675 rtx op1 = operands[1]; 1676 1677 if (GET_CODE (op1) == SUBREG) 1678 op1 = SUBREG_REG (op1); 1679 else 1680 op1 = gen_rtx_REG (TImode, REGNO (op1)); 1681 1682 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1); 1683 return true; 1684 } 1685 1686 if (GET_CODE (operands[1]) == CONST_DOUBLE) 1687 { 1688 /* Don't word-swap when reading in the constant. */ 1689 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)), 1690 operand_subword (operands[1], WORDS_BIG_ENDIAN, 1691 0, mode)); 1692 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1), 1693 operand_subword (operands[1], !WORDS_BIG_ENDIAN, 1694 0, mode)); 1695 return true; 1696 } 1697 1698 /* If the quantity is in a register not known to be GR, spill it. */ 1699 if (register_operand (operands[1], mode)) 1700 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode); 1701 1702 gcc_assert (GET_CODE (operands[1]) == MEM); 1703 1704 /* Don't word-swap when reading in the value. */ 1705 out[0] = gen_rtx_REG (DImode, REGNO (op0)); 1706 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1); 1707 1708 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0)); 1709 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8)); 1710 return true; 1711 } 1712 1713 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1]))) 1714 { 1715 /* We're hoping to transform everything that deals with XFmode 1716 quantities and GR registers early in the compiler. */ 1717 gcc_assert (can_create_pseudo_p ()); 1718 1719 /* Op0 can't be a GR_REG here, as that case is handled above. 1720 If op0 is a register, then we spill op1, so that we now have a 1721 MEM operand. This requires creating an XFmode subreg of a TImode reg 1722 to force the spill. */ 1723 if (register_operand (operands[0], mode)) 1724 { 1725 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); 1726 op1 = gen_rtx_SUBREG (mode, op1, 0); 1727 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode); 1728 } 1729 1730 else 1731 { 1732 rtx in[2]; 1733 1734 gcc_assert (GET_CODE (operands[0]) == MEM); 1735 1736 /* Don't word-swap when writing out the value. */ 1737 in[0] = gen_rtx_REG (DImode, REGNO (operands[1])); 1738 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); 1739 1740 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]); 1741 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]); 1742 return true; 1743 } 1744 } 1745 1746 if (!reload_in_progress && !reload_completed) 1747 { 1748 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode); 1749 1750 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG) 1751 { 1752 rtx memt, memx, in = operands[1]; 1753 if (CONSTANT_P (in)) 1754 in = validize_mem (force_const_mem (mode, in)); 1755 if (GET_CODE (in) == MEM) 1756 memt = adjust_address (in, TImode, 0); 1757 else 1758 { 1759 memt = assign_stack_temp (TImode, 16); 1760 memx = adjust_address (memt, mode, 0); 1761 emit_move_insn (memx, in); 1762 } 1763 emit_move_insn (op0, memt); 1764 return true; 1765 } 1766 1767 if (!ia64_move_ok (operands[0], operands[1])) 1768 operands[1] = force_reg (mode, operands[1]); 1769 } 1770 1771 return false; 1772 } 1773 1774 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1 1775 with the expression that holds the compare result (in VOIDmode). */ 1776 1777 static GTY(()) rtx cmptf_libfunc; 1778 1779 void 1780 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1) 1781 { 1782 enum rtx_code code = GET_CODE (*expr); 1783 rtx cmp; 1784 1785 /* If we have a BImode input, then we already have a compare result, and 1786 do not need to emit another comparison. */ 1787 if (GET_MODE (*op0) == BImode) 1788 { 1789 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx); 1790 cmp = *op0; 1791 } 1792 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a 1793 magic number as its third argument, that indicates what to do. 1794 The return value is an integer to be compared against zero. */ 1795 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode) 1796 { 1797 enum qfcmp_magic { 1798 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */ 1799 QCMP_UNORD = 2, 1800 QCMP_EQ = 4, 1801 QCMP_LT = 8, 1802 QCMP_GT = 16 1803 }; 1804 int magic; 1805 enum rtx_code ncode; 1806 rtx ret; 1807 1808 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode); 1809 switch (code) 1810 { 1811 /* 1 = equal, 0 = not equal. Equality operators do 1812 not raise FP_INVALID when given a NaN operand. */ 1813 case EQ: magic = QCMP_EQ; ncode = NE; break; 1814 case NE: magic = QCMP_EQ; ncode = EQ; break; 1815 /* isunordered() from C99. */ 1816 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break; 1817 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break; 1818 /* Relational operators raise FP_INVALID when given 1819 a NaN operand. */ 1820 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break; 1821 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break; 1822 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break; 1823 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break; 1824 /* Unordered relational operators do not raise FP_INVALID 1825 when given a NaN operand. */ 1826 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break; 1827 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break; 1828 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break; 1829 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break; 1830 /* Not supported. */ 1831 case UNEQ: 1832 case LTGT: 1833 default: gcc_unreachable (); 1834 } 1835 1836 start_sequence (); 1837 1838 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3, 1839 *op0, TFmode, *op1, TFmode, 1840 GEN_INT (magic), DImode); 1841 cmp = gen_reg_rtx (BImode); 1842 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode, 1843 ret, const0_rtx))); 1844 1845 rtx_insn *insns = get_insns (); 1846 end_sequence (); 1847 1848 emit_libcall_block (insns, cmp, cmp, 1849 gen_rtx_fmt_ee (code, BImode, *op0, *op1)); 1850 code = NE; 1851 } 1852 else 1853 { 1854 cmp = gen_reg_rtx (BImode); 1855 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1))); 1856 code = NE; 1857 } 1858 1859 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx); 1860 *op0 = cmp; 1861 *op1 = const0_rtx; 1862 } 1863 1864 /* Generate an integral vector comparison. Return true if the condition has 1865 been reversed, and so the sense of the comparison should be inverted. */ 1866 1867 static bool 1868 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode, 1869 rtx dest, rtx op0, rtx op1) 1870 { 1871 bool negate = false; 1872 rtx x; 1873 1874 /* Canonicalize the comparison to EQ, GT, GTU. */ 1875 switch (code) 1876 { 1877 case EQ: 1878 case GT: 1879 case GTU: 1880 break; 1881 1882 case NE: 1883 case LE: 1884 case LEU: 1885 code = reverse_condition (code); 1886 negate = true; 1887 break; 1888 1889 case GE: 1890 case GEU: 1891 code = reverse_condition (code); 1892 negate = true; 1893 /* FALLTHRU */ 1894 1895 case LT: 1896 case LTU: 1897 code = swap_condition (code); 1898 x = op0, op0 = op1, op1 = x; 1899 break; 1900 1901 default: 1902 gcc_unreachable (); 1903 } 1904 1905 /* Unsigned parallel compare is not supported by the hardware. Play some 1906 tricks to turn this into a signed comparison against 0. */ 1907 if (code == GTU) 1908 { 1909 switch (mode) 1910 { 1911 case V2SImode: 1912 { 1913 rtx t1, t2, mask; 1914 1915 /* Subtract (-(INT MAX) - 1) from both operands to make 1916 them signed. */ 1917 mask = gen_int_mode (0x80000000, SImode); 1918 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask)); 1919 mask = force_reg (mode, mask); 1920 t1 = gen_reg_rtx (mode); 1921 emit_insn (gen_subv2si3 (t1, op0, mask)); 1922 t2 = gen_reg_rtx (mode); 1923 emit_insn (gen_subv2si3 (t2, op1, mask)); 1924 op0 = t1; 1925 op1 = t2; 1926 code = GT; 1927 } 1928 break; 1929 1930 case V8QImode: 1931 case V4HImode: 1932 /* Perform a parallel unsigned saturating subtraction. */ 1933 x = gen_reg_rtx (mode); 1934 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1))); 1935 1936 code = EQ; 1937 op0 = x; 1938 op1 = CONST0_RTX (mode); 1939 negate = !negate; 1940 break; 1941 1942 default: 1943 gcc_unreachable (); 1944 } 1945 } 1946 1947 x = gen_rtx_fmt_ee (code, mode, op0, op1); 1948 emit_insn (gen_rtx_SET (dest, x)); 1949 1950 return negate; 1951 } 1952 1953 /* Emit an integral vector conditional move. */ 1954 1955 void 1956 ia64_expand_vecint_cmov (rtx operands[]) 1957 { 1958 machine_mode mode = GET_MODE (operands[0]); 1959 enum rtx_code code = GET_CODE (operands[3]); 1960 bool negate; 1961 rtx cmp, x, ot, of; 1962 1963 cmp = gen_reg_rtx (mode); 1964 negate = ia64_expand_vecint_compare (code, mode, cmp, 1965 operands[4], operands[5]); 1966 1967 ot = operands[1+negate]; 1968 of = operands[2-negate]; 1969 1970 if (ot == CONST0_RTX (mode)) 1971 { 1972 if (of == CONST0_RTX (mode)) 1973 { 1974 emit_move_insn (operands[0], ot); 1975 return; 1976 } 1977 1978 x = gen_rtx_NOT (mode, cmp); 1979 x = gen_rtx_AND (mode, x, of); 1980 emit_insn (gen_rtx_SET (operands[0], x)); 1981 } 1982 else if (of == CONST0_RTX (mode)) 1983 { 1984 x = gen_rtx_AND (mode, cmp, ot); 1985 emit_insn (gen_rtx_SET (operands[0], x)); 1986 } 1987 else 1988 { 1989 rtx t, f; 1990 1991 t = gen_reg_rtx (mode); 1992 x = gen_rtx_AND (mode, cmp, operands[1+negate]); 1993 emit_insn (gen_rtx_SET (t, x)); 1994 1995 f = gen_reg_rtx (mode); 1996 x = gen_rtx_NOT (mode, cmp); 1997 x = gen_rtx_AND (mode, x, operands[2-negate]); 1998 emit_insn (gen_rtx_SET (f, x)); 1999 2000 x = gen_rtx_IOR (mode, t, f); 2001 emit_insn (gen_rtx_SET (operands[0], x)); 2002 } 2003 } 2004 2005 /* Emit an integral vector min or max operation. Return true if all done. */ 2006 2007 bool 2008 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode, 2009 rtx operands[]) 2010 { 2011 rtx xops[6]; 2012 2013 /* These four combinations are supported directly. */ 2014 if (mode == V8QImode && (code == UMIN || code == UMAX)) 2015 return false; 2016 if (mode == V4HImode && (code == SMIN || code == SMAX)) 2017 return false; 2018 2019 /* This combination can be implemented with only saturating subtraction. */ 2020 if (mode == V4HImode && code == UMAX) 2021 { 2022 rtx x, tmp = gen_reg_rtx (mode); 2023 2024 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]); 2025 emit_insn (gen_rtx_SET (tmp, x)); 2026 2027 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2])); 2028 return true; 2029 } 2030 2031 /* Everything else implemented via vector comparisons. */ 2032 xops[0] = operands[0]; 2033 xops[4] = xops[1] = operands[1]; 2034 xops[5] = xops[2] = operands[2]; 2035 2036 switch (code) 2037 { 2038 case UMIN: 2039 code = LTU; 2040 break; 2041 case UMAX: 2042 code = GTU; 2043 break; 2044 case SMIN: 2045 code = LT; 2046 break; 2047 case SMAX: 2048 code = GT; 2049 break; 2050 default: 2051 gcc_unreachable (); 2052 } 2053 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); 2054 2055 ia64_expand_vecint_cmov (xops); 2056 return true; 2057 } 2058 2059 /* The vectors LO and HI each contain N halves of a double-wide vector. 2060 Reassemble either the first N/2 or the second N/2 elements. */ 2061 2062 void 2063 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp) 2064 { 2065 machine_mode vmode = GET_MODE (lo); 2066 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode); 2067 struct expand_vec_perm_d d; 2068 bool ok; 2069 2070 d.target = gen_lowpart (vmode, out); 2071 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo); 2072 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi); 2073 d.vmode = vmode; 2074 d.nelt = nelt; 2075 d.one_operand_p = false; 2076 d.testing_p = false; 2077 2078 high = (highp ? nelt / 2 : 0); 2079 for (i = 0; i < nelt / 2; ++i) 2080 { 2081 d.perm[i * 2] = i + high; 2082 d.perm[i * 2 + 1] = i + high + nelt; 2083 } 2084 2085 ok = ia64_expand_vec_perm_const_1 (&d); 2086 gcc_assert (ok); 2087 } 2088 2089 /* Return a vector of the sign-extension of VEC. */ 2090 2091 static rtx 2092 ia64_unpack_sign (rtx vec, bool unsignedp) 2093 { 2094 machine_mode mode = GET_MODE (vec); 2095 rtx zero = CONST0_RTX (mode); 2096 2097 if (unsignedp) 2098 return zero; 2099 else 2100 { 2101 rtx sign = gen_reg_rtx (mode); 2102 bool neg; 2103 2104 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero); 2105 gcc_assert (!neg); 2106 2107 return sign; 2108 } 2109 } 2110 2111 /* Emit an integral vector unpack operation. */ 2112 2113 void 2114 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) 2115 { 2116 rtx sign = ia64_unpack_sign (operands[1], unsignedp); 2117 ia64_unpack_assemble (operands[0], operands[1], sign, highp); 2118 } 2119 2120 /* Emit an integral vector widening sum operations. */ 2121 2122 void 2123 ia64_expand_widen_sum (rtx operands[3], bool unsignedp) 2124 { 2125 machine_mode wmode; 2126 rtx l, h, t, sign; 2127 2128 sign = ia64_unpack_sign (operands[1], unsignedp); 2129 2130 wmode = GET_MODE (operands[0]); 2131 l = gen_reg_rtx (wmode); 2132 h = gen_reg_rtx (wmode); 2133 2134 ia64_unpack_assemble (l, operands[1], sign, false); 2135 ia64_unpack_assemble (h, operands[1], sign, true); 2136 2137 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT); 2138 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT); 2139 if (t != operands[0]) 2140 emit_move_insn (operands[0], t); 2141 } 2142 2143 /* Emit the appropriate sequence for a call. */ 2144 2145 void 2146 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED, 2147 int sibcall_p) 2148 { 2149 rtx insn, b0; 2150 2151 addr = XEXP (addr, 0); 2152 addr = convert_memory_address (DImode, addr); 2153 b0 = gen_rtx_REG (DImode, R_BR (0)); 2154 2155 /* ??? Should do this for functions known to bind local too. */ 2156 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 2157 { 2158 if (sibcall_p) 2159 insn = gen_sibcall_nogp (addr); 2160 else if (! retval) 2161 insn = gen_call_nogp (addr, b0); 2162 else 2163 insn = gen_call_value_nogp (retval, addr, b0); 2164 insn = emit_call_insn (insn); 2165 } 2166 else 2167 { 2168 if (sibcall_p) 2169 insn = gen_sibcall_gp (addr); 2170 else if (! retval) 2171 insn = gen_call_gp (addr, b0); 2172 else 2173 insn = gen_call_value_gp (retval, addr, b0); 2174 insn = emit_call_insn (insn); 2175 2176 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 2177 } 2178 2179 if (sibcall_p) 2180 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); 2181 2182 if (TARGET_ABI_OPEN_VMS) 2183 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), 2184 gen_rtx_REG (DImode, GR_REG (25))); 2185 } 2186 2187 static void 2188 reg_emitted (enum ia64_frame_regs r) 2189 { 2190 if (emitted_frame_related_regs[r] == 0) 2191 emitted_frame_related_regs[r] = current_frame_info.r[r]; 2192 else 2193 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]); 2194 } 2195 2196 static int 2197 get_reg (enum ia64_frame_regs r) 2198 { 2199 reg_emitted (r); 2200 return current_frame_info.r[r]; 2201 } 2202 2203 static bool 2204 is_emitted (int regno) 2205 { 2206 unsigned int r; 2207 2208 for (r = reg_fp; r < number_of_ia64_frame_regs; r++) 2209 if (emitted_frame_related_regs[r] == regno) 2210 return true; 2211 return false; 2212 } 2213 2214 void 2215 ia64_reload_gp (void) 2216 { 2217 rtx tmp; 2218 2219 if (current_frame_info.r[reg_save_gp]) 2220 { 2221 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp)); 2222 } 2223 else 2224 { 2225 HOST_WIDE_INT offset; 2226 rtx offset_r; 2227 2228 offset = (current_frame_info.spill_cfa_off 2229 + current_frame_info.spill_size); 2230 if (frame_pointer_needed) 2231 { 2232 tmp = hard_frame_pointer_rtx; 2233 offset = -offset; 2234 } 2235 else 2236 { 2237 tmp = stack_pointer_rtx; 2238 offset = current_frame_info.total_size - offset; 2239 } 2240 2241 offset_r = GEN_INT (offset); 2242 if (satisfies_constraint_I (offset_r)) 2243 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r)); 2244 else 2245 { 2246 emit_move_insn (pic_offset_table_rtx, offset_r); 2247 emit_insn (gen_adddi3 (pic_offset_table_rtx, 2248 pic_offset_table_rtx, tmp)); 2249 } 2250 2251 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); 2252 } 2253 2254 emit_move_insn (pic_offset_table_rtx, tmp); 2255 } 2256 2257 void 2258 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r, 2259 rtx scratch_b, int noreturn_p, int sibcall_p) 2260 { 2261 rtx insn; 2262 bool is_desc = false; 2263 2264 /* If we find we're calling through a register, then we're actually 2265 calling through a descriptor, so load up the values. */ 2266 if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) 2267 { 2268 rtx tmp; 2269 bool addr_dead_p; 2270 2271 /* ??? We are currently constrained to *not* use peep2, because 2272 we can legitimately change the global lifetime of the GP 2273 (in the form of killing where previously live). This is 2274 because a call through a descriptor doesn't use the previous 2275 value of the GP, while a direct call does, and we do not 2276 commit to either form until the split here. 2277 2278 That said, this means that we lack precise life info for 2279 whether ADDR is dead after this call. This is not terribly 2280 important, since we can fix things up essentially for free 2281 with the POST_DEC below, but it's nice to not use it when we 2282 can immediately tell it's not necessary. */ 2283 addr_dead_p = ((noreturn_p || sibcall_p 2284 || TEST_HARD_REG_BIT (regs_invalidated_by_call, 2285 REGNO (addr))) 2286 && !FUNCTION_ARG_REGNO_P (REGNO (addr))); 2287 2288 /* Load the code address into scratch_b. */ 2289 tmp = gen_rtx_POST_INC (Pmode, addr); 2290 tmp = gen_rtx_MEM (Pmode, tmp); 2291 emit_move_insn (scratch_r, tmp); 2292 emit_move_insn (scratch_b, scratch_r); 2293 2294 /* Load the GP address. If ADDR is not dead here, then we must 2295 revert the change made above via the POST_INCREMENT. */ 2296 if (!addr_dead_p) 2297 tmp = gen_rtx_POST_DEC (Pmode, addr); 2298 else 2299 tmp = addr; 2300 tmp = gen_rtx_MEM (Pmode, tmp); 2301 emit_move_insn (pic_offset_table_rtx, tmp); 2302 2303 is_desc = true; 2304 addr = scratch_b; 2305 } 2306 2307 if (sibcall_p) 2308 insn = gen_sibcall_nogp (addr); 2309 else if (retval) 2310 insn = gen_call_value_nogp (retval, addr, retaddr); 2311 else 2312 insn = gen_call_nogp (addr, retaddr); 2313 emit_call_insn (insn); 2314 2315 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) 2316 ia64_reload_gp (); 2317 } 2318 2319 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically. 2320 2321 This differs from the generic code in that we know about the zero-extending 2322 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We 2323 also know that ld.acq+cmpxchg.rel equals a full barrier. 2324 2325 The loop we want to generate looks like 2326 2327 cmp_reg = mem; 2328 label: 2329 old_reg = cmp_reg; 2330 new_reg = cmp_reg op val; 2331 cmp_reg = compare-and-swap(mem, old_reg, new_reg) 2332 if (cmp_reg != old_reg) 2333 goto label; 2334 2335 Note that we only do the plain load from memory once. Subsequent 2336 iterations use the value loaded by the compare-and-swap pattern. */ 2337 2338 void 2339 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, 2340 rtx old_dst, rtx new_dst, enum memmodel model) 2341 { 2342 machine_mode mode = GET_MODE (mem); 2343 rtx old_reg, new_reg, cmp_reg, ar_ccv, label; 2344 enum insn_code icode; 2345 2346 /* Special case for using fetchadd. */ 2347 if ((mode == SImode || mode == DImode) 2348 && (code == PLUS || code == MINUS) 2349 && fetchadd_operand (val, mode)) 2350 { 2351 if (code == MINUS) 2352 val = GEN_INT (-INTVAL (val)); 2353 2354 if (!old_dst) 2355 old_dst = gen_reg_rtx (mode); 2356 2357 switch (model) 2358 { 2359 case MEMMODEL_ACQ_REL: 2360 case MEMMODEL_SEQ_CST: 2361 case MEMMODEL_SYNC_SEQ_CST: 2362 emit_insn (gen_memory_barrier ()); 2363 /* FALLTHRU */ 2364 case MEMMODEL_RELAXED: 2365 case MEMMODEL_ACQUIRE: 2366 case MEMMODEL_SYNC_ACQUIRE: 2367 case MEMMODEL_CONSUME: 2368 if (mode == SImode) 2369 icode = CODE_FOR_fetchadd_acq_si; 2370 else 2371 icode = CODE_FOR_fetchadd_acq_di; 2372 break; 2373 case MEMMODEL_RELEASE: 2374 case MEMMODEL_SYNC_RELEASE: 2375 if (mode == SImode) 2376 icode = CODE_FOR_fetchadd_rel_si; 2377 else 2378 icode = CODE_FOR_fetchadd_rel_di; 2379 break; 2380 2381 default: 2382 gcc_unreachable (); 2383 } 2384 2385 emit_insn (GEN_FCN (icode) (old_dst, mem, val)); 2386 2387 if (new_dst) 2388 { 2389 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst, 2390 true, OPTAB_WIDEN); 2391 if (new_reg != new_dst) 2392 emit_move_insn (new_dst, new_reg); 2393 } 2394 return; 2395 } 2396 2397 /* Because of the volatile mem read, we get an ld.acq, which is the 2398 front half of the full barrier. The end half is the cmpxchg.rel. 2399 For relaxed and release memory models, we don't need this. But we 2400 also don't bother trying to prevent it either. */ 2401 gcc_assert (is_mm_relaxed (model) || is_mm_release (model) 2402 || MEM_VOLATILE_P (mem)); 2403 2404 old_reg = gen_reg_rtx (DImode); 2405 cmp_reg = gen_reg_rtx (DImode); 2406 label = gen_label_rtx (); 2407 2408 if (mode != DImode) 2409 { 2410 val = simplify_gen_subreg (DImode, val, mode, 0); 2411 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1)); 2412 } 2413 else 2414 emit_move_insn (cmp_reg, mem); 2415 2416 emit_label (label); 2417 2418 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); 2419 emit_move_insn (old_reg, cmp_reg); 2420 emit_move_insn (ar_ccv, cmp_reg); 2421 2422 if (old_dst) 2423 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg)); 2424 2425 new_reg = cmp_reg; 2426 if (code == NOT) 2427 { 2428 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX, 2429 true, OPTAB_DIRECT); 2430 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true); 2431 } 2432 else 2433 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX, 2434 true, OPTAB_DIRECT); 2435 2436 if (mode != DImode) 2437 new_reg = gen_lowpart (mode, new_reg); 2438 if (new_dst) 2439 emit_move_insn (new_dst, new_reg); 2440 2441 switch (model) 2442 { 2443 case MEMMODEL_RELAXED: 2444 case MEMMODEL_ACQUIRE: 2445 case MEMMODEL_SYNC_ACQUIRE: 2446 case MEMMODEL_CONSUME: 2447 switch (mode) 2448 { 2449 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break; 2450 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break; 2451 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break; 2452 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break; 2453 default: 2454 gcc_unreachable (); 2455 } 2456 break; 2457 2458 case MEMMODEL_RELEASE: 2459 case MEMMODEL_SYNC_RELEASE: 2460 case MEMMODEL_ACQ_REL: 2461 case MEMMODEL_SEQ_CST: 2462 case MEMMODEL_SYNC_SEQ_CST: 2463 switch (mode) 2464 { 2465 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; 2466 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break; 2467 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break; 2468 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break; 2469 default: 2470 gcc_unreachable (); 2471 } 2472 break; 2473 2474 default: 2475 gcc_unreachable (); 2476 } 2477 2478 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg)); 2479 2480 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label); 2481 } 2482 2483 /* Begin the assembly file. */ 2484 2485 static void 2486 ia64_file_start (void) 2487 { 2488 default_file_start (); 2489 emit_safe_across_calls (); 2490 } 2491 2492 void 2493 emit_safe_across_calls (void) 2494 { 2495 unsigned int rs, re; 2496 int out_state; 2497 2498 rs = 1; 2499 out_state = 0; 2500 while (1) 2501 { 2502 while (rs < 64 && call_used_regs[PR_REG (rs)]) 2503 rs++; 2504 if (rs >= 64) 2505 break; 2506 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) 2507 continue; 2508 if (out_state == 0) 2509 { 2510 fputs ("\t.pred.safe_across_calls ", asm_out_file); 2511 out_state = 1; 2512 } 2513 else 2514 fputc (',', asm_out_file); 2515 if (re == rs + 1) 2516 fprintf (asm_out_file, "p%u", rs); 2517 else 2518 fprintf (asm_out_file, "p%u-p%u", rs, re - 1); 2519 rs = re + 1; 2520 } 2521 if (out_state) 2522 fputc ('\n', asm_out_file); 2523 } 2524 2525 /* Globalize a declaration. */ 2526 2527 static void 2528 ia64_globalize_decl_name (FILE * stream, tree decl) 2529 { 2530 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); 2531 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl)); 2532 if (version_attr) 2533 { 2534 tree v = TREE_VALUE (TREE_VALUE (version_attr)); 2535 const char *p = TREE_STRING_POINTER (v); 2536 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p); 2537 } 2538 targetm.asm_out.globalize_label (stream, name); 2539 if (TREE_CODE (decl) == FUNCTION_DECL) 2540 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function"); 2541 } 2542 2543 /* Helper function for ia64_compute_frame_size: find an appropriate general 2544 register to spill some special register to. SPECIAL_SPILL_MASK contains 2545 bits in GR0 to GR31 that have already been allocated by this routine. 2546 TRY_LOCALS is true if we should attempt to locate a local regnum. */ 2547 2548 static int 2549 find_gr_spill (enum ia64_frame_regs r, int try_locals) 2550 { 2551 int regno; 2552 2553 if (emitted_frame_related_regs[r] != 0) 2554 { 2555 regno = emitted_frame_related_regs[r]; 2556 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed) 2557 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1) 2558 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 2559 else if (crtl->is_leaf 2560 && regno >= GR_REG (1) && regno <= GR_REG (31)) 2561 current_frame_info.gr_used_mask |= 1 << regno; 2562 2563 return regno; 2564 } 2565 2566 /* If this is a leaf function, first try an otherwise unused 2567 call-clobbered register. */ 2568 if (crtl->is_leaf) 2569 { 2570 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 2571 if (! df_regs_ever_live_p (regno) 2572 && call_used_regs[regno] 2573 && ! fixed_regs[regno] 2574 && ! global_regs[regno] 2575 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0 2576 && ! is_emitted (regno)) 2577 { 2578 current_frame_info.gr_used_mask |= 1 << regno; 2579 return regno; 2580 } 2581 } 2582 2583 if (try_locals) 2584 { 2585 regno = current_frame_info.n_local_regs; 2586 /* If there is a frame pointer, then we can't use loc79, because 2587 that is HARD_FRAME_POINTER_REGNUM. In particular, see the 2588 reg_name switching code in ia64_expand_prologue. */ 2589 while (regno < (80 - frame_pointer_needed)) 2590 if (! is_emitted (LOC_REG (regno++))) 2591 { 2592 current_frame_info.n_local_regs = regno; 2593 return LOC_REG (regno - 1); 2594 } 2595 } 2596 2597 /* Failed to find a general register to spill to. Must use stack. */ 2598 return 0; 2599 } 2600 2601 /* In order to make for nice schedules, we try to allocate every temporary 2602 to a different register. We must of course stay away from call-saved, 2603 fixed, and global registers. We must also stay away from registers 2604 allocated in current_frame_info.gr_used_mask, since those include regs 2605 used all through the prologue. 2606 2607 Any register allocated here must be used immediately. The idea is to 2608 aid scheduling, not to solve data flow problems. */ 2609 2610 static int last_scratch_gr_reg; 2611 2612 static int 2613 next_scratch_gr_reg (void) 2614 { 2615 int i, regno; 2616 2617 for (i = 0; i < 32; ++i) 2618 { 2619 regno = (last_scratch_gr_reg + i + 1) & 31; 2620 if (call_used_regs[regno] 2621 && ! fixed_regs[regno] 2622 && ! global_regs[regno] 2623 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 2624 { 2625 last_scratch_gr_reg = regno; 2626 return regno; 2627 } 2628 } 2629 2630 /* There must be _something_ available. */ 2631 gcc_unreachable (); 2632 } 2633 2634 /* Helper function for ia64_compute_frame_size, called through 2635 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ 2636 2637 static void 2638 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED) 2639 { 2640 unsigned int regno = REGNO (reg); 2641 if (regno < 32) 2642 { 2643 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)]; 2644 for (i = 0; i < n; ++i) 2645 current_frame_info.gr_used_mask |= 1 << (regno + i); 2646 } 2647 } 2648 2649 2650 /* Returns the number of bytes offset between the frame pointer and the stack 2651 pointer for the current function. SIZE is the number of bytes of space 2652 needed for local variables. */ 2653 2654 static void 2655 ia64_compute_frame_size (HOST_WIDE_INT size) 2656 { 2657 HOST_WIDE_INT total_size; 2658 HOST_WIDE_INT spill_size = 0; 2659 HOST_WIDE_INT extra_spill_size = 0; 2660 HOST_WIDE_INT pretend_args_size; 2661 HARD_REG_SET mask; 2662 int n_spilled = 0; 2663 int spilled_gr_p = 0; 2664 int spilled_fr_p = 0; 2665 unsigned int regno; 2666 int min_regno; 2667 int max_regno; 2668 int i; 2669 2670 if (current_frame_info.initialized) 2671 return; 2672 2673 memset (¤t_frame_info, 0, sizeof current_frame_info); 2674 CLEAR_HARD_REG_SET (mask); 2675 2676 /* Don't allocate scratches to the return register. */ 2677 diddle_return_value (mark_reg_gr_used_mask, NULL); 2678 2679 /* Don't allocate scratches to the EH scratch registers. */ 2680 if (cfun->machine->ia64_eh_epilogue_sp) 2681 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); 2682 if (cfun->machine->ia64_eh_epilogue_bsp) 2683 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); 2684 2685 /* Static stack checking uses r2 and r3. */ 2686 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) 2687 current_frame_info.gr_used_mask |= 0xc; 2688 2689 /* Find the size of the register stack frame. We have only 80 local 2690 registers, because we reserve 8 for the inputs and 8 for the 2691 outputs. */ 2692 2693 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, 2694 since we'll be adjusting that down later. */ 2695 regno = LOC_REG (78) + ! frame_pointer_needed; 2696 for (; regno >= LOC_REG (0); regno--) 2697 if (df_regs_ever_live_p (regno) && !is_emitted (regno)) 2698 break; 2699 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 2700 2701 /* For functions marked with the syscall_linkage attribute, we must mark 2702 all eight input registers as in use, so that locals aren't visible to 2703 the caller. */ 2704 2705 if (cfun->machine->n_varargs > 0 2706 || lookup_attribute ("syscall_linkage", 2707 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 2708 current_frame_info.n_input_regs = 8; 2709 else 2710 { 2711 for (regno = IN_REG (7); regno >= IN_REG (0); regno--) 2712 if (df_regs_ever_live_p (regno)) 2713 break; 2714 current_frame_info.n_input_regs = regno - IN_REG (0) + 1; 2715 } 2716 2717 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) 2718 if (df_regs_ever_live_p (regno)) 2719 break; 2720 i = regno - OUT_REG (0) + 1; 2721 2722 #ifndef PROFILE_HOOK 2723 /* When -p profiling, we need one output register for the mcount argument. 2724 Likewise for -a profiling for the bb_init_func argument. For -ax 2725 profiling, we need two output registers for the two bb_init_trace_func 2726 arguments. */ 2727 if (crtl->profile) 2728 i = MAX (i, 1); 2729 #endif 2730 current_frame_info.n_output_regs = i; 2731 2732 /* ??? No rotating register support yet. */ 2733 current_frame_info.n_rotate_regs = 0; 2734 2735 /* Discover which registers need spilling, and how much room that 2736 will take. Begin with floating point and general registers, 2737 which will always wind up on the stack. */ 2738 2739 for (regno = FR_REG (2); regno <= FR_REG (127); regno++) 2740 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2741 { 2742 SET_HARD_REG_BIT (mask, regno); 2743 spill_size += 16; 2744 n_spilled += 1; 2745 spilled_fr_p = 1; 2746 } 2747 2748 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 2749 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2750 { 2751 SET_HARD_REG_BIT (mask, regno); 2752 spill_size += 8; 2753 n_spilled += 1; 2754 spilled_gr_p = 1; 2755 } 2756 2757 for (regno = BR_REG (1); regno <= BR_REG (7); regno++) 2758 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2759 { 2760 SET_HARD_REG_BIT (mask, regno); 2761 spill_size += 8; 2762 n_spilled += 1; 2763 } 2764 2765 /* Now come all special registers that might get saved in other 2766 general registers. */ 2767 2768 if (frame_pointer_needed) 2769 { 2770 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1); 2771 /* If we did not get a register, then we take LOC79. This is guaranteed 2772 to be free, even if regs_ever_live is already set, because this is 2773 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, 2774 as we don't count loc79 above. */ 2775 if (current_frame_info.r[reg_fp] == 0) 2776 { 2777 current_frame_info.r[reg_fp] = LOC_REG (79); 2778 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1; 2779 } 2780 } 2781 2782 if (! crtl->is_leaf) 2783 { 2784 /* Emit a save of BR0 if we call other functions. Do this even 2785 if this function doesn't return, as EH depends on this to be 2786 able to unwind the stack. */ 2787 SET_HARD_REG_BIT (mask, BR_REG (0)); 2788 2789 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1); 2790 if (current_frame_info.r[reg_save_b0] == 0) 2791 { 2792 extra_spill_size += 8; 2793 n_spilled += 1; 2794 } 2795 2796 /* Similarly for ar.pfs. */ 2797 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 2798 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1); 2799 if (current_frame_info.r[reg_save_ar_pfs] == 0) 2800 { 2801 extra_spill_size += 8; 2802 n_spilled += 1; 2803 } 2804 2805 /* Similarly for gp. Note that if we're calling setjmp, the stacked 2806 registers are clobbered, so we fall back to the stack. */ 2807 current_frame_info.r[reg_save_gp] 2808 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1)); 2809 if (current_frame_info.r[reg_save_gp] == 0) 2810 { 2811 SET_HARD_REG_BIT (mask, GR_REG (1)); 2812 spill_size += 8; 2813 n_spilled += 1; 2814 } 2815 } 2816 else 2817 { 2818 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)]) 2819 { 2820 SET_HARD_REG_BIT (mask, BR_REG (0)); 2821 extra_spill_size += 8; 2822 n_spilled += 1; 2823 } 2824 2825 if (df_regs_ever_live_p (AR_PFS_REGNUM)) 2826 { 2827 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 2828 current_frame_info.r[reg_save_ar_pfs] 2829 = find_gr_spill (reg_save_ar_pfs, 1); 2830 if (current_frame_info.r[reg_save_ar_pfs] == 0) 2831 { 2832 extra_spill_size += 8; 2833 n_spilled += 1; 2834 } 2835 } 2836 } 2837 2838 /* Unwind descriptor hackery: things are most efficient if we allocate 2839 consecutive GR save registers for RP, PFS, FP in that order. However, 2840 it is absolutely critical that FP get the only hard register that's 2841 guaranteed to be free, so we allocated it first. If all three did 2842 happen to be allocated hard regs, and are consecutive, rearrange them 2843 into the preferred order now. 2844 2845 If we have already emitted code for any of those registers, 2846 then it's already too late to change. */ 2847 min_regno = MIN (current_frame_info.r[reg_fp], 2848 MIN (current_frame_info.r[reg_save_b0], 2849 current_frame_info.r[reg_save_ar_pfs])); 2850 max_regno = MAX (current_frame_info.r[reg_fp], 2851 MAX (current_frame_info.r[reg_save_b0], 2852 current_frame_info.r[reg_save_ar_pfs])); 2853 if (min_regno > 0 2854 && min_regno + 2 == max_regno 2855 && (current_frame_info.r[reg_fp] == min_regno + 1 2856 || current_frame_info.r[reg_save_b0] == min_regno + 1 2857 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1) 2858 && (emitted_frame_related_regs[reg_save_b0] == 0 2859 || emitted_frame_related_regs[reg_save_b0] == min_regno) 2860 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0 2861 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1) 2862 && (emitted_frame_related_regs[reg_fp] == 0 2863 || emitted_frame_related_regs[reg_fp] == min_regno + 2)) 2864 { 2865 current_frame_info.r[reg_save_b0] = min_regno; 2866 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1; 2867 current_frame_info.r[reg_fp] = min_regno + 2; 2868 } 2869 2870 /* See if we need to store the predicate register block. */ 2871 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 2872 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2873 break; 2874 if (regno <= PR_REG (63)) 2875 { 2876 SET_HARD_REG_BIT (mask, PR_REG (0)); 2877 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1); 2878 if (current_frame_info.r[reg_save_pr] == 0) 2879 { 2880 extra_spill_size += 8; 2881 n_spilled += 1; 2882 } 2883 2884 /* ??? Mark them all as used so that register renaming and such 2885 are free to use them. */ 2886 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 2887 df_set_regs_ever_live (regno, true); 2888 } 2889 2890 /* If we're forced to use st8.spill, we're forced to save and restore 2891 ar.unat as well. The check for existing liveness allows inline asm 2892 to touch ar.unat. */ 2893 if (spilled_gr_p || cfun->machine->n_varargs 2894 || df_regs_ever_live_p (AR_UNAT_REGNUM)) 2895 { 2896 df_set_regs_ever_live (AR_UNAT_REGNUM, true); 2897 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); 2898 current_frame_info.r[reg_save_ar_unat] 2899 = find_gr_spill (reg_save_ar_unat, spill_size == 0); 2900 if (current_frame_info.r[reg_save_ar_unat] == 0) 2901 { 2902 extra_spill_size += 8; 2903 n_spilled += 1; 2904 } 2905 } 2906 2907 if (df_regs_ever_live_p (AR_LC_REGNUM)) 2908 { 2909 SET_HARD_REG_BIT (mask, AR_LC_REGNUM); 2910 current_frame_info.r[reg_save_ar_lc] 2911 = find_gr_spill (reg_save_ar_lc, spill_size == 0); 2912 if (current_frame_info.r[reg_save_ar_lc] == 0) 2913 { 2914 extra_spill_size += 8; 2915 n_spilled += 1; 2916 } 2917 } 2918 2919 /* If we have an odd number of words of pretend arguments written to 2920 the stack, then the FR save area will be unaligned. We round the 2921 size of this area up to keep things 16 byte aligned. */ 2922 if (spilled_fr_p) 2923 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size); 2924 else 2925 pretend_args_size = crtl->args.pretend_args_size; 2926 2927 total_size = (spill_size + extra_spill_size + size + pretend_args_size 2928 + crtl->outgoing_args_size); 2929 total_size = IA64_STACK_ALIGN (total_size); 2930 2931 /* We always use the 16-byte scratch area provided by the caller, but 2932 if we are a leaf function, there's no one to which we need to provide 2933 a scratch area. However, if the function allocates dynamic stack space, 2934 the dynamic offset is computed early and contains STACK_POINTER_OFFSET, 2935 so we need to cope. */ 2936 if (crtl->is_leaf && !cfun->calls_alloca) 2937 total_size = MAX (0, total_size - 16); 2938 2939 current_frame_info.total_size = total_size; 2940 current_frame_info.spill_cfa_off = pretend_args_size - 16; 2941 current_frame_info.spill_size = spill_size; 2942 current_frame_info.extra_spill_size = extra_spill_size; 2943 COPY_HARD_REG_SET (current_frame_info.mask, mask); 2944 current_frame_info.n_spilled = n_spilled; 2945 current_frame_info.initialized = reload_completed; 2946 } 2947 2948 /* Worker function for TARGET_CAN_ELIMINATE. */ 2949 2950 bool 2951 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 2952 { 2953 return (to == BR_REG (0) ? crtl->is_leaf : true); 2954 } 2955 2956 /* Compute the initial difference between the specified pair of registers. */ 2957 2958 HOST_WIDE_INT 2959 ia64_initial_elimination_offset (int from, int to) 2960 { 2961 HOST_WIDE_INT offset; 2962 2963 ia64_compute_frame_size (get_frame_size ()); 2964 switch (from) 2965 { 2966 case FRAME_POINTER_REGNUM: 2967 switch (to) 2968 { 2969 case HARD_FRAME_POINTER_REGNUM: 2970 offset = -current_frame_info.total_size; 2971 if (!crtl->is_leaf || cfun->calls_alloca) 2972 offset += 16 + crtl->outgoing_args_size; 2973 break; 2974 2975 case STACK_POINTER_REGNUM: 2976 offset = 0; 2977 if (!crtl->is_leaf || cfun->calls_alloca) 2978 offset += 16 + crtl->outgoing_args_size; 2979 break; 2980 2981 default: 2982 gcc_unreachable (); 2983 } 2984 break; 2985 2986 case ARG_POINTER_REGNUM: 2987 /* Arguments start above the 16 byte save area, unless stdarg 2988 in which case we store through the 16 byte save area. */ 2989 switch (to) 2990 { 2991 case HARD_FRAME_POINTER_REGNUM: 2992 offset = 16 - crtl->args.pretend_args_size; 2993 break; 2994 2995 case STACK_POINTER_REGNUM: 2996 offset = (current_frame_info.total_size 2997 + 16 - crtl->args.pretend_args_size); 2998 break; 2999 3000 default: 3001 gcc_unreachable (); 3002 } 3003 break; 3004 3005 default: 3006 gcc_unreachable (); 3007 } 3008 3009 return offset; 3010 } 3011 3012 /* If there are more than a trivial number of register spills, we use 3013 two interleaved iterators so that we can get two memory references 3014 per insn group. 3015 3016 In order to simplify things in the prologue and epilogue expanders, 3017 we use helper functions to fix up the memory references after the 3018 fact with the appropriate offsets to a POST_MODIFY memory mode. 3019 The following data structure tracks the state of the two iterators 3020 while insns are being emitted. */ 3021 3022 struct spill_fill_data 3023 { 3024 rtx_insn *init_after; /* point at which to emit initializations */ 3025 rtx init_reg[2]; /* initial base register */ 3026 rtx iter_reg[2]; /* the iterator registers */ 3027 rtx *prev_addr[2]; /* address of last memory use */ 3028 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */ 3029 HOST_WIDE_INT prev_off[2]; /* last offset */ 3030 int n_iter; /* number of iterators in use */ 3031 int next_iter; /* next iterator to use */ 3032 unsigned int save_gr_used_mask; 3033 }; 3034 3035 static struct spill_fill_data spill_fill_data; 3036 3037 static void 3038 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off) 3039 { 3040 int i; 3041 3042 spill_fill_data.init_after = get_last_insn (); 3043 spill_fill_data.init_reg[0] = init_reg; 3044 spill_fill_data.init_reg[1] = init_reg; 3045 spill_fill_data.prev_addr[0] = NULL; 3046 spill_fill_data.prev_addr[1] = NULL; 3047 spill_fill_data.prev_insn[0] = NULL; 3048 spill_fill_data.prev_insn[1] = NULL; 3049 spill_fill_data.prev_off[0] = cfa_off; 3050 spill_fill_data.prev_off[1] = cfa_off; 3051 spill_fill_data.next_iter = 0; 3052 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; 3053 3054 spill_fill_data.n_iter = 1 + (n_spills > 2); 3055 for (i = 0; i < spill_fill_data.n_iter; ++i) 3056 { 3057 int regno = next_scratch_gr_reg (); 3058 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); 3059 current_frame_info.gr_used_mask |= 1 << regno; 3060 } 3061 } 3062 3063 static void 3064 finish_spill_pointers (void) 3065 { 3066 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; 3067 } 3068 3069 static rtx 3070 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off) 3071 { 3072 int iter = spill_fill_data.next_iter; 3073 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; 3074 rtx disp_rtx = GEN_INT (disp); 3075 rtx mem; 3076 3077 if (spill_fill_data.prev_addr[iter]) 3078 { 3079 if (satisfies_constraint_N (disp_rtx)) 3080 { 3081 *spill_fill_data.prev_addr[iter] 3082 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], 3083 gen_rtx_PLUS (DImode, 3084 spill_fill_data.iter_reg[iter], 3085 disp_rtx)); 3086 add_reg_note (spill_fill_data.prev_insn[iter], 3087 REG_INC, spill_fill_data.iter_reg[iter]); 3088 } 3089 else 3090 { 3091 /* ??? Could use register post_modify for loads. */ 3092 if (!satisfies_constraint_I (disp_rtx)) 3093 { 3094 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 3095 emit_move_insn (tmp, disp_rtx); 3096 disp_rtx = tmp; 3097 } 3098 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 3099 spill_fill_data.iter_reg[iter], disp_rtx)); 3100 } 3101 } 3102 /* Micro-optimization: if we've created a frame pointer, it's at 3103 CFA 0, which may allow the real iterator to be initialized lower, 3104 slightly increasing parallelism. Also, if there are few saves 3105 it may eliminate the iterator entirely. */ 3106 else if (disp == 0 3107 && spill_fill_data.init_reg[iter] == stack_pointer_rtx 3108 && frame_pointer_needed) 3109 { 3110 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); 3111 set_mem_alias_set (mem, get_varargs_alias_set ()); 3112 return mem; 3113 } 3114 else 3115 { 3116 rtx seq; 3117 rtx_insn *insn; 3118 3119 if (disp == 0) 3120 seq = gen_movdi (spill_fill_data.iter_reg[iter], 3121 spill_fill_data.init_reg[iter]); 3122 else 3123 { 3124 start_sequence (); 3125 3126 if (!satisfies_constraint_I (disp_rtx)) 3127 { 3128 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 3129 emit_move_insn (tmp, disp_rtx); 3130 disp_rtx = tmp; 3131 } 3132 3133 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 3134 spill_fill_data.init_reg[iter], 3135 disp_rtx)); 3136 3137 seq = get_insns (); 3138 end_sequence (); 3139 } 3140 3141 /* Careful for being the first insn in a sequence. */ 3142 if (spill_fill_data.init_after) 3143 insn = emit_insn_after (seq, spill_fill_data.init_after); 3144 else 3145 { 3146 rtx_insn *first = get_insns (); 3147 if (first) 3148 insn = emit_insn_before (seq, first); 3149 else 3150 insn = emit_insn (seq); 3151 } 3152 spill_fill_data.init_after = insn; 3153 } 3154 3155 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); 3156 3157 /* ??? Not all of the spills are for varargs, but some of them are. 3158 The rest of the spills belong in an alias set of their own. But 3159 it doesn't actually hurt to include them here. */ 3160 set_mem_alias_set (mem, get_varargs_alias_set ()); 3161 3162 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); 3163 spill_fill_data.prev_off[iter] = cfa_off; 3164 3165 if (++iter >= spill_fill_data.n_iter) 3166 iter = 0; 3167 spill_fill_data.next_iter = iter; 3168 3169 return mem; 3170 } 3171 3172 static void 3173 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off, 3174 rtx frame_reg) 3175 { 3176 int iter = spill_fill_data.next_iter; 3177 rtx mem; 3178 rtx_insn *insn; 3179 3180 mem = spill_restore_mem (reg, cfa_off); 3181 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); 3182 spill_fill_data.prev_insn[iter] = insn; 3183 3184 if (frame_reg) 3185 { 3186 rtx base; 3187 HOST_WIDE_INT off; 3188 3189 RTX_FRAME_RELATED_P (insn) = 1; 3190 3191 /* Don't even pretend that the unwind code can intuit its way 3192 through a pair of interleaved post_modify iterators. Just 3193 provide the correct answer. */ 3194 3195 if (frame_pointer_needed) 3196 { 3197 base = hard_frame_pointer_rtx; 3198 off = - cfa_off; 3199 } 3200 else 3201 { 3202 base = stack_pointer_rtx; 3203 off = current_frame_info.total_size - cfa_off; 3204 } 3205 3206 add_reg_note (insn, REG_CFA_OFFSET, 3207 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg), 3208 plus_constant (Pmode, 3209 base, off)), 3210 frame_reg)); 3211 } 3212 } 3213 3214 static void 3215 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off) 3216 { 3217 int iter = spill_fill_data.next_iter; 3218 rtx_insn *insn; 3219 3220 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), 3221 GEN_INT (cfa_off))); 3222 spill_fill_data.prev_insn[iter] = insn; 3223 } 3224 3225 /* Wrapper functions that discards the CONST_INT spill offset. These 3226 exist so that we can give gr_spill/gr_fill the offset they need and 3227 use a consistent function interface. */ 3228 3229 static rtx 3230 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3231 { 3232 return gen_movdi (dest, src); 3233 } 3234 3235 static rtx 3236 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3237 { 3238 return gen_fr_spill (dest, src); 3239 } 3240 3241 static rtx 3242 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3243 { 3244 return gen_fr_restore (dest, src); 3245 } 3246 3247 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) 3248 3249 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */ 3250 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0) 3251 3252 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 3253 inclusive. These are offsets from the current stack pointer. BS_SIZE 3254 is the size of the backing store. ??? This clobbers r2 and r3. */ 3255 3256 static void 3257 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, 3258 int bs_size) 3259 { 3260 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2)); 3261 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3)); 3262 rtx p6 = gen_rtx_REG (BImode, PR_REG (6)); 3263 3264 /* On the IA-64 there is a second stack in memory, namely the Backing Store 3265 of the Register Stack Engine. We also need to probe it after checking 3266 that the 2 stacks don't overlap. */ 3267 emit_insn (gen_bsp_value (r3)); 3268 emit_move_insn (r2, GEN_INT (-(first + size))); 3269 3270 /* Compare current value of BSP and SP registers. */ 3271 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode, 3272 r3, stack_pointer_rtx))); 3273 3274 /* Compute the address of the probe for the Backing Store (which grows 3275 towards higher addresses). We probe only at the first offset of 3276 the next page because some OS (eg Linux/ia64) only extend the 3277 backing store when this specific address is hit (but generate a SEGV 3278 on other address). Page size is the worst case (4KB). The reserve 3279 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough. 3280 Also compute the address of the last probe for the memory stack 3281 (which grows towards lower addresses). */ 3282 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095))); 3283 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); 3284 3285 /* Compare them and raise SEGV if the former has topped the latter. */ 3286 emit_insn (gen_rtx_COND_EXEC (VOIDmode, 3287 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx), 3288 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode, 3289 r3, r2)))); 3290 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12), 3291 const0_rtx), 3292 const0_rtx)); 3293 emit_insn (gen_rtx_COND_EXEC (VOIDmode, 3294 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx), 3295 gen_rtx_TRAP_IF (VOIDmode, const1_rtx, 3296 GEN_INT (11)))); 3297 3298 /* Probe the Backing Store if necessary. */ 3299 if (bs_size > 0) 3300 emit_stack_probe (r3); 3301 3302 /* Probe the memory stack if necessary. */ 3303 if (size == 0) 3304 ; 3305 3306 /* See if we have a constant small number of probes to generate. If so, 3307 that's the easy case. */ 3308 else if (size <= PROBE_INTERVAL) 3309 emit_stack_probe (r2); 3310 3311 /* The run-time loop is made up of 9 insns in the generic case while this 3312 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */ 3313 else if (size <= 4 * PROBE_INTERVAL) 3314 { 3315 HOST_WIDE_INT i; 3316 3317 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL))); 3318 emit_insn (gen_rtx_SET (r2, 3319 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); 3320 emit_stack_probe (r2); 3321 3322 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until 3323 it exceeds SIZE. If only two probes are needed, this will not 3324 generate any code. Then probe at FIRST + SIZE. */ 3325 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) 3326 { 3327 emit_insn (gen_rtx_SET (r2, 3328 plus_constant (Pmode, r2, -PROBE_INTERVAL))); 3329 emit_stack_probe (r2); 3330 } 3331 3332 emit_insn (gen_rtx_SET (r2, 3333 plus_constant (Pmode, r2, 3334 (i - PROBE_INTERVAL) - size))); 3335 emit_stack_probe (r2); 3336 } 3337 3338 /* Otherwise, do the same as above, but in a loop. Note that we must be 3339 extra careful with variables wrapping around because we might be at 3340 the very top (or the very bottom) of the address space and we have 3341 to be able to handle this case properly; in particular, we use an 3342 equality test for the loop condition. */ 3343 else 3344 { 3345 HOST_WIDE_INT rounded_size; 3346 3347 emit_move_insn (r2, GEN_INT (-first)); 3348 3349 3350 /* Step 1: round SIZE to the previous multiple of the interval. */ 3351 3352 rounded_size = size & -PROBE_INTERVAL; 3353 3354 3355 /* Step 2: compute initial and final value of the loop counter. */ 3356 3357 /* TEST_ADDR = SP + FIRST. */ 3358 emit_insn (gen_rtx_SET (r2, 3359 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); 3360 3361 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ 3362 if (rounded_size > (1 << 21)) 3363 { 3364 emit_move_insn (r3, GEN_INT (-rounded_size)); 3365 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3))); 3366 } 3367 else 3368 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, 3369 GEN_INT (-rounded_size)))); 3370 3371 3372 /* Step 3: the loop 3373 3374 do 3375 { 3376 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 3377 probe at TEST_ADDR 3378 } 3379 while (TEST_ADDR != LAST_ADDR) 3380 3381 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 3382 until it is equal to ROUNDED_SIZE. */ 3383 3384 emit_insn (gen_probe_stack_range (r2, r2, r3)); 3385 3386 3387 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 3388 that SIZE is equal to ROUNDED_SIZE. */ 3389 3390 /* TEMP = SIZE - ROUNDED_SIZE. */ 3391 if (size != rounded_size) 3392 { 3393 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2, 3394 rounded_size - size))); 3395 emit_stack_probe (r2); 3396 } 3397 } 3398 3399 /* Make sure nothing is scheduled before we are done. */ 3400 emit_insn (gen_blockage ()); 3401 } 3402 3403 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are 3404 absolute addresses. */ 3405 3406 const char * 3407 output_probe_stack_range (rtx reg1, rtx reg2) 3408 { 3409 static int labelno = 0; 3410 char loop_lab[32]; 3411 rtx xops[3]; 3412 3413 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); 3414 3415 /* Loop. */ 3416 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 3417 3418 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 3419 xops[0] = reg1; 3420 xops[1] = GEN_INT (-PROBE_INTERVAL); 3421 output_asm_insn ("addl %0 = %1, %0", xops); 3422 fputs ("\t;;\n", asm_out_file); 3423 3424 /* Probe at TEST_ADDR. */ 3425 output_asm_insn ("probe.w.fault %0, 0", xops); 3426 3427 /* Test if TEST_ADDR == LAST_ADDR. */ 3428 xops[1] = reg2; 3429 xops[2] = gen_rtx_REG (BImode, PR_REG (6)); 3430 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops); 3431 3432 /* Branch. */ 3433 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]); 3434 assemble_name_raw (asm_out_file, loop_lab); 3435 fputc ('\n', asm_out_file); 3436 3437 return ""; 3438 } 3439 3440 /* Called after register allocation to add any instructions needed for the 3441 prologue. Using a prologue insn is favored compared to putting all of the 3442 instructions in output_function_prologue(), since it allows the scheduler 3443 to intermix instructions with the saves of the caller saved registers. In 3444 some cases, it might be necessary to emit a barrier instruction as the last 3445 insn to prevent such scheduling. 3446 3447 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 3448 so that the debug info generation code can handle them properly. 3449 3450 The register save area is laid out like so: 3451 cfa+16 3452 [ varargs spill area ] 3453 [ fr register spill area ] 3454 [ br register spill area ] 3455 [ ar register spill area ] 3456 [ pr register spill area ] 3457 [ gr register spill area ] */ 3458 3459 /* ??? Get inefficient code when the frame size is larger than can fit in an 3460 adds instruction. */ 3461 3462 void 3463 ia64_expand_prologue (void) 3464 { 3465 rtx_insn *insn; 3466 rtx ar_pfs_save_reg, ar_unat_save_reg; 3467 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; 3468 rtx reg, alt_reg; 3469 3470 ia64_compute_frame_size (get_frame_size ()); 3471 last_scratch_gr_reg = 15; 3472 3473 if (flag_stack_usage_info) 3474 current_function_static_stack_size = current_frame_info.total_size; 3475 3476 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) 3477 { 3478 HOST_WIDE_INT size = current_frame_info.total_size; 3479 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs 3480 + current_frame_info.n_local_regs); 3481 3482 if (crtl->is_leaf && !cfun->calls_alloca) 3483 { 3484 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) 3485 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 3486 size - STACK_CHECK_PROTECT, 3487 bs_size); 3488 else if (size + bs_size > STACK_CHECK_PROTECT) 3489 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size); 3490 } 3491 else if (size + bs_size > 0) 3492 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size); 3493 } 3494 3495 if (dump_file) 3496 { 3497 fprintf (dump_file, "ia64 frame related registers " 3498 "recorded in current_frame_info.r[]:\n"); 3499 #define PRINTREG(a) if (current_frame_info.r[a]) \ 3500 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a]) 3501 PRINTREG(reg_fp); 3502 PRINTREG(reg_save_b0); 3503 PRINTREG(reg_save_pr); 3504 PRINTREG(reg_save_ar_pfs); 3505 PRINTREG(reg_save_ar_unat); 3506 PRINTREG(reg_save_ar_lc); 3507 PRINTREG(reg_save_gp); 3508 #undef PRINTREG 3509 } 3510 3511 /* If there is no epilogue, then we don't need some prologue insns. 3512 We need to avoid emitting the dead prologue insns, because flow 3513 will complain about them. */ 3514 if (optimize) 3515 { 3516 edge e; 3517 edge_iterator ei; 3518 3519 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 3520 if ((e->flags & EDGE_FAKE) == 0 3521 && (e->flags & EDGE_FALLTHRU) != 0) 3522 break; 3523 epilogue_p = (e != NULL); 3524 } 3525 else 3526 epilogue_p = 1; 3527 3528 /* Set the local, input, and output register names. We need to do this 3529 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in 3530 half. If we use in/loc/out register names, then we get assembler errors 3531 in crtn.S because there is no alloc insn or regstk directive in there. */ 3532 if (! TARGET_REG_NAMES) 3533 { 3534 int inputs = current_frame_info.n_input_regs; 3535 int locals = current_frame_info.n_local_regs; 3536 int outputs = current_frame_info.n_output_regs; 3537 3538 for (i = 0; i < inputs; i++) 3539 reg_names[IN_REG (i)] = ia64_reg_numbers[i]; 3540 for (i = 0; i < locals; i++) 3541 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; 3542 for (i = 0; i < outputs; i++) 3543 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; 3544 } 3545 3546 /* Set the frame pointer register name. The regnum is logically loc79, 3547 but of course we'll not have allocated that many locals. Rather than 3548 worrying about renumbering the existing rtxs, we adjust the name. */ 3549 /* ??? This code means that we can never use one local register when 3550 there is a frame pointer. loc79 gets wasted in this case, as it is 3551 renamed to a register that will never be used. See also the try_locals 3552 code in find_gr_spill. */ 3553 if (current_frame_info.r[reg_fp]) 3554 { 3555 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 3556 reg_names[HARD_FRAME_POINTER_REGNUM] 3557 = reg_names[current_frame_info.r[reg_fp]]; 3558 reg_names[current_frame_info.r[reg_fp]] = tmp; 3559 } 3560 3561 /* We don't need an alloc instruction if we've used no outputs or locals. */ 3562 if (current_frame_info.n_local_regs == 0 3563 && current_frame_info.n_output_regs == 0 3564 && current_frame_info.n_input_regs <= crtl->args.info.int_regs 3565 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 3566 { 3567 /* If there is no alloc, but there are input registers used, then we 3568 need a .regstk directive. */ 3569 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 3570 ar_pfs_save_reg = NULL_RTX; 3571 } 3572 else 3573 { 3574 current_frame_info.need_regstk = 0; 3575 3576 if (current_frame_info.r[reg_save_ar_pfs]) 3577 { 3578 regno = current_frame_info.r[reg_save_ar_pfs]; 3579 reg_emitted (reg_save_ar_pfs); 3580 } 3581 else 3582 regno = next_scratch_gr_reg (); 3583 ar_pfs_save_reg = gen_rtx_REG (DImode, regno); 3584 3585 insn = emit_insn (gen_alloc (ar_pfs_save_reg, 3586 GEN_INT (current_frame_info.n_input_regs), 3587 GEN_INT (current_frame_info.n_local_regs), 3588 GEN_INT (current_frame_info.n_output_regs), 3589 GEN_INT (current_frame_info.n_rotate_regs))); 3590 if (current_frame_info.r[reg_save_ar_pfs]) 3591 { 3592 RTX_FRAME_RELATED_P (insn) = 1; 3593 add_reg_note (insn, REG_CFA_REGISTER, 3594 gen_rtx_SET (ar_pfs_save_reg, 3595 gen_rtx_REG (DImode, AR_PFS_REGNUM))); 3596 } 3597 } 3598 3599 /* Set up frame pointer, stack pointer, and spill iterators. */ 3600 3601 n_varargs = cfun->machine->n_varargs; 3602 setup_spill_pointers (current_frame_info.n_spilled + n_varargs, 3603 stack_pointer_rtx, 0); 3604 3605 if (frame_pointer_needed) 3606 { 3607 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 3608 RTX_FRAME_RELATED_P (insn) = 1; 3609 3610 /* Force the unwind info to recognize this as defining a new CFA, 3611 rather than some temp register setup. */ 3612 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX); 3613 } 3614 3615 if (current_frame_info.total_size != 0) 3616 { 3617 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); 3618 rtx offset; 3619 3620 if (satisfies_constraint_I (frame_size_rtx)) 3621 offset = frame_size_rtx; 3622 else 3623 { 3624 regno = next_scratch_gr_reg (); 3625 offset = gen_rtx_REG (DImode, regno); 3626 emit_move_insn (offset, frame_size_rtx); 3627 } 3628 3629 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, 3630 stack_pointer_rtx, offset)); 3631 3632 if (! frame_pointer_needed) 3633 { 3634 RTX_FRAME_RELATED_P (insn) = 1; 3635 add_reg_note (insn, REG_CFA_ADJUST_CFA, 3636 gen_rtx_SET (stack_pointer_rtx, 3637 gen_rtx_PLUS (DImode, 3638 stack_pointer_rtx, 3639 frame_size_rtx))); 3640 } 3641 3642 /* ??? At this point we must generate a magic insn that appears to 3643 modify the stack pointer, the frame pointer, and all spill 3644 iterators. This would allow the most scheduling freedom. For 3645 now, just hard stop. */ 3646 emit_insn (gen_blockage ()); 3647 } 3648 3649 /* Must copy out ar.unat before doing any integer spills. */ 3650 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 3651 { 3652 if (current_frame_info.r[reg_save_ar_unat]) 3653 { 3654 ar_unat_save_reg 3655 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); 3656 reg_emitted (reg_save_ar_unat); 3657 } 3658 else 3659 { 3660 alt_regno = next_scratch_gr_reg (); 3661 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 3662 current_frame_info.gr_used_mask |= 1 << alt_regno; 3663 } 3664 3665 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 3666 insn = emit_move_insn (ar_unat_save_reg, reg); 3667 if (current_frame_info.r[reg_save_ar_unat]) 3668 { 3669 RTX_FRAME_RELATED_P (insn) = 1; 3670 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); 3671 } 3672 3673 /* Even if we're not going to generate an epilogue, we still 3674 need to save the register so that EH works. */ 3675 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat]) 3676 emit_insn (gen_prologue_use (ar_unat_save_reg)); 3677 } 3678 else 3679 ar_unat_save_reg = NULL_RTX; 3680 3681 /* Spill all varargs registers. Do this before spilling any GR registers, 3682 since we want the UNAT bits for the GR registers to override the UNAT 3683 bits from varargs, which we don't care about. */ 3684 3685 cfa_off = -16; 3686 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) 3687 { 3688 reg = gen_rtx_REG (DImode, regno); 3689 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); 3690 } 3691 3692 /* Locate the bottom of the register save area. */ 3693 cfa_off = (current_frame_info.spill_cfa_off 3694 + current_frame_info.spill_size 3695 + current_frame_info.extra_spill_size); 3696 3697 /* Save the predicate register block either in a register or in memory. */ 3698 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 3699 { 3700 reg = gen_rtx_REG (DImode, PR_REG (0)); 3701 if (current_frame_info.r[reg_save_pr] != 0) 3702 { 3703 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); 3704 reg_emitted (reg_save_pr); 3705 insn = emit_move_insn (alt_reg, reg); 3706 3707 /* ??? Denote pr spill/fill by a DImode move that modifies all 3708 64 hard registers. */ 3709 RTX_FRAME_RELATED_P (insn) = 1; 3710 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); 3711 3712 /* Even if we're not going to generate an epilogue, we still 3713 need to save the register so that EH works. */ 3714 if (! epilogue_p) 3715 emit_insn (gen_prologue_use (alt_reg)); 3716 } 3717 else 3718 { 3719 alt_regno = next_scratch_gr_reg (); 3720 alt_reg = gen_rtx_REG (DImode, alt_regno); 3721 insn = emit_move_insn (alt_reg, reg); 3722 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3723 cfa_off -= 8; 3724 } 3725 } 3726 3727 /* Handle AR regs in numerical order. All of them get special handling. */ 3728 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) 3729 && current_frame_info.r[reg_save_ar_unat] == 0) 3730 { 3731 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 3732 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); 3733 cfa_off -= 8; 3734 } 3735 3736 /* The alloc insn already copied ar.pfs into a general register. The 3737 only thing we have to do now is copy that register to a stack slot 3738 if we'd not allocated a local register for the job. */ 3739 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) 3740 && current_frame_info.r[reg_save_ar_pfs] == 0) 3741 { 3742 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 3743 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); 3744 cfa_off -= 8; 3745 } 3746 3747 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 3748 { 3749 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 3750 if (current_frame_info.r[reg_save_ar_lc] != 0) 3751 { 3752 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); 3753 reg_emitted (reg_save_ar_lc); 3754 insn = emit_move_insn (alt_reg, reg); 3755 RTX_FRAME_RELATED_P (insn) = 1; 3756 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); 3757 3758 /* Even if we're not going to generate an epilogue, we still 3759 need to save the register so that EH works. */ 3760 if (! epilogue_p) 3761 emit_insn (gen_prologue_use (alt_reg)); 3762 } 3763 else 3764 { 3765 alt_regno = next_scratch_gr_reg (); 3766 alt_reg = gen_rtx_REG (DImode, alt_regno); 3767 emit_move_insn (alt_reg, reg); 3768 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3769 cfa_off -= 8; 3770 } 3771 } 3772 3773 /* Save the return pointer. */ 3774 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 3775 { 3776 reg = gen_rtx_REG (DImode, BR_REG (0)); 3777 if (current_frame_info.r[reg_save_b0] != 0) 3778 { 3779 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 3780 reg_emitted (reg_save_b0); 3781 insn = emit_move_insn (alt_reg, reg); 3782 RTX_FRAME_RELATED_P (insn) = 1; 3783 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx)); 3784 3785 /* Even if we're not going to generate an epilogue, we still 3786 need to save the register so that EH works. */ 3787 if (! epilogue_p) 3788 emit_insn (gen_prologue_use (alt_reg)); 3789 } 3790 else 3791 { 3792 alt_regno = next_scratch_gr_reg (); 3793 alt_reg = gen_rtx_REG (DImode, alt_regno); 3794 emit_move_insn (alt_reg, reg); 3795 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3796 cfa_off -= 8; 3797 } 3798 } 3799 3800 if (current_frame_info.r[reg_save_gp]) 3801 { 3802 reg_emitted (reg_save_gp); 3803 insn = emit_move_insn (gen_rtx_REG (DImode, 3804 current_frame_info.r[reg_save_gp]), 3805 pic_offset_table_rtx); 3806 } 3807 3808 /* We should now be at the base of the gr/br/fr spill area. */ 3809 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off 3810 + current_frame_info.spill_size)); 3811 3812 /* Spill all general registers. */ 3813 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 3814 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3815 { 3816 reg = gen_rtx_REG (DImode, regno); 3817 do_spill (gen_gr_spill, reg, cfa_off, reg); 3818 cfa_off -= 8; 3819 } 3820 3821 /* Spill the rest of the BR registers. */ 3822 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 3823 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3824 { 3825 alt_regno = next_scratch_gr_reg (); 3826 alt_reg = gen_rtx_REG (DImode, alt_regno); 3827 reg = gen_rtx_REG (DImode, regno); 3828 emit_move_insn (alt_reg, reg); 3829 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3830 cfa_off -= 8; 3831 } 3832 3833 /* Align the frame and spill all FR registers. */ 3834 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 3835 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3836 { 3837 gcc_assert (!(cfa_off & 15)); 3838 reg = gen_rtx_REG (XFmode, regno); 3839 do_spill (gen_fr_spill_x, reg, cfa_off, reg); 3840 cfa_off -= 16; 3841 } 3842 3843 gcc_assert (cfa_off == current_frame_info.spill_cfa_off); 3844 3845 finish_spill_pointers (); 3846 } 3847 3848 /* Output the textual info surrounding the prologue. */ 3849 3850 void 3851 ia64_start_function (FILE *file, const char *fnname, 3852 tree decl ATTRIBUTE_UNUSED) 3853 { 3854 #if TARGET_ABI_OPEN_VMS 3855 vms_start_function (fnname); 3856 #endif 3857 3858 fputs ("\t.proc ", file); 3859 assemble_name (file, fnname); 3860 fputc ('\n', file); 3861 ASM_OUTPUT_LABEL (file, fnname); 3862 } 3863 3864 /* Called after register allocation to add any instructions needed for the 3865 epilogue. Using an epilogue insn is favored compared to putting all of the 3866 instructions in output_function_prologue(), since it allows the scheduler 3867 to intermix instructions with the saves of the caller saved registers. In 3868 some cases, it might be necessary to emit a barrier instruction as the last 3869 insn to prevent such scheduling. */ 3870 3871 void 3872 ia64_expand_epilogue (int sibcall_p) 3873 { 3874 rtx_insn *insn; 3875 rtx reg, alt_reg, ar_unat_save_reg; 3876 int regno, alt_regno, cfa_off; 3877 3878 ia64_compute_frame_size (get_frame_size ()); 3879 3880 /* If there is a frame pointer, then we use it instead of the stack 3881 pointer, so that the stack pointer does not need to be valid when 3882 the epilogue starts. See EXIT_IGNORE_STACK. */ 3883 if (frame_pointer_needed) 3884 setup_spill_pointers (current_frame_info.n_spilled, 3885 hard_frame_pointer_rtx, 0); 3886 else 3887 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, 3888 current_frame_info.total_size); 3889 3890 if (current_frame_info.total_size != 0) 3891 { 3892 /* ??? At this point we must generate a magic insn that appears to 3893 modify the spill iterators and the frame pointer. This would 3894 allow the most scheduling freedom. For now, just hard stop. */ 3895 emit_insn (gen_blockage ()); 3896 } 3897 3898 /* Locate the bottom of the register save area. */ 3899 cfa_off = (current_frame_info.spill_cfa_off 3900 + current_frame_info.spill_size 3901 + current_frame_info.extra_spill_size); 3902 3903 /* Restore the predicate registers. */ 3904 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 3905 { 3906 if (current_frame_info.r[reg_save_pr] != 0) 3907 { 3908 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); 3909 reg_emitted (reg_save_pr); 3910 } 3911 else 3912 { 3913 alt_regno = next_scratch_gr_reg (); 3914 alt_reg = gen_rtx_REG (DImode, alt_regno); 3915 do_restore (gen_movdi_x, alt_reg, cfa_off); 3916 cfa_off -= 8; 3917 } 3918 reg = gen_rtx_REG (DImode, PR_REG (0)); 3919 emit_move_insn (reg, alt_reg); 3920 } 3921 3922 /* Restore the application registers. */ 3923 3924 /* Load the saved unat from the stack, but do not restore it until 3925 after the GRs have been restored. */ 3926 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 3927 { 3928 if (current_frame_info.r[reg_save_ar_unat] != 0) 3929 { 3930 ar_unat_save_reg 3931 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); 3932 reg_emitted (reg_save_ar_unat); 3933 } 3934 else 3935 { 3936 alt_regno = next_scratch_gr_reg (); 3937 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 3938 current_frame_info.gr_used_mask |= 1 << alt_regno; 3939 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); 3940 cfa_off -= 8; 3941 } 3942 } 3943 else 3944 ar_unat_save_reg = NULL_RTX; 3945 3946 if (current_frame_info.r[reg_save_ar_pfs] != 0) 3947 { 3948 reg_emitted (reg_save_ar_pfs); 3949 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]); 3950 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 3951 emit_move_insn (reg, alt_reg); 3952 } 3953 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 3954 { 3955 alt_regno = next_scratch_gr_reg (); 3956 alt_reg = gen_rtx_REG (DImode, alt_regno); 3957 do_restore (gen_movdi_x, alt_reg, cfa_off); 3958 cfa_off -= 8; 3959 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 3960 emit_move_insn (reg, alt_reg); 3961 } 3962 3963 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 3964 { 3965 if (current_frame_info.r[reg_save_ar_lc] != 0) 3966 { 3967 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); 3968 reg_emitted (reg_save_ar_lc); 3969 } 3970 else 3971 { 3972 alt_regno = next_scratch_gr_reg (); 3973 alt_reg = gen_rtx_REG (DImode, alt_regno); 3974 do_restore (gen_movdi_x, alt_reg, cfa_off); 3975 cfa_off -= 8; 3976 } 3977 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 3978 emit_move_insn (reg, alt_reg); 3979 } 3980 3981 /* Restore the return pointer. */ 3982 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 3983 { 3984 if (current_frame_info.r[reg_save_b0] != 0) 3985 { 3986 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 3987 reg_emitted (reg_save_b0); 3988 } 3989 else 3990 { 3991 alt_regno = next_scratch_gr_reg (); 3992 alt_reg = gen_rtx_REG (DImode, alt_regno); 3993 do_restore (gen_movdi_x, alt_reg, cfa_off); 3994 cfa_off -= 8; 3995 } 3996 reg = gen_rtx_REG (DImode, BR_REG (0)); 3997 emit_move_insn (reg, alt_reg); 3998 } 3999 4000 /* We should now be at the base of the gr/br/fr spill area. */ 4001 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off 4002 + current_frame_info.spill_size)); 4003 4004 /* The GP may be stored on the stack in the prologue, but it's 4005 never restored in the epilogue. Skip the stack slot. */ 4006 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) 4007 cfa_off -= 8; 4008 4009 /* Restore all general registers. */ 4010 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) 4011 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4012 { 4013 reg = gen_rtx_REG (DImode, regno); 4014 do_restore (gen_gr_restore, reg, cfa_off); 4015 cfa_off -= 8; 4016 } 4017 4018 /* Restore the branch registers. */ 4019 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 4020 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4021 { 4022 alt_regno = next_scratch_gr_reg (); 4023 alt_reg = gen_rtx_REG (DImode, alt_regno); 4024 do_restore (gen_movdi_x, alt_reg, cfa_off); 4025 cfa_off -= 8; 4026 reg = gen_rtx_REG (DImode, regno); 4027 emit_move_insn (reg, alt_reg); 4028 } 4029 4030 /* Restore floating point registers. */ 4031 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 4032 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4033 { 4034 gcc_assert (!(cfa_off & 15)); 4035 reg = gen_rtx_REG (XFmode, regno); 4036 do_restore (gen_fr_restore_x, reg, cfa_off); 4037 cfa_off -= 16; 4038 } 4039 4040 /* Restore ar.unat for real. */ 4041 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 4042 { 4043 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 4044 emit_move_insn (reg, ar_unat_save_reg); 4045 } 4046 4047 gcc_assert (cfa_off == current_frame_info.spill_cfa_off); 4048 4049 finish_spill_pointers (); 4050 4051 if (current_frame_info.total_size 4052 || cfun->machine->ia64_eh_epilogue_sp 4053 || frame_pointer_needed) 4054 { 4055 /* ??? At this point we must generate a magic insn that appears to 4056 modify the spill iterators, the stack pointer, and the frame 4057 pointer. This would allow the most scheduling freedom. For now, 4058 just hard stop. */ 4059 emit_insn (gen_blockage ()); 4060 } 4061 4062 if (cfun->machine->ia64_eh_epilogue_sp) 4063 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); 4064 else if (frame_pointer_needed) 4065 { 4066 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 4067 RTX_FRAME_RELATED_P (insn) = 1; 4068 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); 4069 } 4070 else if (current_frame_info.total_size) 4071 { 4072 rtx offset, frame_size_rtx; 4073 4074 frame_size_rtx = GEN_INT (current_frame_info.total_size); 4075 if (satisfies_constraint_I (frame_size_rtx)) 4076 offset = frame_size_rtx; 4077 else 4078 { 4079 regno = next_scratch_gr_reg (); 4080 offset = gen_rtx_REG (DImode, regno); 4081 emit_move_insn (offset, frame_size_rtx); 4082 } 4083 4084 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 4085 offset)); 4086 4087 RTX_FRAME_RELATED_P (insn) = 1; 4088 add_reg_note (insn, REG_CFA_ADJUST_CFA, 4089 gen_rtx_SET (stack_pointer_rtx, 4090 gen_rtx_PLUS (DImode, 4091 stack_pointer_rtx, 4092 frame_size_rtx))); 4093 } 4094 4095 if (cfun->machine->ia64_eh_epilogue_bsp) 4096 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); 4097 4098 if (! sibcall_p) 4099 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); 4100 else 4101 { 4102 int fp = GR_REG (2); 4103 /* We need a throw away register here, r0 and r1 are reserved, 4104 so r2 is the first available call clobbered register. If 4105 there was a frame_pointer register, we may have swapped the 4106 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make 4107 sure we're using the string "r2" when emitting the register 4108 name for the assembler. */ 4109 if (current_frame_info.r[reg_fp] 4110 && current_frame_info.r[reg_fp] == GR_REG (2)) 4111 fp = HARD_FRAME_POINTER_REGNUM; 4112 4113 /* We must emit an alloc to force the input registers to become output 4114 registers. Otherwise, if the callee tries to pass its parameters 4115 through to another call without an intervening alloc, then these 4116 values get lost. */ 4117 /* ??? We don't need to preserve all input registers. We only need to 4118 preserve those input registers used as arguments to the sibling call. 4119 It is unclear how to compute that number here. */ 4120 if (current_frame_info.n_input_regs != 0) 4121 { 4122 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs); 4123 4124 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), 4125 const0_rtx, const0_rtx, 4126 n_inputs, const0_rtx)); 4127 RTX_FRAME_RELATED_P (insn) = 1; 4128 4129 /* ??? We need to mark the alloc as frame-related so that it gets 4130 passed into ia64_asm_unwind_emit for ia64-specific unwinding. 4131 But there's nothing dwarf2 related to be done wrt the register 4132 windows. If we do nothing, dwarf2out will abort on the UNSPEC; 4133 the empty parallel means dwarf2out will not see anything. */ 4134 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4135 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0))); 4136 } 4137 } 4138 } 4139 4140 /* Return 1 if br.ret can do all the work required to return from a 4141 function. */ 4142 4143 int 4144 ia64_direct_return (void) 4145 { 4146 if (reload_completed && ! frame_pointer_needed) 4147 { 4148 ia64_compute_frame_size (get_frame_size ()); 4149 4150 return (current_frame_info.total_size == 0 4151 && current_frame_info.n_spilled == 0 4152 && current_frame_info.r[reg_save_b0] == 0 4153 && current_frame_info.r[reg_save_pr] == 0 4154 && current_frame_info.r[reg_save_ar_pfs] == 0 4155 && current_frame_info.r[reg_save_ar_unat] == 0 4156 && current_frame_info.r[reg_save_ar_lc] == 0); 4157 } 4158 return 0; 4159 } 4160 4161 /* Return the magic cookie that we use to hold the return address 4162 during early compilation. */ 4163 4164 rtx 4165 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED) 4166 { 4167 if (count != 0) 4168 return NULL; 4169 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); 4170 } 4171 4172 /* Split this value after reload, now that we know where the return 4173 address is saved. */ 4174 4175 void 4176 ia64_split_return_addr_rtx (rtx dest) 4177 { 4178 rtx src; 4179 4180 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 4181 { 4182 if (current_frame_info.r[reg_save_b0] != 0) 4183 { 4184 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 4185 reg_emitted (reg_save_b0); 4186 } 4187 else 4188 { 4189 HOST_WIDE_INT off; 4190 unsigned int regno; 4191 rtx off_r; 4192 4193 /* Compute offset from CFA for BR0. */ 4194 /* ??? Must be kept in sync with ia64_expand_prologue. */ 4195 off = (current_frame_info.spill_cfa_off 4196 + current_frame_info.spill_size); 4197 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 4198 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4199 off -= 8; 4200 4201 /* Convert CFA offset to a register based offset. */ 4202 if (frame_pointer_needed) 4203 src = hard_frame_pointer_rtx; 4204 else 4205 { 4206 src = stack_pointer_rtx; 4207 off += current_frame_info.total_size; 4208 } 4209 4210 /* Load address into scratch register. */ 4211 off_r = GEN_INT (off); 4212 if (satisfies_constraint_I (off_r)) 4213 emit_insn (gen_adddi3 (dest, src, off_r)); 4214 else 4215 { 4216 emit_move_insn (dest, off_r); 4217 emit_insn (gen_adddi3 (dest, src, dest)); 4218 } 4219 4220 src = gen_rtx_MEM (Pmode, dest); 4221 } 4222 } 4223 else 4224 src = gen_rtx_REG (DImode, BR_REG (0)); 4225 4226 emit_move_insn (dest, src); 4227 } 4228 4229 int 4230 ia64_hard_regno_rename_ok (int from, int to) 4231 { 4232 /* Don't clobber any of the registers we reserved for the prologue. */ 4233 unsigned int r; 4234 4235 for (r = reg_fp; r <= reg_save_ar_lc; r++) 4236 if (to == current_frame_info.r[r] 4237 || from == current_frame_info.r[r] 4238 || to == emitted_frame_related_regs[r] 4239 || from == emitted_frame_related_regs[r]) 4240 return 0; 4241 4242 /* Don't use output registers outside the register frame. */ 4243 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) 4244 return 0; 4245 4246 /* Retain even/oddness on predicate register pairs. */ 4247 if (PR_REGNO_P (from) && PR_REGNO_P (to)) 4248 return (from & 1) == (to & 1); 4249 4250 return 1; 4251 } 4252 4253 /* Target hook for assembling integer objects. Handle word-sized 4254 aligned objects and detect the cases when @fptr is needed. */ 4255 4256 static bool 4257 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p) 4258 { 4259 if (size == POINTER_SIZE / BITS_PER_UNIT 4260 && !(TARGET_NO_PIC || TARGET_AUTO_PIC) 4261 && GET_CODE (x) == SYMBOL_REF 4262 && SYMBOL_REF_FUNCTION_P (x)) 4263 { 4264 static const char * const directive[2][2] = { 4265 /* 64-bit pointer */ /* 32-bit pointer */ 4266 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */ 4267 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */ 4268 }; 4269 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file); 4270 output_addr_const (asm_out_file, x); 4271 fputs (")\n", asm_out_file); 4272 return true; 4273 } 4274 return default_assemble_integer (x, size, aligned_p); 4275 } 4276 4277 /* Emit the function prologue. */ 4278 4279 static void 4280 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 4281 { 4282 int mask, grsave, grsave_prev; 4283 4284 if (current_frame_info.need_regstk) 4285 fprintf (file, "\t.regstk %d, %d, %d, %d\n", 4286 current_frame_info.n_input_regs, 4287 current_frame_info.n_local_regs, 4288 current_frame_info.n_output_regs, 4289 current_frame_info.n_rotate_regs); 4290 4291 if (ia64_except_unwind_info (&global_options) != UI_TARGET) 4292 return; 4293 4294 /* Emit the .prologue directive. */ 4295 4296 mask = 0; 4297 grsave = grsave_prev = 0; 4298 if (current_frame_info.r[reg_save_b0] != 0) 4299 { 4300 mask |= 8; 4301 grsave = grsave_prev = current_frame_info.r[reg_save_b0]; 4302 } 4303 if (current_frame_info.r[reg_save_ar_pfs] != 0 4304 && (grsave_prev == 0 4305 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1)) 4306 { 4307 mask |= 4; 4308 if (grsave_prev == 0) 4309 grsave = current_frame_info.r[reg_save_ar_pfs]; 4310 grsave_prev = current_frame_info.r[reg_save_ar_pfs]; 4311 } 4312 if (current_frame_info.r[reg_fp] != 0 4313 && (grsave_prev == 0 4314 || current_frame_info.r[reg_fp] == grsave_prev + 1)) 4315 { 4316 mask |= 2; 4317 if (grsave_prev == 0) 4318 grsave = HARD_FRAME_POINTER_REGNUM; 4319 grsave_prev = current_frame_info.r[reg_fp]; 4320 } 4321 if (current_frame_info.r[reg_save_pr] != 0 4322 && (grsave_prev == 0 4323 || current_frame_info.r[reg_save_pr] == grsave_prev + 1)) 4324 { 4325 mask |= 1; 4326 if (grsave_prev == 0) 4327 grsave = current_frame_info.r[reg_save_pr]; 4328 } 4329 4330 if (mask && TARGET_GNU_AS) 4331 fprintf (file, "\t.prologue %d, %d\n", mask, 4332 ia64_dbx_register_number (grsave)); 4333 else 4334 fputs ("\t.prologue\n", file); 4335 4336 /* Emit a .spill directive, if necessary, to relocate the base of 4337 the register spill area. */ 4338 if (current_frame_info.spill_cfa_off != -16) 4339 fprintf (file, "\t.spill %ld\n", 4340 (long) (current_frame_info.spill_cfa_off 4341 + current_frame_info.spill_size)); 4342 } 4343 4344 /* Emit the .body directive at the scheduled end of the prologue. */ 4345 4346 static void 4347 ia64_output_function_end_prologue (FILE *file) 4348 { 4349 if (ia64_except_unwind_info (&global_options) != UI_TARGET) 4350 return; 4351 4352 fputs ("\t.body\n", file); 4353 } 4354 4355 /* Emit the function epilogue. */ 4356 4357 static void 4358 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 4359 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 4360 { 4361 int i; 4362 4363 if (current_frame_info.r[reg_fp]) 4364 { 4365 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 4366 reg_names[HARD_FRAME_POINTER_REGNUM] 4367 = reg_names[current_frame_info.r[reg_fp]]; 4368 reg_names[current_frame_info.r[reg_fp]] = tmp; 4369 reg_emitted (reg_fp); 4370 } 4371 if (! TARGET_REG_NAMES) 4372 { 4373 for (i = 0; i < current_frame_info.n_input_regs; i++) 4374 reg_names[IN_REG (i)] = ia64_input_reg_names[i]; 4375 for (i = 0; i < current_frame_info.n_local_regs; i++) 4376 reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; 4377 for (i = 0; i < current_frame_info.n_output_regs; i++) 4378 reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; 4379 } 4380 4381 current_frame_info.initialized = 0; 4382 } 4383 4384 int 4385 ia64_dbx_register_number (int regno) 4386 { 4387 /* In ia64_expand_prologue we quite literally renamed the frame pointer 4388 from its home at loc79 to something inside the register frame. We 4389 must perform the same renumbering here for the debug info. */ 4390 if (current_frame_info.r[reg_fp]) 4391 { 4392 if (regno == HARD_FRAME_POINTER_REGNUM) 4393 regno = current_frame_info.r[reg_fp]; 4394 else if (regno == current_frame_info.r[reg_fp]) 4395 regno = HARD_FRAME_POINTER_REGNUM; 4396 } 4397 4398 if (IN_REGNO_P (regno)) 4399 return 32 + regno - IN_REG (0); 4400 else if (LOC_REGNO_P (regno)) 4401 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); 4402 else if (OUT_REGNO_P (regno)) 4403 return (32 + current_frame_info.n_input_regs 4404 + current_frame_info.n_local_regs + regno - OUT_REG (0)); 4405 else 4406 return regno; 4407 } 4408 4409 /* Implement TARGET_TRAMPOLINE_INIT. 4410 4411 The trampoline should set the static chain pointer to value placed 4412 into the trampoline and should branch to the specified routine. 4413 To make the normal indirect-subroutine calling convention work, 4414 the trampoline must look like a function descriptor; the first 4415 word being the target address and the second being the target's 4416 global pointer. 4417 4418 We abuse the concept of a global pointer by arranging for it 4419 to point to the data we need to load. The complete trampoline 4420 has the following form: 4421 4422 +-------------------+ \ 4423 TRAMP: | __ia64_trampoline | | 4424 +-------------------+ > fake function descriptor 4425 | TRAMP+16 | | 4426 +-------------------+ / 4427 | target descriptor | 4428 +-------------------+ 4429 | static link | 4430 +-------------------+ 4431 */ 4432 4433 static void 4434 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain) 4435 { 4436 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 4437 rtx addr, addr_reg, tramp, eight = GEN_INT (8); 4438 4439 /* The Intel assembler requires that the global __ia64_trampoline symbol 4440 be declared explicitly */ 4441 if (!TARGET_GNU_AS) 4442 { 4443 static bool declared_ia64_trampoline = false; 4444 4445 if (!declared_ia64_trampoline) 4446 { 4447 declared_ia64_trampoline = true; 4448 (*targetm.asm_out.globalize_label) (asm_out_file, 4449 "__ia64_trampoline"); 4450 } 4451 } 4452 4453 /* Make sure addresses are Pmode even if we are in ILP32 mode. */ 4454 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0)); 4455 fnaddr = convert_memory_address (Pmode, fnaddr); 4456 static_chain = convert_memory_address (Pmode, static_chain); 4457 4458 /* Load up our iterator. */ 4459 addr_reg = copy_to_reg (addr); 4460 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0); 4461 4462 /* The first two words are the fake descriptor: 4463 __ia64_trampoline, ADDR+16. */ 4464 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"); 4465 if (TARGET_ABI_OPEN_VMS) 4466 { 4467 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity 4468 in the Macro-32 compiler) and changed the semantics of the LTOFF22 4469 relocation against function symbols to make it identical to the 4470 LTOFF_FPTR22 relocation. Emit the latter directly to stay within 4471 strict ELF and dereference to get the bare code address. */ 4472 rtx reg = gen_reg_rtx (Pmode); 4473 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION; 4474 emit_move_insn (reg, tramp); 4475 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg)); 4476 tramp = reg; 4477 } 4478 emit_move_insn (m_tramp, tramp); 4479 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4480 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4481 4482 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16))); 4483 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4484 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4485 4486 /* The third word is the target descriptor. */ 4487 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr)); 4488 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4489 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4490 4491 /* The fourth word is the static chain. */ 4492 emit_move_insn (m_tramp, static_chain); 4493 } 4494 4495 /* Do any needed setup for a variadic function. CUM has not been updated 4496 for the last named argument which has type TYPE and mode MODE. 4497 4498 We generate the actual spill instructions during prologue generation. */ 4499 4500 static void 4501 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, 4502 tree type, int * pretend_size, 4503 int second_time ATTRIBUTE_UNUSED) 4504 { 4505 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum); 4506 4507 /* Skip the current argument. */ 4508 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1); 4509 4510 if (next_cum.words < MAX_ARGUMENT_SLOTS) 4511 { 4512 int n = MAX_ARGUMENT_SLOTS - next_cum.words; 4513 *pretend_size = n * UNITS_PER_WORD; 4514 cfun->machine->n_varargs = n; 4515 } 4516 } 4517 4518 /* Check whether TYPE is a homogeneous floating point aggregate. If 4519 it is, return the mode of the floating point type that appears 4520 in all leafs. If it is not, return VOIDmode. 4521 4522 An aggregate is a homogeneous floating point aggregate is if all 4523 fields/elements in it have the same floating point type (e.g, 4524 SFmode). 128-bit quad-precision floats are excluded. 4525 4526 Variable sized aggregates should never arrive here, since we should 4527 have already decided to pass them by reference. Top-level zero-sized 4528 aggregates are excluded because our parallels crash the middle-end. */ 4529 4530 static machine_mode 4531 hfa_element_mode (const_tree type, bool nested) 4532 { 4533 machine_mode element_mode = VOIDmode; 4534 machine_mode mode; 4535 enum tree_code code = TREE_CODE (type); 4536 int know_element_mode = 0; 4537 tree t; 4538 4539 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type)))) 4540 return VOIDmode; 4541 4542 switch (code) 4543 { 4544 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: 4545 case BOOLEAN_TYPE: case POINTER_TYPE: 4546 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: 4547 case LANG_TYPE: case FUNCTION_TYPE: 4548 return VOIDmode; 4549 4550 /* Fortran complex types are supposed to be HFAs, so we need to handle 4551 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex 4552 types though. */ 4553 case COMPLEX_TYPE: 4554 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT 4555 && TYPE_MODE (type) != TCmode) 4556 return GET_MODE_INNER (TYPE_MODE (type)); 4557 else 4558 return VOIDmode; 4559 4560 case REAL_TYPE: 4561 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual 4562 mode if this is contained within an aggregate. */ 4563 if (nested && TYPE_MODE (type) != TFmode) 4564 return TYPE_MODE (type); 4565 else 4566 return VOIDmode; 4567 4568 case ARRAY_TYPE: 4569 return hfa_element_mode (TREE_TYPE (type), 1); 4570 4571 case RECORD_TYPE: 4572 case UNION_TYPE: 4573 case QUAL_UNION_TYPE: 4574 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t)) 4575 { 4576 if (TREE_CODE (t) != FIELD_DECL) 4577 continue; 4578 4579 mode = hfa_element_mode (TREE_TYPE (t), 1); 4580 if (know_element_mode) 4581 { 4582 if (mode != element_mode) 4583 return VOIDmode; 4584 } 4585 else if (GET_MODE_CLASS (mode) != MODE_FLOAT) 4586 return VOIDmode; 4587 else 4588 { 4589 know_element_mode = 1; 4590 element_mode = mode; 4591 } 4592 } 4593 return element_mode; 4594 4595 default: 4596 /* If we reach here, we probably have some front-end specific type 4597 that the backend doesn't know about. This can happen via the 4598 aggregate_value_p call in init_function_start. All we can do is 4599 ignore unknown tree types. */ 4600 return VOIDmode; 4601 } 4602 4603 return VOIDmode; 4604 } 4605 4606 /* Return the number of words required to hold a quantity of TYPE and MODE 4607 when passed as an argument. */ 4608 static int 4609 ia64_function_arg_words (const_tree type, machine_mode mode) 4610 { 4611 int words; 4612 4613 if (mode == BLKmode) 4614 words = int_size_in_bytes (type); 4615 else 4616 words = GET_MODE_SIZE (mode); 4617 4618 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */ 4619 } 4620 4621 /* Return the number of registers that should be skipped so the current 4622 argument (described by TYPE and WORDS) will be properly aligned. 4623 4624 Integer and float arguments larger than 8 bytes start at the next 4625 even boundary. Aggregates larger than 8 bytes start at the next 4626 even boundary if the aggregate has 16 byte alignment. Note that 4627 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment 4628 but are still to be aligned in registers. 4629 4630 ??? The ABI does not specify how to handle aggregates with 4631 alignment from 9 to 15 bytes, or greater than 16. We handle them 4632 all as if they had 16 byte alignment. Such aggregates can occur 4633 only if gcc extensions are used. */ 4634 static int 4635 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum, 4636 const_tree type, int words) 4637 { 4638 /* No registers are skipped on VMS. */ 4639 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0) 4640 return 0; 4641 4642 if (type 4643 && TREE_CODE (type) != INTEGER_TYPE 4644 && TREE_CODE (type) != REAL_TYPE) 4645 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT; 4646 else 4647 return words > 1; 4648 } 4649 4650 /* Return rtx for register where argument is passed, or zero if it is passed 4651 on the stack. */ 4652 /* ??? 128-bit quad-precision floats are always passed in general 4653 registers. */ 4654 4655 static rtx 4656 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode, 4657 const_tree type, bool named, bool incoming) 4658 { 4659 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 4660 4661 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); 4662 int words = ia64_function_arg_words (type, mode); 4663 int offset = ia64_function_arg_offset (cum, type, words); 4664 machine_mode hfa_mode = VOIDmode; 4665 4666 /* For OPEN VMS, emit the instruction setting up the argument register here, 4667 when we know this will be together with the other arguments setup related 4668 insns. This is not the conceptually best place to do this, but this is 4669 the easiest as we have convenient access to cumulative args info. */ 4670 4671 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node 4672 && named == 1) 4673 { 4674 unsigned HOST_WIDE_INT regval = cum->words; 4675 int i; 4676 4677 for (i = 0; i < 8; i++) 4678 regval |= ((int) cum->atypes[i]) << (i * 3 + 8); 4679 4680 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)), 4681 GEN_INT (regval)); 4682 } 4683 4684 /* If all argument slots are used, then it must go on the stack. */ 4685 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 4686 return 0; 4687 4688 /* On OpenVMS argument is either in Rn or Fn. */ 4689 if (TARGET_ABI_OPEN_VMS) 4690 { 4691 if (FLOAT_MODE_P (mode)) 4692 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words); 4693 else 4694 return gen_rtx_REG (mode, basereg + cum->words); 4695 } 4696 4697 /* Check for and handle homogeneous FP aggregates. */ 4698 if (type) 4699 hfa_mode = hfa_element_mode (type, 0); 4700 4701 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 4702 and unprototyped hfas are passed specially. */ 4703 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 4704 { 4705 rtx loc[16]; 4706 int i = 0; 4707 int fp_regs = cum->fp_regs; 4708 int int_regs = cum->words + offset; 4709 int hfa_size = GET_MODE_SIZE (hfa_mode); 4710 int byte_size; 4711 int args_byte_size; 4712 4713 /* If prototyped, pass it in FR regs then GR regs. 4714 If not prototyped, pass it in both FR and GR regs. 4715 4716 If this is an SFmode aggregate, then it is possible to run out of 4717 FR regs while GR regs are still left. In that case, we pass the 4718 remaining part in the GR regs. */ 4719 4720 /* Fill the FP regs. We do this always. We stop if we reach the end 4721 of the argument, the last FP register, or the last argument slot. */ 4722 4723 byte_size = ((mode == BLKmode) 4724 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4725 args_byte_size = int_regs * UNITS_PER_WORD; 4726 offset = 0; 4727 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 4728 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) 4729 { 4730 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4731 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST 4732 + fp_regs)), 4733 GEN_INT (offset)); 4734 offset += hfa_size; 4735 args_byte_size += hfa_size; 4736 fp_regs++; 4737 } 4738 4739 /* If no prototype, then the whole thing must go in GR regs. */ 4740 if (! cum->prototype) 4741 offset = 0; 4742 /* If this is an SFmode aggregate, then we might have some left over 4743 that needs to go in GR regs. */ 4744 else if (byte_size != offset) 4745 int_regs += offset / UNITS_PER_WORD; 4746 4747 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ 4748 4749 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) 4750 { 4751 machine_mode gr_mode = DImode; 4752 unsigned int gr_size; 4753 4754 /* If we have an odd 4 byte hunk because we ran out of FR regs, 4755 then this goes in a GR reg left adjusted/little endian, right 4756 adjusted/big endian. */ 4757 /* ??? Currently this is handled wrong, because 4-byte hunks are 4758 always right adjusted/little endian. */ 4759 if (offset & 0x4) 4760 gr_mode = SImode; 4761 /* If we have an even 4 byte hunk because the aggregate is a 4762 multiple of 4 bytes in size, then this goes in a GR reg right 4763 adjusted/little endian. */ 4764 else if (byte_size - offset == 4) 4765 gr_mode = SImode; 4766 4767 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4768 gen_rtx_REG (gr_mode, (basereg 4769 + int_regs)), 4770 GEN_INT (offset)); 4771 4772 gr_size = GET_MODE_SIZE (gr_mode); 4773 offset += gr_size; 4774 if (gr_size == UNITS_PER_WORD 4775 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0)) 4776 int_regs++; 4777 else if (gr_size > UNITS_PER_WORD) 4778 int_regs += gr_size / UNITS_PER_WORD; 4779 } 4780 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 4781 } 4782 4783 /* Integral and aggregates go in general registers. If we have run out of 4784 FR registers, then FP values must also go in general registers. This can 4785 happen when we have a SFmode HFA. */ 4786 else if (mode == TFmode || mode == TCmode 4787 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 4788 { 4789 int byte_size = ((mode == BLKmode) 4790 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4791 if (BYTES_BIG_ENDIAN 4792 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 4793 && byte_size < UNITS_PER_WORD 4794 && byte_size > 0) 4795 { 4796 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 4797 gen_rtx_REG (DImode, 4798 (basereg + cum->words 4799 + offset)), 4800 const0_rtx); 4801 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 4802 } 4803 else 4804 return gen_rtx_REG (mode, basereg + cum->words + offset); 4805 4806 } 4807 4808 /* If there is a prototype, then FP values go in a FR register when 4809 named, and in a GR register when unnamed. */ 4810 else if (cum->prototype) 4811 { 4812 if (named) 4813 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); 4814 /* In big-endian mode, an anonymous SFmode value must be represented 4815 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force 4816 the value into the high half of the general register. */ 4817 else if (BYTES_BIG_ENDIAN && mode == SFmode) 4818 return gen_rtx_PARALLEL (mode, 4819 gen_rtvec (1, 4820 gen_rtx_EXPR_LIST (VOIDmode, 4821 gen_rtx_REG (DImode, basereg + cum->words + offset), 4822 const0_rtx))); 4823 else 4824 return gen_rtx_REG (mode, basereg + cum->words + offset); 4825 } 4826 /* If there is no prototype, then FP values go in both FR and GR 4827 registers. */ 4828 else 4829 { 4830 /* See comment above. */ 4831 machine_mode inner_mode = 4832 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode; 4833 4834 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, 4835 gen_rtx_REG (mode, (FR_ARG_FIRST 4836 + cum->fp_regs)), 4837 const0_rtx); 4838 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 4839 gen_rtx_REG (inner_mode, 4840 (basereg + cum->words 4841 + offset)), 4842 const0_rtx); 4843 4844 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); 4845 } 4846 } 4847 4848 /* Implement TARGET_FUNCION_ARG target hook. */ 4849 4850 static rtx 4851 ia64_function_arg (cumulative_args_t cum, machine_mode mode, 4852 const_tree type, bool named) 4853 { 4854 return ia64_function_arg_1 (cum, mode, type, named, false); 4855 } 4856 4857 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */ 4858 4859 static rtx 4860 ia64_function_incoming_arg (cumulative_args_t cum, 4861 machine_mode mode, 4862 const_tree type, bool named) 4863 { 4864 return ia64_function_arg_1 (cum, mode, type, named, true); 4865 } 4866 4867 /* Return number of bytes, at the beginning of the argument, that must be 4868 put in registers. 0 is the argument is entirely in registers or entirely 4869 in memory. */ 4870 4871 static int 4872 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, 4873 tree type, bool named ATTRIBUTE_UNUSED) 4874 { 4875 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 4876 4877 int words = ia64_function_arg_words (type, mode); 4878 int offset = ia64_function_arg_offset (cum, type, words); 4879 4880 /* If all argument slots are used, then it must go on the stack. */ 4881 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 4882 return 0; 4883 4884 /* It doesn't matter whether the argument goes in FR or GR regs. If 4885 it fits within the 8 argument slots, then it goes entirely in 4886 registers. If it extends past the last argument slot, then the rest 4887 goes on the stack. */ 4888 4889 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) 4890 return 0; 4891 4892 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD; 4893 } 4894 4895 /* Return ivms_arg_type based on machine_mode. */ 4896 4897 static enum ivms_arg_type 4898 ia64_arg_type (machine_mode mode) 4899 { 4900 switch (mode) 4901 { 4902 case SFmode: 4903 return FS; 4904 case DFmode: 4905 return FT; 4906 default: 4907 return I64; 4908 } 4909 } 4910 4911 /* Update CUM to point after this argument. This is patterned after 4912 ia64_function_arg. */ 4913 4914 static void 4915 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 4916 const_tree type, bool named) 4917 { 4918 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 4919 int words = ia64_function_arg_words (type, mode); 4920 int offset = ia64_function_arg_offset (cum, type, words); 4921 machine_mode hfa_mode = VOIDmode; 4922 4923 /* If all arg slots are already full, then there is nothing to do. */ 4924 if (cum->words >= MAX_ARGUMENT_SLOTS) 4925 { 4926 cum->words += words + offset; 4927 return; 4928 } 4929 4930 cum->atypes[cum->words] = ia64_arg_type (mode); 4931 cum->words += words + offset; 4932 4933 /* On OpenVMS argument is either in Rn or Fn. */ 4934 if (TARGET_ABI_OPEN_VMS) 4935 { 4936 cum->int_regs = cum->words; 4937 cum->fp_regs = cum->words; 4938 return; 4939 } 4940 4941 /* Check for and handle homogeneous FP aggregates. */ 4942 if (type) 4943 hfa_mode = hfa_element_mode (type, 0); 4944 4945 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 4946 and unprototyped hfas are passed specially. */ 4947 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 4948 { 4949 int fp_regs = cum->fp_regs; 4950 /* This is the original value of cum->words + offset. */ 4951 int int_regs = cum->words - words; 4952 int hfa_size = GET_MODE_SIZE (hfa_mode); 4953 int byte_size; 4954 int args_byte_size; 4955 4956 /* If prototyped, pass it in FR regs then GR regs. 4957 If not prototyped, pass it in both FR and GR regs. 4958 4959 If this is an SFmode aggregate, then it is possible to run out of 4960 FR regs while GR regs are still left. In that case, we pass the 4961 remaining part in the GR regs. */ 4962 4963 /* Fill the FP regs. We do this always. We stop if we reach the end 4964 of the argument, the last FP register, or the last argument slot. */ 4965 4966 byte_size = ((mode == BLKmode) 4967 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4968 args_byte_size = int_regs * UNITS_PER_WORD; 4969 offset = 0; 4970 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 4971 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) 4972 { 4973 offset += hfa_size; 4974 args_byte_size += hfa_size; 4975 fp_regs++; 4976 } 4977 4978 cum->fp_regs = fp_regs; 4979 } 4980 4981 /* Integral and aggregates go in general registers. So do TFmode FP values. 4982 If we have run out of FR registers, then other FP values must also go in 4983 general registers. This can happen when we have a SFmode HFA. */ 4984 else if (mode == TFmode || mode == TCmode 4985 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 4986 cum->int_regs = cum->words; 4987 4988 /* If there is a prototype, then FP values go in a FR register when 4989 named, and in a GR register when unnamed. */ 4990 else if (cum->prototype) 4991 { 4992 if (! named) 4993 cum->int_regs = cum->words; 4994 else 4995 /* ??? Complex types should not reach here. */ 4996 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 4997 } 4998 /* If there is no prototype, then FP values go in both FR and GR 4999 registers. */ 5000 else 5001 { 5002 /* ??? Complex types should not reach here. */ 5003 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 5004 cum->int_regs = cum->words; 5005 } 5006 } 5007 5008 /* Arguments with alignment larger than 8 bytes start at the next even 5009 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary 5010 even though their normal alignment is 8 bytes. See ia64_function_arg. */ 5011 5012 static unsigned int 5013 ia64_function_arg_boundary (machine_mode mode, const_tree type) 5014 { 5015 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32) 5016 return PARM_BOUNDARY * 2; 5017 5018 if (type) 5019 { 5020 if (TYPE_ALIGN (type) > PARM_BOUNDARY) 5021 return PARM_BOUNDARY * 2; 5022 else 5023 return PARM_BOUNDARY; 5024 } 5025 5026 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY) 5027 return PARM_BOUNDARY * 2; 5028 else 5029 return PARM_BOUNDARY; 5030 } 5031 5032 /* True if it is OK to do sibling call optimization for the specified 5033 call expression EXP. DECL will be the called function, or NULL if 5034 this is an indirect call. */ 5035 static bool 5036 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 5037 { 5038 /* We can't perform a sibcall if the current function has the syscall_linkage 5039 attribute. */ 5040 if (lookup_attribute ("syscall_linkage", 5041 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 5042 return false; 5043 5044 /* We must always return with our current GP. This means we can 5045 only sibcall to functions defined in the current module unless 5046 TARGET_CONST_GP is set to true. */ 5047 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP; 5048 } 5049 5050 5051 /* Implement va_arg. */ 5052 5053 static tree 5054 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 5055 gimple_seq *post_p) 5056 { 5057 /* Variable sized types are passed by reference. */ 5058 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 5059 { 5060 tree ptrtype = build_pointer_type (type); 5061 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p); 5062 return build_va_arg_indirect_ref (addr); 5063 } 5064 5065 /* Aggregate arguments with alignment larger than 8 bytes start at 5066 the next even boundary. Integer and floating point arguments 5067 do so if they are larger than 8 bytes, whether or not they are 5068 also aligned larger than 8 bytes. */ 5069 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE) 5070 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 5071 { 5072 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1); 5073 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 5074 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD)); 5075 gimplify_assign (unshare_expr (valist), t, pre_p); 5076 } 5077 5078 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 5079 } 5080 5081 /* Return 1 if function return value returned in memory. Return 0 if it is 5082 in a register. */ 5083 5084 static bool 5085 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED) 5086 { 5087 machine_mode mode; 5088 machine_mode hfa_mode; 5089 HOST_WIDE_INT byte_size; 5090 5091 mode = TYPE_MODE (valtype); 5092 byte_size = GET_MODE_SIZE (mode); 5093 if (mode == BLKmode) 5094 { 5095 byte_size = int_size_in_bytes (valtype); 5096 if (byte_size < 0) 5097 return true; 5098 } 5099 5100 /* Hfa's with up to 8 elements are returned in the FP argument registers. */ 5101 5102 hfa_mode = hfa_element_mode (valtype, 0); 5103 if (hfa_mode != VOIDmode) 5104 { 5105 int hfa_size = GET_MODE_SIZE (hfa_mode); 5106 5107 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) 5108 return true; 5109 else 5110 return false; 5111 } 5112 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) 5113 return true; 5114 else 5115 return false; 5116 } 5117 5118 /* Return rtx for register that holds the function return value. */ 5119 5120 static rtx 5121 ia64_function_value (const_tree valtype, 5122 const_tree fn_decl_or_type, 5123 bool outgoing ATTRIBUTE_UNUSED) 5124 { 5125 machine_mode mode; 5126 machine_mode hfa_mode; 5127 int unsignedp; 5128 const_tree func = fn_decl_or_type; 5129 5130 if (fn_decl_or_type 5131 && !DECL_P (fn_decl_or_type)) 5132 func = NULL; 5133 5134 mode = TYPE_MODE (valtype); 5135 hfa_mode = hfa_element_mode (valtype, 0); 5136 5137 if (hfa_mode != VOIDmode) 5138 { 5139 rtx loc[8]; 5140 int i; 5141 int hfa_size; 5142 int byte_size; 5143 int offset; 5144 5145 hfa_size = GET_MODE_SIZE (hfa_mode); 5146 byte_size = ((mode == BLKmode) 5147 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); 5148 offset = 0; 5149 for (i = 0; offset < byte_size; i++) 5150 { 5151 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 5152 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), 5153 GEN_INT (offset)); 5154 offset += hfa_size; 5155 } 5156 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 5157 } 5158 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode) 5159 return gen_rtx_REG (mode, FR_ARG_FIRST); 5160 else 5161 { 5162 bool need_parallel = false; 5163 5164 /* In big-endian mode, we need to manage the layout of aggregates 5165 in the registers so that we get the bits properly aligned in 5166 the highpart of the registers. */ 5167 if (BYTES_BIG_ENDIAN 5168 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) 5169 need_parallel = true; 5170 5171 /* Something like struct S { long double x; char a[0] } is not an 5172 HFA structure, and therefore doesn't go in fp registers. But 5173 the middle-end will give it XFmode anyway, and XFmode values 5174 don't normally fit in integer registers. So we need to smuggle 5175 the value inside a parallel. */ 5176 else if (mode == XFmode || mode == XCmode || mode == RFmode) 5177 need_parallel = true; 5178 5179 if (need_parallel) 5180 { 5181 rtx loc[8]; 5182 int offset; 5183 int bytesize; 5184 int i; 5185 5186 offset = 0; 5187 bytesize = int_size_in_bytes (valtype); 5188 /* An empty PARALLEL is invalid here, but the return value 5189 doesn't matter for empty structs. */ 5190 if (bytesize == 0) 5191 return gen_rtx_REG (mode, GR_RET_FIRST); 5192 for (i = 0; offset < bytesize; i++) 5193 { 5194 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 5195 gen_rtx_REG (DImode, 5196 GR_RET_FIRST + i), 5197 GEN_INT (offset)); 5198 offset += UNITS_PER_WORD; 5199 } 5200 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 5201 } 5202 5203 mode = promote_function_mode (valtype, mode, &unsignedp, 5204 func ? TREE_TYPE (func) : NULL_TREE, 5205 true); 5206 5207 return gen_rtx_REG (mode, GR_RET_FIRST); 5208 } 5209 } 5210 5211 /* Worker function for TARGET_LIBCALL_VALUE. */ 5212 5213 static rtx 5214 ia64_libcall_value (machine_mode mode, 5215 const_rtx fun ATTRIBUTE_UNUSED) 5216 { 5217 return gen_rtx_REG (mode, 5218 (((GET_MODE_CLASS (mode) == MODE_FLOAT 5219 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 5220 && (mode) != TFmode) 5221 ? FR_RET_FIRST : GR_RET_FIRST)); 5222 } 5223 5224 /* Worker function for FUNCTION_VALUE_REGNO_P. */ 5225 5226 static bool 5227 ia64_function_value_regno_p (const unsigned int regno) 5228 { 5229 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST) 5230 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST)); 5231 } 5232 5233 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 5234 We need to emit DTP-relative relocations. */ 5235 5236 static void 5237 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x) 5238 { 5239 gcc_assert (size == 4 || size == 8); 5240 if (size == 4) 5241 fputs ("\tdata4.ua\t@dtprel(", file); 5242 else 5243 fputs ("\tdata8.ua\t@dtprel(", file); 5244 output_addr_const (file, x); 5245 fputs (")", file); 5246 } 5247 5248 /* Print a memory address as an operand to reference that memory location. */ 5249 5250 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps 5251 also call this from ia64_print_operand for memory addresses. */ 5252 5253 static void 5254 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED, 5255 machine_mode /*mode*/, 5256 rtx address ATTRIBUTE_UNUSED) 5257 { 5258 } 5259 5260 /* Print an operand to an assembler instruction. 5261 C Swap and print a comparison operator. 5262 D Print an FP comparison operator. 5263 E Print 32 - constant, for SImode shifts as extract. 5264 e Print 64 - constant, for DImode rotates. 5265 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or 5266 a floating point register emitted normally. 5267 G A floating point constant. 5268 I Invert a predicate register by adding 1. 5269 J Select the proper predicate register for a condition. 5270 j Select the inverse predicate register for a condition. 5271 O Append .acq for volatile load. 5272 P Postincrement of a MEM. 5273 Q Append .rel for volatile store. 5274 R Print .s .d or nothing for a single, double or no truncation. 5275 S Shift amount for shladd instruction. 5276 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number 5277 for Intel assembler. 5278 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number 5279 for Intel assembler. 5280 X A pair of floating point registers. 5281 r Print register name, or constant 0 as r0. HP compatibility for 5282 Linux kernel. 5283 v Print vector constant value as an 8-byte integer value. */ 5284 5285 static void 5286 ia64_print_operand (FILE * file, rtx x, int code) 5287 { 5288 const char *str; 5289 5290 switch (code) 5291 { 5292 case 0: 5293 /* Handled below. */ 5294 break; 5295 5296 case 'C': 5297 { 5298 enum rtx_code c = swap_condition (GET_CODE (x)); 5299 fputs (GET_RTX_NAME (c), file); 5300 return; 5301 } 5302 5303 case 'D': 5304 switch (GET_CODE (x)) 5305 { 5306 case NE: 5307 str = "neq"; 5308 break; 5309 case UNORDERED: 5310 str = "unord"; 5311 break; 5312 case ORDERED: 5313 str = "ord"; 5314 break; 5315 case UNLT: 5316 str = "nge"; 5317 break; 5318 case UNLE: 5319 str = "ngt"; 5320 break; 5321 case UNGT: 5322 str = "nle"; 5323 break; 5324 case UNGE: 5325 str = "nlt"; 5326 break; 5327 case UNEQ: 5328 case LTGT: 5329 gcc_unreachable (); 5330 default: 5331 str = GET_RTX_NAME (GET_CODE (x)); 5332 break; 5333 } 5334 fputs (str, file); 5335 return; 5336 5337 case 'E': 5338 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); 5339 return; 5340 5341 case 'e': 5342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); 5343 return; 5344 5345 case 'F': 5346 if (x == CONST0_RTX (GET_MODE (x))) 5347 str = reg_names [FR_REG (0)]; 5348 else if (x == CONST1_RTX (GET_MODE (x))) 5349 str = reg_names [FR_REG (1)]; 5350 else 5351 { 5352 gcc_assert (GET_CODE (x) == REG); 5353 str = reg_names [REGNO (x)]; 5354 } 5355 fputs (str, file); 5356 return; 5357 5358 case 'G': 5359 { 5360 long val[4]; 5361 real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x)); 5362 if (GET_MODE (x) == SFmode) 5363 fprintf (file, "0x%08lx", val[0] & 0xffffffff); 5364 else if (GET_MODE (x) == DFmode) 5365 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1]) 5366 & 0xffffffff, 5367 (WORDS_BIG_ENDIAN ? val[1] : val[0]) 5368 & 0xffffffff); 5369 else 5370 output_operand_lossage ("invalid %%G mode"); 5371 } 5372 return; 5373 5374 case 'I': 5375 fputs (reg_names [REGNO (x) + 1], file); 5376 return; 5377 5378 case 'J': 5379 case 'j': 5380 { 5381 unsigned int regno = REGNO (XEXP (x, 0)); 5382 if (GET_CODE (x) == EQ) 5383 regno += 1; 5384 if (code == 'j') 5385 regno ^= 1; 5386 fputs (reg_names [regno], file); 5387 } 5388 return; 5389 5390 case 'O': 5391 if (MEM_VOLATILE_P (x)) 5392 fputs(".acq", file); 5393 return; 5394 5395 case 'P': 5396 { 5397 HOST_WIDE_INT value; 5398 5399 switch (GET_CODE (XEXP (x, 0))) 5400 { 5401 default: 5402 return; 5403 5404 case POST_MODIFY: 5405 x = XEXP (XEXP (XEXP (x, 0), 1), 1); 5406 if (GET_CODE (x) == CONST_INT) 5407 value = INTVAL (x); 5408 else 5409 { 5410 gcc_assert (GET_CODE (x) == REG); 5411 fprintf (file, ", %s", reg_names[REGNO (x)]); 5412 return; 5413 } 5414 break; 5415 5416 case POST_INC: 5417 value = GET_MODE_SIZE (GET_MODE (x)); 5418 break; 5419 5420 case POST_DEC: 5421 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); 5422 break; 5423 } 5424 5425 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value); 5426 return; 5427 } 5428 5429 case 'Q': 5430 if (MEM_VOLATILE_P (x)) 5431 fputs(".rel", file); 5432 return; 5433 5434 case 'R': 5435 if (x == CONST0_RTX (GET_MODE (x))) 5436 fputs(".s", file); 5437 else if (x == CONST1_RTX (GET_MODE (x))) 5438 fputs(".d", file); 5439 else if (x == CONST2_RTX (GET_MODE (x))) 5440 ; 5441 else 5442 output_operand_lossage ("invalid %%R value"); 5443 return; 5444 5445 case 'S': 5446 fprintf (file, "%d", exact_log2 (INTVAL (x))); 5447 return; 5448 5449 case 'T': 5450 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 5451 { 5452 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); 5453 return; 5454 } 5455 break; 5456 5457 case 'U': 5458 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 5459 { 5460 const char *prefix = "0x"; 5461 if (INTVAL (x) & 0x80000000) 5462 { 5463 fprintf (file, "0xffffffff"); 5464 prefix = ""; 5465 } 5466 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); 5467 return; 5468 } 5469 break; 5470 5471 case 'X': 5472 { 5473 unsigned int regno = REGNO (x); 5474 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]); 5475 } 5476 return; 5477 5478 case 'r': 5479 /* If this operand is the constant zero, write it as register zero. 5480 Any register, zero, or CONST_INT value is OK here. */ 5481 if (GET_CODE (x) == REG) 5482 fputs (reg_names[REGNO (x)], file); 5483 else if (x == CONST0_RTX (GET_MODE (x))) 5484 fputs ("r0", file); 5485 else if (GET_CODE (x) == CONST_INT) 5486 output_addr_const (file, x); 5487 else 5488 output_operand_lossage ("invalid %%r value"); 5489 return; 5490 5491 case 'v': 5492 gcc_assert (GET_CODE (x) == CONST_VECTOR); 5493 x = simplify_subreg (DImode, x, GET_MODE (x), 0); 5494 break; 5495 5496 case '+': 5497 { 5498 const char *which; 5499 5500 /* For conditional branches, returns or calls, substitute 5501 sptk, dptk, dpnt, or spnt for %s. */ 5502 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 5503 if (x) 5504 { 5505 int pred_val = XINT (x, 0); 5506 5507 /* Guess top and bottom 10% statically predicted. */ 5508 if (pred_val < REG_BR_PROB_BASE / 50 5509 && br_prob_note_reliable_p (x)) 5510 which = ".spnt"; 5511 else if (pred_val < REG_BR_PROB_BASE / 2) 5512 which = ".dpnt"; 5513 else if (pred_val < REG_BR_PROB_BASE / 100 * 98 5514 || !br_prob_note_reliable_p (x)) 5515 which = ".dptk"; 5516 else 5517 which = ".sptk"; 5518 } 5519 else if (CALL_P (current_output_insn)) 5520 which = ".sptk"; 5521 else 5522 which = ".dptk"; 5523 5524 fputs (which, file); 5525 return; 5526 } 5527 5528 case ',': 5529 x = current_insn_predicate; 5530 if (x) 5531 { 5532 unsigned int regno = REGNO (XEXP (x, 0)); 5533 if (GET_CODE (x) == EQ) 5534 regno += 1; 5535 fprintf (file, "(%s) ", reg_names [regno]); 5536 } 5537 return; 5538 5539 default: 5540 output_operand_lossage ("ia64_print_operand: unknown code"); 5541 return; 5542 } 5543 5544 switch (GET_CODE (x)) 5545 { 5546 /* This happens for the spill/restore instructions. */ 5547 case POST_INC: 5548 case POST_DEC: 5549 case POST_MODIFY: 5550 x = XEXP (x, 0); 5551 /* fall through */ 5552 5553 case REG: 5554 fputs (reg_names [REGNO (x)], file); 5555 break; 5556 5557 case MEM: 5558 { 5559 rtx addr = XEXP (x, 0); 5560 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) 5561 addr = XEXP (addr, 0); 5562 fprintf (file, "[%s]", reg_names [REGNO (addr)]); 5563 break; 5564 } 5565 5566 default: 5567 output_addr_const (file, x); 5568 break; 5569 } 5570 5571 return; 5572 } 5573 5574 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 5575 5576 static bool 5577 ia64_print_operand_punct_valid_p (unsigned char code) 5578 { 5579 return (code == '+' || code == ','); 5580 } 5581 5582 /* Compute a (partial) cost for rtx X. Return true if the complete 5583 cost has been computed, and false if subexpressions should be 5584 scanned. In either case, *TOTAL contains the cost result. */ 5585 /* ??? This is incomplete. */ 5586 5587 static bool 5588 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code, 5589 int opno ATTRIBUTE_UNUSED, 5590 int *total, bool speed ATTRIBUTE_UNUSED) 5591 { 5592 int code = GET_CODE (x); 5593 5594 switch (code) 5595 { 5596 case CONST_INT: 5597 switch (outer_code) 5598 { 5599 case SET: 5600 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1); 5601 return true; 5602 case PLUS: 5603 if (satisfies_constraint_I (x)) 5604 *total = 0; 5605 else if (satisfies_constraint_J (x)) 5606 *total = 1; 5607 else 5608 *total = COSTS_N_INSNS (1); 5609 return true; 5610 default: 5611 if (satisfies_constraint_K (x) || satisfies_constraint_L (x)) 5612 *total = 0; 5613 else 5614 *total = COSTS_N_INSNS (1); 5615 return true; 5616 } 5617 5618 case CONST_DOUBLE: 5619 *total = COSTS_N_INSNS (1); 5620 return true; 5621 5622 case CONST: 5623 case SYMBOL_REF: 5624 case LABEL_REF: 5625 *total = COSTS_N_INSNS (3); 5626 return true; 5627 5628 case FMA: 5629 *total = COSTS_N_INSNS (4); 5630 return true; 5631 5632 case MULT: 5633 /* For multiplies wider than HImode, we have to go to the FPU, 5634 which normally involves copies. Plus there's the latency 5635 of the multiply itself, and the latency of the instructions to 5636 transfer integer regs to FP regs. */ 5637 if (FLOAT_MODE_P (mode)) 5638 *total = COSTS_N_INSNS (4); 5639 else if (GET_MODE_SIZE (mode) > 2) 5640 *total = COSTS_N_INSNS (10); 5641 else 5642 *total = COSTS_N_INSNS (2); 5643 return true; 5644 5645 case PLUS: 5646 case MINUS: 5647 if (FLOAT_MODE_P (mode)) 5648 { 5649 *total = COSTS_N_INSNS (4); 5650 return true; 5651 } 5652 /* FALLTHRU */ 5653 5654 case ASHIFT: 5655 case ASHIFTRT: 5656 case LSHIFTRT: 5657 *total = COSTS_N_INSNS (1); 5658 return true; 5659 5660 case DIV: 5661 case UDIV: 5662 case MOD: 5663 case UMOD: 5664 /* We make divide expensive, so that divide-by-constant will be 5665 optimized to a multiply. */ 5666 *total = COSTS_N_INSNS (60); 5667 return true; 5668 5669 default: 5670 return false; 5671 } 5672 } 5673 5674 /* Calculate the cost of moving data from a register in class FROM to 5675 one in class TO, using MODE. */ 5676 5677 static int 5678 ia64_register_move_cost (machine_mode mode, reg_class_t from, 5679 reg_class_t to) 5680 { 5681 /* ADDL_REGS is the same as GR_REGS for movement purposes. */ 5682 if (to == ADDL_REGS) 5683 to = GR_REGS; 5684 if (from == ADDL_REGS) 5685 from = GR_REGS; 5686 5687 /* All costs are symmetric, so reduce cases by putting the 5688 lower number class as the destination. */ 5689 if (from < to) 5690 { 5691 reg_class_t tmp = to; 5692 to = from, from = tmp; 5693 } 5694 5695 /* Moving from FR<->GR in XFmode must be more expensive than 2, 5696 so that we get secondary memory reloads. Between FR_REGS, 5697 we have to make this at least as expensive as memory_move_cost 5698 to avoid spectacularly poor register class preferencing. */ 5699 if (mode == XFmode || mode == RFmode) 5700 { 5701 if (to != GR_REGS || from != GR_REGS) 5702 return memory_move_cost (mode, to, false); 5703 else 5704 return 3; 5705 } 5706 5707 switch (to) 5708 { 5709 case PR_REGS: 5710 /* Moving between PR registers takes two insns. */ 5711 if (from == PR_REGS) 5712 return 3; 5713 /* Moving between PR and anything but GR is impossible. */ 5714 if (from != GR_REGS) 5715 return memory_move_cost (mode, to, false); 5716 break; 5717 5718 case BR_REGS: 5719 /* Moving between BR and anything but GR is impossible. */ 5720 if (from != GR_REGS && from != GR_AND_BR_REGS) 5721 return memory_move_cost (mode, to, false); 5722 break; 5723 5724 case AR_I_REGS: 5725 case AR_M_REGS: 5726 /* Moving between AR and anything but GR is impossible. */ 5727 if (from != GR_REGS) 5728 return memory_move_cost (mode, to, false); 5729 break; 5730 5731 case GR_REGS: 5732 case FR_REGS: 5733 case FP_REGS: 5734 case GR_AND_FR_REGS: 5735 case GR_AND_BR_REGS: 5736 case ALL_REGS: 5737 break; 5738 5739 default: 5740 gcc_unreachable (); 5741 } 5742 5743 return 2; 5744 } 5745 5746 /* Calculate the cost of moving data of MODE from a register to or from 5747 memory. */ 5748 5749 static int 5750 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 5751 reg_class_t rclass, 5752 bool in ATTRIBUTE_UNUSED) 5753 { 5754 if (rclass == GENERAL_REGS 5755 || rclass == FR_REGS 5756 || rclass == FP_REGS 5757 || rclass == GR_AND_FR_REGS) 5758 return 4; 5759 else 5760 return 10; 5761 } 5762 5763 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions 5764 on RCLASS to use when copying X into that class. */ 5765 5766 static reg_class_t 5767 ia64_preferred_reload_class (rtx x, reg_class_t rclass) 5768 { 5769 switch (rclass) 5770 { 5771 case FR_REGS: 5772 case FP_REGS: 5773 /* Don't allow volatile mem reloads into floating point registers. 5774 This is defined to force reload to choose the r/m case instead 5775 of the f/f case when reloading (set (reg fX) (mem/v)). */ 5776 if (MEM_P (x) && MEM_VOLATILE_P (x)) 5777 return NO_REGS; 5778 5779 /* Force all unrecognized constants into the constant pool. */ 5780 if (CONSTANT_P (x)) 5781 return NO_REGS; 5782 break; 5783 5784 case AR_M_REGS: 5785 case AR_I_REGS: 5786 if (!OBJECT_P (x)) 5787 return NO_REGS; 5788 break; 5789 5790 default: 5791 break; 5792 } 5793 5794 return rclass; 5795 } 5796 5797 /* This function returns the register class required for a secondary 5798 register when copying between one of the registers in RCLASS, and X, 5799 using MODE. A return value of NO_REGS means that no secondary register 5800 is required. */ 5801 5802 enum reg_class 5803 ia64_secondary_reload_class (enum reg_class rclass, 5804 machine_mode mode ATTRIBUTE_UNUSED, rtx x) 5805 { 5806 int regno = -1; 5807 5808 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 5809 regno = true_regnum (x); 5810 5811 switch (rclass) 5812 { 5813 case BR_REGS: 5814 case AR_M_REGS: 5815 case AR_I_REGS: 5816 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global 5817 interaction. We end up with two pseudos with overlapping lifetimes 5818 both of which are equiv to the same constant, and both which need 5819 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end 5820 changes depending on the path length, which means the qty_first_reg 5821 check in make_regs_eqv can give different answers at different times. 5822 At some point I'll probably need a reload_indi pattern to handle 5823 this. 5824 5825 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we 5826 wound up with a FP register from GR_AND_FR_REGS. Extend that to all 5827 non-general registers for good measure. */ 5828 if (regno >= 0 && ! GENERAL_REGNO_P (regno)) 5829 return GR_REGS; 5830 5831 /* This is needed if a pseudo used as a call_operand gets spilled to a 5832 stack slot. */ 5833 if (GET_CODE (x) == MEM) 5834 return GR_REGS; 5835 break; 5836 5837 case FR_REGS: 5838 case FP_REGS: 5839 /* Need to go through general registers to get to other class regs. */ 5840 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) 5841 return GR_REGS; 5842 5843 /* This can happen when a paradoxical subreg is an operand to the 5844 muldi3 pattern. */ 5845 /* ??? This shouldn't be necessary after instruction scheduling is 5846 enabled, because paradoxical subregs are not accepted by 5847 register_operand when INSN_SCHEDULING is defined. Or alternatively, 5848 stop the paradoxical subreg stupidity in the *_operand functions 5849 in recog.c. */ 5850 if (GET_CODE (x) == MEM 5851 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode 5852 || GET_MODE (x) == QImode)) 5853 return GR_REGS; 5854 5855 /* This can happen because of the ior/and/etc patterns that accept FP 5856 registers as operands. If the third operand is a constant, then it 5857 needs to be reloaded into a FP register. */ 5858 if (GET_CODE (x) == CONST_INT) 5859 return GR_REGS; 5860 5861 /* This can happen because of register elimination in a muldi3 insn. 5862 E.g. `26107 * (unsigned long)&u'. */ 5863 if (GET_CODE (x) == PLUS) 5864 return GR_REGS; 5865 break; 5866 5867 case PR_REGS: 5868 /* ??? This happens if we cse/gcse a BImode value across a call, 5869 and the function has a nonlocal goto. This is because global 5870 does not allocate call crossing pseudos to hard registers when 5871 crtl->has_nonlocal_goto is true. This is relatively 5872 common for C++ programs that use exceptions. To reproduce, 5873 return NO_REGS and compile libstdc++. */ 5874 if (GET_CODE (x) == MEM) 5875 return GR_REGS; 5876 5877 /* This can happen when we take a BImode subreg of a DImode value, 5878 and that DImode value winds up in some non-GR register. */ 5879 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) 5880 return GR_REGS; 5881 break; 5882 5883 default: 5884 break; 5885 } 5886 5887 return NO_REGS; 5888 } 5889 5890 5891 /* Implement targetm.unspec_may_trap_p hook. */ 5892 static int 5893 ia64_unspec_may_trap_p (const_rtx x, unsigned flags) 5894 { 5895 switch (XINT (x, 1)) 5896 { 5897 case UNSPEC_LDA: 5898 case UNSPEC_LDS: 5899 case UNSPEC_LDSA: 5900 case UNSPEC_LDCCLR: 5901 case UNSPEC_CHKACLR: 5902 case UNSPEC_CHKS: 5903 /* These unspecs are just wrappers. */ 5904 return may_trap_p_1 (XVECEXP (x, 0, 0), flags); 5905 } 5906 5907 return default_unspec_may_trap_p (x, flags); 5908 } 5909 5910 5911 /* Parse the -mfixed-range= option string. */ 5912 5913 static void 5914 fix_range (const char *const_str) 5915 { 5916 int i, first, last; 5917 char *str, *dash, *comma; 5918 5919 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 5920 REG2 are either register names or register numbers. The effect 5921 of this option is to mark the registers in the range from REG1 to 5922 REG2 as ``fixed'' so they won't be used by the compiler. This is 5923 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ 5924 5925 i = strlen (const_str); 5926 str = (char *) alloca (i + 1); 5927 memcpy (str, const_str, i + 1); 5928 5929 while (1) 5930 { 5931 dash = strchr (str, '-'); 5932 if (!dash) 5933 { 5934 warning (0, "value of -mfixed-range must have form REG1-REG2"); 5935 return; 5936 } 5937 *dash = '\0'; 5938 5939 comma = strchr (dash + 1, ','); 5940 if (comma) 5941 *comma = '\0'; 5942 5943 first = decode_reg_name (str); 5944 if (first < 0) 5945 { 5946 warning (0, "unknown register name: %s", str); 5947 return; 5948 } 5949 5950 last = decode_reg_name (dash + 1); 5951 if (last < 0) 5952 { 5953 warning (0, "unknown register name: %s", dash + 1); 5954 return; 5955 } 5956 5957 *dash = '-'; 5958 5959 if (first > last) 5960 { 5961 warning (0, "%s-%s is an empty range", str, dash + 1); 5962 return; 5963 } 5964 5965 for (i = first; i <= last; ++i) 5966 fixed_regs[i] = call_used_regs[i] = 1; 5967 5968 if (!comma) 5969 break; 5970 5971 *comma = ','; 5972 str = comma + 1; 5973 } 5974 } 5975 5976 /* Implement TARGET_OPTION_OVERRIDE. */ 5977 5978 static void 5979 ia64_option_override (void) 5980 { 5981 unsigned int i; 5982 cl_deferred_option *opt; 5983 vec<cl_deferred_option> *v 5984 = (vec<cl_deferred_option> *) ia64_deferred_options; 5985 5986 if (v) 5987 FOR_EACH_VEC_ELT (*v, i, opt) 5988 { 5989 switch (opt->opt_index) 5990 { 5991 case OPT_mfixed_range_: 5992 fix_range (opt->arg); 5993 break; 5994 5995 default: 5996 gcc_unreachable (); 5997 } 5998 } 5999 6000 if (TARGET_AUTO_PIC) 6001 target_flags |= MASK_CONST_GP; 6002 6003 /* Numerous experiment shows that IRA based loop pressure 6004 calculation works better for RTL loop invariant motion on targets 6005 with enough (>= 32) registers. It is an expensive optimization. 6006 So it is on only for peak performance. */ 6007 if (optimize >= 3) 6008 flag_ira_loop_pressure = 1; 6009 6010 6011 ia64_section_threshold = (global_options_set.x_g_switch_value 6012 ? g_switch_value 6013 : IA64_DEFAULT_GVALUE); 6014 6015 init_machine_status = ia64_init_machine_status; 6016 6017 if (align_functions <= 0) 6018 align_functions = 64; 6019 if (align_loops <= 0) 6020 align_loops = 32; 6021 if (TARGET_ABI_OPEN_VMS) 6022 flag_no_common = 1; 6023 6024 ia64_override_options_after_change(); 6025 } 6026 6027 /* Implement targetm.override_options_after_change. */ 6028 6029 static void 6030 ia64_override_options_after_change (void) 6031 { 6032 if (optimize >= 3 6033 && !global_options_set.x_flag_selective_scheduling 6034 && !global_options_set.x_flag_selective_scheduling2) 6035 { 6036 flag_selective_scheduling2 = 1; 6037 flag_sel_sched_pipelining = 1; 6038 } 6039 if (mflag_sched_control_spec == 2) 6040 { 6041 /* Control speculation is on by default for the selective scheduler, 6042 but not for the Haifa scheduler. */ 6043 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0; 6044 } 6045 if (flag_sel_sched_pipelining && flag_auto_inc_dec) 6046 { 6047 /* FIXME: remove this when we'd implement breaking autoinsns as 6048 a transformation. */ 6049 flag_auto_inc_dec = 0; 6050 } 6051 } 6052 6053 /* Initialize the record of emitted frame related registers. */ 6054 6055 void ia64_init_expanders (void) 6056 { 6057 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs)); 6058 } 6059 6060 static struct machine_function * 6061 ia64_init_machine_status (void) 6062 { 6063 return ggc_cleared_alloc<machine_function> (); 6064 } 6065 6066 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *); 6067 static enum attr_type ia64_safe_type (rtx_insn *); 6068 6069 static enum attr_itanium_class 6070 ia64_safe_itanium_class (rtx_insn *insn) 6071 { 6072 if (recog_memoized (insn) >= 0) 6073 return get_attr_itanium_class (insn); 6074 else if (DEBUG_INSN_P (insn)) 6075 return ITANIUM_CLASS_IGNORE; 6076 else 6077 return ITANIUM_CLASS_UNKNOWN; 6078 } 6079 6080 static enum attr_type 6081 ia64_safe_type (rtx_insn *insn) 6082 { 6083 if (recog_memoized (insn) >= 0) 6084 return get_attr_type (insn); 6085 else 6086 return TYPE_UNKNOWN; 6087 } 6088 6089 /* The following collection of routines emit instruction group stop bits as 6090 necessary to avoid dependencies. */ 6091 6092 /* Need to track some additional registers as far as serialization is 6093 concerned so we can properly handle br.call and br.ret. We could 6094 make these registers visible to gcc, but since these registers are 6095 never explicitly used in gcc generated code, it seems wasteful to 6096 do so (plus it would make the call and return patterns needlessly 6097 complex). */ 6098 #define REG_RP (BR_REG (0)) 6099 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) 6100 /* This is used for volatile asms which may require a stop bit immediately 6101 before and after them. */ 6102 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) 6103 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) 6104 #define NUM_REGS (AR_UNAT_BIT_0 + 64) 6105 6106 /* For each register, we keep track of how it has been written in the 6107 current instruction group. 6108 6109 If a register is written unconditionally (no qualifying predicate), 6110 WRITE_COUNT is set to 2 and FIRST_PRED is ignored. 6111 6112 If a register is written if its qualifying predicate P is true, we 6113 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register 6114 may be written again by the complement of P (P^1) and when this happens, 6115 WRITE_COUNT gets set to 2. 6116 6117 The result of this is that whenever an insn attempts to write a register 6118 whose WRITE_COUNT is two, we need to issue an insn group barrier first. 6119 6120 If a predicate register is written by a floating-point insn, we set 6121 WRITTEN_BY_FP to true. 6122 6123 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND 6124 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ 6125 6126 #if GCC_VERSION >= 4000 6127 #define RWS_FIELD_TYPE __extension__ unsigned short 6128 #else 6129 #define RWS_FIELD_TYPE unsigned int 6130 #endif 6131 struct reg_write_state 6132 { 6133 RWS_FIELD_TYPE write_count : 2; 6134 RWS_FIELD_TYPE first_pred : 10; 6135 RWS_FIELD_TYPE written_by_fp : 1; 6136 RWS_FIELD_TYPE written_by_and : 1; 6137 RWS_FIELD_TYPE written_by_or : 1; 6138 }; 6139 6140 /* Cumulative info for the current instruction group. */ 6141 struct reg_write_state rws_sum[NUM_REGS]; 6142 #if CHECKING_P 6143 /* Bitmap whether a register has been written in the current insn. */ 6144 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1) 6145 / HOST_BITS_PER_WIDEST_FAST_INT]; 6146 6147 static inline void 6148 rws_insn_set (int regno) 6149 { 6150 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno)); 6151 SET_HARD_REG_BIT (rws_insn, regno); 6152 } 6153 6154 static inline int 6155 rws_insn_test (int regno) 6156 { 6157 return TEST_HARD_REG_BIT (rws_insn, regno); 6158 } 6159 #else 6160 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */ 6161 unsigned char rws_insn[2]; 6162 6163 static inline void 6164 rws_insn_set (int regno) 6165 { 6166 if (regno == REG_AR_CFM) 6167 rws_insn[0] = 1; 6168 else if (regno == REG_VOLATILE) 6169 rws_insn[1] = 1; 6170 } 6171 6172 static inline int 6173 rws_insn_test (int regno) 6174 { 6175 if (regno == REG_AR_CFM) 6176 return rws_insn[0]; 6177 if (regno == REG_VOLATILE) 6178 return rws_insn[1]; 6179 return 0; 6180 } 6181 #endif 6182 6183 /* Indicates whether this is the first instruction after a stop bit, 6184 in which case we don't need another stop bit. Without this, 6185 ia64_variable_issue will die when scheduling an alloc. */ 6186 static int first_instruction; 6187 6188 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing 6189 RTL for one instruction. */ 6190 struct reg_flags 6191 { 6192 unsigned int is_write : 1; /* Is register being written? */ 6193 unsigned int is_fp : 1; /* Is register used as part of an fp op? */ 6194 unsigned int is_branch : 1; /* Is register used as part of a branch? */ 6195 unsigned int is_and : 1; /* Is register used as part of and.orcm? */ 6196 unsigned int is_or : 1; /* Is register used as part of or.andcm? */ 6197 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ 6198 }; 6199 6200 static void rws_update (int, struct reg_flags, int); 6201 static int rws_access_regno (int, struct reg_flags, int); 6202 static int rws_access_reg (rtx, struct reg_flags, int); 6203 static void update_set_flags (rtx, struct reg_flags *); 6204 static int set_src_needs_barrier (rtx, struct reg_flags, int); 6205 static int rtx_needs_barrier (rtx, struct reg_flags, int); 6206 static void init_insn_group_barriers (void); 6207 static int group_barrier_needed (rtx_insn *); 6208 static int safe_group_barrier_needed (rtx_insn *); 6209 static int in_safe_group_barrier; 6210 6211 /* Update *RWS for REGNO, which is being written by the current instruction, 6212 with predicate PRED, and associated register flags in FLAGS. */ 6213 6214 static void 6215 rws_update (int regno, struct reg_flags flags, int pred) 6216 { 6217 if (pred) 6218 rws_sum[regno].write_count++; 6219 else 6220 rws_sum[regno].write_count = 2; 6221 rws_sum[regno].written_by_fp |= flags.is_fp; 6222 /* ??? Not tracking and/or across differing predicates. */ 6223 rws_sum[regno].written_by_and = flags.is_and; 6224 rws_sum[regno].written_by_or = flags.is_or; 6225 rws_sum[regno].first_pred = pred; 6226 } 6227 6228 /* Handle an access to register REGNO of type FLAGS using predicate register 6229 PRED. Update rws_sum array. Return 1 if this access creates 6230 a dependency with an earlier instruction in the same group. */ 6231 6232 static int 6233 rws_access_regno (int regno, struct reg_flags flags, int pred) 6234 { 6235 int need_barrier = 0; 6236 6237 gcc_assert (regno < NUM_REGS); 6238 6239 if (! PR_REGNO_P (regno)) 6240 flags.is_and = flags.is_or = 0; 6241 6242 if (flags.is_write) 6243 { 6244 int write_count; 6245 6246 rws_insn_set (regno); 6247 write_count = rws_sum[regno].write_count; 6248 6249 switch (write_count) 6250 { 6251 case 0: 6252 /* The register has not been written yet. */ 6253 if (!in_safe_group_barrier) 6254 rws_update (regno, flags, pred); 6255 break; 6256 6257 case 1: 6258 /* The register has been written via a predicate. Treat 6259 it like a unconditional write and do not try to check 6260 for complementary pred reg in earlier write. */ 6261 if (flags.is_and && rws_sum[regno].written_by_and) 6262 ; 6263 else if (flags.is_or && rws_sum[regno].written_by_or) 6264 ; 6265 else 6266 need_barrier = 1; 6267 if (!in_safe_group_barrier) 6268 rws_update (regno, flags, pred); 6269 break; 6270 6271 case 2: 6272 /* The register has been unconditionally written already. We 6273 need a barrier. */ 6274 if (flags.is_and && rws_sum[regno].written_by_and) 6275 ; 6276 else if (flags.is_or && rws_sum[regno].written_by_or) 6277 ; 6278 else 6279 need_barrier = 1; 6280 if (!in_safe_group_barrier) 6281 { 6282 rws_sum[regno].written_by_and = flags.is_and; 6283 rws_sum[regno].written_by_or = flags.is_or; 6284 } 6285 break; 6286 6287 default: 6288 gcc_unreachable (); 6289 } 6290 } 6291 else 6292 { 6293 if (flags.is_branch) 6294 { 6295 /* Branches have several RAW exceptions that allow to avoid 6296 barriers. */ 6297 6298 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) 6299 /* RAW dependencies on branch regs are permissible as long 6300 as the writer is a non-branch instruction. Since we 6301 never generate code that uses a branch register written 6302 by a branch instruction, handling this case is 6303 easy. */ 6304 return 0; 6305 6306 if (REGNO_REG_CLASS (regno) == PR_REGS 6307 && ! rws_sum[regno].written_by_fp) 6308 /* The predicates of a branch are available within the 6309 same insn group as long as the predicate was written by 6310 something other than a floating-point instruction. */ 6311 return 0; 6312 } 6313 6314 if (flags.is_and && rws_sum[regno].written_by_and) 6315 return 0; 6316 if (flags.is_or && rws_sum[regno].written_by_or) 6317 return 0; 6318 6319 switch (rws_sum[regno].write_count) 6320 { 6321 case 0: 6322 /* The register has not been written yet. */ 6323 break; 6324 6325 case 1: 6326 /* The register has been written via a predicate, assume we 6327 need a barrier (don't check for complementary regs). */ 6328 need_barrier = 1; 6329 break; 6330 6331 case 2: 6332 /* The register has been unconditionally written already. We 6333 need a barrier. */ 6334 need_barrier = 1; 6335 break; 6336 6337 default: 6338 gcc_unreachable (); 6339 } 6340 } 6341 6342 return need_barrier; 6343 } 6344 6345 static int 6346 rws_access_reg (rtx reg, struct reg_flags flags, int pred) 6347 { 6348 int regno = REGNO (reg); 6349 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); 6350 6351 if (n == 1) 6352 return rws_access_regno (regno, flags, pred); 6353 else 6354 { 6355 int need_barrier = 0; 6356 while (--n >= 0) 6357 need_barrier |= rws_access_regno (regno + n, flags, pred); 6358 return need_barrier; 6359 } 6360 } 6361 6362 /* Examine X, which is a SET rtx, and update the flags, the predicate, and 6363 the condition, stored in *PFLAGS, *PPRED and *PCOND. */ 6364 6365 static void 6366 update_set_flags (rtx x, struct reg_flags *pflags) 6367 { 6368 rtx src = SET_SRC (x); 6369 6370 switch (GET_CODE (src)) 6371 { 6372 case CALL: 6373 return; 6374 6375 case IF_THEN_ELSE: 6376 /* There are four cases here: 6377 (1) The destination is (pc), in which case this is a branch, 6378 nothing here applies. 6379 (2) The destination is ar.lc, in which case this is a 6380 doloop_end_internal, 6381 (3) The destination is an fp register, in which case this is 6382 an fselect instruction. 6383 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 6384 this is a check load. 6385 In all cases, nothing we do in this function applies. */ 6386 return; 6387 6388 default: 6389 if (COMPARISON_P (src) 6390 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0)))) 6391 /* Set pflags->is_fp to 1 so that we know we're dealing 6392 with a floating point comparison when processing the 6393 destination of the SET. */ 6394 pflags->is_fp = 1; 6395 6396 /* Discover if this is a parallel comparison. We only handle 6397 and.orcm and or.andcm at present, since we must retain a 6398 strict inverse on the predicate pair. */ 6399 else if (GET_CODE (src) == AND) 6400 pflags->is_and = 1; 6401 else if (GET_CODE (src) == IOR) 6402 pflags->is_or = 1; 6403 6404 break; 6405 } 6406 } 6407 6408 /* Subroutine of rtx_needs_barrier; this function determines whether the 6409 source of a given SET rtx found in X needs a barrier. FLAGS and PRED 6410 are as in rtx_needs_barrier. COND is an rtx that holds the condition 6411 for this insn. */ 6412 6413 static int 6414 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred) 6415 { 6416 int need_barrier = 0; 6417 rtx dst; 6418 rtx src = SET_SRC (x); 6419 6420 if (GET_CODE (src) == CALL) 6421 /* We don't need to worry about the result registers that 6422 get written by subroutine call. */ 6423 return rtx_needs_barrier (src, flags, pred); 6424 else if (SET_DEST (x) == pc_rtx) 6425 { 6426 /* X is a conditional branch. */ 6427 /* ??? This seems redundant, as the caller sets this bit for 6428 all JUMP_INSNs. */ 6429 if (!ia64_spec_check_src_p (src)) 6430 flags.is_branch = 1; 6431 return rtx_needs_barrier (src, flags, pred); 6432 } 6433 6434 if (ia64_spec_check_src_p (src)) 6435 /* Avoid checking one register twice (in condition 6436 and in 'then' section) for ldc pattern. */ 6437 { 6438 gcc_assert (REG_P (XEXP (src, 2))); 6439 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred); 6440 6441 /* We process MEM below. */ 6442 src = XEXP (src, 1); 6443 } 6444 6445 need_barrier |= rtx_needs_barrier (src, flags, pred); 6446 6447 dst = SET_DEST (x); 6448 if (GET_CODE (dst) == ZERO_EXTRACT) 6449 { 6450 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); 6451 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); 6452 } 6453 return need_barrier; 6454 } 6455 6456 /* Handle an access to rtx X of type FLAGS using predicate register 6457 PRED. Return 1 if this access creates a dependency with an earlier 6458 instruction in the same group. */ 6459 6460 static int 6461 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) 6462 { 6463 int i, j; 6464 int is_complemented = 0; 6465 int need_barrier = 0; 6466 const char *format_ptr; 6467 struct reg_flags new_flags; 6468 rtx cond; 6469 6470 if (! x) 6471 return 0; 6472 6473 new_flags = flags; 6474 6475 switch (GET_CODE (x)) 6476 { 6477 case SET: 6478 update_set_flags (x, &new_flags); 6479 need_barrier = set_src_needs_barrier (x, new_flags, pred); 6480 if (GET_CODE (SET_SRC (x)) != CALL) 6481 { 6482 new_flags.is_write = 1; 6483 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); 6484 } 6485 break; 6486 6487 case CALL: 6488 new_flags.is_write = 0; 6489 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 6490 6491 /* Avoid multiple register writes, in case this is a pattern with 6492 multiple CALL rtx. This avoids a failure in rws_access_reg. */ 6493 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM)) 6494 { 6495 new_flags.is_write = 1; 6496 need_barrier |= rws_access_regno (REG_RP, new_flags, pred); 6497 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); 6498 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 6499 } 6500 break; 6501 6502 case COND_EXEC: 6503 /* X is a predicated instruction. */ 6504 6505 cond = COND_EXEC_TEST (x); 6506 gcc_assert (!pred); 6507 need_barrier = rtx_needs_barrier (cond, flags, 0); 6508 6509 if (GET_CODE (cond) == EQ) 6510 is_complemented = 1; 6511 cond = XEXP (cond, 0); 6512 gcc_assert (GET_CODE (cond) == REG 6513 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS); 6514 pred = REGNO (cond); 6515 if (is_complemented) 6516 ++pred; 6517 6518 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); 6519 return need_barrier; 6520 6521 case CLOBBER: 6522 case USE: 6523 /* Clobber & use are for earlier compiler-phases only. */ 6524 break; 6525 6526 case ASM_OPERANDS: 6527 case ASM_INPUT: 6528 /* We always emit stop bits for traditional asms. We emit stop bits 6529 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ 6530 if (GET_CODE (x) != ASM_OPERANDS 6531 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) 6532 { 6533 /* Avoid writing the register multiple times if we have multiple 6534 asm outputs. This avoids a failure in rws_access_reg. */ 6535 if (! rws_insn_test (REG_VOLATILE)) 6536 { 6537 new_flags.is_write = 1; 6538 rws_access_regno (REG_VOLATILE, new_flags, pred); 6539 } 6540 return 1; 6541 } 6542 6543 /* For all ASM_OPERANDS, we must traverse the vector of input operands. 6544 We cannot just fall through here since then we would be confused 6545 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate 6546 traditional asms unlike their normal usage. */ 6547 6548 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) 6549 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) 6550 need_barrier = 1; 6551 break; 6552 6553 case PARALLEL: 6554 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 6555 { 6556 rtx pat = XVECEXP (x, 0, i); 6557 switch (GET_CODE (pat)) 6558 { 6559 case SET: 6560 update_set_flags (pat, &new_flags); 6561 need_barrier |= set_src_needs_barrier (pat, new_flags, pred); 6562 break; 6563 6564 case USE: 6565 case CALL: 6566 case ASM_OPERANDS: 6567 case ASM_INPUT: 6568 need_barrier |= rtx_needs_barrier (pat, flags, pred); 6569 break; 6570 6571 case CLOBBER: 6572 if (REG_P (XEXP (pat, 0)) 6573 && extract_asm_operands (x) != NULL_RTX 6574 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM) 6575 { 6576 new_flags.is_write = 1; 6577 need_barrier |= rtx_needs_barrier (XEXP (pat, 0), 6578 new_flags, pred); 6579 new_flags = flags; 6580 } 6581 break; 6582 6583 case RETURN: 6584 break; 6585 6586 default: 6587 gcc_unreachable (); 6588 } 6589 } 6590 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 6591 { 6592 rtx pat = XVECEXP (x, 0, i); 6593 if (GET_CODE (pat) == SET) 6594 { 6595 if (GET_CODE (SET_SRC (pat)) != CALL) 6596 { 6597 new_flags.is_write = 1; 6598 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, 6599 pred); 6600 } 6601 } 6602 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) 6603 need_barrier |= rtx_needs_barrier (pat, flags, pred); 6604 } 6605 break; 6606 6607 case SUBREG: 6608 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred); 6609 break; 6610 case REG: 6611 if (REGNO (x) == AR_UNAT_REGNUM) 6612 { 6613 for (i = 0; i < 64; ++i) 6614 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); 6615 } 6616 else 6617 need_barrier = rws_access_reg (x, flags, pred); 6618 break; 6619 6620 case MEM: 6621 /* Find the regs used in memory address computation. */ 6622 new_flags.is_write = 0; 6623 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 6624 break; 6625 6626 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR: 6627 case SYMBOL_REF: case LABEL_REF: case CONST: 6628 break; 6629 6630 /* Operators with side-effects. */ 6631 case POST_INC: case POST_DEC: 6632 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); 6633 6634 new_flags.is_write = 0; 6635 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 6636 new_flags.is_write = 1; 6637 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 6638 break; 6639 6640 case POST_MODIFY: 6641 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); 6642 6643 new_flags.is_write = 0; 6644 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 6645 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 6646 new_flags.is_write = 1; 6647 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 6648 break; 6649 6650 /* Handle common unary and binary ops for efficiency. */ 6651 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 6652 case MOD: case UDIV: case UMOD: case AND: case IOR: 6653 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 6654 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 6655 case NE: case EQ: case GE: case GT: case LE: 6656 case LT: case GEU: case GTU: case LEU: case LTU: 6657 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 6658 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 6659 break; 6660 6661 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 6662 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 6663 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 6664 case SQRT: case FFS: case POPCOUNT: 6665 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 6666 break; 6667 6668 case VEC_SELECT: 6669 /* VEC_SELECT's second argument is a PARALLEL with integers that 6670 describe the elements selected. On ia64, those integers are 6671 always constants. Avoid walking the PARALLEL so that we don't 6672 get confused with "normal" parallels and then die. */ 6673 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 6674 break; 6675 6676 case UNSPEC: 6677 switch (XINT (x, 1)) 6678 { 6679 case UNSPEC_LTOFF_DTPMOD: 6680 case UNSPEC_LTOFF_DTPREL: 6681 case UNSPEC_DTPREL: 6682 case UNSPEC_LTOFF_TPREL: 6683 case UNSPEC_TPREL: 6684 case UNSPEC_PRED_REL_MUTEX: 6685 case UNSPEC_PIC_CALL: 6686 case UNSPEC_MF: 6687 case UNSPEC_FETCHADD_ACQ: 6688 case UNSPEC_FETCHADD_REL: 6689 case UNSPEC_BSP_VALUE: 6690 case UNSPEC_FLUSHRS: 6691 case UNSPEC_BUNDLE_SELECTOR: 6692 break; 6693 6694 case UNSPEC_GR_SPILL: 6695 case UNSPEC_GR_RESTORE: 6696 { 6697 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); 6698 HOST_WIDE_INT bit = (offset >> 3) & 63; 6699 6700 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6701 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL); 6702 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, 6703 new_flags, pred); 6704 break; 6705 } 6706 6707 case UNSPEC_FR_SPILL: 6708 case UNSPEC_FR_RESTORE: 6709 case UNSPEC_GETF_EXP: 6710 case UNSPEC_SETF_EXP: 6711 case UNSPEC_ADDP4: 6712 case UNSPEC_FR_SQRT_RECIP_APPROX: 6713 case UNSPEC_FR_SQRT_RECIP_APPROX_RES: 6714 case UNSPEC_LDA: 6715 case UNSPEC_LDS: 6716 case UNSPEC_LDS_A: 6717 case UNSPEC_LDSA: 6718 case UNSPEC_CHKACLR: 6719 case UNSPEC_CHKS: 6720 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6721 break; 6722 6723 case UNSPEC_FR_RECIP_APPROX: 6724 case UNSPEC_SHRP: 6725 case UNSPEC_COPYSIGN: 6726 case UNSPEC_FR_RECIP_APPROX_RES: 6727 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6728 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 6729 break; 6730 6731 case UNSPEC_CMPXCHG_ACQ: 6732 case UNSPEC_CMPXCHG_REL: 6733 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 6734 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); 6735 break; 6736 6737 default: 6738 gcc_unreachable (); 6739 } 6740 break; 6741 6742 case UNSPEC_VOLATILE: 6743 switch (XINT (x, 1)) 6744 { 6745 case UNSPECV_ALLOC: 6746 /* Alloc must always be the first instruction of a group. 6747 We force this by always returning true. */ 6748 /* ??? We might get better scheduling if we explicitly check for 6749 input/local/output register dependencies, and modify the 6750 scheduler so that alloc is always reordered to the start of 6751 the current group. We could then eliminate all of the 6752 first_instruction code. */ 6753 rws_access_regno (AR_PFS_REGNUM, flags, pred); 6754 6755 new_flags.is_write = 1; 6756 rws_access_regno (REG_AR_CFM, new_flags, pred); 6757 return 1; 6758 6759 case UNSPECV_SET_BSP: 6760 case UNSPECV_PROBE_STACK_RANGE: 6761 need_barrier = 1; 6762 break; 6763 6764 case UNSPECV_BLOCKAGE: 6765 case UNSPECV_INSN_GROUP_BARRIER: 6766 case UNSPECV_BREAK: 6767 case UNSPECV_PSAC_ALL: 6768 case UNSPECV_PSAC_NORMAL: 6769 return 0; 6770 6771 case UNSPECV_PROBE_STACK_ADDRESS: 6772 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6773 break; 6774 6775 default: 6776 gcc_unreachable (); 6777 } 6778 break; 6779 6780 case RETURN: 6781 new_flags.is_write = 0; 6782 need_barrier = rws_access_regno (REG_RP, flags, pred); 6783 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); 6784 6785 new_flags.is_write = 1; 6786 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 6787 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 6788 break; 6789 6790 default: 6791 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 6792 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 6793 switch (format_ptr[i]) 6794 { 6795 case '0': /* unused field */ 6796 case 'i': /* integer */ 6797 case 'n': /* note */ 6798 case 'w': /* wide integer */ 6799 case 's': /* pointer to string */ 6800 case 'S': /* optional pointer to string */ 6801 break; 6802 6803 case 'e': 6804 if (rtx_needs_barrier (XEXP (x, i), flags, pred)) 6805 need_barrier = 1; 6806 break; 6807 6808 case 'E': 6809 for (j = XVECLEN (x, i) - 1; j >= 0; --j) 6810 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) 6811 need_barrier = 1; 6812 break; 6813 6814 default: 6815 gcc_unreachable (); 6816 } 6817 break; 6818 } 6819 return need_barrier; 6820 } 6821 6822 /* Clear out the state for group_barrier_needed at the start of a 6823 sequence of insns. */ 6824 6825 static void 6826 init_insn_group_barriers (void) 6827 { 6828 memset (rws_sum, 0, sizeof (rws_sum)); 6829 first_instruction = 1; 6830 } 6831 6832 /* Given the current state, determine whether a group barrier (a stop bit) is 6833 necessary before INSN. Return nonzero if so. This modifies the state to 6834 include the effects of INSN as a side-effect. */ 6835 6836 static int 6837 group_barrier_needed (rtx_insn *insn) 6838 { 6839 rtx pat; 6840 int need_barrier = 0; 6841 struct reg_flags flags; 6842 6843 memset (&flags, 0, sizeof (flags)); 6844 switch (GET_CODE (insn)) 6845 { 6846 case NOTE: 6847 case DEBUG_INSN: 6848 break; 6849 6850 case BARRIER: 6851 /* A barrier doesn't imply an instruction group boundary. */ 6852 break; 6853 6854 case CODE_LABEL: 6855 memset (rws_insn, 0, sizeof (rws_insn)); 6856 return 1; 6857 6858 case CALL_INSN: 6859 flags.is_branch = 1; 6860 flags.is_sibcall = SIBLING_CALL_P (insn); 6861 memset (rws_insn, 0, sizeof (rws_insn)); 6862 6863 /* Don't bundle a call following another call. */ 6864 if ((pat = prev_active_insn (insn)) && CALL_P (pat)) 6865 { 6866 need_barrier = 1; 6867 break; 6868 } 6869 6870 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); 6871 break; 6872 6873 case JUMP_INSN: 6874 if (!ia64_spec_check_p (insn)) 6875 flags.is_branch = 1; 6876 6877 /* Don't bundle a jump following a call. */ 6878 if ((pat = prev_active_insn (insn)) && CALL_P (pat)) 6879 { 6880 need_barrier = 1; 6881 break; 6882 } 6883 /* FALLTHRU */ 6884 6885 case INSN: 6886 if (GET_CODE (PATTERN (insn)) == USE 6887 || GET_CODE (PATTERN (insn)) == CLOBBER) 6888 /* Don't care about USE and CLOBBER "insns"---those are used to 6889 indicate to the optimizer that it shouldn't get rid of 6890 certain operations. */ 6891 break; 6892 6893 pat = PATTERN (insn); 6894 6895 /* Ug. Hack hacks hacked elsewhere. */ 6896 switch (recog_memoized (insn)) 6897 { 6898 /* We play dependency tricks with the epilogue in order 6899 to get proper schedules. Undo this for dv analysis. */ 6900 case CODE_FOR_epilogue_deallocate_stack: 6901 case CODE_FOR_prologue_allocate_stack: 6902 pat = XVECEXP (pat, 0, 0); 6903 break; 6904 6905 /* The pattern we use for br.cloop confuses the code above. 6906 The second element of the vector is representative. */ 6907 case CODE_FOR_doloop_end_internal: 6908 pat = XVECEXP (pat, 0, 1); 6909 break; 6910 6911 /* Doesn't generate code. */ 6912 case CODE_FOR_pred_rel_mutex: 6913 case CODE_FOR_prologue_use: 6914 return 0; 6915 6916 default: 6917 break; 6918 } 6919 6920 memset (rws_insn, 0, sizeof (rws_insn)); 6921 need_barrier = rtx_needs_barrier (pat, flags, 0); 6922 6923 /* Check to see if the previous instruction was a volatile 6924 asm. */ 6925 if (! need_barrier) 6926 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); 6927 6928 break; 6929 6930 default: 6931 gcc_unreachable (); 6932 } 6933 6934 if (first_instruction && important_for_bundling_p (insn)) 6935 { 6936 need_barrier = 0; 6937 first_instruction = 0; 6938 } 6939 6940 return need_barrier; 6941 } 6942 6943 /* Like group_barrier_needed, but do not clobber the current state. */ 6944 6945 static int 6946 safe_group_barrier_needed (rtx_insn *insn) 6947 { 6948 int saved_first_instruction; 6949 int t; 6950 6951 saved_first_instruction = first_instruction; 6952 in_safe_group_barrier = 1; 6953 6954 t = group_barrier_needed (insn); 6955 6956 first_instruction = saved_first_instruction; 6957 in_safe_group_barrier = 0; 6958 6959 return t; 6960 } 6961 6962 /* Scan the current function and insert stop bits as necessary to 6963 eliminate dependencies. This function assumes that a final 6964 instruction scheduling pass has been run which has already 6965 inserted most of the necessary stop bits. This function only 6966 inserts new ones at basic block boundaries, since these are 6967 invisible to the scheduler. */ 6968 6969 static void 6970 emit_insn_group_barriers (FILE *dump) 6971 { 6972 rtx_insn *insn; 6973 rtx_insn *last_label = 0; 6974 int insns_since_last_label = 0; 6975 6976 init_insn_group_barriers (); 6977 6978 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6979 { 6980 if (LABEL_P (insn)) 6981 { 6982 if (insns_since_last_label) 6983 last_label = insn; 6984 insns_since_last_label = 0; 6985 } 6986 else if (NOTE_P (insn) 6987 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK) 6988 { 6989 if (insns_since_last_label) 6990 last_label = insn; 6991 insns_since_last_label = 0; 6992 } 6993 else if (NONJUMP_INSN_P (insn) 6994 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 6995 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 6996 { 6997 init_insn_group_barriers (); 6998 last_label = 0; 6999 } 7000 else if (NONDEBUG_INSN_P (insn)) 7001 { 7002 insns_since_last_label = 1; 7003 7004 if (group_barrier_needed (insn)) 7005 { 7006 if (last_label) 7007 { 7008 if (dump) 7009 fprintf (dump, "Emitting stop before label %d\n", 7010 INSN_UID (last_label)); 7011 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); 7012 insn = last_label; 7013 7014 init_insn_group_barriers (); 7015 last_label = 0; 7016 } 7017 } 7018 } 7019 } 7020 } 7021 7022 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run. 7023 This function has to emit all necessary group barriers. */ 7024 7025 static void 7026 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) 7027 { 7028 rtx_insn *insn; 7029 7030 init_insn_group_barriers (); 7031 7032 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 7033 { 7034 if (BARRIER_P (insn)) 7035 { 7036 rtx_insn *last = prev_active_insn (insn); 7037 7038 if (! last) 7039 continue; 7040 if (JUMP_TABLE_DATA_P (last)) 7041 last = prev_active_insn (last); 7042 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 7043 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 7044 7045 init_insn_group_barriers (); 7046 } 7047 else if (NONDEBUG_INSN_P (insn)) 7048 { 7049 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 7050 init_insn_group_barriers (); 7051 else if (group_barrier_needed (insn)) 7052 { 7053 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 7054 init_insn_group_barriers (); 7055 group_barrier_needed (insn); 7056 } 7057 } 7058 } 7059 } 7060 7061 7062 7063 /* Instruction scheduling support. */ 7064 7065 #define NR_BUNDLES 10 7066 7067 /* A list of names of all available bundles. */ 7068 7069 static const char *bundle_name [NR_BUNDLES] = 7070 { 7071 ".mii", 7072 ".mmi", 7073 ".mfi", 7074 ".mmf", 7075 #if NR_BUNDLES == 10 7076 ".bbb", 7077 ".mbb", 7078 #endif 7079 ".mib", 7080 ".mmb", 7081 ".mfb", 7082 ".mlx" 7083 }; 7084 7085 /* Nonzero if we should insert stop bits into the schedule. */ 7086 7087 int ia64_final_schedule = 0; 7088 7089 /* Codes of the corresponding queried units: */ 7090 7091 static int _0mii_, _0mmi_, _0mfi_, _0mmf_; 7092 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_; 7093 7094 static int _1mii_, _1mmi_, _1mfi_, _1mmf_; 7095 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_; 7096 7097 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6; 7098 7099 /* The following variable value is an insn group barrier. */ 7100 7101 static rtx_insn *dfa_stop_insn; 7102 7103 /* The following variable value is the last issued insn. */ 7104 7105 static rtx_insn *last_scheduled_insn; 7106 7107 /* The following variable value is pointer to a DFA state used as 7108 temporary variable. */ 7109 7110 static state_t temp_dfa_state = NULL; 7111 7112 /* The following variable value is DFA state after issuing the last 7113 insn. */ 7114 7115 static state_t prev_cycle_state = NULL; 7116 7117 /* The following array element values are TRUE if the corresponding 7118 insn requires to add stop bits before it. */ 7119 7120 static char *stops_p = NULL; 7121 7122 /* The following variable is used to set up the mentioned above array. */ 7123 7124 static int stop_before_p = 0; 7125 7126 /* The following variable value is length of the arrays `clocks' and 7127 `add_cycles'. */ 7128 7129 static int clocks_length; 7130 7131 /* The following variable value is number of data speculations in progress. */ 7132 static int pending_data_specs = 0; 7133 7134 /* Number of memory references on current and three future processor cycles. */ 7135 static char mem_ops_in_group[4]; 7136 7137 /* Number of current processor cycle (from scheduler's point of view). */ 7138 static int current_cycle; 7139 7140 static rtx ia64_single_set (rtx_insn *); 7141 static void ia64_emit_insn_before (rtx, rtx_insn *); 7142 7143 /* Map a bundle number to its pseudo-op. */ 7144 7145 const char * 7146 get_bundle_name (int b) 7147 { 7148 return bundle_name[b]; 7149 } 7150 7151 7152 /* Return the maximum number of instructions a cpu can issue. */ 7153 7154 static int 7155 ia64_issue_rate (void) 7156 { 7157 return 6; 7158 } 7159 7160 /* Helper function - like single_set, but look inside COND_EXEC. */ 7161 7162 static rtx 7163 ia64_single_set (rtx_insn *insn) 7164 { 7165 rtx x = PATTERN (insn), ret; 7166 if (GET_CODE (x) == COND_EXEC) 7167 x = COND_EXEC_CODE (x); 7168 if (GET_CODE (x) == SET) 7169 return x; 7170 7171 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. 7172 Although they are not classical single set, the second set is there just 7173 to protect it from moving past FP-relative stack accesses. */ 7174 switch (recog_memoized (insn)) 7175 { 7176 case CODE_FOR_prologue_allocate_stack: 7177 case CODE_FOR_prologue_allocate_stack_pr: 7178 case CODE_FOR_epilogue_deallocate_stack: 7179 case CODE_FOR_epilogue_deallocate_stack_pr: 7180 ret = XVECEXP (x, 0, 0); 7181 break; 7182 7183 default: 7184 ret = single_set_2 (insn, x); 7185 break; 7186 } 7187 7188 return ret; 7189 } 7190 7191 /* Adjust the cost of a scheduling dependency. 7192 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN. 7193 COST is the current cost, DW is dependency weakness. */ 7194 static int 7195 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn, 7196 int cost, dw_t dw) 7197 { 7198 enum reg_note dep_type = (enum reg_note) dep_type1; 7199 enum attr_itanium_class dep_class; 7200 enum attr_itanium_class insn_class; 7201 7202 insn_class = ia64_safe_itanium_class (insn); 7203 dep_class = ia64_safe_itanium_class (dep_insn); 7204 7205 /* Treat true memory dependencies separately. Ignore apparent true 7206 dependence between store and call (call has a MEM inside a SYMBOL_REF). */ 7207 if (dep_type == REG_DEP_TRUE 7208 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF) 7209 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL)) 7210 return 0; 7211 7212 if (dw == MIN_DEP_WEAK) 7213 /* Store and load are likely to alias, use higher cost to avoid stall. */ 7214 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST); 7215 else if (dw > MIN_DEP_WEAK) 7216 { 7217 /* Store and load are less likely to alias. */ 7218 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF) 7219 /* Assume there will be no cache conflict for floating-point data. 7220 For integer data, L1 conflict penalty is huge (17 cycles), so we 7221 never assume it will not cause a conflict. */ 7222 return 0; 7223 else 7224 return cost; 7225 } 7226 7227 if (dep_type != REG_DEP_OUTPUT) 7228 return cost; 7229 7230 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF 7231 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF) 7232 return 0; 7233 7234 return cost; 7235 } 7236 7237 /* Like emit_insn_before, but skip cycle_display notes. 7238 ??? When cycle display notes are implemented, update this. */ 7239 7240 static void 7241 ia64_emit_insn_before (rtx insn, rtx_insn *before) 7242 { 7243 emit_insn_before (insn, before); 7244 } 7245 7246 /* The following function marks insns who produce addresses for load 7247 and store insns. Such insns will be placed into M slots because it 7248 decrease latency time for Itanium1 (see function 7249 `ia64_produce_address_p' and the DFA descriptions). */ 7250 7251 static void 7252 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) 7253 { 7254 rtx_insn *insn, *next, *next_tail; 7255 7256 /* Before reload, which_alternative is not set, which means that 7257 ia64_safe_itanium_class will produce wrong results for (at least) 7258 move instructions. */ 7259 if (!reload_completed) 7260 return; 7261 7262 next_tail = NEXT_INSN (tail); 7263 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 7264 if (INSN_P (insn)) 7265 insn->call = 0; 7266 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 7267 if (INSN_P (insn) 7268 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) 7269 { 7270 sd_iterator_def sd_it; 7271 dep_t dep; 7272 bool has_mem_op_consumer_p = false; 7273 7274 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) 7275 { 7276 enum attr_itanium_class c; 7277 7278 if (DEP_TYPE (dep) != REG_DEP_TRUE) 7279 continue; 7280 7281 next = DEP_CON (dep); 7282 c = ia64_safe_itanium_class (next); 7283 if ((c == ITANIUM_CLASS_ST 7284 || c == ITANIUM_CLASS_STF) 7285 && ia64_st_address_bypass_p (insn, next)) 7286 { 7287 has_mem_op_consumer_p = true; 7288 break; 7289 } 7290 else if ((c == ITANIUM_CLASS_LD 7291 || c == ITANIUM_CLASS_FLD 7292 || c == ITANIUM_CLASS_FLDP) 7293 && ia64_ld_address_bypass_p (insn, next)) 7294 { 7295 has_mem_op_consumer_p = true; 7296 break; 7297 } 7298 } 7299 7300 insn->call = has_mem_op_consumer_p; 7301 } 7302 } 7303 7304 /* We're beginning a new block. Initialize data structures as necessary. */ 7305 7306 static void 7307 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED, 7308 int sched_verbose ATTRIBUTE_UNUSED, 7309 int max_ready ATTRIBUTE_UNUSED) 7310 { 7311 if (flag_checking && !sel_sched_p () && reload_completed) 7312 { 7313 for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head); 7314 insn != current_sched_info->next_tail; 7315 insn = NEXT_INSN (insn)) 7316 gcc_assert (!SCHED_GROUP_P (insn)); 7317 } 7318 last_scheduled_insn = NULL; 7319 init_insn_group_barriers (); 7320 7321 current_cycle = 0; 7322 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group)); 7323 } 7324 7325 /* We're beginning a scheduling pass. Check assertion. */ 7326 7327 static void 7328 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, 7329 int sched_verbose ATTRIBUTE_UNUSED, 7330 int max_ready ATTRIBUTE_UNUSED) 7331 { 7332 gcc_assert (pending_data_specs == 0); 7333 } 7334 7335 /* Scheduling pass is now finished. Free/reset static variable. */ 7336 static void 7337 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED, 7338 int sched_verbose ATTRIBUTE_UNUSED) 7339 { 7340 gcc_assert (pending_data_specs == 0); 7341 } 7342 7343 /* Return TRUE if INSN is a load (either normal or speculative, but not a 7344 speculation check), FALSE otherwise. */ 7345 static bool 7346 is_load_p (rtx_insn *insn) 7347 { 7348 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); 7349 7350 return 7351 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD) 7352 && get_attr_check_load (insn) == CHECK_LOAD_NO); 7353 } 7354 7355 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array 7356 (taking account for 3-cycle cache reference postponing for stores: Intel 7357 Itanium 2 Reference Manual for Software Development and Optimization, 7358 6.7.3.1). */ 7359 static void 7360 record_memory_reference (rtx_insn *insn) 7361 { 7362 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); 7363 7364 switch (insn_class) { 7365 case ITANIUM_CLASS_FLD: 7366 case ITANIUM_CLASS_LD: 7367 mem_ops_in_group[current_cycle % 4]++; 7368 break; 7369 case ITANIUM_CLASS_STF: 7370 case ITANIUM_CLASS_ST: 7371 mem_ops_in_group[(current_cycle + 3) % 4]++; 7372 break; 7373 default:; 7374 } 7375 } 7376 7377 /* We are about to being issuing insns for this clock cycle. 7378 Override the default sort algorithm to better slot instructions. */ 7379 7380 static int 7381 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, 7382 int *pn_ready, int clock_var, 7383 int reorder_type) 7384 { 7385 int n_asms; 7386 int n_ready = *pn_ready; 7387 rtx_insn **e_ready = ready + n_ready; 7388 rtx_insn **insnp; 7389 7390 if (sched_verbose) 7391 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type); 7392 7393 if (reorder_type == 0) 7394 { 7395 /* First, move all USEs, CLOBBERs and other crud out of the way. */ 7396 n_asms = 0; 7397 for (insnp = ready; insnp < e_ready; insnp++) 7398 if (insnp < e_ready) 7399 { 7400 rtx_insn *insn = *insnp; 7401 enum attr_type t = ia64_safe_type (insn); 7402 if (t == TYPE_UNKNOWN) 7403 { 7404 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 7405 || asm_noperands (PATTERN (insn)) >= 0) 7406 { 7407 rtx_insn *lowest = ready[n_asms]; 7408 ready[n_asms] = insn; 7409 *insnp = lowest; 7410 n_asms++; 7411 } 7412 else 7413 { 7414 rtx_insn *highest = ready[n_ready - 1]; 7415 ready[n_ready - 1] = insn; 7416 *insnp = highest; 7417 return 1; 7418 } 7419 } 7420 } 7421 7422 if (n_asms < n_ready) 7423 { 7424 /* Some normal insns to process. Skip the asms. */ 7425 ready += n_asms; 7426 n_ready -= n_asms; 7427 } 7428 else if (n_ready > 0) 7429 return 1; 7430 } 7431 7432 if (ia64_final_schedule) 7433 { 7434 int deleted = 0; 7435 int nr_need_stop = 0; 7436 7437 for (insnp = ready; insnp < e_ready; insnp++) 7438 if (safe_group_barrier_needed (*insnp)) 7439 nr_need_stop++; 7440 7441 if (reorder_type == 1 && n_ready == nr_need_stop) 7442 return 0; 7443 if (reorder_type == 0) 7444 return 1; 7445 insnp = e_ready; 7446 /* Move down everything that needs a stop bit, preserving 7447 relative order. */ 7448 while (insnp-- > ready + deleted) 7449 while (insnp >= ready + deleted) 7450 { 7451 rtx_insn *insn = *insnp; 7452 if (! safe_group_barrier_needed (insn)) 7453 break; 7454 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 7455 *ready = insn; 7456 deleted++; 7457 } 7458 n_ready -= deleted; 7459 ready += deleted; 7460 } 7461 7462 current_cycle = clock_var; 7463 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns) 7464 { 7465 int moved = 0; 7466 7467 insnp = e_ready; 7468 /* Move down loads/stores, preserving relative order. */ 7469 while (insnp-- > ready + moved) 7470 while (insnp >= ready + moved) 7471 { 7472 rtx_insn *insn = *insnp; 7473 if (! is_load_p (insn)) 7474 break; 7475 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 7476 *ready = insn; 7477 moved++; 7478 } 7479 n_ready -= moved; 7480 ready += moved; 7481 } 7482 7483 return 1; 7484 } 7485 7486 /* We are about to being issuing insns for this clock cycle. Override 7487 the default sort algorithm to better slot instructions. */ 7488 7489 static int 7490 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, 7491 int *pn_ready, int clock_var) 7492 { 7493 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, 7494 pn_ready, clock_var, 0); 7495 } 7496 7497 /* Like ia64_sched_reorder, but called after issuing each insn. 7498 Override the default sort algorithm to better slot instructions. */ 7499 7500 static int 7501 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED, 7502 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready, 7503 int *pn_ready, int clock_var) 7504 { 7505 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, 7506 clock_var, 1); 7507 } 7508 7509 /* We are about to issue INSN. Return the number of insns left on the 7510 ready queue that can be issued this cycle. */ 7511 7512 static int 7513 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED, 7514 int sched_verbose ATTRIBUTE_UNUSED, 7515 rtx_insn *insn, 7516 int can_issue_more ATTRIBUTE_UNUSED) 7517 { 7518 if (sched_deps_info->generate_spec_deps && !sel_sched_p ()) 7519 /* Modulo scheduling does not extend h_i_d when emitting 7520 new instructions. Don't use h_i_d, if we don't have to. */ 7521 { 7522 if (DONE_SPEC (insn) & BEGIN_DATA) 7523 pending_data_specs++; 7524 if (CHECK_SPEC (insn) & BEGIN_DATA) 7525 pending_data_specs--; 7526 } 7527 7528 if (DEBUG_INSN_P (insn)) 7529 return 1; 7530 7531 last_scheduled_insn = insn; 7532 memcpy (prev_cycle_state, curr_state, dfa_state_size); 7533 if (reload_completed) 7534 { 7535 int needed = group_barrier_needed (insn); 7536 7537 gcc_assert (!needed); 7538 if (CALL_P (insn)) 7539 init_insn_group_barriers (); 7540 stops_p [INSN_UID (insn)] = stop_before_p; 7541 stop_before_p = 0; 7542 7543 record_memory_reference (insn); 7544 } 7545 return 1; 7546 } 7547 7548 /* We are choosing insn from the ready queue. Return zero if INSN 7549 can be chosen. */ 7550 7551 static int 7552 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index) 7553 { 7554 gcc_assert (insn && INSN_P (insn)); 7555 7556 /* Size of ALAT is 32. As far as we perform conservative 7557 data speculation, we keep ALAT half-empty. */ 7558 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA)) 7559 return ready_index == 0 ? -1 : 1; 7560 7561 if (ready_index == 0) 7562 return 0; 7563 7564 if ((!reload_completed 7565 || !safe_group_barrier_needed (insn)) 7566 && (!mflag_sched_mem_insns_hard_limit 7567 || !is_load_p (insn) 7568 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns)) 7569 return 0; 7570 7571 return 1; 7572 } 7573 7574 /* The following variable value is pseudo-insn used by the DFA insn 7575 scheduler to change the DFA state when the simulated clock is 7576 increased. */ 7577 7578 static rtx_insn *dfa_pre_cycle_insn; 7579 7580 /* Returns 1 when a meaningful insn was scheduled between the last group 7581 barrier and LAST. */ 7582 static int 7583 scheduled_good_insn (rtx_insn *last) 7584 { 7585 if (last && recog_memoized (last) >= 0) 7586 return 1; 7587 7588 for ( ; 7589 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last) 7590 && !stops_p[INSN_UID (last)]; 7591 last = PREV_INSN (last)) 7592 /* We could hit a NOTE_INSN_DELETED here which is actually outside 7593 the ebb we're scheduling. */ 7594 if (INSN_P (last) && recog_memoized (last) >= 0) 7595 return 1; 7596 7597 return 0; 7598 } 7599 7600 /* We are about to being issuing INSN. Return nonzero if we cannot 7601 issue it on given cycle CLOCK and return zero if we should not sort 7602 the ready queue on the next clock start. */ 7603 7604 static int 7605 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock, 7606 int clock, int *sort_p) 7607 { 7608 gcc_assert (insn && INSN_P (insn)); 7609 7610 if (DEBUG_INSN_P (insn)) 7611 return 0; 7612 7613 /* When a group barrier is needed for insn, last_scheduled_insn 7614 should be set. */ 7615 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn)) 7616 || last_scheduled_insn); 7617 7618 if ((reload_completed 7619 && (safe_group_barrier_needed (insn) 7620 || (mflag_sched_stop_bits_after_every_cycle 7621 && last_clock != clock 7622 && last_scheduled_insn 7623 && scheduled_good_insn (last_scheduled_insn)))) 7624 || (last_scheduled_insn 7625 && (CALL_P (last_scheduled_insn) 7626 || unknown_for_bundling_p (last_scheduled_insn)))) 7627 { 7628 init_insn_group_barriers (); 7629 7630 if (verbose && dump) 7631 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn), 7632 last_clock == clock ? " + cycle advance" : ""); 7633 7634 stop_before_p = 1; 7635 current_cycle = clock; 7636 mem_ops_in_group[current_cycle % 4] = 0; 7637 7638 if (last_clock == clock) 7639 { 7640 state_transition (curr_state, dfa_stop_insn); 7641 if (TARGET_EARLY_STOP_BITS) 7642 *sort_p = (last_scheduled_insn == NULL_RTX 7643 || ! CALL_P (last_scheduled_insn)); 7644 else 7645 *sort_p = 0; 7646 return 1; 7647 } 7648 7649 if (last_scheduled_insn) 7650 { 7651 if (unknown_for_bundling_p (last_scheduled_insn)) 7652 state_reset (curr_state); 7653 else 7654 { 7655 memcpy (curr_state, prev_cycle_state, dfa_state_size); 7656 state_transition (curr_state, dfa_stop_insn); 7657 state_transition (curr_state, dfa_pre_cycle_insn); 7658 state_transition (curr_state, NULL); 7659 } 7660 } 7661 } 7662 return 0; 7663 } 7664 7665 /* Implement targetm.sched.h_i_d_extended hook. 7666 Extend internal data structures. */ 7667 static void 7668 ia64_h_i_d_extended (void) 7669 { 7670 if (stops_p != NULL) 7671 { 7672 int new_clocks_length = get_max_uid () * 3 / 2; 7673 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1); 7674 clocks_length = new_clocks_length; 7675 } 7676 } 7677 7678 7679 /* This structure describes the data used by the backend to guide scheduling. 7680 When the current scheduling point is switched, this data should be saved 7681 and restored later, if the scheduler returns to this point. */ 7682 struct _ia64_sched_context 7683 { 7684 state_t prev_cycle_state; 7685 rtx_insn *last_scheduled_insn; 7686 struct reg_write_state rws_sum[NUM_REGS]; 7687 struct reg_write_state rws_insn[NUM_REGS]; 7688 int first_instruction; 7689 int pending_data_specs; 7690 int current_cycle; 7691 char mem_ops_in_group[4]; 7692 }; 7693 typedef struct _ia64_sched_context *ia64_sched_context_t; 7694 7695 /* Allocates a scheduling context. */ 7696 static void * 7697 ia64_alloc_sched_context (void) 7698 { 7699 return xmalloc (sizeof (struct _ia64_sched_context)); 7700 } 7701 7702 /* Initializes the _SC context with clean data, if CLEAN_P, and from 7703 the global context otherwise. */ 7704 static void 7705 ia64_init_sched_context (void *_sc, bool clean_p) 7706 { 7707 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7708 7709 sc->prev_cycle_state = xmalloc (dfa_state_size); 7710 if (clean_p) 7711 { 7712 state_reset (sc->prev_cycle_state); 7713 sc->last_scheduled_insn = NULL; 7714 memset (sc->rws_sum, 0, sizeof (rws_sum)); 7715 memset (sc->rws_insn, 0, sizeof (rws_insn)); 7716 sc->first_instruction = 1; 7717 sc->pending_data_specs = 0; 7718 sc->current_cycle = 0; 7719 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group)); 7720 } 7721 else 7722 { 7723 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size); 7724 sc->last_scheduled_insn = last_scheduled_insn; 7725 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum)); 7726 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn)); 7727 sc->first_instruction = first_instruction; 7728 sc->pending_data_specs = pending_data_specs; 7729 sc->current_cycle = current_cycle; 7730 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group)); 7731 } 7732 } 7733 7734 /* Sets the global scheduling context to the one pointed to by _SC. */ 7735 static void 7736 ia64_set_sched_context (void *_sc) 7737 { 7738 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7739 7740 gcc_assert (sc != NULL); 7741 7742 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size); 7743 last_scheduled_insn = sc->last_scheduled_insn; 7744 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum)); 7745 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn)); 7746 first_instruction = sc->first_instruction; 7747 pending_data_specs = sc->pending_data_specs; 7748 current_cycle = sc->current_cycle; 7749 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group)); 7750 } 7751 7752 /* Clears the data in the _SC scheduling context. */ 7753 static void 7754 ia64_clear_sched_context (void *_sc) 7755 { 7756 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7757 7758 free (sc->prev_cycle_state); 7759 sc->prev_cycle_state = NULL; 7760 } 7761 7762 /* Frees the _SC scheduling context. */ 7763 static void 7764 ia64_free_sched_context (void *_sc) 7765 { 7766 gcc_assert (_sc != NULL); 7767 7768 free (_sc); 7769 } 7770 7771 typedef rtx (* gen_func_t) (rtx, rtx); 7772 7773 /* Return a function that will generate a load of mode MODE_NO 7774 with speculation types TS. */ 7775 static gen_func_t 7776 get_spec_load_gen_function (ds_t ts, int mode_no) 7777 { 7778 static gen_func_t gen_ld_[] = { 7779 gen_movbi, 7780 gen_movqi_internal, 7781 gen_movhi_internal, 7782 gen_movsi_internal, 7783 gen_movdi_internal, 7784 gen_movsf_internal, 7785 gen_movdf_internal, 7786 gen_movxf_internal, 7787 gen_movti_internal, 7788 gen_zero_extendqidi2, 7789 gen_zero_extendhidi2, 7790 gen_zero_extendsidi2, 7791 }; 7792 7793 static gen_func_t gen_ld_a[] = { 7794 gen_movbi_advanced, 7795 gen_movqi_advanced, 7796 gen_movhi_advanced, 7797 gen_movsi_advanced, 7798 gen_movdi_advanced, 7799 gen_movsf_advanced, 7800 gen_movdf_advanced, 7801 gen_movxf_advanced, 7802 gen_movti_advanced, 7803 gen_zero_extendqidi2_advanced, 7804 gen_zero_extendhidi2_advanced, 7805 gen_zero_extendsidi2_advanced, 7806 }; 7807 static gen_func_t gen_ld_s[] = { 7808 gen_movbi_speculative, 7809 gen_movqi_speculative, 7810 gen_movhi_speculative, 7811 gen_movsi_speculative, 7812 gen_movdi_speculative, 7813 gen_movsf_speculative, 7814 gen_movdf_speculative, 7815 gen_movxf_speculative, 7816 gen_movti_speculative, 7817 gen_zero_extendqidi2_speculative, 7818 gen_zero_extendhidi2_speculative, 7819 gen_zero_extendsidi2_speculative, 7820 }; 7821 static gen_func_t gen_ld_sa[] = { 7822 gen_movbi_speculative_advanced, 7823 gen_movqi_speculative_advanced, 7824 gen_movhi_speculative_advanced, 7825 gen_movsi_speculative_advanced, 7826 gen_movdi_speculative_advanced, 7827 gen_movsf_speculative_advanced, 7828 gen_movdf_speculative_advanced, 7829 gen_movxf_speculative_advanced, 7830 gen_movti_speculative_advanced, 7831 gen_zero_extendqidi2_speculative_advanced, 7832 gen_zero_extendhidi2_speculative_advanced, 7833 gen_zero_extendsidi2_speculative_advanced, 7834 }; 7835 static gen_func_t gen_ld_s_a[] = { 7836 gen_movbi_speculative_a, 7837 gen_movqi_speculative_a, 7838 gen_movhi_speculative_a, 7839 gen_movsi_speculative_a, 7840 gen_movdi_speculative_a, 7841 gen_movsf_speculative_a, 7842 gen_movdf_speculative_a, 7843 gen_movxf_speculative_a, 7844 gen_movti_speculative_a, 7845 gen_zero_extendqidi2_speculative_a, 7846 gen_zero_extendhidi2_speculative_a, 7847 gen_zero_extendsidi2_speculative_a, 7848 }; 7849 7850 gen_func_t *gen_ld; 7851 7852 if (ts & BEGIN_DATA) 7853 { 7854 if (ts & BEGIN_CONTROL) 7855 gen_ld = gen_ld_sa; 7856 else 7857 gen_ld = gen_ld_a; 7858 } 7859 else if (ts & BEGIN_CONTROL) 7860 { 7861 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL) 7862 || ia64_needs_block_p (ts)) 7863 gen_ld = gen_ld_s; 7864 else 7865 gen_ld = gen_ld_s_a; 7866 } 7867 else if (ts == 0) 7868 gen_ld = gen_ld_; 7869 else 7870 gcc_unreachable (); 7871 7872 return gen_ld[mode_no]; 7873 } 7874 7875 /* Constants that help mapping 'machine_mode' to int. */ 7876 enum SPEC_MODES 7877 { 7878 SPEC_MODE_INVALID = -1, 7879 SPEC_MODE_FIRST = 0, 7880 SPEC_MODE_FOR_EXTEND_FIRST = 1, 7881 SPEC_MODE_FOR_EXTEND_LAST = 3, 7882 SPEC_MODE_LAST = 8 7883 }; 7884 7885 enum 7886 { 7887 /* Offset to reach ZERO_EXTEND patterns. */ 7888 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1 7889 }; 7890 7891 /* Return index of the MODE. */ 7892 static int 7893 ia64_mode_to_int (machine_mode mode) 7894 { 7895 switch (mode) 7896 { 7897 case BImode: return 0; /* SPEC_MODE_FIRST */ 7898 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */ 7899 case HImode: return 2; 7900 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */ 7901 case DImode: return 4; 7902 case SFmode: return 5; 7903 case DFmode: return 6; 7904 case XFmode: return 7; 7905 case TImode: 7906 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not 7907 mentioned in itanium[12].md. Predicate fp_register_operand also 7908 needs to be defined. Bottom line: better disable for now. */ 7909 return SPEC_MODE_INVALID; 7910 default: return SPEC_MODE_INVALID; 7911 } 7912 } 7913 7914 /* Provide information about speculation capabilities. */ 7915 static void 7916 ia64_set_sched_flags (spec_info_t spec_info) 7917 { 7918 unsigned int *flags = &(current_sched_info->flags); 7919 7920 if (*flags & SCHED_RGN 7921 || *flags & SCHED_EBB 7922 || *flags & SEL_SCHED) 7923 { 7924 int mask = 0; 7925 7926 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0) 7927 || (mflag_sched_ar_data_spec && reload_completed)) 7928 { 7929 mask |= BEGIN_DATA; 7930 7931 if (!sel_sched_p () 7932 && ((mflag_sched_br_in_data_spec && !reload_completed) 7933 || (mflag_sched_ar_in_data_spec && reload_completed))) 7934 mask |= BE_IN_DATA; 7935 } 7936 7937 if (mflag_sched_control_spec 7938 && (!sel_sched_p () 7939 || reload_completed)) 7940 { 7941 mask |= BEGIN_CONTROL; 7942 7943 if (!sel_sched_p () && mflag_sched_in_control_spec) 7944 mask |= BE_IN_CONTROL; 7945 } 7946 7947 spec_info->mask = mask; 7948 7949 if (mask) 7950 { 7951 *flags |= USE_DEPS_LIST | DO_SPECULATION; 7952 7953 if (mask & BE_IN_SPEC) 7954 *flags |= NEW_BBS; 7955 7956 spec_info->flags = 0; 7957 7958 if ((mask & CONTROL_SPEC) 7959 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec) 7960 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL; 7961 7962 if (sched_verbose >= 1) 7963 spec_info->dump = sched_dump; 7964 else 7965 spec_info->dump = 0; 7966 7967 if (mflag_sched_count_spec_in_critical_path) 7968 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH; 7969 } 7970 } 7971 else 7972 spec_info->mask = 0; 7973 } 7974 7975 /* If INSN is an appropriate load return its mode. 7976 Return -1 otherwise. */ 7977 static int 7978 get_mode_no_for_insn (rtx_insn *insn) 7979 { 7980 rtx reg, mem, mode_rtx; 7981 int mode_no; 7982 bool extend_p; 7983 7984 extract_insn_cached (insn); 7985 7986 /* We use WHICH_ALTERNATIVE only after reload. This will 7987 guarantee that reload won't touch a speculative insn. */ 7988 7989 if (recog_data.n_operands != 2) 7990 return -1; 7991 7992 reg = recog_data.operand[0]; 7993 mem = recog_data.operand[1]; 7994 7995 /* We should use MEM's mode since REG's mode in presence of 7996 ZERO_EXTEND will always be DImode. */ 7997 if (get_attr_speculable1 (insn) == SPECULABLE1_YES) 7998 /* Process non-speculative ld. */ 7999 { 8000 if (!reload_completed) 8001 { 8002 /* Do not speculate into regs like ar.lc. */ 8003 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg))) 8004 return -1; 8005 8006 if (!MEM_P (mem)) 8007 return -1; 8008 8009 { 8010 rtx mem_reg = XEXP (mem, 0); 8011 8012 if (!REG_P (mem_reg)) 8013 return -1; 8014 } 8015 8016 mode_rtx = mem; 8017 } 8018 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES) 8019 { 8020 gcc_assert (REG_P (reg) && MEM_P (mem)); 8021 mode_rtx = mem; 8022 } 8023 else 8024 return -1; 8025 } 8026 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES 8027 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES 8028 || get_attr_check_load (insn) == CHECK_LOAD_YES) 8029 /* Process speculative ld or ld.c. */ 8030 { 8031 gcc_assert (REG_P (reg) && MEM_P (mem)); 8032 mode_rtx = mem; 8033 } 8034 else 8035 { 8036 enum attr_itanium_class attr_class = get_attr_itanium_class (insn); 8037 8038 if (attr_class == ITANIUM_CLASS_CHK_A 8039 || attr_class == ITANIUM_CLASS_CHK_S_I 8040 || attr_class == ITANIUM_CLASS_CHK_S_F) 8041 /* Process chk. */ 8042 mode_rtx = reg; 8043 else 8044 return -1; 8045 } 8046 8047 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx)); 8048 8049 if (mode_no == SPEC_MODE_INVALID) 8050 return -1; 8051 8052 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx)); 8053 8054 if (extend_p) 8055 { 8056 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no 8057 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)) 8058 return -1; 8059 8060 mode_no += SPEC_GEN_EXTEND_OFFSET; 8061 } 8062 8063 return mode_no; 8064 } 8065 8066 /* If X is an unspec part of a speculative load, return its code. 8067 Return -1 otherwise. */ 8068 static int 8069 get_spec_unspec_code (const_rtx x) 8070 { 8071 if (GET_CODE (x) != UNSPEC) 8072 return -1; 8073 8074 { 8075 int code; 8076 8077 code = XINT (x, 1); 8078 8079 switch (code) 8080 { 8081 case UNSPEC_LDA: 8082 case UNSPEC_LDS: 8083 case UNSPEC_LDS_A: 8084 case UNSPEC_LDSA: 8085 return code; 8086 8087 default: 8088 return -1; 8089 } 8090 } 8091 } 8092 8093 /* Implement skip_rtx_p hook. */ 8094 static bool 8095 ia64_skip_rtx_p (const_rtx x) 8096 { 8097 return get_spec_unspec_code (x) != -1; 8098 } 8099 8100 /* If INSN is a speculative load, return its UNSPEC code. 8101 Return -1 otherwise. */ 8102 static int 8103 get_insn_spec_code (const_rtx insn) 8104 { 8105 rtx pat, reg, mem; 8106 8107 pat = PATTERN (insn); 8108 8109 if (GET_CODE (pat) == COND_EXEC) 8110 pat = COND_EXEC_CODE (pat); 8111 8112 if (GET_CODE (pat) != SET) 8113 return -1; 8114 8115 reg = SET_DEST (pat); 8116 if (!REG_P (reg)) 8117 return -1; 8118 8119 mem = SET_SRC (pat); 8120 if (GET_CODE (mem) == ZERO_EXTEND) 8121 mem = XEXP (mem, 0); 8122 8123 return get_spec_unspec_code (mem); 8124 } 8125 8126 /* If INSN is a speculative load, return a ds with the speculation types. 8127 Otherwise [if INSN is a normal instruction] return 0. */ 8128 static ds_t 8129 ia64_get_insn_spec_ds (rtx_insn *insn) 8130 { 8131 int code = get_insn_spec_code (insn); 8132 8133 switch (code) 8134 { 8135 case UNSPEC_LDA: 8136 return BEGIN_DATA; 8137 8138 case UNSPEC_LDS: 8139 case UNSPEC_LDS_A: 8140 return BEGIN_CONTROL; 8141 8142 case UNSPEC_LDSA: 8143 return BEGIN_DATA | BEGIN_CONTROL; 8144 8145 default: 8146 return 0; 8147 } 8148 } 8149 8150 /* If INSN is a speculative load return a ds with the speculation types that 8151 will be checked. 8152 Otherwise [if INSN is a normal instruction] return 0. */ 8153 static ds_t 8154 ia64_get_insn_checked_ds (rtx_insn *insn) 8155 { 8156 int code = get_insn_spec_code (insn); 8157 8158 switch (code) 8159 { 8160 case UNSPEC_LDA: 8161 return BEGIN_DATA | BEGIN_CONTROL; 8162 8163 case UNSPEC_LDS: 8164 return BEGIN_CONTROL; 8165 8166 case UNSPEC_LDS_A: 8167 case UNSPEC_LDSA: 8168 return BEGIN_DATA | BEGIN_CONTROL; 8169 8170 default: 8171 return 0; 8172 } 8173 } 8174 8175 /* If GEN_P is true, calculate the index of needed speculation check and return 8176 speculative pattern for INSN with speculative mode TS, machine mode 8177 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true). 8178 If GEN_P is false, just calculate the index of needed speculation check. */ 8179 static rtx 8180 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no) 8181 { 8182 rtx pat, new_pat; 8183 gen_func_t gen_load; 8184 8185 gen_load = get_spec_load_gen_function (ts, mode_no); 8186 8187 new_pat = gen_load (copy_rtx (recog_data.operand[0]), 8188 copy_rtx (recog_data.operand[1])); 8189 8190 pat = PATTERN (insn); 8191 if (GET_CODE (pat) == COND_EXEC) 8192 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), 8193 new_pat); 8194 8195 return new_pat; 8196 } 8197 8198 static bool 8199 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED, 8200 ds_t ds ATTRIBUTE_UNUSED) 8201 { 8202 return false; 8203 } 8204 8205 /* Implement targetm.sched.speculate_insn hook. 8206 Check if the INSN can be TS speculative. 8207 If 'no' - return -1. 8208 If 'yes' - generate speculative pattern in the NEW_PAT and return 1. 8209 If current pattern of the INSN already provides TS speculation, 8210 return 0. */ 8211 static int 8212 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat) 8213 { 8214 int mode_no; 8215 int res; 8216 8217 gcc_assert (!(ts & ~SPECULATIVE)); 8218 8219 if (ia64_spec_check_p (insn)) 8220 return -1; 8221 8222 if ((ts & BE_IN_SPEC) 8223 && !insn_can_be_in_speculative_p (insn, ts)) 8224 return -1; 8225 8226 mode_no = get_mode_no_for_insn (insn); 8227 8228 if (mode_no != SPEC_MODE_INVALID) 8229 { 8230 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts)) 8231 res = 0; 8232 else 8233 { 8234 res = 1; 8235 *new_pat = ia64_gen_spec_load (insn, ts, mode_no); 8236 } 8237 } 8238 else 8239 res = -1; 8240 8241 return res; 8242 } 8243 8244 /* Return a function that will generate a check for speculation TS with mode 8245 MODE_NO. 8246 If simple check is needed, pass true for SIMPLE_CHECK_P. 8247 If clearing check is needed, pass true for CLEARING_CHECK_P. */ 8248 static gen_func_t 8249 get_spec_check_gen_function (ds_t ts, int mode_no, 8250 bool simple_check_p, bool clearing_check_p) 8251 { 8252 static gen_func_t gen_ld_c_clr[] = { 8253 gen_movbi_clr, 8254 gen_movqi_clr, 8255 gen_movhi_clr, 8256 gen_movsi_clr, 8257 gen_movdi_clr, 8258 gen_movsf_clr, 8259 gen_movdf_clr, 8260 gen_movxf_clr, 8261 gen_movti_clr, 8262 gen_zero_extendqidi2_clr, 8263 gen_zero_extendhidi2_clr, 8264 gen_zero_extendsidi2_clr, 8265 }; 8266 static gen_func_t gen_ld_c_nc[] = { 8267 gen_movbi_nc, 8268 gen_movqi_nc, 8269 gen_movhi_nc, 8270 gen_movsi_nc, 8271 gen_movdi_nc, 8272 gen_movsf_nc, 8273 gen_movdf_nc, 8274 gen_movxf_nc, 8275 gen_movti_nc, 8276 gen_zero_extendqidi2_nc, 8277 gen_zero_extendhidi2_nc, 8278 gen_zero_extendsidi2_nc, 8279 }; 8280 static gen_func_t gen_chk_a_clr[] = { 8281 gen_advanced_load_check_clr_bi, 8282 gen_advanced_load_check_clr_qi, 8283 gen_advanced_load_check_clr_hi, 8284 gen_advanced_load_check_clr_si, 8285 gen_advanced_load_check_clr_di, 8286 gen_advanced_load_check_clr_sf, 8287 gen_advanced_load_check_clr_df, 8288 gen_advanced_load_check_clr_xf, 8289 gen_advanced_load_check_clr_ti, 8290 gen_advanced_load_check_clr_di, 8291 gen_advanced_load_check_clr_di, 8292 gen_advanced_load_check_clr_di, 8293 }; 8294 static gen_func_t gen_chk_a_nc[] = { 8295 gen_advanced_load_check_nc_bi, 8296 gen_advanced_load_check_nc_qi, 8297 gen_advanced_load_check_nc_hi, 8298 gen_advanced_load_check_nc_si, 8299 gen_advanced_load_check_nc_di, 8300 gen_advanced_load_check_nc_sf, 8301 gen_advanced_load_check_nc_df, 8302 gen_advanced_load_check_nc_xf, 8303 gen_advanced_load_check_nc_ti, 8304 gen_advanced_load_check_nc_di, 8305 gen_advanced_load_check_nc_di, 8306 gen_advanced_load_check_nc_di, 8307 }; 8308 static gen_func_t gen_chk_s[] = { 8309 gen_speculation_check_bi, 8310 gen_speculation_check_qi, 8311 gen_speculation_check_hi, 8312 gen_speculation_check_si, 8313 gen_speculation_check_di, 8314 gen_speculation_check_sf, 8315 gen_speculation_check_df, 8316 gen_speculation_check_xf, 8317 gen_speculation_check_ti, 8318 gen_speculation_check_di, 8319 gen_speculation_check_di, 8320 gen_speculation_check_di, 8321 }; 8322 8323 gen_func_t *gen_check; 8324 8325 if (ts & BEGIN_DATA) 8326 { 8327 /* We don't need recovery because even if this is ld.sa 8328 ALAT entry will be allocated only if NAT bit is set to zero. 8329 So it is enough to use ld.c here. */ 8330 8331 if (simple_check_p) 8332 { 8333 gcc_assert (mflag_sched_spec_ldc); 8334 8335 if (clearing_check_p) 8336 gen_check = gen_ld_c_clr; 8337 else 8338 gen_check = gen_ld_c_nc; 8339 } 8340 else 8341 { 8342 if (clearing_check_p) 8343 gen_check = gen_chk_a_clr; 8344 else 8345 gen_check = gen_chk_a_nc; 8346 } 8347 } 8348 else if (ts & BEGIN_CONTROL) 8349 { 8350 if (simple_check_p) 8351 /* We might want to use ld.sa -> ld.c instead of 8352 ld.s -> chk.s. */ 8353 { 8354 gcc_assert (!ia64_needs_block_p (ts)); 8355 8356 if (clearing_check_p) 8357 gen_check = gen_ld_c_clr; 8358 else 8359 gen_check = gen_ld_c_nc; 8360 } 8361 else 8362 { 8363 gen_check = gen_chk_s; 8364 } 8365 } 8366 else 8367 gcc_unreachable (); 8368 8369 gcc_assert (mode_no >= 0); 8370 return gen_check[mode_no]; 8371 } 8372 8373 /* Return nonzero, if INSN needs branchy recovery check. */ 8374 static bool 8375 ia64_needs_block_p (ds_t ts) 8376 { 8377 if (ts & BEGIN_DATA) 8378 return !mflag_sched_spec_ldc; 8379 8380 gcc_assert ((ts & BEGIN_CONTROL) != 0); 8381 8382 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc); 8383 } 8384 8385 /* Generate (or regenerate) a recovery check for INSN. */ 8386 static rtx 8387 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds) 8388 { 8389 rtx op1, pat, check_pat; 8390 gen_func_t gen_check; 8391 int mode_no; 8392 8393 mode_no = get_mode_no_for_insn (insn); 8394 gcc_assert (mode_no >= 0); 8395 8396 if (label) 8397 op1 = label; 8398 else 8399 { 8400 gcc_assert (!ia64_needs_block_p (ds)); 8401 op1 = copy_rtx (recog_data.operand[1]); 8402 } 8403 8404 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX, 8405 true); 8406 8407 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1); 8408 8409 pat = PATTERN (insn); 8410 if (GET_CODE (pat) == COND_EXEC) 8411 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), 8412 check_pat); 8413 8414 return check_pat; 8415 } 8416 8417 /* Return nonzero, if X is branchy recovery check. */ 8418 static int 8419 ia64_spec_check_p (rtx x) 8420 { 8421 x = PATTERN (x); 8422 if (GET_CODE (x) == COND_EXEC) 8423 x = COND_EXEC_CODE (x); 8424 if (GET_CODE (x) == SET) 8425 return ia64_spec_check_src_p (SET_SRC (x)); 8426 return 0; 8427 } 8428 8429 /* Return nonzero, if SRC belongs to recovery check. */ 8430 static int 8431 ia64_spec_check_src_p (rtx src) 8432 { 8433 if (GET_CODE (src) == IF_THEN_ELSE) 8434 { 8435 rtx t; 8436 8437 t = XEXP (src, 0); 8438 if (GET_CODE (t) == NE) 8439 { 8440 t = XEXP (t, 0); 8441 8442 if (GET_CODE (t) == UNSPEC) 8443 { 8444 int code; 8445 8446 code = XINT (t, 1); 8447 8448 if (code == UNSPEC_LDCCLR 8449 || code == UNSPEC_LDCNC 8450 || code == UNSPEC_CHKACLR 8451 || code == UNSPEC_CHKANC 8452 || code == UNSPEC_CHKS) 8453 { 8454 gcc_assert (code != 0); 8455 return code; 8456 } 8457 } 8458 } 8459 } 8460 return 0; 8461 } 8462 8463 8464 /* The following page contains abstract data `bundle states' which are 8465 used for bundling insns (inserting nops and template generation). */ 8466 8467 /* The following describes state of insn bundling. */ 8468 8469 struct bundle_state 8470 { 8471 /* Unique bundle state number to identify them in the debugging 8472 output */ 8473 int unique_num; 8474 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */ 8475 /* number nops before and after the insn */ 8476 short before_nops_num, after_nops_num; 8477 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st 8478 insn */ 8479 int cost; /* cost of the state in cycles */ 8480 int accumulated_insns_num; /* number of all previous insns including 8481 nops. L is considered as 2 insns */ 8482 int branch_deviation; /* deviation of previous branches from 3rd slots */ 8483 int middle_bundle_stops; /* number of stop bits in the middle of bundles */ 8484 struct bundle_state *next; /* next state with the same insn_num */ 8485 struct bundle_state *originator; /* originator (previous insn state) */ 8486 /* All bundle states are in the following chain. */ 8487 struct bundle_state *allocated_states_chain; 8488 /* The DFA State after issuing the insn and the nops. */ 8489 state_t dfa_state; 8490 }; 8491 8492 /* The following is map insn number to the corresponding bundle state. */ 8493 8494 static struct bundle_state **index_to_bundle_states; 8495 8496 /* The unique number of next bundle state. */ 8497 8498 static int bundle_states_num; 8499 8500 /* All allocated bundle states are in the following chain. */ 8501 8502 static struct bundle_state *allocated_bundle_states_chain; 8503 8504 /* All allocated but not used bundle states are in the following 8505 chain. */ 8506 8507 static struct bundle_state *free_bundle_state_chain; 8508 8509 8510 /* The following function returns a free bundle state. */ 8511 8512 static struct bundle_state * 8513 get_free_bundle_state (void) 8514 { 8515 struct bundle_state *result; 8516 8517 if (free_bundle_state_chain != NULL) 8518 { 8519 result = free_bundle_state_chain; 8520 free_bundle_state_chain = result->next; 8521 } 8522 else 8523 { 8524 result = XNEW (struct bundle_state); 8525 result->dfa_state = xmalloc (dfa_state_size); 8526 result->allocated_states_chain = allocated_bundle_states_chain; 8527 allocated_bundle_states_chain = result; 8528 } 8529 result->unique_num = bundle_states_num++; 8530 return result; 8531 8532 } 8533 8534 /* The following function frees given bundle state. */ 8535 8536 static void 8537 free_bundle_state (struct bundle_state *state) 8538 { 8539 state->next = free_bundle_state_chain; 8540 free_bundle_state_chain = state; 8541 } 8542 8543 /* Start work with abstract data `bundle states'. */ 8544 8545 static void 8546 initiate_bundle_states (void) 8547 { 8548 bundle_states_num = 0; 8549 free_bundle_state_chain = NULL; 8550 allocated_bundle_states_chain = NULL; 8551 } 8552 8553 /* Finish work with abstract data `bundle states'. */ 8554 8555 static void 8556 finish_bundle_states (void) 8557 { 8558 struct bundle_state *curr_state, *next_state; 8559 8560 for (curr_state = allocated_bundle_states_chain; 8561 curr_state != NULL; 8562 curr_state = next_state) 8563 { 8564 next_state = curr_state->allocated_states_chain; 8565 free (curr_state->dfa_state); 8566 free (curr_state); 8567 } 8568 } 8569 8570 /* Hashtable helpers. */ 8571 8572 struct bundle_state_hasher : nofree_ptr_hash <bundle_state> 8573 { 8574 static inline hashval_t hash (const bundle_state *); 8575 static inline bool equal (const bundle_state *, const bundle_state *); 8576 }; 8577 8578 /* The function returns hash of BUNDLE_STATE. */ 8579 8580 inline hashval_t 8581 bundle_state_hasher::hash (const bundle_state *state) 8582 { 8583 unsigned result, i; 8584 8585 for (result = i = 0; i < dfa_state_size; i++) 8586 result += (((unsigned char *) state->dfa_state) [i] 8587 << ((i % CHAR_BIT) * 3 + CHAR_BIT)); 8588 return result + state->insn_num; 8589 } 8590 8591 /* The function returns nonzero if the bundle state keys are equal. */ 8592 8593 inline bool 8594 bundle_state_hasher::equal (const bundle_state *state1, 8595 const bundle_state *state2) 8596 { 8597 return (state1->insn_num == state2->insn_num 8598 && memcmp (state1->dfa_state, state2->dfa_state, 8599 dfa_state_size) == 0); 8600 } 8601 8602 /* Hash table of the bundle states. The key is dfa_state and insn_num 8603 of the bundle states. */ 8604 8605 static hash_table<bundle_state_hasher> *bundle_state_table; 8606 8607 /* The function inserts the BUNDLE_STATE into the hash table. The 8608 function returns nonzero if the bundle has been inserted into the 8609 table. The table contains the best bundle state with given key. */ 8610 8611 static int 8612 insert_bundle_state (struct bundle_state *bundle_state) 8613 { 8614 struct bundle_state **entry_ptr; 8615 8616 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT); 8617 if (*entry_ptr == NULL) 8618 { 8619 bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; 8620 index_to_bundle_states [bundle_state->insn_num] = bundle_state; 8621 *entry_ptr = bundle_state; 8622 return TRUE; 8623 } 8624 else if (bundle_state->cost < (*entry_ptr)->cost 8625 || (bundle_state->cost == (*entry_ptr)->cost 8626 && ((*entry_ptr)->accumulated_insns_num 8627 > bundle_state->accumulated_insns_num 8628 || ((*entry_ptr)->accumulated_insns_num 8629 == bundle_state->accumulated_insns_num 8630 && ((*entry_ptr)->branch_deviation 8631 > bundle_state->branch_deviation 8632 || ((*entry_ptr)->branch_deviation 8633 == bundle_state->branch_deviation 8634 && (*entry_ptr)->middle_bundle_stops 8635 > bundle_state->middle_bundle_stops)))))) 8636 8637 { 8638 struct bundle_state temp; 8639 8640 temp = **entry_ptr; 8641 **entry_ptr = *bundle_state; 8642 (*entry_ptr)->next = temp.next; 8643 *bundle_state = temp; 8644 } 8645 return FALSE; 8646 } 8647 8648 /* Start work with the hash table. */ 8649 8650 static void 8651 initiate_bundle_state_table (void) 8652 { 8653 bundle_state_table = new hash_table<bundle_state_hasher> (50); 8654 } 8655 8656 /* Finish work with the hash table. */ 8657 8658 static void 8659 finish_bundle_state_table (void) 8660 { 8661 delete bundle_state_table; 8662 bundle_state_table = NULL; 8663 } 8664 8665 8666 8667 /* The following variable is a insn `nop' used to check bundle states 8668 with different number of inserted nops. */ 8669 8670 static rtx_insn *ia64_nop; 8671 8672 /* The following function tries to issue NOPS_NUM nops for the current 8673 state without advancing processor cycle. If it failed, the 8674 function returns FALSE and frees the current state. */ 8675 8676 static int 8677 try_issue_nops (struct bundle_state *curr_state, int nops_num) 8678 { 8679 int i; 8680 8681 for (i = 0; i < nops_num; i++) 8682 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0) 8683 { 8684 free_bundle_state (curr_state); 8685 return FALSE; 8686 } 8687 return TRUE; 8688 } 8689 8690 /* The following function tries to issue INSN for the current 8691 state without advancing processor cycle. If it failed, the 8692 function returns FALSE and frees the current state. */ 8693 8694 static int 8695 try_issue_insn (struct bundle_state *curr_state, rtx insn) 8696 { 8697 if (insn && state_transition (curr_state->dfa_state, insn) >= 0) 8698 { 8699 free_bundle_state (curr_state); 8700 return FALSE; 8701 } 8702 return TRUE; 8703 } 8704 8705 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN 8706 starting with ORIGINATOR without advancing processor cycle. If 8707 TRY_BUNDLE_END_P is TRUE, the function also/only (if 8708 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle. 8709 If it was successful, the function creates new bundle state and 8710 insert into the hash table and into `index_to_bundle_states'. */ 8711 8712 static void 8713 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num, 8714 rtx_insn *insn, int try_bundle_end_p, 8715 int only_bundle_end_p) 8716 { 8717 struct bundle_state *curr_state; 8718 8719 curr_state = get_free_bundle_state (); 8720 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size); 8721 curr_state->insn = insn; 8722 curr_state->insn_num = originator->insn_num + 1; 8723 curr_state->cost = originator->cost; 8724 curr_state->originator = originator; 8725 curr_state->before_nops_num = before_nops_num; 8726 curr_state->after_nops_num = 0; 8727 curr_state->accumulated_insns_num 8728 = originator->accumulated_insns_num + before_nops_num; 8729 curr_state->branch_deviation = originator->branch_deviation; 8730 curr_state->middle_bundle_stops = originator->middle_bundle_stops; 8731 gcc_assert (insn); 8732 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier) 8733 { 8734 gcc_assert (GET_MODE (insn) != TImode); 8735 if (!try_issue_nops (curr_state, before_nops_num)) 8736 return; 8737 if (!try_issue_insn (curr_state, insn)) 8738 return; 8739 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size); 8740 if (curr_state->accumulated_insns_num % 3 != 0) 8741 curr_state->middle_bundle_stops++; 8742 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0 8743 && curr_state->accumulated_insns_num % 3 != 0) 8744 { 8745 free_bundle_state (curr_state); 8746 return; 8747 } 8748 } 8749 else if (GET_MODE (insn) != TImode) 8750 { 8751 if (!try_issue_nops (curr_state, before_nops_num)) 8752 return; 8753 if (!try_issue_insn (curr_state, insn)) 8754 return; 8755 curr_state->accumulated_insns_num++; 8756 gcc_assert (!unknown_for_bundling_p (insn)); 8757 8758 if (ia64_safe_type (insn) == TYPE_L) 8759 curr_state->accumulated_insns_num++; 8760 } 8761 else 8762 { 8763 /* If this is an insn that must be first in a group, then don't allow 8764 nops to be emitted before it. Currently, alloc is the only such 8765 supported instruction. */ 8766 /* ??? The bundling automatons should handle this for us, but they do 8767 not yet have support for the first_insn attribute. */ 8768 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES) 8769 { 8770 free_bundle_state (curr_state); 8771 return; 8772 } 8773 8774 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn); 8775 state_transition (curr_state->dfa_state, NULL); 8776 curr_state->cost++; 8777 if (!try_issue_nops (curr_state, before_nops_num)) 8778 return; 8779 if (!try_issue_insn (curr_state, insn)) 8780 return; 8781 curr_state->accumulated_insns_num++; 8782 if (unknown_for_bundling_p (insn)) 8783 { 8784 /* Finish bundle containing asm insn. */ 8785 curr_state->after_nops_num 8786 = 3 - curr_state->accumulated_insns_num % 3; 8787 curr_state->accumulated_insns_num 8788 += 3 - curr_state->accumulated_insns_num % 3; 8789 } 8790 else if (ia64_safe_type (insn) == TYPE_L) 8791 curr_state->accumulated_insns_num++; 8792 } 8793 if (ia64_safe_type (insn) == TYPE_B) 8794 curr_state->branch_deviation 8795 += 2 - (curr_state->accumulated_insns_num - 1) % 3; 8796 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0) 8797 { 8798 if (!only_bundle_end_p && insert_bundle_state (curr_state)) 8799 { 8800 state_t dfa_state; 8801 struct bundle_state *curr_state1; 8802 struct bundle_state *allocated_states_chain; 8803 8804 curr_state1 = get_free_bundle_state (); 8805 dfa_state = curr_state1->dfa_state; 8806 allocated_states_chain = curr_state1->allocated_states_chain; 8807 *curr_state1 = *curr_state; 8808 curr_state1->dfa_state = dfa_state; 8809 curr_state1->allocated_states_chain = allocated_states_chain; 8810 memcpy (curr_state1->dfa_state, curr_state->dfa_state, 8811 dfa_state_size); 8812 curr_state = curr_state1; 8813 } 8814 if (!try_issue_nops (curr_state, 8815 3 - curr_state->accumulated_insns_num % 3)) 8816 return; 8817 curr_state->after_nops_num 8818 = 3 - curr_state->accumulated_insns_num % 3; 8819 curr_state->accumulated_insns_num 8820 += 3 - curr_state->accumulated_insns_num % 3; 8821 } 8822 if (!insert_bundle_state (curr_state)) 8823 free_bundle_state (curr_state); 8824 return; 8825 } 8826 8827 /* The following function returns position in the two window bundle 8828 for given STATE. */ 8829 8830 static int 8831 get_max_pos (state_t state) 8832 { 8833 if (cpu_unit_reservation_p (state, pos_6)) 8834 return 6; 8835 else if (cpu_unit_reservation_p (state, pos_5)) 8836 return 5; 8837 else if (cpu_unit_reservation_p (state, pos_4)) 8838 return 4; 8839 else if (cpu_unit_reservation_p (state, pos_3)) 8840 return 3; 8841 else if (cpu_unit_reservation_p (state, pos_2)) 8842 return 2; 8843 else if (cpu_unit_reservation_p (state, pos_1)) 8844 return 1; 8845 else 8846 return 0; 8847 } 8848 8849 /* The function returns code of a possible template for given position 8850 and state. The function should be called only with 2 values of 8851 position equal to 3 or 6. We avoid generating F NOPs by putting 8852 templates containing F insns at the end of the template search 8853 because undocumented anomaly in McKinley derived cores which can 8854 cause stalls if an F-unit insn (including a NOP) is issued within a 8855 six-cycle window after reading certain application registers (such 8856 as ar.bsp). Furthermore, power-considerations also argue against 8857 the use of F-unit instructions unless they're really needed. */ 8858 8859 static int 8860 get_template (state_t state, int pos) 8861 { 8862 switch (pos) 8863 { 8864 case 3: 8865 if (cpu_unit_reservation_p (state, _0mmi_)) 8866 return 1; 8867 else if (cpu_unit_reservation_p (state, _0mii_)) 8868 return 0; 8869 else if (cpu_unit_reservation_p (state, _0mmb_)) 8870 return 7; 8871 else if (cpu_unit_reservation_p (state, _0mib_)) 8872 return 6; 8873 else if (cpu_unit_reservation_p (state, _0mbb_)) 8874 return 5; 8875 else if (cpu_unit_reservation_p (state, _0bbb_)) 8876 return 4; 8877 else if (cpu_unit_reservation_p (state, _0mmf_)) 8878 return 3; 8879 else if (cpu_unit_reservation_p (state, _0mfi_)) 8880 return 2; 8881 else if (cpu_unit_reservation_p (state, _0mfb_)) 8882 return 8; 8883 else if (cpu_unit_reservation_p (state, _0mlx_)) 8884 return 9; 8885 else 8886 gcc_unreachable (); 8887 case 6: 8888 if (cpu_unit_reservation_p (state, _1mmi_)) 8889 return 1; 8890 else if (cpu_unit_reservation_p (state, _1mii_)) 8891 return 0; 8892 else if (cpu_unit_reservation_p (state, _1mmb_)) 8893 return 7; 8894 else if (cpu_unit_reservation_p (state, _1mib_)) 8895 return 6; 8896 else if (cpu_unit_reservation_p (state, _1mbb_)) 8897 return 5; 8898 else if (cpu_unit_reservation_p (state, _1bbb_)) 8899 return 4; 8900 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_)) 8901 return 3; 8902 else if (cpu_unit_reservation_p (state, _1mfi_)) 8903 return 2; 8904 else if (cpu_unit_reservation_p (state, _1mfb_)) 8905 return 8; 8906 else if (cpu_unit_reservation_p (state, _1mlx_)) 8907 return 9; 8908 else 8909 gcc_unreachable (); 8910 default: 8911 gcc_unreachable (); 8912 } 8913 } 8914 8915 /* True when INSN is important for bundling. */ 8916 8917 static bool 8918 important_for_bundling_p (rtx_insn *insn) 8919 { 8920 return (INSN_P (insn) 8921 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE 8922 && GET_CODE (PATTERN (insn)) != USE 8923 && GET_CODE (PATTERN (insn)) != CLOBBER); 8924 } 8925 8926 /* The following function returns an insn important for insn bundling 8927 followed by INSN and before TAIL. */ 8928 8929 static rtx_insn * 8930 get_next_important_insn (rtx_insn *insn, rtx_insn *tail) 8931 { 8932 for (; insn && insn != tail; insn = NEXT_INSN (insn)) 8933 if (important_for_bundling_p (insn)) 8934 return insn; 8935 return NULL; 8936 } 8937 8938 /* True when INSN is unknown, but important, for bundling. */ 8939 8940 static bool 8941 unknown_for_bundling_p (rtx_insn *insn) 8942 { 8943 return (INSN_P (insn) 8944 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN 8945 && GET_CODE (PATTERN (insn)) != USE 8946 && GET_CODE (PATTERN (insn)) != CLOBBER); 8947 } 8948 8949 /* Add a bundle selector TEMPLATE0 before INSN. */ 8950 8951 static void 8952 ia64_add_bundle_selector_before (int template0, rtx_insn *insn) 8953 { 8954 rtx b = gen_bundle_selector (GEN_INT (template0)); 8955 8956 ia64_emit_insn_before (b, insn); 8957 #if NR_BUNDLES == 10 8958 if ((template0 == 4 || template0 == 5) 8959 && ia64_except_unwind_info (&global_options) == UI_TARGET) 8960 { 8961 int i; 8962 rtx note = NULL_RTX; 8963 8964 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the 8965 first or second slot. If it is and has REG_EH_NOTE set, copy it 8966 to following nops, as br.call sets rp to the address of following 8967 bundle and therefore an EH region end must be on a bundle 8968 boundary. */ 8969 insn = PREV_INSN (insn); 8970 for (i = 0; i < 3; i++) 8971 { 8972 do 8973 insn = next_active_insn (insn); 8974 while (NONJUMP_INSN_P (insn) 8975 && get_attr_empty (insn) == EMPTY_YES); 8976 if (CALL_P (insn)) 8977 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); 8978 else if (note) 8979 { 8980 int code; 8981 8982 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop 8983 || code == CODE_FOR_nop_b); 8984 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) 8985 note = NULL_RTX; 8986 else 8987 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0)); 8988 } 8989 } 8990 } 8991 #endif 8992 } 8993 8994 /* The following function does insn bundling. Bundling means 8995 inserting templates and nop insns to fit insn groups into permitted 8996 templates. Instruction scheduling uses NDFA (non-deterministic 8997 finite automata) encoding informations about the templates and the 8998 inserted nops. Nondeterminism of the automata permits follows 8999 all possible insn sequences very fast. 9000 9001 Unfortunately it is not possible to get information about inserting 9002 nop insns and used templates from the automata states. The 9003 automata only says that we can issue an insn possibly inserting 9004 some nops before it and using some template. Therefore insn 9005 bundling in this function is implemented by using DFA 9006 (deterministic finite automata). We follow all possible insn 9007 sequences by inserting 0-2 nops (that is what the NDFA describe for 9008 insn scheduling) before/after each insn being bundled. We know the 9009 start of simulated processor cycle from insn scheduling (insn 9010 starting a new cycle has TImode). 9011 9012 Simple implementation of insn bundling would create enormous 9013 number of possible insn sequences satisfying information about new 9014 cycle ticks taken from the insn scheduling. To make the algorithm 9015 practical we use dynamic programming. Each decision (about 9016 inserting nops and implicitly about previous decisions) is described 9017 by structure bundle_state (see above). If we generate the same 9018 bundle state (key is automaton state after issuing the insns and 9019 nops for it), we reuse already generated one. As consequence we 9020 reject some decisions which cannot improve the solution and 9021 reduce memory for the algorithm. 9022 9023 When we reach the end of EBB (extended basic block), we choose the 9024 best sequence and then, moving back in EBB, insert templates for 9025 the best alternative. The templates are taken from querying 9026 automaton state for each insn in chosen bundle states. 9027 9028 So the algorithm makes two (forward and backward) passes through 9029 EBB. */ 9030 9031 static void 9032 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail) 9033 { 9034 struct bundle_state *curr_state, *next_state, *best_state; 9035 rtx_insn *insn, *next_insn; 9036 int insn_num; 9037 int i, bundle_end_p, only_bundle_end_p, asm_p; 9038 int pos = 0, max_pos, template0, template1; 9039 rtx_insn *b; 9040 enum attr_type type; 9041 9042 insn_num = 0; 9043 /* Count insns in the EBB. */ 9044 for (insn = NEXT_INSN (prev_head_insn); 9045 insn && insn != tail; 9046 insn = NEXT_INSN (insn)) 9047 if (INSN_P (insn)) 9048 insn_num++; 9049 if (insn_num == 0) 9050 return; 9051 bundling_p = 1; 9052 dfa_clean_insn_cache (); 9053 initiate_bundle_state_table (); 9054 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2); 9055 /* First (forward) pass -- generation of bundle states. */ 9056 curr_state = get_free_bundle_state (); 9057 curr_state->insn = NULL; 9058 curr_state->before_nops_num = 0; 9059 curr_state->after_nops_num = 0; 9060 curr_state->insn_num = 0; 9061 curr_state->cost = 0; 9062 curr_state->accumulated_insns_num = 0; 9063 curr_state->branch_deviation = 0; 9064 curr_state->middle_bundle_stops = 0; 9065 curr_state->next = NULL; 9066 curr_state->originator = NULL; 9067 state_reset (curr_state->dfa_state); 9068 index_to_bundle_states [0] = curr_state; 9069 insn_num = 0; 9070 /* Shift cycle mark if it is put on insn which could be ignored. */ 9071 for (insn = NEXT_INSN (prev_head_insn); 9072 insn != tail; 9073 insn = NEXT_INSN (insn)) 9074 if (INSN_P (insn) 9075 && !important_for_bundling_p (insn) 9076 && GET_MODE (insn) == TImode) 9077 { 9078 PUT_MODE (insn, VOIDmode); 9079 for (next_insn = NEXT_INSN (insn); 9080 next_insn != tail; 9081 next_insn = NEXT_INSN (next_insn)) 9082 if (important_for_bundling_p (next_insn) 9083 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier) 9084 { 9085 PUT_MODE (next_insn, TImode); 9086 break; 9087 } 9088 } 9089 /* Forward pass: generation of bundle states. */ 9090 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); 9091 insn != NULL_RTX; 9092 insn = next_insn) 9093 { 9094 gcc_assert (important_for_bundling_p (insn)); 9095 type = ia64_safe_type (insn); 9096 next_insn = get_next_important_insn (NEXT_INSN (insn), tail); 9097 insn_num++; 9098 index_to_bundle_states [insn_num] = NULL; 9099 for (curr_state = index_to_bundle_states [insn_num - 1]; 9100 curr_state != NULL; 9101 curr_state = next_state) 9102 { 9103 pos = curr_state->accumulated_insns_num % 3; 9104 next_state = curr_state->next; 9105 /* We must fill up the current bundle in order to start a 9106 subsequent asm insn in a new bundle. Asm insn is always 9107 placed in a separate bundle. */ 9108 only_bundle_end_p 9109 = (next_insn != NULL_RTX 9110 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier 9111 && unknown_for_bundling_p (next_insn)); 9112 /* We may fill up the current bundle if it is the cycle end 9113 without a group barrier. */ 9114 bundle_end_p 9115 = (only_bundle_end_p || next_insn == NULL_RTX 9116 || (GET_MODE (next_insn) == TImode 9117 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier)); 9118 if (type == TYPE_F || type == TYPE_B || type == TYPE_L 9119 || type == TYPE_S) 9120 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p, 9121 only_bundle_end_p); 9122 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p, 9123 only_bundle_end_p); 9124 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p, 9125 only_bundle_end_p); 9126 } 9127 gcc_assert (index_to_bundle_states [insn_num]); 9128 for (curr_state = index_to_bundle_states [insn_num]; 9129 curr_state != NULL; 9130 curr_state = curr_state->next) 9131 if (verbose >= 2 && dump) 9132 { 9133 /* This structure is taken from generated code of the 9134 pipeline hazard recognizer (see file insn-attrtab.c). 9135 Please don't forget to change the structure if a new 9136 automaton is added to .md file. */ 9137 struct DFA_chip 9138 { 9139 unsigned short one_automaton_state; 9140 unsigned short oneb_automaton_state; 9141 unsigned short two_automaton_state; 9142 unsigned short twob_automaton_state; 9143 }; 9144 9145 fprintf 9146 (dump, 9147 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n", 9148 curr_state->unique_num, 9149 (curr_state->originator == NULL 9150 ? -1 : curr_state->originator->unique_num), 9151 curr_state->cost, 9152 curr_state->before_nops_num, curr_state->after_nops_num, 9153 curr_state->accumulated_insns_num, curr_state->branch_deviation, 9154 curr_state->middle_bundle_stops, 9155 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, 9156 INSN_UID (insn)); 9157 } 9158 } 9159 9160 /* We should find a solution because the 2nd insn scheduling has 9161 found one. */ 9162 gcc_assert (index_to_bundle_states [insn_num]); 9163 /* Find a state corresponding to the best insn sequence. */ 9164 best_state = NULL; 9165 for (curr_state = index_to_bundle_states [insn_num]; 9166 curr_state != NULL; 9167 curr_state = curr_state->next) 9168 /* We are just looking at the states with fully filled up last 9169 bundle. The first we prefer insn sequences with minimal cost 9170 then with minimal inserted nops and finally with branch insns 9171 placed in the 3rd slots. */ 9172 if (curr_state->accumulated_insns_num % 3 == 0 9173 && (best_state == NULL || best_state->cost > curr_state->cost 9174 || (best_state->cost == curr_state->cost 9175 && (curr_state->accumulated_insns_num 9176 < best_state->accumulated_insns_num 9177 || (curr_state->accumulated_insns_num 9178 == best_state->accumulated_insns_num 9179 && (curr_state->branch_deviation 9180 < best_state->branch_deviation 9181 || (curr_state->branch_deviation 9182 == best_state->branch_deviation 9183 && curr_state->middle_bundle_stops 9184 < best_state->middle_bundle_stops))))))) 9185 best_state = curr_state; 9186 /* Second (backward) pass: adding nops and templates. */ 9187 gcc_assert (best_state); 9188 insn_num = best_state->before_nops_num; 9189 template0 = template1 = -1; 9190 for (curr_state = best_state; 9191 curr_state->originator != NULL; 9192 curr_state = curr_state->originator) 9193 { 9194 insn = curr_state->insn; 9195 asm_p = unknown_for_bundling_p (insn); 9196 insn_num++; 9197 if (verbose >= 2 && dump) 9198 { 9199 struct DFA_chip 9200 { 9201 unsigned short one_automaton_state; 9202 unsigned short oneb_automaton_state; 9203 unsigned short two_automaton_state; 9204 unsigned short twob_automaton_state; 9205 }; 9206 9207 fprintf 9208 (dump, 9209 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n", 9210 curr_state->unique_num, 9211 (curr_state->originator == NULL 9212 ? -1 : curr_state->originator->unique_num), 9213 curr_state->cost, 9214 curr_state->before_nops_num, curr_state->after_nops_num, 9215 curr_state->accumulated_insns_num, curr_state->branch_deviation, 9216 curr_state->middle_bundle_stops, 9217 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, 9218 INSN_UID (insn)); 9219 } 9220 /* Find the position in the current bundle window. The window can 9221 contain at most two bundles. Two bundle window means that 9222 the processor will make two bundle rotation. */ 9223 max_pos = get_max_pos (curr_state->dfa_state); 9224 if (max_pos == 6 9225 /* The following (negative template number) means that the 9226 processor did one bundle rotation. */ 9227 || (max_pos == 3 && template0 < 0)) 9228 { 9229 /* We are at the end of the window -- find template(s) for 9230 its bundle(s). */ 9231 pos = max_pos; 9232 if (max_pos == 3) 9233 template0 = get_template (curr_state->dfa_state, 3); 9234 else 9235 { 9236 template1 = get_template (curr_state->dfa_state, 3); 9237 template0 = get_template (curr_state->dfa_state, 6); 9238 } 9239 } 9240 if (max_pos > 3 && template1 < 0) 9241 /* It may happen when we have the stop inside a bundle. */ 9242 { 9243 gcc_assert (pos <= 3); 9244 template1 = get_template (curr_state->dfa_state, 3); 9245 pos += 3; 9246 } 9247 if (!asm_p) 9248 /* Emit nops after the current insn. */ 9249 for (i = 0; i < curr_state->after_nops_num; i++) 9250 { 9251 rtx nop_pat = gen_nop (); 9252 rtx_insn *nop = emit_insn_after (nop_pat, insn); 9253 pos--; 9254 gcc_assert (pos >= 0); 9255 if (pos % 3 == 0) 9256 { 9257 /* We are at the start of a bundle: emit the template 9258 (it should be defined). */ 9259 gcc_assert (template0 >= 0); 9260 ia64_add_bundle_selector_before (template0, nop); 9261 /* If we have two bundle window, we make one bundle 9262 rotation. Otherwise template0 will be undefined 9263 (negative value). */ 9264 template0 = template1; 9265 template1 = -1; 9266 } 9267 } 9268 /* Move the position backward in the window. Group barrier has 9269 no slot. Asm insn takes all bundle. */ 9270 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier 9271 && !unknown_for_bundling_p (insn)) 9272 pos--; 9273 /* Long insn takes 2 slots. */ 9274 if (ia64_safe_type (insn) == TYPE_L) 9275 pos--; 9276 gcc_assert (pos >= 0); 9277 if (pos % 3 == 0 9278 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier 9279 && !unknown_for_bundling_p (insn)) 9280 { 9281 /* The current insn is at the bundle start: emit the 9282 template. */ 9283 gcc_assert (template0 >= 0); 9284 ia64_add_bundle_selector_before (template0, insn); 9285 b = PREV_INSN (insn); 9286 insn = b; 9287 /* See comment above in analogous place for emitting nops 9288 after the insn. */ 9289 template0 = template1; 9290 template1 = -1; 9291 } 9292 /* Emit nops after the current insn. */ 9293 for (i = 0; i < curr_state->before_nops_num; i++) 9294 { 9295 rtx nop_pat = gen_nop (); 9296 ia64_emit_insn_before (nop_pat, insn); 9297 rtx_insn *nop = PREV_INSN (insn); 9298 insn = nop; 9299 pos--; 9300 gcc_assert (pos >= 0); 9301 if (pos % 3 == 0) 9302 { 9303 /* See comment above in analogous place for emitting nops 9304 after the insn. */ 9305 gcc_assert (template0 >= 0); 9306 ia64_add_bundle_selector_before (template0, insn); 9307 b = PREV_INSN (insn); 9308 insn = b; 9309 template0 = template1; 9310 template1 = -1; 9311 } 9312 } 9313 } 9314 9315 if (flag_checking) 9316 { 9317 /* Assert right calculation of middle_bundle_stops. */ 9318 int num = best_state->middle_bundle_stops; 9319 bool start_bundle = true, end_bundle = false; 9320 9321 for (insn = NEXT_INSN (prev_head_insn); 9322 insn && insn != tail; 9323 insn = NEXT_INSN (insn)) 9324 { 9325 if (!INSN_P (insn)) 9326 continue; 9327 if (recog_memoized (insn) == CODE_FOR_bundle_selector) 9328 start_bundle = true; 9329 else 9330 { 9331 rtx_insn *next_insn; 9332 9333 for (next_insn = NEXT_INSN (insn); 9334 next_insn && next_insn != tail; 9335 next_insn = NEXT_INSN (next_insn)) 9336 if (INSN_P (next_insn) 9337 && (ia64_safe_itanium_class (next_insn) 9338 != ITANIUM_CLASS_IGNORE 9339 || recog_memoized (next_insn) 9340 == CODE_FOR_bundle_selector) 9341 && GET_CODE (PATTERN (next_insn)) != USE 9342 && GET_CODE (PATTERN (next_insn)) != CLOBBER) 9343 break; 9344 9345 end_bundle = next_insn == NULL_RTX 9346 || next_insn == tail 9347 || (INSN_P (next_insn) 9348 && recog_memoized (next_insn) == CODE_FOR_bundle_selector); 9349 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier 9350 && !start_bundle && !end_bundle 9351 && next_insn 9352 && !unknown_for_bundling_p (next_insn)) 9353 num--; 9354 9355 start_bundle = false; 9356 } 9357 } 9358 9359 gcc_assert (num == 0); 9360 } 9361 9362 free (index_to_bundle_states); 9363 finish_bundle_state_table (); 9364 bundling_p = 0; 9365 dfa_clean_insn_cache (); 9366 } 9367 9368 /* The following function is called at the end of scheduling BB or 9369 EBB. After reload, it inserts stop bits and does insn bundling. */ 9370 9371 static void 9372 ia64_sched_finish (FILE *dump, int sched_verbose) 9373 { 9374 if (sched_verbose) 9375 fprintf (dump, "// Finishing schedule.\n"); 9376 if (!reload_completed) 9377 return; 9378 if (reload_completed) 9379 { 9380 final_emit_insn_group_barriers (dump); 9381 bundling (dump, sched_verbose, current_sched_info->prev_head, 9382 current_sched_info->next_tail); 9383 if (sched_verbose && dump) 9384 fprintf (dump, "// finishing %d-%d\n", 9385 INSN_UID (NEXT_INSN (current_sched_info->prev_head)), 9386 INSN_UID (PREV_INSN (current_sched_info->next_tail))); 9387 9388 return; 9389 } 9390 } 9391 9392 /* The following function inserts stop bits in scheduled BB or EBB. */ 9393 9394 static void 9395 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) 9396 { 9397 rtx_insn *insn; 9398 int need_barrier_p = 0; 9399 int seen_good_insn = 0; 9400 9401 init_insn_group_barriers (); 9402 9403 for (insn = NEXT_INSN (current_sched_info->prev_head); 9404 insn != current_sched_info->next_tail; 9405 insn = NEXT_INSN (insn)) 9406 { 9407 if (BARRIER_P (insn)) 9408 { 9409 rtx_insn *last = prev_active_insn (insn); 9410 9411 if (! last) 9412 continue; 9413 if (JUMP_TABLE_DATA_P (last)) 9414 last = prev_active_insn (last); 9415 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 9416 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 9417 9418 init_insn_group_barriers (); 9419 seen_good_insn = 0; 9420 need_barrier_p = 0; 9421 } 9422 else if (NONDEBUG_INSN_P (insn)) 9423 { 9424 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 9425 { 9426 init_insn_group_barriers (); 9427 seen_good_insn = 0; 9428 need_barrier_p = 0; 9429 } 9430 else if (need_barrier_p || group_barrier_needed (insn) 9431 || (mflag_sched_stop_bits_after_every_cycle 9432 && GET_MODE (insn) == TImode 9433 && seen_good_insn)) 9434 { 9435 if (TARGET_EARLY_STOP_BITS) 9436 { 9437 rtx_insn *last; 9438 9439 for (last = insn; 9440 last != current_sched_info->prev_head; 9441 last = PREV_INSN (last)) 9442 if (INSN_P (last) && GET_MODE (last) == TImode 9443 && stops_p [INSN_UID (last)]) 9444 break; 9445 if (last == current_sched_info->prev_head) 9446 last = insn; 9447 last = prev_active_insn (last); 9448 if (last 9449 && recog_memoized (last) != CODE_FOR_insn_group_barrier) 9450 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), 9451 last); 9452 init_insn_group_barriers (); 9453 for (last = NEXT_INSN (last); 9454 last != insn; 9455 last = NEXT_INSN (last)) 9456 if (INSN_P (last)) 9457 { 9458 group_barrier_needed (last); 9459 if (recog_memoized (last) >= 0 9460 && important_for_bundling_p (last)) 9461 seen_good_insn = 1; 9462 } 9463 } 9464 else 9465 { 9466 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), 9467 insn); 9468 init_insn_group_barriers (); 9469 seen_good_insn = 0; 9470 } 9471 group_barrier_needed (insn); 9472 if (recog_memoized (insn) >= 0 9473 && important_for_bundling_p (insn)) 9474 seen_good_insn = 1; 9475 } 9476 else if (recog_memoized (insn) >= 0 9477 && important_for_bundling_p (insn)) 9478 seen_good_insn = 1; 9479 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn)); 9480 } 9481 } 9482 } 9483 9484 9485 9486 /* If the following function returns TRUE, we will use the DFA 9487 insn scheduler. */ 9488 9489 static int 9490 ia64_first_cycle_multipass_dfa_lookahead (void) 9491 { 9492 return (reload_completed ? 6 : 4); 9493 } 9494 9495 /* The following function initiates variable `dfa_pre_cycle_insn'. */ 9496 9497 static void 9498 ia64_init_dfa_pre_cycle_insn (void) 9499 { 9500 if (temp_dfa_state == NULL) 9501 { 9502 dfa_state_size = state_size (); 9503 temp_dfa_state = xmalloc (dfa_state_size); 9504 prev_cycle_state = xmalloc (dfa_state_size); 9505 } 9506 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ()); 9507 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX; 9508 recog_memoized (dfa_pre_cycle_insn); 9509 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3))); 9510 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX; 9511 recog_memoized (dfa_stop_insn); 9512 } 9513 9514 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN 9515 used by the DFA insn scheduler. */ 9516 9517 static rtx 9518 ia64_dfa_pre_cycle_insn (void) 9519 { 9520 return dfa_pre_cycle_insn; 9521 } 9522 9523 /* The following function returns TRUE if PRODUCER (of type ilog or 9524 ld) produces address for CONSUMER (of type st or stf). */ 9525 9526 int 9527 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer) 9528 { 9529 rtx dest, reg, mem; 9530 9531 gcc_assert (producer && consumer); 9532 dest = ia64_single_set (producer); 9533 gcc_assert (dest); 9534 reg = SET_DEST (dest); 9535 gcc_assert (reg); 9536 if (GET_CODE (reg) == SUBREG) 9537 reg = SUBREG_REG (reg); 9538 gcc_assert (GET_CODE (reg) == REG); 9539 9540 dest = ia64_single_set (consumer); 9541 gcc_assert (dest); 9542 mem = SET_DEST (dest); 9543 gcc_assert (mem && GET_CODE (mem) == MEM); 9544 return reg_mentioned_p (reg, mem); 9545 } 9546 9547 /* The following function returns TRUE if PRODUCER (of type ilog or 9548 ld) produces address for CONSUMER (of type ld or fld). */ 9549 9550 int 9551 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer) 9552 { 9553 rtx dest, src, reg, mem; 9554 9555 gcc_assert (producer && consumer); 9556 dest = ia64_single_set (producer); 9557 gcc_assert (dest); 9558 reg = SET_DEST (dest); 9559 gcc_assert (reg); 9560 if (GET_CODE (reg) == SUBREG) 9561 reg = SUBREG_REG (reg); 9562 gcc_assert (GET_CODE (reg) == REG); 9563 9564 src = ia64_single_set (consumer); 9565 gcc_assert (src); 9566 mem = SET_SRC (src); 9567 gcc_assert (mem); 9568 9569 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0) 9570 mem = XVECEXP (mem, 0, 0); 9571 else if (GET_CODE (mem) == IF_THEN_ELSE) 9572 /* ??? Is this bypass necessary for ld.c? */ 9573 { 9574 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR); 9575 mem = XEXP (mem, 1); 9576 } 9577 9578 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND) 9579 mem = XEXP (mem, 0); 9580 9581 if (GET_CODE (mem) == UNSPEC) 9582 { 9583 int c = XINT (mem, 1); 9584 9585 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A 9586 || c == UNSPEC_LDSA); 9587 mem = XVECEXP (mem, 0, 0); 9588 } 9589 9590 /* Note that LO_SUM is used for GOT loads. */ 9591 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM); 9592 9593 return reg_mentioned_p (reg, mem); 9594 } 9595 9596 /* The following function returns TRUE if INSN produces address for a 9597 load/store insn. We will place such insns into M slot because it 9598 decreases its latency time. */ 9599 9600 int 9601 ia64_produce_address_p (rtx insn) 9602 { 9603 return insn->call; 9604 } 9605 9606 9607 /* Emit pseudo-ops for the assembler to describe predicate relations. 9608 At present this assumes that we only consider predicate pairs to 9609 be mutex, and that the assembler can deduce proper values from 9610 straight-line code. */ 9611 9612 static void 9613 emit_predicate_relation_info (void) 9614 { 9615 basic_block bb; 9616 9617 FOR_EACH_BB_REVERSE_FN (bb, cfun) 9618 { 9619 int r; 9620 rtx_insn *head = BB_HEAD (bb); 9621 9622 /* We only need such notes at code labels. */ 9623 if (! LABEL_P (head)) 9624 continue; 9625 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head))) 9626 head = NEXT_INSN (head); 9627 9628 /* Skip p0, which may be thought to be live due to (reg:DI p0) 9629 grabbing the entire block of predicate registers. */ 9630 for (r = PR_REG (2); r < PR_REG (64); r += 2) 9631 if (REGNO_REG_SET_P (df_get_live_in (bb), r)) 9632 { 9633 rtx p = gen_rtx_REG (BImode, r); 9634 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head); 9635 if (head == BB_END (bb)) 9636 BB_END (bb) = n; 9637 head = n; 9638 } 9639 } 9640 9641 /* Look for conditional calls that do not return, and protect predicate 9642 relations around them. Otherwise the assembler will assume the call 9643 returns, and complain about uses of call-clobbered predicates after 9644 the call. */ 9645 FOR_EACH_BB_REVERSE_FN (bb, cfun) 9646 { 9647 rtx_insn *insn = BB_HEAD (bb); 9648 9649 while (1) 9650 { 9651 if (CALL_P (insn) 9652 && GET_CODE (PATTERN (insn)) == COND_EXEC 9653 && find_reg_note (insn, REG_NORETURN, NULL_RTX)) 9654 { 9655 rtx_insn *b = 9656 emit_insn_before (gen_safe_across_calls_all (), insn); 9657 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn); 9658 if (BB_HEAD (bb) == insn) 9659 BB_HEAD (bb) = b; 9660 if (BB_END (bb) == insn) 9661 BB_END (bb) = a; 9662 } 9663 9664 if (insn == BB_END (bb)) 9665 break; 9666 insn = NEXT_INSN (insn); 9667 } 9668 } 9669 } 9670 9671 /* Perform machine dependent operations on the rtl chain INSNS. */ 9672 9673 static void 9674 ia64_reorg (void) 9675 { 9676 /* We are freeing block_for_insn in the toplev to keep compatibility 9677 with old MDEP_REORGS that are not CFG based. Recompute it now. */ 9678 compute_bb_for_insn (); 9679 9680 /* If optimizing, we'll have split before scheduling. */ 9681 if (optimize == 0) 9682 split_all_insns (); 9683 9684 if (optimize && flag_schedule_insns_after_reload 9685 && dbg_cnt (ia64_sched2)) 9686 { 9687 basic_block bb; 9688 timevar_push (TV_SCHED2); 9689 ia64_final_schedule = 1; 9690 9691 /* We can't let modulo-sched prevent us from scheduling any bbs, 9692 since we need the final schedule to produce bundle information. */ 9693 FOR_EACH_BB_FN (bb, cfun) 9694 bb->flags &= ~BB_DISABLE_SCHEDULE; 9695 9696 initiate_bundle_states (); 9697 ia64_nop = make_insn_raw (gen_nop ()); 9698 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX; 9699 recog_memoized (ia64_nop); 9700 clocks_length = get_max_uid () + 1; 9701 stops_p = XCNEWVEC (char, clocks_length); 9702 9703 if (ia64_tune == PROCESSOR_ITANIUM2) 9704 { 9705 pos_1 = get_cpu_unit_code ("2_1"); 9706 pos_2 = get_cpu_unit_code ("2_2"); 9707 pos_3 = get_cpu_unit_code ("2_3"); 9708 pos_4 = get_cpu_unit_code ("2_4"); 9709 pos_5 = get_cpu_unit_code ("2_5"); 9710 pos_6 = get_cpu_unit_code ("2_6"); 9711 _0mii_ = get_cpu_unit_code ("2b_0mii."); 9712 _0mmi_ = get_cpu_unit_code ("2b_0mmi."); 9713 _0mfi_ = get_cpu_unit_code ("2b_0mfi."); 9714 _0mmf_ = get_cpu_unit_code ("2b_0mmf."); 9715 _0bbb_ = get_cpu_unit_code ("2b_0bbb."); 9716 _0mbb_ = get_cpu_unit_code ("2b_0mbb."); 9717 _0mib_ = get_cpu_unit_code ("2b_0mib."); 9718 _0mmb_ = get_cpu_unit_code ("2b_0mmb."); 9719 _0mfb_ = get_cpu_unit_code ("2b_0mfb."); 9720 _0mlx_ = get_cpu_unit_code ("2b_0mlx."); 9721 _1mii_ = get_cpu_unit_code ("2b_1mii."); 9722 _1mmi_ = get_cpu_unit_code ("2b_1mmi."); 9723 _1mfi_ = get_cpu_unit_code ("2b_1mfi."); 9724 _1mmf_ = get_cpu_unit_code ("2b_1mmf."); 9725 _1bbb_ = get_cpu_unit_code ("2b_1bbb."); 9726 _1mbb_ = get_cpu_unit_code ("2b_1mbb."); 9727 _1mib_ = get_cpu_unit_code ("2b_1mib."); 9728 _1mmb_ = get_cpu_unit_code ("2b_1mmb."); 9729 _1mfb_ = get_cpu_unit_code ("2b_1mfb."); 9730 _1mlx_ = get_cpu_unit_code ("2b_1mlx."); 9731 } 9732 else 9733 { 9734 pos_1 = get_cpu_unit_code ("1_1"); 9735 pos_2 = get_cpu_unit_code ("1_2"); 9736 pos_3 = get_cpu_unit_code ("1_3"); 9737 pos_4 = get_cpu_unit_code ("1_4"); 9738 pos_5 = get_cpu_unit_code ("1_5"); 9739 pos_6 = get_cpu_unit_code ("1_6"); 9740 _0mii_ = get_cpu_unit_code ("1b_0mii."); 9741 _0mmi_ = get_cpu_unit_code ("1b_0mmi."); 9742 _0mfi_ = get_cpu_unit_code ("1b_0mfi."); 9743 _0mmf_ = get_cpu_unit_code ("1b_0mmf."); 9744 _0bbb_ = get_cpu_unit_code ("1b_0bbb."); 9745 _0mbb_ = get_cpu_unit_code ("1b_0mbb."); 9746 _0mib_ = get_cpu_unit_code ("1b_0mib."); 9747 _0mmb_ = get_cpu_unit_code ("1b_0mmb."); 9748 _0mfb_ = get_cpu_unit_code ("1b_0mfb."); 9749 _0mlx_ = get_cpu_unit_code ("1b_0mlx."); 9750 _1mii_ = get_cpu_unit_code ("1b_1mii."); 9751 _1mmi_ = get_cpu_unit_code ("1b_1mmi."); 9752 _1mfi_ = get_cpu_unit_code ("1b_1mfi."); 9753 _1mmf_ = get_cpu_unit_code ("1b_1mmf."); 9754 _1bbb_ = get_cpu_unit_code ("1b_1bbb."); 9755 _1mbb_ = get_cpu_unit_code ("1b_1mbb."); 9756 _1mib_ = get_cpu_unit_code ("1b_1mib."); 9757 _1mmb_ = get_cpu_unit_code ("1b_1mmb."); 9758 _1mfb_ = get_cpu_unit_code ("1b_1mfb."); 9759 _1mlx_ = get_cpu_unit_code ("1b_1mlx."); 9760 } 9761 9762 if (flag_selective_scheduling2 9763 && !maybe_skip_selective_scheduling ()) 9764 run_selective_scheduling (); 9765 else 9766 schedule_ebbs (); 9767 9768 /* Redo alignment computation, as it might gone wrong. */ 9769 compute_alignments (); 9770 9771 /* We cannot reuse this one because it has been corrupted by the 9772 evil glat. */ 9773 finish_bundle_states (); 9774 free (stops_p); 9775 stops_p = NULL; 9776 emit_insn_group_barriers (dump_file); 9777 9778 ia64_final_schedule = 0; 9779 timevar_pop (TV_SCHED2); 9780 } 9781 else 9782 emit_all_insn_group_barriers (dump_file); 9783 9784 df_analyze (); 9785 9786 /* A call must not be the last instruction in a function, so that the 9787 return address is still within the function, so that unwinding works 9788 properly. Note that IA-64 differs from dwarf2 on this point. */ 9789 if (ia64_except_unwind_info (&global_options) == UI_TARGET) 9790 { 9791 rtx_insn *insn; 9792 int saw_stop = 0; 9793 9794 insn = get_last_insn (); 9795 if (! INSN_P (insn)) 9796 insn = prev_active_insn (insn); 9797 if (insn) 9798 { 9799 /* Skip over insns that expand to nothing. */ 9800 while (NONJUMP_INSN_P (insn) 9801 && get_attr_empty (insn) == EMPTY_YES) 9802 { 9803 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 9804 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 9805 saw_stop = 1; 9806 insn = prev_active_insn (insn); 9807 } 9808 if (CALL_P (insn)) 9809 { 9810 if (! saw_stop) 9811 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 9812 emit_insn (gen_break_f ()); 9813 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 9814 } 9815 } 9816 } 9817 9818 emit_predicate_relation_info (); 9819 9820 if (flag_var_tracking) 9821 { 9822 timevar_push (TV_VAR_TRACKING); 9823 variable_tracking_main (); 9824 timevar_pop (TV_VAR_TRACKING); 9825 } 9826 df_finish_pass (false); 9827 } 9828 9829 /* Return true if REGNO is used by the epilogue. */ 9830 9831 int 9832 ia64_epilogue_uses (int regno) 9833 { 9834 switch (regno) 9835 { 9836 case R_GR (1): 9837 /* With a call to a function in another module, we will write a new 9838 value to "gp". After returning from such a call, we need to make 9839 sure the function restores the original gp-value, even if the 9840 function itself does not use the gp anymore. */ 9841 return !(TARGET_AUTO_PIC || TARGET_NO_PIC); 9842 9843 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): 9844 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): 9845 /* For functions defined with the syscall_linkage attribute, all 9846 input registers are marked as live at all function exits. This 9847 prevents the register allocator from using the input registers, 9848 which in turn makes it possible to restart a system call after 9849 an interrupt without having to save/restore the input registers. 9850 This also prevents kernel data from leaking to application code. */ 9851 return lookup_attribute ("syscall_linkage", 9852 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; 9853 9854 case R_BR (0): 9855 /* Conditional return patterns can't represent the use of `b0' as 9856 the return address, so we force the value live this way. */ 9857 return 1; 9858 9859 case AR_PFS_REGNUM: 9860 /* Likewise for ar.pfs, which is used by br.ret. */ 9861 return 1; 9862 9863 default: 9864 return 0; 9865 } 9866 } 9867 9868 /* Return true if REGNO is used by the frame unwinder. */ 9869 9870 int 9871 ia64_eh_uses (int regno) 9872 { 9873 unsigned int r; 9874 9875 if (! reload_completed) 9876 return 0; 9877 9878 if (regno == 0) 9879 return 0; 9880 9881 for (r = reg_save_b0; r <= reg_save_ar_lc; r++) 9882 if (regno == current_frame_info.r[r] 9883 || regno == emitted_frame_related_regs[r]) 9884 return 1; 9885 9886 return 0; 9887 } 9888 9889 /* Return true if this goes in small data/bss. */ 9890 9891 /* ??? We could also support own long data here. Generating movl/add/ld8 9892 instead of addl,ld8/ld8. This makes the code bigger, but should make the 9893 code faster because there is one less load. This also includes incomplete 9894 types which can't go in sdata/sbss. */ 9895 9896 static bool 9897 ia64_in_small_data_p (const_tree exp) 9898 { 9899 if (TARGET_NO_SDATA) 9900 return false; 9901 9902 /* We want to merge strings, so we never consider them small data. */ 9903 if (TREE_CODE (exp) == STRING_CST) 9904 return false; 9905 9906 /* Functions are never small data. */ 9907 if (TREE_CODE (exp) == FUNCTION_DECL) 9908 return false; 9909 9910 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 9911 { 9912 const char *section = DECL_SECTION_NAME (exp); 9913 9914 if (strcmp (section, ".sdata") == 0 9915 || strncmp (section, ".sdata.", 7) == 0 9916 || strncmp (section, ".gnu.linkonce.s.", 16) == 0 9917 || strcmp (section, ".sbss") == 0 9918 || strncmp (section, ".sbss.", 6) == 0 9919 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0) 9920 return true; 9921 } 9922 else 9923 { 9924 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 9925 9926 /* If this is an incomplete type with size 0, then we can't put it 9927 in sdata because it might be too big when completed. */ 9928 if (size > 0 && size <= ia64_section_threshold) 9929 return true; 9930 } 9931 9932 return false; 9933 } 9934 9935 /* Output assembly directives for prologue regions. */ 9936 9937 /* The current basic block number. */ 9938 9939 static bool last_block; 9940 9941 /* True if we need a copy_state command at the start of the next block. */ 9942 9943 static bool need_copy_state; 9944 9945 #ifndef MAX_ARTIFICIAL_LABEL_BYTES 9946 # define MAX_ARTIFICIAL_LABEL_BYTES 30 9947 #endif 9948 9949 /* The function emits unwind directives for the start of an epilogue. */ 9950 9951 static void 9952 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED, 9953 bool unwind, bool frame ATTRIBUTE_UNUSED) 9954 { 9955 /* If this isn't the last block of the function, then we need to label the 9956 current state, and copy it back in at the start of the next block. */ 9957 9958 if (!last_block) 9959 { 9960 if (unwind) 9961 fprintf (asm_out_file, "\t.label_state %d\n", 9962 ++cfun->machine->state_num); 9963 need_copy_state = true; 9964 } 9965 9966 if (unwind) 9967 fprintf (asm_out_file, "\t.restore sp\n"); 9968 } 9969 9970 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */ 9971 9972 static void 9973 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn, 9974 bool unwind, bool frame) 9975 { 9976 rtx dest = SET_DEST (pat); 9977 rtx src = SET_SRC (pat); 9978 9979 if (dest == stack_pointer_rtx) 9980 { 9981 if (GET_CODE (src) == PLUS) 9982 { 9983 rtx op0 = XEXP (src, 0); 9984 rtx op1 = XEXP (src, 1); 9985 9986 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT); 9987 9988 if (INTVAL (op1) < 0) 9989 { 9990 gcc_assert (!frame_pointer_needed); 9991 if (unwind) 9992 fprintf (asm_out_file, 9993 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n", 9994 -INTVAL (op1)); 9995 } 9996 else 9997 process_epilogue (asm_out_file, insn, unwind, frame); 9998 } 9999 else 10000 { 10001 gcc_assert (src == hard_frame_pointer_rtx); 10002 process_epilogue (asm_out_file, insn, unwind, frame); 10003 } 10004 } 10005 else if (dest == hard_frame_pointer_rtx) 10006 { 10007 gcc_assert (src == stack_pointer_rtx); 10008 gcc_assert (frame_pointer_needed); 10009 10010 if (unwind) 10011 fprintf (asm_out_file, "\t.vframe r%d\n", 10012 ia64_dbx_register_number (REGNO (dest))); 10013 } 10014 else 10015 gcc_unreachable (); 10016 } 10017 10018 /* This function processes a SET pattern for REG_CFA_REGISTER. */ 10019 10020 static void 10021 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind) 10022 { 10023 rtx dest = SET_DEST (pat); 10024 rtx src = SET_SRC (pat); 10025 int dest_regno = REGNO (dest); 10026 int src_regno; 10027 10028 if (src == pc_rtx) 10029 { 10030 /* Saving return address pointer. */ 10031 if (unwind) 10032 fprintf (asm_out_file, "\t.save rp, r%d\n", 10033 ia64_dbx_register_number (dest_regno)); 10034 return; 10035 } 10036 10037 src_regno = REGNO (src); 10038 10039 switch (src_regno) 10040 { 10041 case PR_REG (0): 10042 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]); 10043 if (unwind) 10044 fprintf (asm_out_file, "\t.save pr, r%d\n", 10045 ia64_dbx_register_number (dest_regno)); 10046 break; 10047 10048 case AR_UNAT_REGNUM: 10049 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]); 10050 if (unwind) 10051 fprintf (asm_out_file, "\t.save ar.unat, r%d\n", 10052 ia64_dbx_register_number (dest_regno)); 10053 break; 10054 10055 case AR_LC_REGNUM: 10056 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]); 10057 if (unwind) 10058 fprintf (asm_out_file, "\t.save ar.lc, r%d\n", 10059 ia64_dbx_register_number (dest_regno)); 10060 break; 10061 10062 default: 10063 /* Everything else should indicate being stored to memory. */ 10064 gcc_unreachable (); 10065 } 10066 } 10067 10068 /* This function processes a SET pattern for REG_CFA_OFFSET. */ 10069 10070 static void 10071 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind) 10072 { 10073 rtx dest = SET_DEST (pat); 10074 rtx src = SET_SRC (pat); 10075 int src_regno = REGNO (src); 10076 const char *saveop; 10077 HOST_WIDE_INT off; 10078 rtx base; 10079 10080 gcc_assert (MEM_P (dest)); 10081 if (GET_CODE (XEXP (dest, 0)) == REG) 10082 { 10083 base = XEXP (dest, 0); 10084 off = 0; 10085 } 10086 else 10087 { 10088 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS 10089 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT); 10090 base = XEXP (XEXP (dest, 0), 0); 10091 off = INTVAL (XEXP (XEXP (dest, 0), 1)); 10092 } 10093 10094 if (base == hard_frame_pointer_rtx) 10095 { 10096 saveop = ".savepsp"; 10097 off = - off; 10098 } 10099 else 10100 { 10101 gcc_assert (base == stack_pointer_rtx); 10102 saveop = ".savesp"; 10103 } 10104 10105 src_regno = REGNO (src); 10106 switch (src_regno) 10107 { 10108 case BR_REG (0): 10109 gcc_assert (!current_frame_info.r[reg_save_b0]); 10110 if (unwind) 10111 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n", 10112 saveop, off); 10113 break; 10114 10115 case PR_REG (0): 10116 gcc_assert (!current_frame_info.r[reg_save_pr]); 10117 if (unwind) 10118 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n", 10119 saveop, off); 10120 break; 10121 10122 case AR_LC_REGNUM: 10123 gcc_assert (!current_frame_info.r[reg_save_ar_lc]); 10124 if (unwind) 10125 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n", 10126 saveop, off); 10127 break; 10128 10129 case AR_PFS_REGNUM: 10130 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]); 10131 if (unwind) 10132 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n", 10133 saveop, off); 10134 break; 10135 10136 case AR_UNAT_REGNUM: 10137 gcc_assert (!current_frame_info.r[reg_save_ar_unat]); 10138 if (unwind) 10139 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n", 10140 saveop, off); 10141 break; 10142 10143 case GR_REG (4): 10144 case GR_REG (5): 10145 case GR_REG (6): 10146 case GR_REG (7): 10147 if (unwind) 10148 fprintf (asm_out_file, "\t.save.g 0x%x\n", 10149 1 << (src_regno - GR_REG (4))); 10150 break; 10151 10152 case BR_REG (1): 10153 case BR_REG (2): 10154 case BR_REG (3): 10155 case BR_REG (4): 10156 case BR_REG (5): 10157 if (unwind) 10158 fprintf (asm_out_file, "\t.save.b 0x%x\n", 10159 1 << (src_regno - BR_REG (1))); 10160 break; 10161 10162 case FR_REG (2): 10163 case FR_REG (3): 10164 case FR_REG (4): 10165 case FR_REG (5): 10166 if (unwind) 10167 fprintf (asm_out_file, "\t.save.f 0x%x\n", 10168 1 << (src_regno - FR_REG (2))); 10169 break; 10170 10171 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): 10172 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): 10173 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): 10174 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): 10175 if (unwind) 10176 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", 10177 1 << (src_regno - FR_REG (12))); 10178 break; 10179 10180 default: 10181 /* ??? For some reason we mark other general registers, even those 10182 we can't represent in the unwind info. Ignore them. */ 10183 break; 10184 } 10185 } 10186 10187 /* This function looks at a single insn and emits any directives 10188 required to unwind this insn. */ 10189 10190 static void 10191 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn) 10192 { 10193 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET; 10194 bool frame = dwarf2out_do_frame (); 10195 rtx note, pat; 10196 bool handled_one; 10197 10198 if (!unwind && !frame) 10199 return; 10200 10201 if (NOTE_INSN_BASIC_BLOCK_P (insn)) 10202 { 10203 last_block = NOTE_BASIC_BLOCK (insn)->next_bb 10204 == EXIT_BLOCK_PTR_FOR_FN (cfun); 10205 10206 /* Restore unwind state from immediately before the epilogue. */ 10207 if (need_copy_state) 10208 { 10209 if (unwind) 10210 { 10211 fprintf (asm_out_file, "\t.body\n"); 10212 fprintf (asm_out_file, "\t.copy_state %d\n", 10213 cfun->machine->state_num); 10214 } 10215 need_copy_state = false; 10216 } 10217 } 10218 10219 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn)) 10220 return; 10221 10222 /* Look for the ALLOC insn. */ 10223 if (INSN_CODE (insn) == CODE_FOR_alloc) 10224 { 10225 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0)); 10226 int dest_regno = REGNO (dest); 10227 10228 /* If this is the final destination for ar.pfs, then this must 10229 be the alloc in the prologue. */ 10230 if (dest_regno == current_frame_info.r[reg_save_ar_pfs]) 10231 { 10232 if (unwind) 10233 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", 10234 ia64_dbx_register_number (dest_regno)); 10235 } 10236 else 10237 { 10238 /* This must be an alloc before a sibcall. We must drop the 10239 old frame info. The easiest way to drop the old frame 10240 info is to ensure we had a ".restore sp" directive 10241 followed by a new prologue. If the procedure doesn't 10242 have a memory-stack frame, we'll issue a dummy ".restore 10243 sp" now. */ 10244 if (current_frame_info.total_size == 0 && !frame_pointer_needed) 10245 /* if haven't done process_epilogue() yet, do it now */ 10246 process_epilogue (asm_out_file, insn, unwind, frame); 10247 if (unwind) 10248 fprintf (asm_out_file, "\t.prologue\n"); 10249 } 10250 return; 10251 } 10252 10253 handled_one = false; 10254 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 10255 switch (REG_NOTE_KIND (note)) 10256 { 10257 case REG_CFA_ADJUST_CFA: 10258 pat = XEXP (note, 0); 10259 if (pat == NULL) 10260 pat = PATTERN (insn); 10261 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame); 10262 handled_one = true; 10263 break; 10264 10265 case REG_CFA_OFFSET: 10266 pat = XEXP (note, 0); 10267 if (pat == NULL) 10268 pat = PATTERN (insn); 10269 process_cfa_offset (asm_out_file, pat, unwind); 10270 handled_one = true; 10271 break; 10272 10273 case REG_CFA_REGISTER: 10274 pat = XEXP (note, 0); 10275 if (pat == NULL) 10276 pat = PATTERN (insn); 10277 process_cfa_register (asm_out_file, pat, unwind); 10278 handled_one = true; 10279 break; 10280 10281 case REG_FRAME_RELATED_EXPR: 10282 case REG_CFA_DEF_CFA: 10283 case REG_CFA_EXPRESSION: 10284 case REG_CFA_RESTORE: 10285 case REG_CFA_SET_VDRAP: 10286 /* Not used in the ia64 port. */ 10287 gcc_unreachable (); 10288 10289 default: 10290 /* Not a frame-related note. */ 10291 break; 10292 } 10293 10294 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the 10295 explicit action to take. No guessing required. */ 10296 gcc_assert (handled_one); 10297 } 10298 10299 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */ 10300 10301 static void 10302 ia64_asm_emit_except_personality (rtx personality) 10303 { 10304 fputs ("\t.personality\t", asm_out_file); 10305 output_addr_const (asm_out_file, personality); 10306 fputc ('\n', asm_out_file); 10307 } 10308 10309 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */ 10310 10311 static void 10312 ia64_asm_init_sections (void) 10313 { 10314 exception_section = get_unnamed_section (0, output_section_asm_op, 10315 "\t.handlerdata"); 10316 } 10317 10318 /* Implement TARGET_DEBUG_UNWIND_INFO. */ 10319 10320 static enum unwind_info_type 10321 ia64_debug_unwind_info (void) 10322 { 10323 return UI_TARGET; 10324 } 10325 10326 enum ia64_builtins 10327 { 10328 IA64_BUILTIN_BSP, 10329 IA64_BUILTIN_COPYSIGNQ, 10330 IA64_BUILTIN_FABSQ, 10331 IA64_BUILTIN_FLUSHRS, 10332 IA64_BUILTIN_INFQ, 10333 IA64_BUILTIN_HUGE_VALQ, 10334 IA64_BUILTIN_NANQ, 10335 IA64_BUILTIN_NANSQ, 10336 IA64_BUILTIN_max 10337 }; 10338 10339 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max]; 10340 10341 void 10342 ia64_init_builtins (void) 10343 { 10344 tree fpreg_type; 10345 tree float80_type; 10346 tree decl; 10347 10348 /* The __fpreg type. */ 10349 fpreg_type = make_node (REAL_TYPE); 10350 TYPE_PRECISION (fpreg_type) = 82; 10351 layout_type (fpreg_type); 10352 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg"); 10353 10354 /* The __float80 type. */ 10355 if (float64x_type_node != NULL_TREE 10356 && TYPE_MODE (float64x_type_node) == XFmode) 10357 float80_type = float64x_type_node; 10358 else 10359 { 10360 float80_type = make_node (REAL_TYPE); 10361 TYPE_PRECISION (float80_type) = 80; 10362 layout_type (float80_type); 10363 } 10364 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 10365 10366 /* The __float128 type. */ 10367 if (!TARGET_HPUX) 10368 { 10369 tree ftype; 10370 tree const_string_type 10371 = build_pointer_type (build_qualified_type 10372 (char_type_node, TYPE_QUAL_CONST)); 10373 10374 (*lang_hooks.types.register_builtin_type) (float128_type_node, 10375 "__float128"); 10376 10377 /* TFmode support builtins. */ 10378 ftype = build_function_type_list (float128_type_node, NULL_TREE); 10379 decl = add_builtin_function ("__builtin_infq", ftype, 10380 IA64_BUILTIN_INFQ, BUILT_IN_MD, 10381 NULL, NULL_TREE); 10382 ia64_builtins[IA64_BUILTIN_INFQ] = decl; 10383 10384 decl = add_builtin_function ("__builtin_huge_valq", ftype, 10385 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD, 10386 NULL, NULL_TREE); 10387 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl; 10388 10389 ftype = build_function_type_list (float128_type_node, 10390 const_string_type, 10391 NULL_TREE); 10392 decl = add_builtin_function ("__builtin_nanq", ftype, 10393 IA64_BUILTIN_NANQ, BUILT_IN_MD, 10394 "nanq", NULL_TREE); 10395 TREE_READONLY (decl) = 1; 10396 ia64_builtins[IA64_BUILTIN_NANQ] = decl; 10397 10398 decl = add_builtin_function ("__builtin_nansq", ftype, 10399 IA64_BUILTIN_NANSQ, BUILT_IN_MD, 10400 "nansq", NULL_TREE); 10401 TREE_READONLY (decl) = 1; 10402 ia64_builtins[IA64_BUILTIN_NANSQ] = decl; 10403 10404 ftype = build_function_type_list (float128_type_node, 10405 float128_type_node, 10406 NULL_TREE); 10407 decl = add_builtin_function ("__builtin_fabsq", ftype, 10408 IA64_BUILTIN_FABSQ, BUILT_IN_MD, 10409 "__fabstf2", NULL_TREE); 10410 TREE_READONLY (decl) = 1; 10411 ia64_builtins[IA64_BUILTIN_FABSQ] = decl; 10412 10413 ftype = build_function_type_list (float128_type_node, 10414 float128_type_node, 10415 float128_type_node, 10416 NULL_TREE); 10417 decl = add_builtin_function ("__builtin_copysignq", ftype, 10418 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 10419 "__copysigntf3", NULL_TREE); 10420 TREE_READONLY (decl) = 1; 10421 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl; 10422 } 10423 else 10424 /* Under HPUX, this is a synonym for "long double". */ 10425 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 10426 "__float128"); 10427 10428 /* Fwrite on VMS is non-standard. */ 10429 #if TARGET_ABI_OPEN_VMS 10430 vms_patch_builtins (); 10431 #endif 10432 10433 #define def_builtin(name, type, code) \ 10434 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \ 10435 NULL, NULL_TREE) 10436 10437 decl = def_builtin ("__builtin_ia64_bsp", 10438 build_function_type_list (ptr_type_node, NULL_TREE), 10439 IA64_BUILTIN_BSP); 10440 ia64_builtins[IA64_BUILTIN_BSP] = decl; 10441 10442 decl = def_builtin ("__builtin_ia64_flushrs", 10443 build_function_type_list (void_type_node, NULL_TREE), 10444 IA64_BUILTIN_FLUSHRS); 10445 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl; 10446 10447 #undef def_builtin 10448 10449 if (TARGET_HPUX) 10450 { 10451 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE) 10452 set_user_assembler_name (decl, "_Isfinite"); 10453 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE) 10454 set_user_assembler_name (decl, "_Isfinitef"); 10455 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE) 10456 set_user_assembler_name (decl, "_Isfinitef128"); 10457 } 10458 } 10459 10460 static tree 10461 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, 10462 tree *args, bool ignore ATTRIBUTE_UNUSED) 10463 { 10464 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) 10465 { 10466 enum ia64_builtins fn_code = (enum ia64_builtins) 10467 DECL_FUNCTION_CODE (fndecl); 10468 switch (fn_code) 10469 { 10470 case IA64_BUILTIN_NANQ: 10471 case IA64_BUILTIN_NANSQ: 10472 { 10473 tree type = TREE_TYPE (TREE_TYPE (fndecl)); 10474 const char *str = c_getstr (*args); 10475 int quiet = fn_code == IA64_BUILTIN_NANQ; 10476 REAL_VALUE_TYPE real; 10477 10478 if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) 10479 return build_real (type, real); 10480 return NULL_TREE; 10481 } 10482 10483 default: 10484 break; 10485 } 10486 } 10487 10488 #ifdef SUBTARGET_FOLD_BUILTIN 10489 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); 10490 #endif 10491 10492 return NULL_TREE; 10493 } 10494 10495 rtx 10496 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 10497 machine_mode mode ATTRIBUTE_UNUSED, 10498 int ignore ATTRIBUTE_UNUSED) 10499 { 10500 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 10501 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 10502 10503 switch (fcode) 10504 { 10505 case IA64_BUILTIN_BSP: 10506 if (! target || ! register_operand (target, DImode)) 10507 target = gen_reg_rtx (DImode); 10508 emit_insn (gen_bsp_value (target)); 10509 #ifdef POINTERS_EXTEND_UNSIGNED 10510 target = convert_memory_address (ptr_mode, target); 10511 #endif 10512 return target; 10513 10514 case IA64_BUILTIN_FLUSHRS: 10515 emit_insn (gen_flushrs ()); 10516 return const0_rtx; 10517 10518 case IA64_BUILTIN_INFQ: 10519 case IA64_BUILTIN_HUGE_VALQ: 10520 { 10521 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); 10522 REAL_VALUE_TYPE inf; 10523 rtx tmp; 10524 10525 real_inf (&inf); 10526 tmp = const_double_from_real_value (inf, target_mode); 10527 10528 tmp = validize_mem (force_const_mem (target_mode, tmp)); 10529 10530 if (target == 0) 10531 target = gen_reg_rtx (target_mode); 10532 10533 emit_move_insn (target, tmp); 10534 return target; 10535 } 10536 10537 case IA64_BUILTIN_NANQ: 10538 case IA64_BUILTIN_NANSQ: 10539 case IA64_BUILTIN_FABSQ: 10540 case IA64_BUILTIN_COPYSIGNQ: 10541 return expand_call (exp, target, ignore); 10542 10543 default: 10544 gcc_unreachable (); 10545 } 10546 10547 return NULL_RTX; 10548 } 10549 10550 /* Return the ia64 builtin for CODE. */ 10551 10552 static tree 10553 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 10554 { 10555 if (code >= IA64_BUILTIN_max) 10556 return error_mark_node; 10557 10558 return ia64_builtins[code]; 10559 } 10560 10561 /* For the HP-UX IA64 aggregate parameters are passed stored in the 10562 most significant bits of the stack slot. */ 10563 10564 enum direction 10565 ia64_hpux_function_arg_padding (machine_mode mode, const_tree type) 10566 { 10567 /* Exception to normal case for structures/unions/etc. */ 10568 10569 if (type && AGGREGATE_TYPE_P (type) 10570 && int_size_in_bytes (type) < UNITS_PER_WORD) 10571 return upward; 10572 10573 /* Fall back to the default. */ 10574 return DEFAULT_FUNCTION_ARG_PADDING (mode, type); 10575 } 10576 10577 /* Emit text to declare externally defined variables and functions, because 10578 the Intel assembler does not support undefined externals. */ 10579 10580 void 10581 ia64_asm_output_external (FILE *file, tree decl, const char *name) 10582 { 10583 /* We output the name if and only if TREE_SYMBOL_REFERENCED is 10584 set in order to avoid putting out names that are never really 10585 used. */ 10586 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) 10587 { 10588 /* maybe_assemble_visibility will return 1 if the assembler 10589 visibility directive is output. */ 10590 int need_visibility = ((*targetm.binds_local_p) (decl) 10591 && maybe_assemble_visibility (decl)); 10592 10593 /* GNU as does not need anything here, but the HP linker does 10594 need something for external functions. */ 10595 if ((TARGET_HPUX_LD || !TARGET_GNU_AS) 10596 && TREE_CODE (decl) == FUNCTION_DECL) 10597 (*targetm.asm_out.globalize_decl_name) (file, decl); 10598 else if (need_visibility && !TARGET_GNU_AS) 10599 (*targetm.asm_out.globalize_label) (file, name); 10600 } 10601 } 10602 10603 /* Set SImode div/mod functions, init_integral_libfuncs only initializes 10604 modes of word_mode and larger. Rename the TFmode libfuncs using the 10605 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for 10606 backward compatibility. */ 10607 10608 static void 10609 ia64_init_libfuncs (void) 10610 { 10611 set_optab_libfunc (sdiv_optab, SImode, "__divsi3"); 10612 set_optab_libfunc (udiv_optab, SImode, "__udivsi3"); 10613 set_optab_libfunc (smod_optab, SImode, "__modsi3"); 10614 set_optab_libfunc (umod_optab, SImode, "__umodsi3"); 10615 10616 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 10617 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 10618 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 10619 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 10620 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 10621 10622 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 10623 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 10624 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad"); 10625 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 10626 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 10627 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80"); 10628 10629 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl"); 10630 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); 10631 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad"); 10632 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl"); 10633 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl"); 10634 10635 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); 10636 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); 10637 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad"); 10638 /* HP-UX 11.23 libc does not have a function for unsigned 10639 SImode-to-TFmode conversion. */ 10640 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad"); 10641 } 10642 10643 /* Rename all the TFmode libfuncs using the HPUX conventions. */ 10644 10645 static void 10646 ia64_hpux_init_libfuncs (void) 10647 { 10648 ia64_init_libfuncs (); 10649 10650 /* The HP SI millicode division and mod functions expect DI arguments. 10651 By turning them off completely we avoid using both libgcc and the 10652 non-standard millicode routines and use the HP DI millicode routines 10653 instead. */ 10654 10655 set_optab_libfunc (sdiv_optab, SImode, 0); 10656 set_optab_libfunc (udiv_optab, SImode, 0); 10657 set_optab_libfunc (smod_optab, SImode, 0); 10658 set_optab_libfunc (umod_optab, SImode, 0); 10659 10660 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI"); 10661 set_optab_libfunc (udiv_optab, DImode, "__milli_divU"); 10662 set_optab_libfunc (smod_optab, DImode, "__milli_remI"); 10663 set_optab_libfunc (umod_optab, DImode, "__milli_remU"); 10664 10665 /* HP-UX libc has TF min/max/abs routines in it. */ 10666 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin"); 10667 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 10668 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 10669 10670 /* ia64_expand_compare uses this. */ 10671 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp"); 10672 10673 /* These should never be used. */ 10674 set_optab_libfunc (eq_optab, TFmode, 0); 10675 set_optab_libfunc (ne_optab, TFmode, 0); 10676 set_optab_libfunc (gt_optab, TFmode, 0); 10677 set_optab_libfunc (ge_optab, TFmode, 0); 10678 set_optab_libfunc (lt_optab, TFmode, 0); 10679 set_optab_libfunc (le_optab, TFmode, 0); 10680 } 10681 10682 /* Rename the division and modulus functions in VMS. */ 10683 10684 static void 10685 ia64_vms_init_libfuncs (void) 10686 { 10687 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); 10688 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); 10689 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); 10690 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); 10691 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); 10692 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); 10693 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); 10694 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); 10695 #ifdef MEM_LIBFUNCS_INIT 10696 MEM_LIBFUNCS_INIT; 10697 #endif 10698 } 10699 10700 /* Rename the TFmode libfuncs available from soft-fp in glibc using 10701 the HPUX conventions. */ 10702 10703 static void 10704 ia64_sysv4_init_libfuncs (void) 10705 { 10706 ia64_init_libfuncs (); 10707 10708 /* These functions are not part of the HPUX TFmode interface. We 10709 use them instead of _U_Qfcmp, which doesn't work the way we 10710 expect. */ 10711 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 10712 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 10713 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 10714 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 10715 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 10716 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 10717 10718 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in 10719 glibc doesn't have them. */ 10720 } 10721 10722 /* Use soft-fp. */ 10723 10724 static void 10725 ia64_soft_fp_init_libfuncs (void) 10726 { 10727 } 10728 10729 static bool 10730 ia64_vms_valid_pointer_mode (machine_mode mode) 10731 { 10732 return (mode == SImode || mode == DImode); 10733 } 10734 10735 /* For HPUX, it is illegal to have relocations in shared segments. */ 10736 10737 static int 10738 ia64_hpux_reloc_rw_mask (void) 10739 { 10740 return 3; 10741 } 10742 10743 /* For others, relax this so that relocations to local data goes in 10744 read-only segments, but we still cannot allow global relocations 10745 in read-only segments. */ 10746 10747 static int 10748 ia64_reloc_rw_mask (void) 10749 { 10750 return flag_pic ? 3 : 2; 10751 } 10752 10753 /* Return the section to use for X. The only special thing we do here 10754 is to honor small data. */ 10755 10756 static section * 10757 ia64_select_rtx_section (machine_mode mode, rtx x, 10758 unsigned HOST_WIDE_INT align) 10759 { 10760 if (GET_MODE_SIZE (mode) > 0 10761 && GET_MODE_SIZE (mode) <= ia64_section_threshold 10762 && !TARGET_NO_SDATA) 10763 return sdata_section; 10764 else 10765 return default_elf_select_rtx_section (mode, x, align); 10766 } 10767 10768 static unsigned int 10769 ia64_section_type_flags (tree decl, const char *name, int reloc) 10770 { 10771 unsigned int flags = 0; 10772 10773 if (strcmp (name, ".sdata") == 0 10774 || strncmp (name, ".sdata.", 7) == 0 10775 || strncmp (name, ".gnu.linkonce.s.", 16) == 0 10776 || strncmp (name, ".sdata2.", 8) == 0 10777 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0 10778 || strcmp (name, ".sbss") == 0 10779 || strncmp (name, ".sbss.", 6) == 0 10780 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) 10781 flags = SECTION_SMALL; 10782 10783 flags |= default_section_type_flags (decl, name, reloc); 10784 return flags; 10785 } 10786 10787 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a 10788 structure type and that the address of that type should be passed 10789 in out0, rather than in r8. */ 10790 10791 static bool 10792 ia64_struct_retval_addr_is_first_parm_p (tree fntype) 10793 { 10794 tree ret_type = TREE_TYPE (fntype); 10795 10796 /* The Itanium C++ ABI requires that out0, rather than r8, be used 10797 as the structure return address parameter, if the return value 10798 type has a non-trivial copy constructor or destructor. It is not 10799 clear if this same convention should be used for other 10800 programming languages. Until G++ 3.4, we incorrectly used r8 for 10801 these return values. */ 10802 return (abi_version_at_least (2) 10803 && ret_type 10804 && TYPE_MODE (ret_type) == BLKmode 10805 && TREE_ADDRESSABLE (ret_type) 10806 && lang_GNU_CXX ()); 10807 } 10808 10809 /* Output the assembler code for a thunk function. THUNK_DECL is the 10810 declaration for the thunk function itself, FUNCTION is the decl for 10811 the target function. DELTA is an immediate constant offset to be 10812 added to THIS. If VCALL_OFFSET is nonzero, the word at 10813 *(*this + vcall_offset) should be added to THIS. */ 10814 10815 static void 10816 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 10817 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 10818 tree function) 10819 { 10820 rtx this_rtx, funexp; 10821 rtx_insn *insn; 10822 unsigned int this_parmno; 10823 unsigned int this_regno; 10824 rtx delta_rtx; 10825 10826 reload_completed = 1; 10827 epilogue_completed = 1; 10828 10829 /* Set things up as ia64_expand_prologue might. */ 10830 last_scratch_gr_reg = 15; 10831 10832 memset (¤t_frame_info, 0, sizeof (current_frame_info)); 10833 current_frame_info.spill_cfa_off = -16; 10834 current_frame_info.n_input_regs = 1; 10835 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 10836 10837 /* Mark the end of the (empty) prologue. */ 10838 emit_note (NOTE_INSN_PROLOGUE_END); 10839 10840 /* Figure out whether "this" will be the first parameter (the 10841 typical case) or the second parameter (as happens when the 10842 virtual function returns certain class objects). */ 10843 this_parmno 10844 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk)) 10845 ? 1 : 0); 10846 this_regno = IN_REG (this_parmno); 10847 if (!TARGET_REG_NAMES) 10848 reg_names[this_regno] = ia64_reg_numbers[this_parmno]; 10849 10850 this_rtx = gen_rtx_REG (Pmode, this_regno); 10851 10852 /* Apply the constant offset, if required. */ 10853 delta_rtx = GEN_INT (delta); 10854 if (TARGET_ILP32) 10855 { 10856 rtx tmp = gen_rtx_REG (ptr_mode, this_regno); 10857 REG_POINTER (tmp) = 1; 10858 if (delta && satisfies_constraint_I (delta_rtx)) 10859 { 10860 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx)); 10861 delta = 0; 10862 } 10863 else 10864 emit_insn (gen_ptr_extend (this_rtx, tmp)); 10865 } 10866 if (delta) 10867 { 10868 if (!satisfies_constraint_I (delta_rtx)) 10869 { 10870 rtx tmp = gen_rtx_REG (Pmode, 2); 10871 emit_move_insn (tmp, delta_rtx); 10872 delta_rtx = tmp; 10873 } 10874 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx)); 10875 } 10876 10877 /* Apply the offset from the vtable, if required. */ 10878 if (vcall_offset) 10879 { 10880 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 10881 rtx tmp = gen_rtx_REG (Pmode, 2); 10882 10883 if (TARGET_ILP32) 10884 { 10885 rtx t = gen_rtx_REG (ptr_mode, 2); 10886 REG_POINTER (t) = 1; 10887 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx)); 10888 if (satisfies_constraint_I (vcall_offset_rtx)) 10889 { 10890 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx)); 10891 vcall_offset = 0; 10892 } 10893 else 10894 emit_insn (gen_ptr_extend (tmp, t)); 10895 } 10896 else 10897 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); 10898 10899 if (vcall_offset) 10900 { 10901 if (!satisfies_constraint_J (vcall_offset_rtx)) 10902 { 10903 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); 10904 emit_move_insn (tmp2, vcall_offset_rtx); 10905 vcall_offset_rtx = tmp2; 10906 } 10907 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); 10908 } 10909 10910 if (TARGET_ILP32) 10911 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp))); 10912 else 10913 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); 10914 10915 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 10916 } 10917 10918 /* Generate a tail call to the target function. */ 10919 if (! TREE_USED (function)) 10920 { 10921 assemble_external (function); 10922 TREE_USED (function) = 1; 10923 } 10924 funexp = XEXP (DECL_RTL (function), 0); 10925 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 10926 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); 10927 insn = get_last_insn (); 10928 SIBLING_CALL_P (insn) = 1; 10929 10930 /* Code generation for calls relies on splitting. */ 10931 reload_completed = 1; 10932 epilogue_completed = 1; 10933 try_split (PATTERN (insn), insn, 0); 10934 10935 emit_barrier (); 10936 10937 /* Run just enough of rest_of_compilation to get the insns emitted. 10938 There's not really enough bulk here to make other passes such as 10939 instruction scheduling worth while. Note that use_thunk calls 10940 assemble_start_function and assemble_end_function. */ 10941 10942 emit_all_insn_group_barriers (NULL); 10943 insn = get_insns (); 10944 shorten_branches (insn); 10945 final_start_function (insn, file, 1); 10946 final (insn, file, 1); 10947 final_end_function (); 10948 10949 reload_completed = 0; 10950 epilogue_completed = 0; 10951 } 10952 10953 /* Worker function for TARGET_STRUCT_VALUE_RTX. */ 10954 10955 static rtx 10956 ia64_struct_value_rtx (tree fntype, 10957 int incoming ATTRIBUTE_UNUSED) 10958 { 10959 if (TARGET_ABI_OPEN_VMS || 10960 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))) 10961 return NULL_RTX; 10962 return gen_rtx_REG (Pmode, GR_REG (8)); 10963 } 10964 10965 static bool 10966 ia64_scalar_mode_supported_p (machine_mode mode) 10967 { 10968 switch (mode) 10969 { 10970 case QImode: 10971 case HImode: 10972 case SImode: 10973 case DImode: 10974 case TImode: 10975 return true; 10976 10977 case SFmode: 10978 case DFmode: 10979 case XFmode: 10980 case RFmode: 10981 return true; 10982 10983 case TFmode: 10984 return true; 10985 10986 default: 10987 return false; 10988 } 10989 } 10990 10991 static bool 10992 ia64_vector_mode_supported_p (machine_mode mode) 10993 { 10994 switch (mode) 10995 { 10996 case V8QImode: 10997 case V4HImode: 10998 case V2SImode: 10999 return true; 11000 11001 case V2SFmode: 11002 return true; 11003 11004 default: 11005 return false; 11006 } 11007 } 11008 11009 /* Implement the FUNCTION_PROFILER macro. */ 11010 11011 void 11012 ia64_output_function_profiler (FILE *file, int labelno) 11013 { 11014 bool indirect_call; 11015 11016 /* If the function needs a static chain and the static chain 11017 register is r15, we use an indirect call so as to bypass 11018 the PLT stub in case the executable is dynamically linked, 11019 because the stub clobbers r15 as per 5.3.6 of the psABI. 11020 We don't need to do that in non canonical PIC mode. */ 11021 11022 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC) 11023 { 11024 gcc_assert (STATIC_CHAIN_REGNUM == 15); 11025 indirect_call = true; 11026 } 11027 else 11028 indirect_call = false; 11029 11030 if (TARGET_GNU_AS) 11031 fputs ("\t.prologue 4, r40\n", file); 11032 else 11033 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file); 11034 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file); 11035 11036 if (NO_PROFILE_COUNTERS) 11037 fputs ("\tmov out3 = r0\n", file); 11038 else 11039 { 11040 char buf[20]; 11041 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 11042 11043 if (TARGET_AUTO_PIC) 11044 fputs ("\tmovl out3 = @gprel(", file); 11045 else 11046 fputs ("\taddl out3 = @ltoff(", file); 11047 assemble_name (file, buf); 11048 if (TARGET_AUTO_PIC) 11049 fputs (")\n", file); 11050 else 11051 fputs ("), r1\n", file); 11052 } 11053 11054 if (indirect_call) 11055 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file); 11056 fputs ("\t;;\n", file); 11057 11058 fputs ("\t.save rp, r42\n", file); 11059 fputs ("\tmov out2 = b0\n", file); 11060 if (indirect_call) 11061 fputs ("\tld8 r14 = [r14]\n\t;;\n", file); 11062 fputs ("\t.body\n", file); 11063 fputs ("\tmov out1 = r1\n", file); 11064 if (indirect_call) 11065 { 11066 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file); 11067 fputs ("\tmov b6 = r16\n", file); 11068 fputs ("\tld8 r1 = [r14]\n", file); 11069 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file); 11070 } 11071 else 11072 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file); 11073 } 11074 11075 static GTY(()) rtx mcount_func_rtx; 11076 static rtx 11077 gen_mcount_func_rtx (void) 11078 { 11079 if (!mcount_func_rtx) 11080 mcount_func_rtx = init_one_libfunc ("_mcount"); 11081 return mcount_func_rtx; 11082 } 11083 11084 void 11085 ia64_profile_hook (int labelno) 11086 { 11087 rtx label, ip; 11088 11089 if (NO_PROFILE_COUNTERS) 11090 label = const0_rtx; 11091 else 11092 { 11093 char buf[30]; 11094 const char *label_name; 11095 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 11096 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); 11097 label = gen_rtx_SYMBOL_REF (Pmode, label_name); 11098 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL; 11099 } 11100 ip = gen_reg_rtx (Pmode); 11101 emit_insn (gen_ip_value (ip)); 11102 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL, 11103 VOIDmode, 3, 11104 gen_rtx_REG (Pmode, BR_REG (0)), Pmode, 11105 ip, Pmode, 11106 label, Pmode); 11107 } 11108 11109 /* Return the mangling of TYPE if it is an extended fundamental type. */ 11110 11111 static const char * 11112 ia64_mangle_type (const_tree type) 11113 { 11114 type = TYPE_MAIN_VARIANT (type); 11115 11116 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 11117 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 11118 return NULL; 11119 11120 /* On HP-UX, "long double" is mangled as "e" so __float128 is 11121 mangled as "e". */ 11122 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode) 11123 return "g"; 11124 /* On HP-UX, "e" is not available as a mangling of __float80 so use 11125 an extended mangling. Elsewhere, "e" is available since long 11126 double is 80 bits. */ 11127 if (TYPE_MODE (type) == XFmode) 11128 return TARGET_HPUX ? "u9__float80" : "e"; 11129 if (TYPE_MODE (type) == RFmode) 11130 return "u7__fpreg"; 11131 return NULL; 11132 } 11133 11134 /* Return the diagnostic message string if conversion from FROMTYPE to 11135 TOTYPE is not allowed, NULL otherwise. */ 11136 static const char * 11137 ia64_invalid_conversion (const_tree fromtype, const_tree totype) 11138 { 11139 /* Reject nontrivial conversion to or from __fpreg. */ 11140 if (TYPE_MODE (fromtype) == RFmode 11141 && TYPE_MODE (totype) != RFmode 11142 && TYPE_MODE (totype) != VOIDmode) 11143 return N_("invalid conversion from %<__fpreg%>"); 11144 if (TYPE_MODE (totype) == RFmode 11145 && TYPE_MODE (fromtype) != RFmode) 11146 return N_("invalid conversion to %<__fpreg%>"); 11147 return NULL; 11148 } 11149 11150 /* Return the diagnostic message string if the unary operation OP is 11151 not permitted on TYPE, NULL otherwise. */ 11152 static const char * 11153 ia64_invalid_unary_op (int op, const_tree type) 11154 { 11155 /* Reject operations on __fpreg other than unary + or &. */ 11156 if (TYPE_MODE (type) == RFmode 11157 && op != CONVERT_EXPR 11158 && op != ADDR_EXPR) 11159 return N_("invalid operation on %<__fpreg%>"); 11160 return NULL; 11161 } 11162 11163 /* Return the diagnostic message string if the binary operation OP is 11164 not permitted on TYPE1 and TYPE2, NULL otherwise. */ 11165 static const char * 11166 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) 11167 { 11168 /* Reject operations on __fpreg. */ 11169 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode) 11170 return N_("invalid operation on %<__fpreg%>"); 11171 return NULL; 11172 } 11173 11174 /* HP-UX version_id attribute. 11175 For object foo, if the version_id is set to 1234 put out an alias 11176 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything 11177 other than an alias statement because it is an illegal symbol name. */ 11178 11179 static tree 11180 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED, 11181 tree name ATTRIBUTE_UNUSED, 11182 tree args, 11183 int flags ATTRIBUTE_UNUSED, 11184 bool *no_add_attrs) 11185 { 11186 tree arg = TREE_VALUE (args); 11187 11188 if (TREE_CODE (arg) != STRING_CST) 11189 { 11190 error("version attribute is not a string"); 11191 *no_add_attrs = true; 11192 return NULL_TREE; 11193 } 11194 return NULL_TREE; 11195 } 11196 11197 /* Target hook for c_mode_for_suffix. */ 11198 11199 static machine_mode 11200 ia64_c_mode_for_suffix (char suffix) 11201 { 11202 if (suffix == 'q') 11203 return TFmode; 11204 if (suffix == 'w') 11205 return XFmode; 11206 11207 return VOIDmode; 11208 } 11209 11210 static GTY(()) rtx ia64_dconst_0_5_rtx; 11211 11212 rtx 11213 ia64_dconst_0_5 (void) 11214 { 11215 if (! ia64_dconst_0_5_rtx) 11216 { 11217 REAL_VALUE_TYPE rv; 11218 real_from_string (&rv, "0.5"); 11219 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode); 11220 } 11221 return ia64_dconst_0_5_rtx; 11222 } 11223 11224 static GTY(()) rtx ia64_dconst_0_375_rtx; 11225 11226 rtx 11227 ia64_dconst_0_375 (void) 11228 { 11229 if (! ia64_dconst_0_375_rtx) 11230 { 11231 REAL_VALUE_TYPE rv; 11232 real_from_string (&rv, "0.375"); 11233 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode); 11234 } 11235 return ia64_dconst_0_375_rtx; 11236 } 11237 11238 static machine_mode 11239 ia64_get_reg_raw_mode (int regno) 11240 { 11241 if (FR_REGNO_P (regno)) 11242 return XFmode; 11243 return default_get_reg_raw_mode(regno); 11244 } 11245 11246 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed 11247 anymore. */ 11248 11249 bool 11250 ia64_member_type_forces_blk (const_tree, machine_mode mode) 11251 { 11252 return TARGET_HPUX && mode == TFmode; 11253 } 11254 11255 /* Always default to .text section until HP-UX linker is fixed. */ 11256 11257 ATTRIBUTE_UNUSED static section * 11258 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED, 11259 enum node_frequency freq ATTRIBUTE_UNUSED, 11260 bool startup ATTRIBUTE_UNUSED, 11261 bool exit ATTRIBUTE_UNUSED) 11262 { 11263 return NULL; 11264 } 11265 11266 /* Construct (set target (vec_select op0 (parallel perm))) and 11267 return true if that's a valid instruction in the active ISA. */ 11268 11269 static bool 11270 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt) 11271 { 11272 rtx rperm[MAX_VECT_LEN], x; 11273 unsigned i; 11274 11275 for (i = 0; i < nelt; ++i) 11276 rperm[i] = GEN_INT (perm[i]); 11277 11278 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); 11279 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); 11280 x = gen_rtx_SET (target, x); 11281 11282 rtx_insn *insn = emit_insn (x); 11283 if (recog_memoized (insn) < 0) 11284 { 11285 remove_insn (insn); 11286 return false; 11287 } 11288 return true; 11289 } 11290 11291 /* Similar, but generate a vec_concat from op0 and op1 as well. */ 11292 11293 static bool 11294 expand_vselect_vconcat (rtx target, rtx op0, rtx op1, 11295 const unsigned char *perm, unsigned nelt) 11296 { 11297 machine_mode v2mode; 11298 rtx x; 11299 11300 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0)); 11301 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); 11302 return expand_vselect (target, x, perm, nelt); 11303 } 11304 11305 /* Try to expand a no-op permutation. */ 11306 11307 static bool 11308 expand_vec_perm_identity (struct expand_vec_perm_d *d) 11309 { 11310 unsigned i, nelt = d->nelt; 11311 11312 for (i = 0; i < nelt; ++i) 11313 if (d->perm[i] != i) 11314 return false; 11315 11316 if (!d->testing_p) 11317 emit_move_insn (d->target, d->op0); 11318 11319 return true; 11320 } 11321 11322 /* Try to expand D via a shrp instruction. */ 11323 11324 static bool 11325 expand_vec_perm_shrp (struct expand_vec_perm_d *d) 11326 { 11327 unsigned i, nelt = d->nelt, shift, mask; 11328 rtx tmp, hi, lo; 11329 11330 /* ??? Don't force V2SFmode into the integer registers. */ 11331 if (d->vmode == V2SFmode) 11332 return false; 11333 11334 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1); 11335 11336 shift = d->perm[0]; 11337 if (BYTES_BIG_ENDIAN && shift > nelt) 11338 return false; 11339 11340 for (i = 1; i < nelt; ++i) 11341 if (d->perm[i] != ((shift + i) & mask)) 11342 return false; 11343 11344 if (d->testing_p) 11345 return true; 11346 11347 hi = shift < nelt ? d->op1 : d->op0; 11348 lo = shift < nelt ? d->op0 : d->op1; 11349 11350 shift %= nelt; 11351 11352 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT; 11353 11354 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */ 11355 gcc_assert (IN_RANGE (shift, 1, 63)); 11356 11357 /* Recall that big-endian elements are numbered starting at the top of 11358 the register. Ideally we'd have a shift-left-pair. But since we 11359 don't, convert to a shift the other direction. */ 11360 if (BYTES_BIG_ENDIAN) 11361 shift = 64 - shift; 11362 11363 tmp = gen_reg_rtx (DImode); 11364 hi = gen_lowpart (DImode, hi); 11365 lo = gen_lowpart (DImode, lo); 11366 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift))); 11367 11368 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp)); 11369 return true; 11370 } 11371 11372 /* Try to instantiate D in a single instruction. */ 11373 11374 static bool 11375 expand_vec_perm_1 (struct expand_vec_perm_d *d) 11376 { 11377 unsigned i, nelt = d->nelt; 11378 unsigned char perm2[MAX_VECT_LEN]; 11379 11380 /* Try single-operand selections. */ 11381 if (d->one_operand_p) 11382 { 11383 if (expand_vec_perm_identity (d)) 11384 return true; 11385 if (expand_vselect (d->target, d->op0, d->perm, nelt)) 11386 return true; 11387 } 11388 11389 /* Try two operand selections. */ 11390 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt)) 11391 return true; 11392 11393 /* Recognize interleave style patterns with reversed operands. */ 11394 if (!d->one_operand_p) 11395 { 11396 for (i = 0; i < nelt; ++i) 11397 { 11398 unsigned e = d->perm[i]; 11399 if (e >= nelt) 11400 e -= nelt; 11401 else 11402 e += nelt; 11403 perm2[i] = e; 11404 } 11405 11406 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) 11407 return true; 11408 } 11409 11410 if (expand_vec_perm_shrp (d)) 11411 return true; 11412 11413 /* ??? Look for deposit-like permutations where most of the result 11414 comes from one vector unchanged and the rest comes from a 11415 sequential hunk of the other vector. */ 11416 11417 return false; 11418 } 11419 11420 /* Pattern match broadcast permutations. */ 11421 11422 static bool 11423 expand_vec_perm_broadcast (struct expand_vec_perm_d *d) 11424 { 11425 unsigned i, elt, nelt = d->nelt; 11426 unsigned char perm2[2]; 11427 rtx temp; 11428 bool ok; 11429 11430 if (!d->one_operand_p) 11431 return false; 11432 11433 elt = d->perm[0]; 11434 for (i = 1; i < nelt; ++i) 11435 if (d->perm[i] != elt) 11436 return false; 11437 11438 switch (d->vmode) 11439 { 11440 case V2SImode: 11441 case V2SFmode: 11442 /* Implementable by interleave. */ 11443 perm2[0] = elt; 11444 perm2[1] = elt + 2; 11445 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2); 11446 gcc_assert (ok); 11447 break; 11448 11449 case V8QImode: 11450 /* Implementable by extract + broadcast. */ 11451 if (BYTES_BIG_ENDIAN) 11452 elt = 7 - elt; 11453 elt *= BITS_PER_UNIT; 11454 temp = gen_reg_rtx (DImode); 11455 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0), 11456 GEN_INT (8), GEN_INT (elt))); 11457 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp))); 11458 break; 11459 11460 case V4HImode: 11461 /* Should have been matched directly by vec_select. */ 11462 default: 11463 gcc_unreachable (); 11464 } 11465 11466 return true; 11467 } 11468 11469 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a 11470 two vector permutation into a single vector permutation by using 11471 an interleave operation to merge the vectors. */ 11472 11473 static bool 11474 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d) 11475 { 11476 struct expand_vec_perm_d dremap, dfinal; 11477 unsigned char remap[2 * MAX_VECT_LEN]; 11478 unsigned contents, i, nelt, nelt2; 11479 unsigned h0, h1, h2, h3; 11480 rtx_insn *seq; 11481 bool ok; 11482 11483 if (d->one_operand_p) 11484 return false; 11485 11486 nelt = d->nelt; 11487 nelt2 = nelt / 2; 11488 11489 /* Examine from whence the elements come. */ 11490 contents = 0; 11491 for (i = 0; i < nelt; ++i) 11492 contents |= 1u << d->perm[i]; 11493 11494 memset (remap, 0xff, sizeof (remap)); 11495 dremap = *d; 11496 11497 h0 = (1u << nelt2) - 1; 11498 h1 = h0 << nelt2; 11499 h2 = h0 << nelt; 11500 h3 = h0 << (nelt + nelt2); 11501 11502 if ((contents & (h0 | h2)) == contents) /* punpck even halves */ 11503 { 11504 for (i = 0; i < nelt; ++i) 11505 { 11506 unsigned which = i / 2 + (i & 1 ? nelt : 0); 11507 remap[which] = i; 11508 dremap.perm[i] = which; 11509 } 11510 } 11511 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */ 11512 { 11513 for (i = 0; i < nelt; ++i) 11514 { 11515 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0); 11516 remap[which] = i; 11517 dremap.perm[i] = which; 11518 } 11519 } 11520 else if ((contents & 0x5555) == contents) /* mix even elements */ 11521 { 11522 for (i = 0; i < nelt; ++i) 11523 { 11524 unsigned which = (i & ~1) + (i & 1 ? nelt : 0); 11525 remap[which] = i; 11526 dremap.perm[i] = which; 11527 } 11528 } 11529 else if ((contents & 0xaaaa) == contents) /* mix odd elements */ 11530 { 11531 for (i = 0; i < nelt; ++i) 11532 { 11533 unsigned which = (i | 1) + (i & 1 ? nelt : 0); 11534 remap[which] = i; 11535 dremap.perm[i] = which; 11536 } 11537 } 11538 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */ 11539 { 11540 unsigned shift = ctz_hwi (contents); 11541 for (i = 0; i < nelt; ++i) 11542 { 11543 unsigned which = (i + shift) & (2 * nelt - 1); 11544 remap[which] = i; 11545 dremap.perm[i] = which; 11546 } 11547 } 11548 else 11549 return false; 11550 11551 /* Use the remapping array set up above to move the elements from their 11552 swizzled locations into their final destinations. */ 11553 dfinal = *d; 11554 for (i = 0; i < nelt; ++i) 11555 { 11556 unsigned e = remap[d->perm[i]]; 11557 gcc_assert (e < nelt); 11558 dfinal.perm[i] = e; 11559 } 11560 if (d->testing_p) 11561 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1); 11562 else 11563 dfinal.op0 = gen_reg_rtx (dfinal.vmode); 11564 dfinal.op1 = dfinal.op0; 11565 dfinal.one_operand_p = true; 11566 dremap.target = dfinal.op0; 11567 11568 /* Test if the final remap can be done with a single insn. For V4HImode 11569 this *will* succeed. For V8QImode or V2SImode it may not. */ 11570 start_sequence (); 11571 ok = expand_vec_perm_1 (&dfinal); 11572 seq = get_insns (); 11573 end_sequence (); 11574 if (!ok) 11575 return false; 11576 if (d->testing_p) 11577 return true; 11578 11579 ok = expand_vec_perm_1 (&dremap); 11580 gcc_assert (ok); 11581 11582 emit_insn (seq); 11583 return true; 11584 } 11585 11586 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode 11587 constant permutation via two mux2 and a merge. */ 11588 11589 static bool 11590 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d) 11591 { 11592 unsigned char perm2[4]; 11593 rtx rmask[4]; 11594 unsigned i; 11595 rtx t0, t1, mask, x; 11596 bool ok; 11597 11598 if (d->vmode != V4HImode || d->one_operand_p) 11599 return false; 11600 if (d->testing_p) 11601 return true; 11602 11603 for (i = 0; i < 4; ++i) 11604 { 11605 perm2[i] = d->perm[i] & 3; 11606 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx); 11607 } 11608 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask)); 11609 mask = force_reg (V4HImode, mask); 11610 11611 t0 = gen_reg_rtx (V4HImode); 11612 t1 = gen_reg_rtx (V4HImode); 11613 11614 ok = expand_vselect (t0, d->op0, perm2, 4); 11615 gcc_assert (ok); 11616 ok = expand_vselect (t1, d->op1, perm2, 4); 11617 gcc_assert (ok); 11618 11619 x = gen_rtx_AND (V4HImode, mask, t0); 11620 emit_insn (gen_rtx_SET (t0, x)); 11621 11622 x = gen_rtx_NOT (V4HImode, mask); 11623 x = gen_rtx_AND (V4HImode, x, t1); 11624 emit_insn (gen_rtx_SET (t1, x)); 11625 11626 x = gen_rtx_IOR (V4HImode, t0, t1); 11627 emit_insn (gen_rtx_SET (d->target, x)); 11628 11629 return true; 11630 } 11631 11632 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook. 11633 With all of the interface bits taken care of, perform the expansion 11634 in D and return true on success. */ 11635 11636 static bool 11637 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) 11638 { 11639 if (expand_vec_perm_1 (d)) 11640 return true; 11641 if (expand_vec_perm_broadcast (d)) 11642 return true; 11643 if (expand_vec_perm_interleave_2 (d)) 11644 return true; 11645 if (expand_vec_perm_v4hi_5 (d)) 11646 return true; 11647 return false; 11648 } 11649 11650 bool 11651 ia64_expand_vec_perm_const (rtx operands[4]) 11652 { 11653 struct expand_vec_perm_d d; 11654 unsigned char perm[MAX_VECT_LEN]; 11655 int i, nelt, which; 11656 rtx sel; 11657 11658 d.target = operands[0]; 11659 d.op0 = operands[1]; 11660 d.op1 = operands[2]; 11661 sel = operands[3]; 11662 11663 d.vmode = GET_MODE (d.target); 11664 gcc_assert (VECTOR_MODE_P (d.vmode)); 11665 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 11666 d.testing_p = false; 11667 11668 gcc_assert (GET_CODE (sel) == CONST_VECTOR); 11669 gcc_assert (XVECLEN (sel, 0) == nelt); 11670 gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); 11671 11672 for (i = which = 0; i < nelt; ++i) 11673 { 11674 rtx e = XVECEXP (sel, 0, i); 11675 int ei = INTVAL (e) & (2 * nelt - 1); 11676 11677 which |= (ei < nelt ? 1 : 2); 11678 d.perm[i] = ei; 11679 perm[i] = ei; 11680 } 11681 11682 switch (which) 11683 { 11684 default: 11685 gcc_unreachable(); 11686 11687 case 3: 11688 if (!rtx_equal_p (d.op0, d.op1)) 11689 { 11690 d.one_operand_p = false; 11691 break; 11692 } 11693 11694 /* The elements of PERM do not suggest that only the first operand 11695 is used, but both operands are identical. Allow easier matching 11696 of the permutation by folding the permutation into the single 11697 input vector. */ 11698 for (i = 0; i < nelt; ++i) 11699 if (d.perm[i] >= nelt) 11700 d.perm[i] -= nelt; 11701 /* FALLTHRU */ 11702 11703 case 1: 11704 d.op1 = d.op0; 11705 d.one_operand_p = true; 11706 break; 11707 11708 case 2: 11709 for (i = 0; i < nelt; ++i) 11710 d.perm[i] -= nelt; 11711 d.op0 = d.op1; 11712 d.one_operand_p = true; 11713 break; 11714 } 11715 11716 if (ia64_expand_vec_perm_const_1 (&d)) 11717 return true; 11718 11719 /* If the mask says both arguments are needed, but they are the same, 11720 the above tried to expand with one_operand_p true. If that didn't 11721 work, retry with one_operand_p false, as that's what we used in _ok. */ 11722 if (which == 3 && d.one_operand_p) 11723 { 11724 memcpy (d.perm, perm, sizeof (perm)); 11725 d.one_operand_p = false; 11726 return ia64_expand_vec_perm_const_1 (&d); 11727 } 11728 11729 return false; 11730 } 11731 11732 /* Implement targetm.vectorize.vec_perm_const_ok. */ 11733 11734 static bool 11735 ia64_vectorize_vec_perm_const_ok (machine_mode vmode, 11736 const unsigned char *sel) 11737 { 11738 struct expand_vec_perm_d d; 11739 unsigned int i, nelt, which; 11740 bool ret; 11741 11742 d.vmode = vmode; 11743 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 11744 d.testing_p = true; 11745 11746 /* Extract the values from the vector CST into the permutation 11747 array in D. */ 11748 memcpy (d.perm, sel, nelt); 11749 for (i = which = 0; i < nelt; ++i) 11750 { 11751 unsigned char e = d.perm[i]; 11752 gcc_assert (e < 2 * nelt); 11753 which |= (e < nelt ? 1 : 2); 11754 } 11755 11756 /* For all elements from second vector, fold the elements to first. */ 11757 if (which == 2) 11758 for (i = 0; i < nelt; ++i) 11759 d.perm[i] -= nelt; 11760 11761 /* Check whether the mask can be applied to the vector type. */ 11762 d.one_operand_p = (which != 3); 11763 11764 /* Otherwise we have to go through the motions and see if we can 11765 figure out how to generate the requested permutation. */ 11766 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); 11767 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); 11768 if (!d.one_operand_p) 11769 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); 11770 11771 start_sequence (); 11772 ret = ia64_expand_vec_perm_const_1 (&d); 11773 end_sequence (); 11774 11775 return ret; 11776 } 11777 11778 void 11779 ia64_expand_vec_setv2sf (rtx operands[3]) 11780 { 11781 struct expand_vec_perm_d d; 11782 unsigned int which; 11783 bool ok; 11784 11785 d.target = operands[0]; 11786 d.op0 = operands[0]; 11787 d.op1 = gen_reg_rtx (V2SFmode); 11788 d.vmode = V2SFmode; 11789 d.nelt = 2; 11790 d.one_operand_p = false; 11791 d.testing_p = false; 11792 11793 which = INTVAL (operands[2]); 11794 gcc_assert (which <= 1); 11795 d.perm[0] = 1 - which; 11796 d.perm[1] = which + 2; 11797 11798 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode))); 11799 11800 ok = ia64_expand_vec_perm_const_1 (&d); 11801 gcc_assert (ok); 11802 } 11803 11804 void 11805 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd) 11806 { 11807 struct expand_vec_perm_d d; 11808 machine_mode vmode = GET_MODE (target); 11809 unsigned int i, nelt = GET_MODE_NUNITS (vmode); 11810 bool ok; 11811 11812 d.target = target; 11813 d.op0 = op0; 11814 d.op1 = op1; 11815 d.vmode = vmode; 11816 d.nelt = nelt; 11817 d.one_operand_p = false; 11818 d.testing_p = false; 11819 11820 for (i = 0; i < nelt; ++i) 11821 d.perm[i] = i * 2 + odd; 11822 11823 ok = ia64_expand_vec_perm_const_1 (&d); 11824 gcc_assert (ok); 11825 } 11826 11827 #include "gt-ia64.h" 11828