1 /* Output routines for GCC for Renesas / SuperH SH. 2 Copyright (C) 1993-2019 Free Software Foundation, Inc. 3 Contributed by Steve Chamberlain (sac@cygnus.com). 4 Improved by Jim Wilson (wilson@cygnus.com). 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include <sstream> 23 24 #define IN_TARGET_CODE 1 25 26 #include "config.h" 27 #define INCLUDE_VECTOR 28 #include "system.h" 29 #include "coretypes.h" 30 #include "backend.h" 31 #include "target.h" 32 #include "rtl.h" 33 #include "tree.h" 34 #include "gimple.h" 35 #include "cfghooks.h" 36 #include "df.h" 37 #include "memmodel.h" 38 #include "tm_p.h" 39 #include "stringpool.h" 40 #include "attribs.h" 41 #include "optabs.h" 42 #include "emit-rtl.h" 43 #include "recog.h" 44 #include "diagnostic-core.h" 45 #include "alias.h" 46 #include "fold-const.h" 47 #include "stor-layout.h" 48 #include "calls.h" 49 #include "varasm.h" 50 #include "flags.h" 51 #include "explow.h" 52 #include "expr.h" 53 #include "reload.h" 54 #include "output.h" 55 #include "insn-attr.h" 56 #include "dwarf2.h" 57 #include "langhooks.h" 58 #include "cfgrtl.h" 59 #include "intl.h" 60 #include "sched-int.h" 61 #include "gimplify.h" 62 #include "tm-constrs.h" 63 #include "opts.h" 64 #include "tree-pass.h" 65 #include "context.h" 66 #include "builtins.h" 67 #include "rtl-iter.h" 68 #include "regs.h" 69 #include "toplev.h" 70 71 /* This file should be included last. */ 72 #include "target-def.h" 73 74 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; 75 76 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size) 77 #define GEN_MOV (*(gen_movsi)) 78 #define GEN_ADD3 (*(gen_addsi3)) 79 #define GEN_SUB3 (*(gen_subsi3)) 80 81 /* Used to simplify the logic below. Find the attributes wherever 82 they may be. */ 83 #define SH_ATTRIBUTES(decl) \ 84 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \ 85 : DECL_ATTRIBUTES (decl) \ 86 ? (DECL_ATTRIBUTES (decl)) \ 87 : TYPE_ATTRIBUTES (TREE_TYPE (decl)) 88 89 /* Set to true by expand_prologue() when the function is an 90 interrupt handler. */ 91 bool current_function_interrupt; 92 93 tree sh_deferred_function_attributes; 94 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; 95 96 /* Global variables for machine-dependent things. */ 97 98 /* Which cpu are we scheduling for. */ 99 enum processor_type sh_cpu; 100 101 /* Definitions used in ready queue reordering for first scheduling pass. */ 102 103 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */ 104 static short *regmode_weight[2]; 105 106 /* Total SFmode and SImode weights of scheduled insns. */ 107 static int curr_regmode_pressure[2]; 108 109 /* Number of r0 life regions. */ 110 static int r0_life_regions; 111 112 /* If true, skip cycles for Q -> R movement. */ 113 static int skip_cycles = 0; 114 115 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook 116 and returned from sh_reorder2. */ 117 static short cached_can_issue_more; 118 119 /* Unique number for UNSPEC_BBR pattern. */ 120 static unsigned int unspec_bbr_uid = 1; 121 122 /* Provides the class number of the smallest class containing 123 reg number. */ 124 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] = 125 { 126 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 142 FP0_REGS,FP_REGS, FP_REGS, FP_REGS, 143 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 144 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 145 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 146 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 147 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 148 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 149 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 150 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 151 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 152 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 153 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 154 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 155 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 156 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 157 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, 159 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, 160 DF_REGS, DF_REGS, DF_REGS, DF_REGS, 161 DF_REGS, DF_REGS, DF_REGS, DF_REGS, 162 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS, 163 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS, 164 GENERAL_REGS, GENERAL_REGS, 165 }; 166 167 char sh_register_names[FIRST_PSEUDO_REGISTER] \ 168 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER; 169 170 char sh_additional_register_names[ADDREGNAMES_SIZE] \ 171 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1] 172 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER; 173 174 int assembler_dialect; 175 176 static void split_branches (rtx_insn *); 177 static int branch_dest (rtx); 178 static void print_slot (rtx_sequence *); 179 static rtx_code_label *add_constant (rtx, machine_mode, rtx); 180 static void dump_table (rtx_insn *, rtx_insn *); 181 static bool broken_move (rtx_insn *); 182 static bool mova_p (rtx_insn *); 183 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *); 184 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *); 185 static rtx_insn *gen_block_redirect (rtx_insn *, int, int); 186 static void sh_reorg (void); 187 static void sh_option_override (void); 188 static void sh_override_options_after_change (void); 189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool); 190 static rtx_insn* emit_frame_insn (rtx); 191 static rtx push (int); 192 static void pop (int); 193 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler); 194 static int calc_live_regs (HARD_REG_SET *); 195 static HOST_WIDE_INT rounded_frame_size (int); 196 static bool sh_frame_pointer_required (void); 197 static void sh_emit_mode_set (int, int, int, HARD_REG_SET); 198 static int sh_mode_needed (int, rtx_insn *); 199 static int sh_mode_after (int, int, rtx_insn *); 200 static int sh_mode_entry (int); 201 static int sh_mode_exit (int); 202 static int sh_mode_priority (int entity, int n); 203 204 static rtx mark_constant_pool_use (rtx); 205 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, 206 int, bool *); 207 static tree sh_handle_resbank_handler_attribute (tree *, tree, 208 tree, int, bool *); 209 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree, 210 tree, int, bool *); 211 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *); 212 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *); 213 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *); 214 static void sh_print_operand (FILE *, rtx, int); 215 static void sh_print_operand_address (FILE *, machine_mode, rtx); 216 static bool sh_print_operand_punct_valid_p (unsigned char code); 217 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x); 218 static void sh_output_function_epilogue (FILE *); 219 static void sh_insert_attributes (tree, tree *); 220 static const char *sh_check_pch_target_flags (int); 221 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t); 222 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); 223 static int sh_issue_rate (void); 224 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p); 225 static short find_set_regmode_weight (rtx, machine_mode); 226 static short find_insn_regmode_weight (rtx, machine_mode); 227 static void find_regmode_weight (basic_block, machine_mode); 228 static int find_r0_life_regions (basic_block); 229 static void sh_md_init_global (FILE *, int, int); 230 static void sh_md_finish_global (FILE *, int); 231 static int rank_for_reorder (const void *, const void *); 232 static void swap_reorder (rtx_insn **, int); 233 static void ready_reorder (rtx_insn **, int); 234 static bool high_pressure (machine_mode); 235 static int sh_reorder (FILE *, int, rtx_insn **, int *, int); 236 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int); 237 static void sh_md_init (FILE *, int, int); 238 static int sh_variable_issue (FILE *, int, rtx_insn *, int); 239 240 static bool sh_function_ok_for_sibcall (tree, tree); 241 242 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *); 243 static bool sh_ms_bitfield_layout_p (const_tree); 244 245 static void sh_init_builtins (void); 246 static tree sh_builtin_decl (unsigned, bool); 247 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int); 248 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 249 HOST_WIDE_INT, tree); 250 static void sh_file_start (void); 251 static bool sh_assemble_integer (rtx, unsigned int, int); 252 static bool flow_dependent_p (rtx, rtx); 253 static void flow_dependent_p_1 (rtx, const_rtx, void *); 254 static int shiftcosts (rtx); 255 static int and_xor_ior_costs (rtx, int); 256 static int addsubcosts (rtx); 257 static int multcosts (rtx); 258 static bool unspec_caller_rtx_p (rtx); 259 static bool sh_cannot_copy_insn_p (rtx_insn *); 260 static bool sh_cannot_force_const_mem_p (machine_mode, rtx); 261 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool); 262 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool); 263 static int sh_pr_n_sets (void); 264 static rtx sh_allocate_initial_value (rtx); 265 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t); 266 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t, 267 machine_mode, 268 struct secondary_reload_info *); 269 static bool sh_legitimate_address_p (machine_mode, rtx, bool); 270 static rtx sh_legitimize_address (rtx, rtx, machine_mode); 271 static rtx sh_delegitimize_address (rtx); 272 static bool sh_cannot_substitute_mem_equiv_p (rtx); 273 static bool sh_legitimize_address_displacement (rtx *, rtx *, 274 poly_int64, machine_mode); 275 static int scavenge_reg (HARD_REG_SET *s); 276 277 static rtx sh_struct_value_rtx (tree, int); 278 static rtx sh_function_value (const_tree, const_tree, bool); 279 static bool sh_function_value_regno_p (const unsigned int); 280 static rtx sh_libcall_value (machine_mode, const_rtx); 281 static bool sh_return_in_memory (const_tree, const_tree); 282 static rtx sh_builtin_saveregs (void); 283 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode, 284 tree, int *, int); 285 static bool sh_strict_argument_naming (cumulative_args_t); 286 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t); 287 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *); 288 static tree sh_build_builtin_va_list (void); 289 static void sh_va_start (tree, rtx); 290 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 291 static bool sh_promote_prototypes (const_tree); 292 static machine_mode sh_promote_function_mode (const_tree type, 293 machine_mode, 294 int *punsignedp, 295 const_tree funtype, 296 int for_return); 297 static bool sh_pass_by_reference (cumulative_args_t, machine_mode, 298 const_tree, bool); 299 static bool sh_callee_copies (cumulative_args_t, machine_mode, 300 const_tree, bool); 301 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode, 302 tree, bool); 303 static void sh_function_arg_advance (cumulative_args_t, machine_mode, 304 const_tree, bool); 305 static rtx sh_function_arg (cumulative_args_t, machine_mode, 306 const_tree, bool); 307 static int sh_dwarf_calling_convention (const_tree); 308 static void sh_encode_section_info (tree, rtx, int); 309 static bool sh2a_function_vector_p (tree); 310 static void sh_trampoline_init (rtx, tree, rtx); 311 static rtx sh_trampoline_adjust_address (rtx); 312 static void sh_conditional_register_usage (void); 313 static bool sh_legitimate_constant_p (machine_mode, rtx); 314 static int mov_insn_size (machine_mode, bool); 315 static int mov_insn_alignment_mask (machine_mode, bool); 316 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, 317 unsigned int, 318 enum by_pieces_operation, 319 bool); 320 static bool sequence_insn_p (rtx_insn *); 321 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool); 322 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, 323 machine_mode, bool); 324 static bool sh_legitimate_combined_insn (rtx_insn* insn); 325 326 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2); 327 328 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED; 329 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode); 330 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode); 331 static bool sh_modes_tieable_p (machine_mode, machine_mode); 332 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t); 333 334 static const struct attribute_spec sh_attribute_table[] = 335 { 336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, 337 affects_type_identity, handler, exclude } */ 338 { "interrupt_handler", 0, 0, true, false, false, false, 339 sh_handle_interrupt_handler_attribute, NULL }, 340 { "sp_switch", 1, 1, true, false, false, false, 341 sh_handle_sp_switch_attribute, NULL }, 342 { "trap_exit", 1, 1, true, false, false, false, 343 sh_handle_trap_exit_attribute, NULL }, 344 { "renesas", 0, 0, false, true, false, false, 345 sh_handle_renesas_attribute, NULL }, 346 { "trapa_handler", 0, 0, true, false, false, false, 347 sh_handle_interrupt_handler_attribute, NULL }, 348 { "nosave_low_regs", 0, 0, true, false, false, false, 349 sh_handle_interrupt_handler_attribute, NULL }, 350 { "resbank", 0, 0, true, false, false, false, 351 sh_handle_resbank_handler_attribute, NULL }, 352 { "function_vector", 1, 1, true, false, false, false, 353 sh2a_handle_function_vector_handler_attribute, NULL }, 354 { NULL, 0, 0, false, false, false, false, NULL, NULL } 355 }; 356 357 /* Initialize the GCC target structure. */ 358 #undef TARGET_ATTRIBUTE_TABLE 359 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table 360 361 /* The next two are used for debug info when compiling with -gdwarf. */ 362 #undef TARGET_ASM_UNALIGNED_HI_OP 363 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t" 364 #undef TARGET_ASM_UNALIGNED_SI_OP 365 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t" 366 367 #undef TARGET_OPTION_OVERRIDE 368 #define TARGET_OPTION_OVERRIDE sh_option_override 369 370 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 371 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ 372 sh_override_options_after_change 373 374 #undef TARGET_PRINT_OPERAND 375 #define TARGET_PRINT_OPERAND sh_print_operand 376 #undef TARGET_PRINT_OPERAND_ADDRESS 377 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address 378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p 380 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA 381 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra 382 383 #undef TARGET_ASM_FUNCTION_EPILOGUE 384 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue 385 386 #undef TARGET_ASM_OUTPUT_MI_THUNK 387 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk 388 389 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 390 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ 391 hook_bool_const_tree_hwi_hwi_const_tree_true 392 393 #undef TARGET_ASM_FILE_START 394 #define TARGET_ASM_FILE_START sh_file_start 395 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE 396 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true 397 398 #undef TARGET_ASM_INTEGER 399 #define TARGET_ASM_INTEGER sh_assemble_integer 400 401 #undef TARGET_REGISTER_MOVE_COST 402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost 403 404 #undef TARGET_INSERT_ATTRIBUTES 405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes 406 407 #undef TARGET_SCHED_ADJUST_COST 408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost 409 410 #undef TARGET_SCHED_ISSUE_RATE 411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate 412 413 /* The next 5 hooks have been implemented for reenabling sched1. With the 414 help of these macros we are limiting the movement of insns in sched1 to 415 reduce the register pressure. The overall idea is to keep count of SImode 416 and SFmode regs required by already scheduled insns. When these counts 417 cross some threshold values; give priority to insns that free registers. 418 The insn that frees registers is most likely to be the insn with lowest 419 LUID (original insn order); but such an insn might be there in the stalled 420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles 421 up to a max of 8 cycles so that such insns may move from Q -> R. 422 423 The description of the hooks are as below: 424 425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic 426 scheduler; it is called inside the sched_init function just after 427 find_insn_reg_weights function call. It is used to calculate the SImode 428 and SFmode weights of insns of basic blocks; much similar to what 429 find_insn_reg_weights does. 430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook. 431 432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is 433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from 434 (Q)->(R). 435 436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is 437 high; reorder the ready queue so that the insn with lowest LUID will be 438 issued next. 439 440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to 441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles. 442 443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it 444 can be returned from TARGET_SCHED_REORDER2. 445 446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */ 447 448 #undef TARGET_SCHED_DFA_NEW_CYCLE 449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle 450 451 #undef TARGET_SCHED_INIT_GLOBAL 452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global 453 454 #undef TARGET_SCHED_FINISH_GLOBAL 455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global 456 457 #undef TARGET_SCHED_VARIABLE_ISSUE 458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue 459 460 #undef TARGET_SCHED_REORDER 461 #define TARGET_SCHED_REORDER sh_reorder 462 463 #undef TARGET_SCHED_REORDER2 464 #define TARGET_SCHED_REORDER2 sh_reorder2 465 466 #undef TARGET_SCHED_INIT 467 #define TARGET_SCHED_INIT sh_md_init 468 469 #undef TARGET_DELEGITIMIZE_ADDRESS 470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address 471 472 #undef TARGET_LEGITIMIZE_ADDRESS 473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address 474 475 #undef TARGET_CAN_FOLLOW_JUMP 476 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump 477 478 #undef TARGET_MS_BITFIELD_LAYOUT_P 479 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p 480 481 #undef TARGET_INIT_BUILTINS 482 #define TARGET_INIT_BUILTINS sh_init_builtins 483 #undef TARGET_BUILTIN_DECL 484 #define TARGET_BUILTIN_DECL sh_builtin_decl 485 #undef TARGET_EXPAND_BUILTIN 486 #define TARGET_EXPAND_BUILTIN sh_expand_builtin 487 488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 489 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall 490 491 #undef TARGET_CANNOT_COPY_INSN_P 492 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p 493 #undef TARGET_RTX_COSTS 494 #define TARGET_RTX_COSTS sh_rtx_costs 495 #undef TARGET_ADDRESS_COST 496 #define TARGET_ADDRESS_COST sh_address_cost 497 #undef TARGET_ALLOCATE_INITIAL_VALUE 498 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value 499 500 #undef TARGET_MACHINE_DEPENDENT_REORG 501 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg 502 503 #undef TARGET_DWARF_REGISTER_SPAN 504 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span 505 506 #ifdef HAVE_AS_TLS 507 #undef TARGET_HAVE_TLS 508 #define TARGET_HAVE_TLS true 509 #endif 510 511 #undef TARGET_PROMOTE_PROTOTYPES 512 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes 513 #undef TARGET_PROMOTE_FUNCTION_MODE 514 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode 515 516 #undef TARGET_FUNCTION_VALUE 517 #define TARGET_FUNCTION_VALUE sh_function_value 518 #undef TARGET_FUNCTION_VALUE_REGNO_P 519 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p 520 #undef TARGET_LIBCALL_VALUE 521 #define TARGET_LIBCALL_VALUE sh_libcall_value 522 #undef TARGET_STRUCT_VALUE_RTX 523 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx 524 #undef TARGET_RETURN_IN_MEMORY 525 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory 526 527 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 528 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs 529 #undef TARGET_SETUP_INCOMING_VARARGS 530 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs 531 #undef TARGET_STRICT_ARGUMENT_NAMING 532 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming 533 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 534 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named 535 #undef TARGET_MUST_PASS_IN_STACK 536 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 537 #undef TARGET_PASS_BY_REFERENCE 538 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference 539 #undef TARGET_CALLEE_COPIES 540 #define TARGET_CALLEE_COPIES sh_callee_copies 541 #undef TARGET_ARG_PARTIAL_BYTES 542 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes 543 #undef TARGET_FUNCTION_ARG 544 #define TARGET_FUNCTION_ARG sh_function_arg 545 #undef TARGET_FUNCTION_ARG_ADVANCE 546 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance 547 548 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 549 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv 550 551 #undef TARGET_BUILD_BUILTIN_VA_LIST 552 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list 553 #undef TARGET_EXPAND_BUILTIN_VA_START 554 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start 555 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 556 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr 557 558 #undef TARGET_VECTOR_MODE_SUPPORTED_P 559 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p 560 561 #undef TARGET_CHECK_PCH_TARGET_FLAGS 562 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags 563 564 #undef TARGET_DWARF_CALLING_CONVENTION 565 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention 566 567 #undef TARGET_FRAME_POINTER_REQUIRED 568 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required 569 570 #undef TARGET_MODE_EMIT 571 #define TARGET_MODE_EMIT sh_emit_mode_set 572 573 #undef TARGET_MODE_NEEDED 574 #define TARGET_MODE_NEEDED sh_mode_needed 575 576 #undef TARGET_MODE_AFTER 577 #define TARGET_MODE_AFTER sh_mode_after 578 579 #undef TARGET_MODE_ENTRY 580 #define TARGET_MODE_ENTRY sh_mode_entry 581 582 #undef TARGET_MODE_EXIT 583 #define TARGET_MODE_EXIT sh_mode_exit 584 585 #undef TARGET_MODE_PRIORITY 586 #define TARGET_MODE_PRIORITY sh_mode_priority 587 588 /* Return regmode weight for insn. */ 589 #define INSN_REGMODE_WEIGHT(INSN, MODE)\ 590 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)] 591 592 /* Return current register pressure for regmode. */ 593 #define CURR_REGMODE_PRESSURE(MODE)\ 594 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1] 595 596 #undef TARGET_ENCODE_SECTION_INFO 597 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info 598 599 #undef TARGET_LRA_P 600 #define TARGET_LRA_P sh_lra_p 601 602 #undef TARGET_SECONDARY_RELOAD 603 #define TARGET_SECONDARY_RELOAD sh_secondary_reload 604 605 #undef TARGET_PREFERRED_RELOAD_CLASS 606 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class 607 608 #undef TARGET_CONDITIONAL_REGISTER_USAGE 609 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage 610 611 #undef TARGET_LEGITIMATE_ADDRESS_P 612 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p 613 614 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P 615 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p 616 617 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT 618 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \ 619 sh_legitimize_address_displacement 620 621 #undef TARGET_TRAMPOLINE_INIT 622 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init 623 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 624 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address 625 626 #undef TARGET_LEGITIMATE_CONSTANT_P 627 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p 628 629 #undef TARGET_CANONICALIZE_COMPARISON 630 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison 631 632 #undef TARGET_LEGITIMATE_COMBINED_INSN 633 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn 634 635 #undef TARGET_FIXED_CONDITION_CODE_REGS 636 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs 637 638 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 639 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ 640 sh_use_by_pieces_infrastructure_p 641 642 /* Machine-specific symbol_ref flags. */ 643 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) 644 645 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value 646 is used by optabs.c atomic op expansion code as well as in sync.md. */ 647 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 648 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80 649 650 #undef TARGET_CANNOT_FORCE_CONST_MEM 651 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p 652 653 #undef TARGET_HARD_REGNO_NREGS 654 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs 655 #undef TARGET_HARD_REGNO_MODE_OK 656 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok 657 658 #undef TARGET_MODES_TIEABLE_P 659 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p 660 661 #undef TARGET_CAN_CHANGE_MODE_CLASS 662 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class 663 664 #undef TARGET_CONSTANT_ALIGNMENT 665 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings 666 667 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE 668 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed 669 670 struct gcc_target targetm = TARGET_INITIALIZER; 671 672 673 /* Information on the currently selected atomic model. 674 This is initialized in sh_option_override. */ 675 static sh_atomic_model selected_atomic_model_; 676 677 const sh_atomic_model& 678 selected_atomic_model (void) 679 { 680 return selected_atomic_model_; 681 } 682 683 static sh_atomic_model 684 parse_validate_atomic_model_option (const char* str) 685 { 686 const char* model_names[sh_atomic_model::num_models]; 687 model_names[sh_atomic_model::none] = "none"; 688 model_names[sh_atomic_model::soft_gusa] = "soft-gusa"; 689 model_names[sh_atomic_model::hard_llcs] = "hard-llcs"; 690 model_names[sh_atomic_model::soft_tcb] = "soft-tcb"; 691 model_names[sh_atomic_model::soft_imask] = "soft-imask"; 692 693 const char* model_cdef_names[sh_atomic_model::num_models]; 694 model_cdef_names[sh_atomic_model::none] = "NONE"; 695 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA"; 696 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS"; 697 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB"; 698 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK"; 699 700 sh_atomic_model ret; 701 ret.type = sh_atomic_model::none; 702 ret.name = model_names[sh_atomic_model::none]; 703 ret.cdef_name = model_cdef_names[sh_atomic_model::none]; 704 ret.strict = false; 705 ret.tcb_gbr_offset = -1; 706 707 /* Handle empty string as 'none'. */ 708 if (str == NULL || *str == '\0') 709 return ret; 710 711 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0) 712 713 std::vector<std::string> tokens; 714 for (std::stringstream ss (str); ss.good (); ) 715 { 716 tokens.push_back (std::string ()); 717 std::getline (ss, tokens.back (), ','); 718 } 719 720 if (tokens.empty ()) 721 err_ret ("invalid atomic model option"); 722 723 /* The first token must be the atomic model name. */ 724 { 725 for (size_t i = 0; i < sh_atomic_model::num_models; ++i) 726 if (tokens.front () == model_names[i]) 727 { 728 ret.type = (sh_atomic_model::enum_type)i; 729 ret.name = model_names[i]; 730 ret.cdef_name = model_cdef_names[i]; 731 goto got_mode_name; 732 } 733 734 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ()); 735 got_mode_name:; 736 } 737 738 /* Go through the remaining tokens. */ 739 for (size_t i = 1; i < tokens.size (); ++i) 740 { 741 if (tokens[i] == "strict") 742 ret.strict = true; 743 else if (!tokens[i].compare (0, strlen ("gbr-offset="), "gbr-offset=")) 744 { 745 std::string offset_str = tokens[i].substr (strlen ("gbr-offset=")); 746 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ()); 747 if (offset_str.empty () || ret.tcb_gbr_offset == -1) 748 err_ret ("could not parse gbr-offset value \"%s\" in atomic model " 749 "option", offset_str.c_str ()); 750 } 751 else 752 err_ret ("unknown parameter \"%s\" in atomic model option", 753 tokens[i].c_str ()); 754 } 755 756 /* Check that the selection makes sense. */ 757 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3) 758 err_ret ("atomic model %s is only available on SH3 and SH4 targets", 759 ret.name); 760 761 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A) 762 err_ret ("atomic model %s is only available on SH4A targets", ret.name); 763 764 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1) 765 err_ret ("atomic model %s requires gbr-offset parameter", ret.name); 766 767 if (ret.type == sh_atomic_model::soft_tcb 768 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020 769 || (ret.tcb_gbr_offset & 3) != 0)) 770 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be " 771 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset, 772 ret.name); 773 774 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE) 775 err_ret ("cannot use atomic model %s in user mode", ret.name); 776 777 return ret; 778 779 #undef err_ret 780 } 781 782 /* Register SH specific RTL passes. */ 783 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns, 784 const char* name); 785 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx, 786 const char* name); 787 static void 788 register_sh_passes (void) 789 { 790 /* Running the sh_treg_combine pass after ce1 generates better code when 791 comparisons are combined and reg-reg moves are introduced, because 792 reg-reg moves will be eliminated afterwards. However, there are quite 793 some cases where combine will be unable to fold comparison related insns, 794 thus for now don't do it. 795 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"), 796 PASS_POS_INSERT_AFTER, "ce1", 1); 797 */ 798 799 /* Run sh_treg_combine pass after combine but before register allocation. */ 800 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"), 801 PASS_POS_INSERT_AFTER, "split1", 1); 802 803 /* Run sh_treg_combine pass after register allocation and basic block 804 reordering as this sometimes creates new opportunities. */ 805 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"), 806 PASS_POS_INSERT_AFTER, "split4", 1); 807 808 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value 809 is known after a conditional branch. 810 This must be done after basic blocks and branch conditions have 811 stabilized and won't be changed by further passes. */ 812 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"), 813 PASS_POS_INSERT_BEFORE, "sched2", 1); 814 } 815 816 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override 817 various options, and do some machine dependent initialization. */ 818 static void 819 sh_option_override (void) 820 { 821 int regno; 822 823 SUBTARGET_OVERRIDE_OPTIONS; 824 825 sh_cpu = PROCESSOR_SH1; 826 assembler_dialect = 0; 827 if (TARGET_SH2) 828 sh_cpu = PROCESSOR_SH2; 829 if (TARGET_SH2E) 830 sh_cpu = PROCESSOR_SH2E; 831 if (TARGET_SH2A) 832 sh_cpu = PROCESSOR_SH2A; 833 if (TARGET_SH3) 834 sh_cpu = PROCESSOR_SH3; 835 if (TARGET_SH3E) 836 sh_cpu = PROCESSOR_SH3E; 837 if (TARGET_SH4) 838 { 839 assembler_dialect = 1; 840 sh_cpu = PROCESSOR_SH4; 841 } 842 if (TARGET_SH4A) 843 { 844 assembler_dialect = 1; 845 sh_cpu = PROCESSOR_SH4A; 846 } 847 848 /* User/priviledged mode is supported only on SH3* and SH4*. 849 Disable it for everything else. */ 850 if (!TARGET_SH3 && TARGET_USERMODE) 851 TARGET_USERMODE = false; 852 853 if (! strcmp (sh_div_str, "call-div1")) 854 sh_div_strategy = SH_DIV_CALL_DIV1; 855 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY) 856 sh_div_strategy = SH_DIV_CALL_FP; 857 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT) 858 sh_div_strategy = SH_DIV_CALL_TABLE; 859 else 860 { 861 /* Pick one that makes most sense for the target in general. 862 It is not much good to use different functions depending on -Os, 863 since then we'll end up with two different functions when some of 864 the code is compiled for size, and some for speed. */ 865 866 /* SH4 tends to emphasize speed. */ 867 if (TARGET_HARD_SH4) 868 sh_div_strategy = SH_DIV_CALL_TABLE; 869 /* These have their own way of doing things. */ 870 else if (TARGET_SH2A) 871 sh_div_strategy = SH_DIV_INTRINSIC; 872 /* SH1 .. SH3 cores often go into small-footprint systems, so 873 default to the smallest implementation available. */ 874 else 875 sh_div_strategy = SH_DIV_CALL_DIV1; 876 } 877 878 if (sh_divsi3_libfunc[0]) 879 ; /* User supplied - leave it alone. */ 880 else if (TARGET_DIVIDE_CALL_FP) 881 sh_divsi3_libfunc = "__sdivsi3_i4"; 882 else if (TARGET_DIVIDE_CALL_TABLE) 883 sh_divsi3_libfunc = "__sdivsi3_i4i"; 884 else 885 sh_divsi3_libfunc = "__sdivsi3"; 886 887 if (sh_branch_cost == -1) 888 { 889 /* The SH1 does not have delay slots, hence we get a pipeline stall 890 at every branch. The SH4 is superscalar, so the single delay slot 891 is not sufficient to keep both pipelines filled. 892 In any case, set the default branch cost to '2', as it results in 893 slightly overall smaller code and also enables some if conversions 894 that are required for matching special T bit related insns. */ 895 sh_branch_cost = 2; 896 } 897 898 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */ 899 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4) 900 TARGET_ZDCBRANCH = 1; 901 902 /* FDPIC code is a special form of PIC, and the vast majority of code 903 generation constraints that apply to PIC also apply to FDPIC, so we 904 set flag_pic to avoid the need to check TARGET_FDPIC everywhere 905 flag_pic is checked. */ 906 if (TARGET_FDPIC && !flag_pic) 907 flag_pic = 2; 908 909 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 910 if (! VALID_REGISTER_P (regno)) 911 sh_register_names[regno][0] = '\0'; 912 913 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++) 914 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno))) 915 sh_additional_register_names[regno][0] = '\0'; 916 917 if (flag_pic && ! TARGET_PREFERGOT) 918 flag_no_function_cse = 1; 919 920 if (targetm.small_register_classes_for_mode_p (VOIDmode)) 921 { 922 /* Never run scheduling before reload, since that can 923 break global alloc, and generates slower code anyway due 924 to the pressure on R0. */ 925 /* Enable sched1 for SH4 if the user explicitly requests. 926 When sched1 is enabled, the ready queue will be reordered by 927 the target hooks if pressure is high. We cannot do this for 928 PIC, SH3 and lower as they give spill failures for R0. */ 929 if (!TARGET_HARD_SH4 || flag_pic) 930 flag_schedule_insns = 0; 931 /* ??? Current exception handling places basic block boundaries 932 after call_insns. It causes the high pressure on R0 and gives 933 spill failures for R0 in reload. See PR 22553 and the thread 934 on gcc-patches 935 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */ 936 else if (flag_exceptions) 937 { 938 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns) 939 warning (0, "ignoring %<-fschedule-insns%> because of exception " 940 "handling bug"); 941 flag_schedule_insns = 0; 942 } 943 else if (flag_schedule_insns 944 && !global_options_set.x_flag_schedule_insns) 945 flag_schedule_insns = 0; 946 } 947 948 /* Unwind info is not correct around the CFG unless either a frame 949 pointer is present or M_A_O_A is set. Fixing this requires rewriting 950 unwind info generation to be aware of the CFG and propagating states 951 around edges. */ 952 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 953 || flag_exceptions || flag_non_call_exceptions) 954 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS) 955 { 956 warning (0, "unwind tables currently require either a frame pointer " 957 "or %<-maccumulate-outgoing-args%> for correctness"); 958 TARGET_ACCUMULATE_OUTGOING_ARGS = 1; 959 } 960 961 if (flag_unsafe_math_optimizations) 962 { 963 /* Enable fsca insn for SH4A if not otherwise specified by the user. */ 964 if (global_options_set.x_TARGET_FSCA == 0 965 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300)) 966 TARGET_FSCA = 1; 967 968 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */ 969 if (global_options_set.x_TARGET_FSRRA == 0 970 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300)) 971 TARGET_FSRRA = 1; 972 } 973 974 /* Allow fsrra insn only if -funsafe-math-optimizations and 975 -ffinite-math-only is enabled. */ 976 TARGET_FSRRA = TARGET_FSRRA 977 && flag_unsafe_math_optimizations 978 && flag_finite_math_only; 979 980 /* If the -mieee option was not explicitly set by the user, turn it on 981 unless -ffinite-math-only was specified. See also PR 33135. */ 982 if (! global_options_set.x_TARGET_IEEE) 983 TARGET_IEEE = ! flag_finite_math_only; 984 985 if (sh_fixed_range_str) 986 sh_fix_range (sh_fixed_range_str); 987 988 /* This target defaults to strict volatile bitfields. */ 989 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2)) 990 flag_strict_volatile_bitfields = 1; 991 992 sh_override_options_after_change (); 993 994 /* Parse atomic model option and make sure it is valid for the current 995 target CPU. */ 996 selected_atomic_model_ 997 = parse_validate_atomic_model_option (sh_atomic_model_str); 998 999 register_sh_passes (); 1000 } 1001 1002 /* Implement targetm.override_options_after_change. */ 1003 1004 static void 1005 sh_override_options_after_change (void) 1006 { 1007 /* Adjust loop, jump and function alignment values (in bytes), if those 1008 were not specified by the user using -falign-loops, -falign-jumps 1009 and -falign-functions options. 1010 32 bit alignment is better for speed, because instructions can be 1011 fetched as a pair from a longword boundary. For size use 16 bit 1012 alignment to get more compact code. 1013 Aligning all jumps increases the code size, even if it might 1014 result in slightly faster code. Thus, it is set to the smallest 1015 alignment possible if not specified by the user. */ 1016 if (flag_align_loops && !str_align_loops) 1017 str_align_loops = optimize_size ? "2" : "4"; 1018 1019 /* Parse values so that we can compare for current value. */ 1020 parse_alignment_opts (); 1021 if (flag_align_jumps && !str_align_jumps) 1022 str_align_jumps = "2"; 1023 else if (align_jumps.levels[0].get_value () < 2) 1024 str_align_jumps = "2"; 1025 1026 if (flag_align_functions && !str_align_functions) 1027 str_align_functions = optimize_size ? "2" : "4"; 1028 1029 /* The linker relaxation code breaks when a function contains 1030 alignments that are larger than that at the start of a 1031 compilation unit. */ 1032 if (TARGET_RELAX) 1033 { 1034 /* Parse values so that we can compare for current value. */ 1035 parse_alignment_opts (); 1036 int min_align = MAX (align_loops.levels[0].get_value (), 1037 align_jumps.levels[0].get_value ()); 1038 1039 /* Also take possible .long constants / mova tables into account. */ 1040 if (min_align < 4) 1041 min_align = 4; 1042 if (align_functions.levels[0].get_value () < min_align) 1043 { 1044 char *r = XNEWVEC (char, 16); 1045 sprintf (r, "%d", min_align); 1046 str_align_functions = r; 1047 } 1048 } 1049 } 1050 1051 /* Print the operand address in x to the stream. */ 1052 static void 1053 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) 1054 { 1055 switch (GET_CODE (x)) 1056 { 1057 case REG: 1058 case SUBREG: 1059 fprintf (stream, "@%s", reg_names[true_regnum (x)]); 1060 break; 1061 1062 case PLUS: 1063 { 1064 rtx base = XEXP (x, 0); 1065 rtx index = XEXP (x, 1); 1066 1067 switch (GET_CODE (index)) 1068 { 1069 case CONST_INT: 1070 fprintf (stream, "@(%d,%s)", (int) INTVAL (index), 1071 reg_names[true_regnum (base)]); 1072 break; 1073 1074 case REG: 1075 case SUBREG: 1076 { 1077 int base_num = true_regnum (base); 1078 int index_num = true_regnum (index); 1079 1080 /* If base or index is R0, make sure that it comes first. 1081 Usually one of them will be R0, but the order might be wrong. 1082 If neither base nor index are R0 it's an error and we just 1083 pass it on to the assembler. This avoids silent wrong code 1084 bugs. */ 1085 if (base_num == 0 && index_num != 0) 1086 std::swap (base_num, index_num); 1087 1088 fprintf (stream, "@(%s,%s)", reg_names[index_num], 1089 reg_names[base_num]); 1090 break; 1091 } 1092 1093 default: 1094 gcc_unreachable (); 1095 } 1096 } 1097 break; 1098 1099 case PRE_DEC: 1100 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); 1101 break; 1102 1103 case POST_INC: 1104 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); 1105 break; 1106 1107 default: 1108 x = mark_constant_pool_use (x); 1109 output_addr_const (stream, x); 1110 break; 1111 } 1112 } 1113 1114 /* Print operand x (an rtx) in assembler syntax to file stream 1115 according to modifier code. 1116 1117 '.' print a .s if insn needs delay slot 1118 ',' print LOCAL_LABEL_PREFIX 1119 '@' print trap, rte or rts depending upon pragma interruptness 1120 '#' output a nop if there is nothing to put in the delay slot 1121 ''' print likelihood suffix (/u for unlikely). 1122 '>' print branch target if -fverbose-asm 1123 'O' print a constant without the # 1124 'R' print the LSW of a dp value - changes if in little endian 1125 'S' print the MSW of a dp value - changes if in little endian 1126 'T' print the next word of a dp value - same as 'R' in big endian mode. 1127 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM. 1128 'N' print 'r63' if the operand is (const_int 0). 1129 'd' print a V2SF reg as dN instead of fpN. 1130 'm' print a pair `base,offset' or `base,index', for LD and ST. 1131 'U' Likewise for {LD,ST}{HI,LO}. 1132 'V' print the position of a single bit set. 1133 'W' print the position of a single bit cleared. 1134 't' print a memory address which is a register. 1135 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value. 1136 'o' output an operator. */ 1137 static void 1138 sh_print_operand (FILE *stream, rtx x, int code) 1139 { 1140 int regno; 1141 machine_mode mode; 1142 1143 switch (code) 1144 { 1145 tree trapa_attr; 1146 1147 case '.': 1148 if (final_sequence 1149 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0)) 1150 && get_attr_length (final_sequence->insn (1))) 1151 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); 1152 break; 1153 case ',': 1154 fprintf (stream, "%s", LOCAL_LABEL_PREFIX); 1155 break; 1156 case '@': 1157 trapa_attr = lookup_attribute ("trap_exit", 1158 DECL_ATTRIBUTES (current_function_decl)); 1159 if (trapa_attr) 1160 fprintf (stream, "trapa #%ld", 1161 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr)))); 1162 else if (sh_cfun_interrupt_handler_p ()) 1163 { 1164 if (sh_cfun_resbank_handler_p ()) 1165 fprintf (stream, "resbank\n"); 1166 fprintf (stream, "rte"); 1167 } 1168 else 1169 fprintf (stream, "rts"); 1170 break; 1171 case '#': 1172 /* Output a nop if there's nothing in the delay slot. */ 1173 if (dbr_sequence_length () == 0) 1174 fprintf (stream, "\n\tnop"); 1175 break; 1176 case '\'': 1177 { 1178 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0); 1179 1180 if (note 1181 && profile_probability::from_reg_br_prob_note (XINT (note, 0)) 1182 < profile_probability::even ()) 1183 fputs ("/u", stream); 1184 break; 1185 } 1186 case '>': 1187 if (flag_verbose_asm && JUMP_LABEL (current_output_insn)) 1188 { 1189 fputs ("\t! target: ", stream); 1190 output_addr_const (stream, JUMP_LABEL (current_output_insn)); 1191 } 1192 break; 1193 case 'O': 1194 x = mark_constant_pool_use (x); 1195 output_addr_const (stream, x); 1196 break; 1197 /* N.B.: %R / %S / %T adjust memory addresses by four. 1198 While they can be used to access 64 bit parts of a larger value 1199 held in general purpose registers, that won't work with memory - 1200 neither for fp registers, since the frxx names are used. */ 1201 case 'R': 1202 if (REG_P (x) || GET_CODE (x) == SUBREG) 1203 { 1204 regno = true_regnum (x); 1205 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET; 1206 fputs (reg_names[regno], (stream)); 1207 } 1208 else if (MEM_P (x)) 1209 { 1210 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET); 1211 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0)); 1212 } 1213 else 1214 { 1215 rtx sub = NULL_RTX; 1216 1217 mode = GET_MODE (x); 1218 if (mode == VOIDmode) 1219 mode = DImode; 1220 if (GET_MODE_SIZE (mode) >= 8) 1221 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET); 1222 if (sub) 1223 sh_print_operand (stream, sub, 0); 1224 else 1225 output_operand_lossage ("invalid operand to %%R"); 1226 } 1227 break; 1228 case 'S': 1229 if (REG_P (x) || GET_CODE (x) == SUBREG) 1230 { 1231 regno = true_regnum (x); 1232 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET; 1233 fputs (reg_names[regno], (stream)); 1234 } 1235 else if (MEM_P (x)) 1236 { 1237 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET); 1238 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0)); 1239 } 1240 else 1241 { 1242 rtx sub = NULL_RTX; 1243 1244 mode = GET_MODE (x); 1245 if (mode == VOIDmode) 1246 mode = DImode; 1247 if (GET_MODE_SIZE (mode) >= 8) 1248 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET); 1249 if (sub) 1250 sh_print_operand (stream, sub, 0); 1251 else 1252 output_operand_lossage ("invalid operand to %%S"); 1253 } 1254 break; 1255 case 'T': 1256 /* Next word of a double. */ 1257 switch (GET_CODE (x)) 1258 { 1259 case REG: 1260 fputs (reg_names[REGNO (x) + 1], (stream)); 1261 break; 1262 case MEM: 1263 { 1264 machine_mode mode = GET_MODE (x); 1265 if (GET_CODE (XEXP (x, 0)) != PRE_DEC 1266 && GET_CODE (XEXP (x, 0)) != POST_INC) 1267 x = adjust_address (x, SImode, 4); 1268 sh_print_operand_address (stream, mode, XEXP (x, 0)); 1269 } 1270 break; 1271 default: 1272 break; 1273 } 1274 break; 1275 1276 case 't': 1277 gcc_assert (MEM_P (x)); 1278 x = XEXP (x, 0); 1279 switch (GET_CODE (x)) 1280 { 1281 case REG: 1282 case SUBREG: 1283 sh_print_operand (stream, x, 0); 1284 break; 1285 default: 1286 break; 1287 } 1288 break; 1289 1290 case 'o': 1291 switch (GET_CODE (x)) 1292 { 1293 case PLUS: fputs ("add", stream); break; 1294 case MINUS: fputs ("sub", stream); break; 1295 case MULT: fputs ("mul", stream); break; 1296 case DIV: fputs ("div", stream); break; 1297 case EQ: fputs ("eq", stream); break; 1298 case NE: fputs ("ne", stream); break; 1299 case GT: case LT: fputs ("gt", stream); break; 1300 case GE: case LE: fputs ("ge", stream); break; 1301 case GTU: case LTU: fputs ("gtu", stream); break; 1302 case GEU: case LEU: fputs ("geu", stream); break; 1303 default: 1304 break; 1305 } 1306 break; 1307 case 'M': 1308 if (MEM_P (x)) 1309 { 1310 switch (GET_MODE (x)) 1311 { 1312 case E_QImode: fputs (".b", stream); break; 1313 case E_HImode: fputs (".w", stream); break; 1314 case E_SImode: fputs (".l", stream); break; 1315 case E_SFmode: fputs (".s", stream); break; 1316 case E_DFmode: fputs (".d", stream); break; 1317 default: gcc_unreachable (); 1318 } 1319 } 1320 break; 1321 1322 case 'm': 1323 gcc_assert (MEM_P (x)); 1324 x = XEXP (x, 0); 1325 /* Fall through. */ 1326 case 'U': 1327 switch (GET_CODE (x)) 1328 { 1329 case REG: 1330 case SUBREG: 1331 sh_print_operand (stream, x, 0); 1332 fputs (", 0", stream); 1333 break; 1334 1335 case PLUS: 1336 sh_print_operand (stream, XEXP (x, 0), 0); 1337 fputs (", ", stream); 1338 sh_print_operand (stream, XEXP (x, 1), 0); 1339 break; 1340 1341 default: 1342 gcc_unreachable (); 1343 } 1344 break; 1345 1346 case 'V': 1347 { 1348 int num = exact_log2 (INTVAL (x)); 1349 gcc_assert (num >= 0); 1350 fprintf (stream, "#%d", num); 1351 } 1352 break; 1353 1354 case 'W': 1355 { 1356 int num = exact_log2 (~INTVAL (x)); 1357 gcc_assert (num >= 0); 1358 fprintf (stream, "#%d", num); 1359 } 1360 break; 1361 1362 case 'd': 1363 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode); 1364 1365 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1); 1366 break; 1367 1368 case 'N': 1369 if (x == CONST0_RTX (GET_MODE (x))) 1370 { 1371 fprintf ((stream), "r63"); 1372 break; 1373 } 1374 goto default_output; 1375 case 'u': 1376 if (CONST_INT_P (x)) 1377 { 1378 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1)); 1379 break; 1380 } 1381 /* Fall through. */ 1382 1383 default_output: 1384 default: 1385 regno = 0; 1386 mode = GET_MODE (x); 1387 1388 switch (GET_CODE (x)) 1389 { 1390 case TRUNCATE: 1391 { 1392 rtx inner = XEXP (x, 0); 1393 int offset = 0; 1394 machine_mode inner_mode; 1395 1396 /* We might see SUBREGs with vector mode registers inside. */ 1397 if (GET_CODE (inner) == SUBREG 1398 && (GET_MODE_SIZE (GET_MODE (inner)) 1399 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1400 && subreg_lowpart_p (inner)) 1401 inner = SUBREG_REG (inner); 1402 if (CONST_INT_P (inner)) 1403 { 1404 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x))); 1405 goto default_output; 1406 } 1407 inner_mode = GET_MODE (inner); 1408 if (GET_CODE (inner) == SUBREG 1409 && (GET_MODE_SIZE (GET_MODE (inner)) 1410 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1411 && REG_P (SUBREG_REG (inner))) 1412 { 1413 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)), 1414 GET_MODE (SUBREG_REG (inner)), 1415 SUBREG_BYTE (inner), 1416 GET_MODE (inner)); 1417 inner = SUBREG_REG (inner); 1418 } 1419 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8) 1420 abort (); 1421 /* Floating point register pairs are always big endian; 1422 general purpose registers are 64 bit wide. */ 1423 regno = REGNO (inner); 1424 regno = (hard_regno_nregs (regno, inner_mode) 1425 - hard_regno_nregs (regno, mode)) 1426 + offset; 1427 x = inner; 1428 goto reg; 1429 } 1430 case SIGN_EXTEND: 1431 x = XEXP (x, 0); 1432 goto reg; 1433 case SUBREG: 1434 gcc_assert (SUBREG_BYTE (x) == 0 1435 && REG_P (SUBREG_REG (x))); 1436 1437 x = SUBREG_REG (x); 1438 /* Fall through. */ 1439 1440 reg: 1441 case REG: 1442 regno += REGNO (x); 1443 if (FP_REGISTER_P (regno) 1444 && mode == V16SFmode) 1445 fprintf ((stream), "mtrx%s", reg_names[regno] + 2); 1446 else if (FP_REGISTER_P (REGNO (x)) 1447 && mode == V4SFmode) 1448 fprintf ((stream), "fv%s", reg_names[regno] + 2); 1449 else if (REG_P (x) 1450 && mode == V2SFmode) 1451 fprintf ((stream), "fp%s", reg_names[regno] + 2); 1452 else if (FP_REGISTER_P (REGNO (x)) 1453 && GET_MODE_SIZE (mode) > 4) 1454 fprintf ((stream), "d%s", reg_names[regno] + 1); 1455 else 1456 fputs (reg_names[regno], (stream)); 1457 break; 1458 1459 case MEM: 1460 output_address (GET_MODE (x), XEXP (x, 0)); 1461 break; 1462 1463 default: 1464 fputc ('#', stream); 1465 output_addr_const (stream, x); 1466 break; 1467 } 1468 break; 1469 } 1470 } 1471 1472 static bool 1473 sh_print_operand_punct_valid_p (unsigned char code) 1474 { 1475 return (code == '.' || code == '#' || code == '@' || code == ',' 1476 || code == '$' || code == '\'' || code == '>'); 1477 } 1478 1479 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ 1480 static bool 1481 sh_asm_output_addr_const_extra (FILE *file, rtx x) 1482 { 1483 if (GET_CODE (x) == UNSPEC) 1484 { 1485 switch (XINT (x, 1)) 1486 { 1487 case UNSPEC_PIC: 1488 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */ 1489 output_addr_const (file, XVECEXP (x, 0, 0)); 1490 break; 1491 case UNSPEC_GOT: 1492 output_addr_const (file, XVECEXP (x, 0, 0)); 1493 fputs ("@GOT", file); 1494 break; 1495 case UNSPEC_GOTOFF: 1496 output_addr_const (file, XVECEXP (x, 0, 0)); 1497 fputs ("@GOTOFF", file); 1498 break; 1499 case UNSPEC_PLT: 1500 output_addr_const (file, XVECEXP (x, 0, 0)); 1501 fputs ("@PLT", file); 1502 break; 1503 case UNSPEC_GOTPLT: 1504 output_addr_const (file, XVECEXP (x, 0, 0)); 1505 fputs ("@GOTPLT", file); 1506 break; 1507 case UNSPEC_PCREL: 1508 output_addr_const (file, XVECEXP (x, 0, 0)); 1509 fputs ("@PCREL", file); 1510 break; 1511 case UNSPEC_DTPOFF: 1512 output_addr_const (file, XVECEXP (x, 0, 0)); 1513 fputs ("@DTPOFF", file); 1514 break; 1515 case UNSPEC_GOTTPOFF: 1516 output_addr_const (file, XVECEXP (x, 0, 0)); 1517 fputs ("@GOTTPOFF", file); 1518 break; 1519 case UNSPEC_TPOFF: 1520 output_addr_const (file, XVECEXP (x, 0, 0)); 1521 fputs ("@TPOFF", file); 1522 break; 1523 case UNSPEC_CALLER: 1524 { 1525 char name[32]; 1526 /* LPCS stands for Label for PIC Call Site. */ 1527 targetm.asm_out.generate_internal_label (name, "LPCS", 1528 INTVAL (XVECEXP (x, 0, 0))); 1529 assemble_name (file, name); 1530 } 1531 break; 1532 case UNSPEC_SYMOFF: 1533 output_addr_const (file, XVECEXP (x, 0, 0)); 1534 fputc ('-', file); 1535 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST) 1536 { 1537 fputc ('(', file); 1538 output_addr_const (file, XVECEXP (x, 0, 1)); 1539 fputc (')', file); 1540 } 1541 else 1542 output_addr_const (file, XVECEXP (x, 0, 1)); 1543 break; 1544 case UNSPEC_PCREL_SYMOFF: 1545 output_addr_const (file, XVECEXP (x, 0, 0)); 1546 fputs ("-(", file); 1547 output_addr_const (file, XVECEXP (x, 0, 1)); 1548 fputs ("-.)", file); 1549 break; 1550 case UNSPEC_GOTFUNCDESC: 1551 output_addr_const (file, XVECEXP (x, 0, 0)); 1552 fputs ("@GOTFUNCDESC", file); 1553 break; 1554 case UNSPEC_GOTOFFFUNCDESC: 1555 output_addr_const (file, XVECEXP (x, 0, 0)); 1556 fputs ("@GOTOFFFUNCDESC", file); 1557 break; 1558 default: 1559 return false; 1560 } 1561 return true; 1562 } 1563 else 1564 return false; 1565 } 1566 1567 /* Encode symbol attributes of a SYMBOL_REF into its 1568 SYMBOL_REF_FLAGS. */ 1569 static void 1570 sh_encode_section_info (tree decl, rtx rtl, int first) 1571 { 1572 default_encode_section_info (decl, rtl, first); 1573 1574 if (TREE_CODE (decl) == FUNCTION_DECL 1575 && sh2a_function_vector_p (decl) && TARGET_SH2A) 1576 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; 1577 } 1578 1579 /* Prepare operands for a move define_expand; specifically, one of the 1580 operands must be in a register. */ 1581 void 1582 prepare_move_operands (rtx operands[], machine_mode mode) 1583 { 1584 if ((mode == SImode || mode == DImode) 1585 && flag_pic 1586 && ! ((mode == Pmode || mode == ptr_mode) 1587 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE)) 1588 { 1589 rtx temp; 1590 if (SYMBOLIC_CONST_P (operands[1])) 1591 { 1592 if (MEM_P (operands[0])) 1593 operands[1] = force_reg (Pmode, operands[1]); 1594 else 1595 { 1596 temp = (!can_create_pseudo_p () 1597 ? operands[0] 1598 : gen_reg_rtx (Pmode)); 1599 operands[1] = legitimize_pic_address (operands[1], mode, temp); 1600 } 1601 } 1602 else if (GET_CODE (operands[1]) == CONST 1603 && GET_CODE (XEXP (operands[1], 0)) == PLUS 1604 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0))) 1605 { 1606 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); 1607 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0), 1608 mode, temp); 1609 operands[1] = expand_binop (mode, add_optab, temp, 1610 XEXP (XEXP (operands[1], 0), 1), 1611 (!can_create_pseudo_p () 1612 ? temp 1613 : gen_reg_rtx (Pmode)), 1614 0, OPTAB_LIB_WIDEN); 1615 } 1616 } 1617 1618 if (! reload_in_progress && ! reload_completed) 1619 { 1620 /* Copy the source to a register if both operands aren't registers. */ 1621 if (! register_operand (operands[0], mode) 1622 && ! register_operand (operands[1], mode)) 1623 operands[1] = copy_to_mode_reg (mode, operands[1]); 1624 1625 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode)) 1626 { 1627 /* This is like change_address_1 (operands[0], mode, 0, 1) , 1628 except that we can't use that function because it is static. */ 1629 rtx new_rtx = change_address (operands[0], mode, 0); 1630 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]); 1631 operands[0] = new_rtx; 1632 } 1633 1634 /* This case can happen while generating code to move the result 1635 of a library call to the target. Reject `st r0,@(rX,rY)' because 1636 reload will fail to find a spill register for rX, since r0 is already 1637 being used for the source. */ 1638 else if (refers_to_regno_p (R0_REG, operands[1]) 1639 && MEM_P (operands[0]) 1640 && GET_CODE (XEXP (operands[0], 0)) == PLUS 1641 && REG_P (XEXP (XEXP (operands[0], 0), 1))) 1642 operands[1] = copy_to_mode_reg (mode, operands[1]); 1643 1644 /* When the displacement addressing is used, RA will assign r0 to 1645 the pseudo register operand for the QI/HImode load/store. 1646 This tends to make a long live range for R0 and might cause 1647 anomalous register spills in some case with LRA. See PR 1648 target/55212. 1649 We split possible load/store to two move insns via r0 so as to 1650 shorten R0 live range. It will make some codes worse but will 1651 win on average for LRA. 1652 Also when base+index addressing is used and the index term is 1653 a subreg, LRA assumes that more hard registers can be available 1654 in some situation. It isn't the case for SH in the problematic 1655 case. We can pre-allocate R0 for that index term to avoid 1656 the issue. See PR target/66591. */ 1657 else if (sh_lra_p () 1658 && ! TARGET_SH2A 1659 && ((REG_P (operands[0]) && MEM_P (operands[1])) 1660 || (REG_P (operands[1]) && MEM_P (operands[0])))) 1661 { 1662 bool load_p = REG_P (operands[0]); 1663 rtx reg = operands[load_p ? 0 : 1]; 1664 rtx adr = XEXP (operands[load_p ? 1 : 0], 0); 1665 1666 if ((mode == QImode || mode == HImode) 1667 && REGNO (reg) >= FIRST_PSEUDO_REGISTER 1668 && GET_CODE (adr) == PLUS 1669 && REG_P (XEXP (adr, 0)) 1670 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER) 1671 && CONST_INT_P (XEXP (adr, 1)) 1672 && INTVAL (XEXP (adr, 1)) != 0 1673 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true)) 1674 { 1675 rtx r0_rtx = gen_rtx_REG (mode, R0_REG); 1676 emit_move_insn (r0_rtx, operands[1]); 1677 operands[1] = r0_rtx; 1678 } 1679 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER 1680 && GET_CODE (adr) == PLUS 1681 && REG_P (XEXP (adr, 0)) 1682 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER) 1683 && SUBREG_P (XEXP (adr, 1)) 1684 && REG_P (SUBREG_REG (XEXP (adr, 1)))) 1685 { 1686 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG); 1687 emit_move_insn (r0_rtx, XEXP (adr, 1)); 1688 XEXP (adr, 1) = r0_rtx; 1689 } 1690 } 1691 } 1692 1693 if (mode == Pmode || mode == ptr_mode) 1694 { 1695 rtx op0 = operands[0]; 1696 rtx op1 = operands[1]; 1697 rtx opc; 1698 if (GET_CODE (op1) == CONST 1699 && GET_CODE (XEXP (op1, 0)) == PLUS 1700 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode) 1701 != TLS_MODEL_NONE)) 1702 { 1703 opc = XEXP (XEXP (op1, 0), 1); 1704 op1 = XEXP (XEXP (op1, 0), 0); 1705 } 1706 else 1707 opc = NULL_RTX; 1708 1709 enum tls_model tls_kind; 1710 1711 if (! reload_in_progress && ! reload_completed 1712 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE) 1713 { 1714 rtx tga_op1, tga_ret, tmp, tmp2; 1715 1716 if (! flag_pic 1717 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC 1718 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC 1719 || tls_kind == TLS_MODEL_INITIAL_EXEC)) 1720 { 1721 static int got_labelno; 1722 /* Don't schedule insns for getting GOT address when 1723 the first scheduling is enabled, to avoid spill 1724 failures for R0. */ 1725 if (flag_schedule_insns) 1726 emit_insn (gen_blockage ()); 1727 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno))); 1728 emit_use (gen_rtx_REG (SImode, PIC_REG)); 1729 if (flag_schedule_insns) 1730 emit_insn (gen_blockage ()); 1731 } 1732 1733 switch (tls_kind) 1734 { 1735 case TLS_MODEL_GLOBAL_DYNAMIC: 1736 tga_ret = gen_rtx_REG (Pmode, R0_REG); 1737 if (TARGET_FDPIC) 1738 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG), 1739 sh_get_fdpic_reg_initial_val ()); 1740 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1)); 1741 tmp = gen_reg_rtx (Pmode); 1742 emit_move_insn (tmp, tga_ret); 1743 op1 = tmp; 1744 break; 1745 1746 case TLS_MODEL_LOCAL_DYNAMIC: 1747 tga_ret = gen_rtx_REG (Pmode, R0_REG); 1748 if (TARGET_FDPIC) 1749 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG), 1750 sh_get_fdpic_reg_initial_val ()); 1751 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1)); 1752 1753 tmp = gen_reg_rtx (Pmode); 1754 emit_move_insn (tmp, tga_ret); 1755 1756 if (register_operand (op0, Pmode)) 1757 tmp2 = op0; 1758 else 1759 tmp2 = gen_reg_rtx (Pmode); 1760 1761 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp)); 1762 op1 = tmp2; 1763 break; 1764 1765 case TLS_MODEL_INITIAL_EXEC: 1766 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode); 1767 tmp = gen_sym2GOTTPOFF (op1); 1768 if (TARGET_FDPIC) 1769 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG), 1770 sh_get_fdpic_reg_initial_val ()); 1771 emit_insn (gen_tls_initial_exec (tga_op1, tmp)); 1772 op1 = tga_op1; 1773 break; 1774 1775 case TLS_MODEL_LOCAL_EXEC: 1776 tmp2 = gen_reg_rtx (Pmode); 1777 emit_insn (gen_store_gbr (tmp2)); 1778 tmp = gen_reg_rtx (Pmode); 1779 emit_insn (gen_symTPOFF2reg (tmp, op1)); 1780 1781 if (register_operand (op0, Pmode)) 1782 op1 = op0; 1783 else 1784 op1 = gen_reg_rtx (Pmode); 1785 1786 emit_insn (gen_addsi3 (op1, tmp, tmp2)); 1787 break; 1788 1789 default: 1790 gcc_unreachable (); 1791 } 1792 if (opc) 1793 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc))); 1794 operands[1] = op1; 1795 } 1796 } 1797 1798 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) 1799 { 1800 rtx base, offset; 1801 split_const (operands[1], &base, &offset); 1802 1803 if (GET_CODE (base) == SYMBOL_REF 1804 && !offset_within_block_p (base, INTVAL (offset))) 1805 { 1806 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0]; 1807 emit_move_insn (tmp, base); 1808 if (!arith_operand (offset, mode)) 1809 offset = force_reg (mode, offset); 1810 emit_insn (gen_add3_insn (operands[0], tmp, offset)); 1811 } 1812 } 1813 } 1814 1815 /* Implement the canonicalize_comparison target hook for the combine 1816 pass. For the target hook this function is invoked via 1817 sh_canonicalize_comparison. This function is also re-used to 1818 canonicalize comparisons in cbranch pattern expanders. */ 1819 static void 1820 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1, 1821 machine_mode mode, 1822 bool op0_preserve_value) 1823 { 1824 /* When invoked from within the combine pass the mode is not specified, 1825 so try to get it from one of the operands. */ 1826 if (mode == VOIDmode) 1827 mode = GET_MODE (op0); 1828 if (mode == VOIDmode) 1829 mode = GET_MODE (op1); 1830 1831 // We need to have a mode to do something useful here. 1832 if (mode == VOIDmode) 1833 return; 1834 1835 // Currently, we don't deal with floats here. 1836 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1837 return; 1838 1839 // Make sure that the constant operand is the second operand. 1840 if (CONST_INT_P (op0) && !CONST_INT_P (op1)) 1841 { 1842 if (op0_preserve_value) 1843 return; 1844 1845 std::swap (op0, op1); 1846 cmp = swap_condition (cmp); 1847 } 1848 1849 if (CONST_INT_P (op1)) 1850 { 1851 /* Try to adjust the constant operand in such a way that available 1852 comparison insns can be utilized better and the constant can be 1853 loaded with a 'mov #imm,Rm' insn. This avoids a load from the 1854 constant pool. */ 1855 const HOST_WIDE_INT val = INTVAL (op1); 1856 1857 /* x > -1 --> x >= 0 1858 x > 0xFFFFFF7F --> x >= 0xFFFFFF80 1859 x <= -1 --> x < 0 1860 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */ 1861 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE)) 1862 { 1863 cmp = cmp == GT ? GE : LT; 1864 op1 = gen_int_mode (val + 1, mode); 1865 } 1866 1867 /* x >= 1 --> x > 0 1868 x >= 0x80 --> x > 0x7F 1869 x < 1 --> x <= 0 1870 x < 0x80 --> x <= 0x7F */ 1871 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT)) 1872 { 1873 cmp = cmp == GE ? GT : LE; 1874 op1 = gen_int_mode (val - 1, mode); 1875 } 1876 1877 /* unsigned x >= 1 --> x != 0 1878 unsigned x < 1 --> x == 0 */ 1879 else if (val == 1 && (cmp == GEU || cmp == LTU)) 1880 { 1881 cmp = cmp == GEU ? NE : EQ; 1882 op1 = CONST0_RTX (mode); 1883 } 1884 1885 /* unsigned x >= 0x80 --> unsigned x > 0x7F 1886 unsigned x < 0x80 --> unsigned x < 0x7F */ 1887 else if (val == 0x80 && (cmp == GEU || cmp == LTU)) 1888 { 1889 cmp = cmp == GEU ? GTU : LEU; 1890 op1 = gen_int_mode (val - 1, mode); 1891 } 1892 1893 /* unsigned x > 0 --> x != 0 1894 unsigned x <= 0 --> x == 0 */ 1895 else if (val == 0 && (cmp == GTU || cmp == LEU)) 1896 cmp = cmp == GTU ? NE : EQ; 1897 1898 /* unsigned x > 0x7FFFFFFF --> signed x < 0 1899 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */ 1900 else if (mode == SImode && (cmp == GTU || cmp == LEU) 1901 && val == 0x7FFFFFFF) 1902 { 1903 cmp = cmp == GTU ? LT : GE; 1904 op1 = const0_rtx; 1905 } 1906 1907 /* unsigned x >= 0x80000000 --> signed x < 0 1908 unsigned x < 0x80000000 --> signed x >= 0 */ 1909 else if (mode == SImode && (cmp == GEU || cmp == LTU) 1910 && (unsigned HOST_WIDE_INT)val 1911 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1)) 1912 { 1913 cmp = cmp == GEU ? LT : GE; 1914 op1 = const0_rtx; 1915 } 1916 } 1917 } 1918 1919 /* This function implements the canonicalize_comparison target hook. 1920 This wrapper around the internally used sh_canonicalize_comparison 1921 function is needed to do the enum rtx_code <-> int conversion. 1922 Target hooks cannot use enum rtx_code in its definition. */ 1923 static void 1924 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 1925 bool op0_preserve_value) 1926 { 1927 enum rtx_code tmp_code = (enum rtx_code)*code; 1928 sh_canonicalize_comparison (tmp_code, *op0, *op1, 1929 VOIDmode, op0_preserve_value); 1930 *code = (int)tmp_code; 1931 } 1932 1933 /* This function implements the legitimate_combined_insn target hook, 1934 which the combine pass uses to early reject combined insns, before 1935 it tries to recog the insn and determine its cost. */ 1936 static bool 1937 sh_legitimate_combined_insn (rtx_insn* insn) 1938 { 1939 /* Reject combinations of memory loads and zero extensions, as these 1940 interfere with other combine patterns such as zero extracts and bit 1941 tests. The SH2A movu.{b|w} insns are formed later in the 1942 'sh_optimize_extu_exts' pass after combine/split1. */ 1943 rtx p = PATTERN (insn); 1944 if (GET_CODE (p) == SET 1945 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode 1946 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND 1947 && MEM_P (XEXP (XEXP (p, 1), 0))) 1948 return false; 1949 1950 return true; 1951 } 1952 1953 bool 1954 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2) 1955 { 1956 *p1 = T_REG; 1957 *p2 = INVALID_REGNUM; 1958 return true; 1959 } 1960 1961 /* Try to calculate the branch distance of a conditional branch in bytes. 1962 1963 FIXME: Because of PR 59189 we can't use the CFG here. Instead just 1964 walk from this insn into the next (fall-through) basic block and see if 1965 we hit the label. */ 1966 unsigned int 1967 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist) 1968 { 1969 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn); 1970 1971 if (dump_file) 1972 { 1973 fprintf (dump_file, "sh_cbranch_distance insn = \n"); 1974 print_rtl_single (dump_file, cbranch_insn); 1975 } 1976 1977 unsigned int dist = 0; 1978 1979 for (rtx_insn* i = next_nonnote_insn (cbranch_insn); 1980 i != NULL && dist < max_dist; i = next_nonnote_insn (i)) 1981 { 1982 const unsigned int i_len = get_attr_length (i); 1983 dist += i_len; 1984 1985 if (dump_file) 1986 fprintf (dump_file, " insn %d length = %u dist = %u\n", 1987 INSN_UID (i), i_len, dist); 1988 1989 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i)) 1990 { 1991 if (l == cbranch_insn->jump_target ()) 1992 { 1993 if (dump_file) 1994 fprintf (dump_file, " cbranch dist = %u\n", dist); 1995 return dist; 1996 } 1997 break; 1998 } 1999 } 2000 2001 if (dump_file) 2002 fprintf (dump_file, " cbranch dist = unknown\n"); 2003 2004 return unknown_cbranch_distance; 2005 } 2006 2007 enum rtx_code 2008 prepare_cbranch_operands (rtx *operands, machine_mode mode, 2009 enum rtx_code comparison) 2010 { 2011 gcc_assert (can_create_pseudo_p ()); 2012 2013 if (comparison == LAST_AND_UNUSED_RTX_CODE) 2014 comparison = GET_CODE (operands[0]); 2015 2016 sh_canonicalize_comparison (comparison, operands[1], operands[2], 2017 mode, false); 2018 2019 rtx op1 = operands[1]; 2020 operands[1] = force_reg (mode, op1); 2021 2022 /* When we are handling DImode comparisons, we want to keep constants so 2023 that we can optimize the component comparisons; however, memory loads 2024 are better issued as a whole so that they can be scheduled well. 2025 SImode equality comparisons allow I08 constants, but only when they 2026 compare r0. Hence, if operands[1] has to be loaded from somewhere else 2027 into a register, that register might as well be r0, and we allow the 2028 constant. If it is already in a register, this is likely to be 2029 allocated to a different hard register, thus we load the constant into 2030 a register unless it is zero. */ 2031 if (!REG_P (operands[2]) 2032 && (!CONST_INT_P (operands[2]) 2033 || (mode == SImode && operands[2] != CONST0_RTX (SImode) 2034 && ((comparison != EQ && comparison != NE) 2035 || (REG_P (op1) && REGNO (op1) != R0_REG) 2036 || !satisfies_constraint_I08 (operands[2]))))) 2037 operands[2] = force_reg (mode, operands[2]); 2038 2039 return comparison; 2040 } 2041 2042 static void 2043 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, 2044 profile_probability probability) 2045 { 2046 rtx (*branch_expander) (rtx) = gen_branch_true; 2047 comparison = prepare_cbranch_operands (operands, SImode, comparison); 2048 switch (comparison) 2049 { 2050 case NE: case LT: case LE: case LTU: case LEU: 2051 comparison = reverse_condition (comparison); 2052 branch_expander = gen_branch_false; 2053 default: ; 2054 } 2055 emit_insn (gen_rtx_SET (get_t_reg_rtx (), 2056 gen_rtx_fmt_ee (comparison, SImode, 2057 operands[1], operands[2]))); 2058 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3])); 2059 if (probability.initialized_p ()) 2060 add_reg_br_prob_note (jump, probability); 2061 } 2062 2063 void 2064 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison) 2065 { 2066 expand_cbranchsi4 (operands, comparison, 2067 profile_probability::uninitialized ()); 2068 } 2069 2070 /* ??? How should we distribute probabilities when more than one branch 2071 is generated. So far we only have some ad-hoc observations: 2072 - If the operands are random, they are likely to differ in both parts. 2073 - If comparing items in a hash chain, the operands are random or equal; 2074 operation should be EQ or NE. 2075 - If items are searched in an ordered tree from the root, we can expect 2076 the highpart to be unequal about half of the time; operation should be 2077 an inequality comparison, operands non-constant, and overall probability 2078 about 50%. Likewise for quicksort. 2079 - Range checks will be often made against constants. Even if we assume for 2080 simplicity an even distribution of the non-constant operand over a 2081 sub-range here, the same probability could be generated with differently 2082 wide sub-ranges - as long as the ratio of the part of the subrange that 2083 is before the threshold to the part that comes after the threshold stays 2084 the same. Thus, we can't really tell anything here; 2085 assuming random distribution is at least simple. 2086 */ 2087 bool 2088 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) 2089 { 2090 enum rtx_code msw_taken, msw_skip, lsw_taken; 2091 rtx_code_label *skip_label = NULL; 2092 rtx op1h, op1l, op2h, op2l; 2093 int num_branches; 2094 profile_probability prob, rev_prob; 2095 profile_probability msw_taken_prob = profile_probability::uninitialized (), 2096 msw_skip_prob = profile_probability::uninitialized (), 2097 lsw_taken_prob = profile_probability::uninitialized (); 2098 2099 comparison = prepare_cbranch_operands (operands, DImode, comparison); 2100 op1h = gen_highpart_mode (SImode, DImode, operands[1]); 2101 op2h = gen_highpart_mode (SImode, DImode, operands[2]); 2102 op1l = gen_lowpart (SImode, operands[1]); 2103 op2l = gen_lowpart (SImode, operands[2]); 2104 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE; 2105 prob = split_branch_probability; 2106 rev_prob = prob.invert (); 2107 switch (comparison) 2108 { 2109 case EQ: 2110 msw_skip = NE; 2111 lsw_taken = EQ; 2112 if (prob.initialized_p ()) 2113 { 2114 /* FIXME: This is not optimal. We do not really know the probablity 2115 that values differ by MCW only, but we should probably distribute 2116 probabilities more evenly. */ 2117 msw_skip_prob = rev_prob; 2118 lsw_taken_prob = prob > profile_probability::never () 2119 ? profile_probability::guessed_always () 2120 : profile_probability::guessed_never (); 2121 } 2122 break; 2123 case NE: 2124 msw_taken = NE; 2125 msw_taken_prob = prob; 2126 lsw_taken = NE; 2127 lsw_taken_prob = profile_probability::guessed_never (); 2128 break; 2129 case GTU: case GT: 2130 msw_taken = comparison; 2131 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) 2132 break; 2133 if (comparison != GTU || op2h != CONST0_RTX (SImode)) 2134 msw_skip = swap_condition (msw_taken); 2135 lsw_taken = GTU; 2136 break; 2137 case GEU: case GE: 2138 if (op2l == CONST0_RTX (SImode)) 2139 msw_taken = comparison; 2140 else 2141 { 2142 msw_taken = comparison == GE ? GT : GTU; 2143 msw_skip = swap_condition (msw_taken); 2144 lsw_taken = GEU; 2145 } 2146 break; 2147 case LTU: case LT: 2148 msw_taken = comparison; 2149 if (op2l == CONST0_RTX (SImode)) 2150 break; 2151 msw_skip = swap_condition (msw_taken); 2152 lsw_taken = LTU; 2153 break; 2154 case LEU: case LE: 2155 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) 2156 msw_taken = comparison; 2157 else 2158 { 2159 lsw_taken = LEU; 2160 if (comparison == LE) 2161 msw_taken = LT; 2162 else if (op2h != CONST0_RTX (SImode)) 2163 msw_taken = LTU; 2164 else 2165 { 2166 msw_skip = swap_condition (LTU); 2167 break; 2168 } 2169 msw_skip = swap_condition (msw_taken); 2170 } 2171 break; 2172 default: return false; 2173 } 2174 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE) 2175 + (msw_skip != LAST_AND_UNUSED_RTX_CODE) 2176 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE)); 2177 if (comparison != EQ && comparison != NE && num_branches > 1) 2178 { 2179 if (!CONSTANT_P (operands[2]) 2180 && prob.initialized_p () 2181 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U) 2182 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U)) 2183 { 2184 msw_taken_prob = prob.apply_scale (1, 2); 2185 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE, 2186 rev_prob.to_reg_br_prob_base () 2187 + REG_BR_PROB_BASE); 2188 lsw_taken_prob = prob; 2189 } 2190 else 2191 { 2192 msw_taken_prob = prob; 2193 msw_skip_prob = profile_probability::guessed_always (); 2194 /* ??? If we have a constant op2h, should we use that when 2195 calculating lsw_taken_prob? */ 2196 lsw_taken_prob = prob; 2197 } 2198 } 2199 operands[1] = op1h; 2200 operands[2] = op2h; 2201 2202 if (msw_taken != LAST_AND_UNUSED_RTX_CODE) 2203 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob); 2204 if (msw_skip != LAST_AND_UNUSED_RTX_CODE) 2205 { 2206 rtx taken_label = operands[3]; 2207 2208 /* Operands were possibly modified, but msw_skip doesn't expect this. 2209 Always use the original ones. */ 2210 if (msw_taken != LAST_AND_UNUSED_RTX_CODE) 2211 { 2212 operands[1] = op1h; 2213 operands[2] = op2h; 2214 } 2215 2216 operands[3] = skip_label = gen_label_rtx (); 2217 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob); 2218 operands[3] = taken_label; 2219 } 2220 operands[1] = op1l; 2221 operands[2] = op2l; 2222 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE) 2223 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob); 2224 if (msw_skip != LAST_AND_UNUSED_RTX_CODE) 2225 emit_label (skip_label); 2226 return true; 2227 } 2228 2229 /* Given an operand, return 1 if the evaluated operand plugged into an 2230 if_then_else will result in a branch_true, 0 if branch_false, or 2231 -1 if neither nor applies. The truth table goes like this: 2232 2233 op | cmpval | code | result 2234 ---------+--------+---------+-------------------- 2235 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1) 2236 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1) 2237 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0) 2238 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0) 2239 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1) 2240 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1) 2241 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0) 2242 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */ 2243 int 2244 sh_eval_treg_value (rtx op) 2245 { 2246 if (t_reg_operand (op, GET_MODE (op))) 2247 return 1; 2248 if (negt_reg_operand (op, GET_MODE (op))) 2249 return 0; 2250 2251 rtx_code code = GET_CODE (op); 2252 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1))) 2253 return -1; 2254 2255 int cmpop = code == EQ ? 1 : 0; 2256 int cmpval = INTVAL (XEXP (op, 1)); 2257 if (cmpval != 0 && cmpval != 1) 2258 return -1; 2259 2260 int t; 2261 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))) 2262 t = 0; 2263 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))) 2264 t = 1; 2265 else 2266 return -1; 2267 2268 return t ^ (cmpval == cmpop); 2269 } 2270 2271 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case 2272 of floating-point comparisons. */ 2273 static void 2274 sh_emit_set_t_insn (rtx insn, machine_mode mode) 2275 { 2276 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT 2277 && GET_CODE (insn) != PARALLEL) 2278 { 2279 insn = gen_rtx_PARALLEL (VOIDmode, 2280 gen_rtvec (3, insn, 2281 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)), 2282 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG)))); 2283 } 2284 emit_insn (insn); 2285 } 2286 2287 /* Prepare the operands for an scc instruction; make sure that the 2288 compare has been done and the result is in T_REG. */ 2289 void 2290 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1) 2291 { 2292 rtx t_reg = get_t_reg_rtx (); 2293 enum rtx_code oldcode = code; 2294 2295 /* First need a compare insn. */ 2296 switch (code) 2297 { 2298 case NE: 2299 /* It isn't possible to handle this case. */ 2300 gcc_unreachable (); 2301 case LT: 2302 code = GT; 2303 break; 2304 case LE: 2305 code = GE; 2306 break; 2307 case LTU: 2308 code = GTU; 2309 break; 2310 case LEU: 2311 code = GEU; 2312 break; 2313 default: 2314 break; 2315 } 2316 if (code != oldcode) 2317 std::swap (op0, op1); 2318 2319 machine_mode mode = GET_MODE (op0); 2320 if (mode == VOIDmode) 2321 mode = GET_MODE (op1); 2322 2323 op0 = force_reg (mode, op0); 2324 if ((code != EQ && code != NE 2325 && (op1 != const0_rtx 2326 || code == GTU || code == GEU || code == LTU || code == LEU)) 2327 || (mode == DImode && op1 != const0_rtx) 2328 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) 2329 op1 = force_reg (mode, op1); 2330 2331 sh_emit_set_t_insn (gen_rtx_SET (t_reg, 2332 gen_rtx_fmt_ee (code, SImode, op0, op1)), 2333 mode); 2334 } 2335 2336 /* Called from the md file, set up the operands of a compare instruction. */ 2337 void 2338 sh_emit_compare_and_branch (rtx *operands, machine_mode mode) 2339 { 2340 enum rtx_code code = GET_CODE (operands[0]); 2341 enum rtx_code branch_code; 2342 rtx op0 = operands[1]; 2343 rtx op1 = operands[2]; 2344 rtx insn; 2345 bool need_ccmpeq = false; 2346 2347 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT) 2348 { 2349 op0 = force_reg (mode, op0); 2350 op1 = force_reg (mode, op1); 2351 } 2352 else 2353 { 2354 if (code != EQ || mode == DImode) 2355 { 2356 /* Force args into regs, since we can't use constants here. */ 2357 op0 = force_reg (mode, op0); 2358 if (op1 != const0_rtx || code == GTU || code == GEU) 2359 op1 = force_reg (mode, op1); 2360 } 2361 } 2362 2363 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 2364 { 2365 if (code == LT 2366 || (code == LE && TARGET_IEEE && TARGET_SH2E) 2367 || (code == GE && !(TARGET_IEEE && TARGET_SH2E))) 2368 { 2369 std::swap (op0, op1); 2370 code = swap_condition (code); 2371 } 2372 2373 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */ 2374 if (code == GE) 2375 { 2376 gcc_assert (TARGET_IEEE && TARGET_SH2E); 2377 need_ccmpeq = true; 2378 code = GT; 2379 } 2380 2381 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed 2382 to EQ/GT respectively. */ 2383 gcc_assert (code == EQ || code == GT || code == NE || code == LE); 2384 } 2385 2386 switch (code) 2387 { 2388 case EQ: 2389 case GT: 2390 case GE: 2391 case GTU: 2392 case GEU: 2393 branch_code = code; 2394 break; 2395 case NE: 2396 case LT: 2397 case LE: 2398 case LTU: 2399 case LEU: 2400 branch_code = reverse_condition (code); 2401 break; 2402 default: 2403 gcc_unreachable (); 2404 } 2405 2406 insn = gen_rtx_SET (get_t_reg_rtx (), 2407 gen_rtx_fmt_ee (branch_code, SImode, op0, op1)); 2408 2409 sh_emit_set_t_insn (insn, mode); 2410 if (need_ccmpeq) 2411 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode); 2412 2413 if (branch_code == code) 2414 emit_jump_insn (gen_branch_true (operands[3])); 2415 else 2416 emit_jump_insn (gen_branch_false (operands[3])); 2417 } 2418 2419 void 2420 sh_emit_compare_and_set (rtx *operands, machine_mode mode) 2421 { 2422 enum rtx_code code = GET_CODE (operands[1]); 2423 rtx op0 = operands[2]; 2424 rtx op1 = operands[3]; 2425 rtx_code_label *lab = NULL; 2426 bool invert = false; 2427 2428 op0 = force_reg (mode, op0); 2429 if ((code != EQ && code != NE 2430 && (op1 != const0_rtx 2431 || code == GTU || code == GEU || code == LTU || code == LEU)) 2432 || (mode == DImode && op1 != const0_rtx) 2433 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) 2434 op1 = force_reg (mode, op1); 2435 2436 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 2437 { 2438 if (code == LT || code == LE) 2439 { 2440 std::swap (op0, op1); 2441 code = swap_condition (code); 2442 } 2443 if (code == GE) 2444 { 2445 if (TARGET_IEEE) 2446 { 2447 lab = gen_label_rtx (); 2448 sh_emit_scc_to_t (EQ, op0, op1); 2449 emit_jump_insn (gen_branch_true (lab)); 2450 code = GT; 2451 } 2452 else 2453 { 2454 code = LT; 2455 invert = true; 2456 } 2457 } 2458 } 2459 2460 if (code == NE) 2461 { 2462 code = EQ; 2463 invert = true; 2464 } 2465 2466 sh_emit_scc_to_t (code, op0, op1); 2467 if (lab) 2468 emit_label (lab); 2469 if (invert) 2470 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); 2471 else 2472 emit_move_insn (operands[0], get_t_reg_rtx ()); 2473 } 2474 2475 /* Functions to output assembly code. */ 2476 2477 /* Return a sequence of instructions to perform DI or DF move. 2478 2479 Since the SH cannot move a DI or DF in one instruction, we have 2480 to take care when we see overlapping source and dest registers. */ 2481 const char * 2482 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[], 2483 machine_mode mode) 2484 { 2485 rtx dst = operands[0]; 2486 rtx src = operands[1]; 2487 2488 if (MEM_P (dst) 2489 && GET_CODE (XEXP (dst, 0)) == PRE_DEC) 2490 return "mov.l %T1,%0" "\n" 2491 " mov.l %1,%0"; 2492 2493 if (register_operand (dst, mode) 2494 && register_operand (src, mode)) 2495 { 2496 if (REGNO (src) == MACH_REG) 2497 return "sts mach,%S0" "\n" 2498 " sts macl,%R0"; 2499 2500 /* When mov.d r1,r2 do r2->r3 then r1->r2; 2501 when mov.d r1,r0 do r1->r0 then r2->r1. */ 2502 if (REGNO (src) + 1 == REGNO (dst)) 2503 return "mov %T1,%T0" "\n" 2504 " mov %1,%0"; 2505 else 2506 return "mov %1,%0" "\n" 2507 " mov %T1,%T0"; 2508 } 2509 else if (CONST_INT_P (src)) 2510 { 2511 if (INTVAL (src) < 0) 2512 output_asm_insn ("mov #-1,%S0", operands); 2513 else 2514 output_asm_insn ("mov #0,%S0", operands); 2515 2516 return "mov %1,%R0"; 2517 } 2518 else if (MEM_P (src)) 2519 { 2520 int ptrreg = -1; 2521 int dreg = REGNO (dst); 2522 rtx inside = XEXP (src, 0); 2523 2524 switch (GET_CODE (inside)) 2525 { 2526 case REG: 2527 ptrreg = REGNO (inside); 2528 break; 2529 2530 case SUBREG: 2531 ptrreg = subreg_regno (inside); 2532 break; 2533 2534 case PLUS: 2535 ptrreg = REGNO (XEXP (inside, 0)); 2536 /* ??? A r0+REG address shouldn't be possible here, because it isn't 2537 an offsettable address. Unfortunately, offsettable addresses use 2538 QImode to check the offset, and a QImode offsettable address 2539 requires r0 for the other operand, which is not currently 2540 supported, so we can't use the 'o' constraint. 2541 Thus we must check for and handle r0+REG addresses here. 2542 We punt for now, since this is likely very rare. */ 2543 gcc_assert (!REG_P (XEXP (inside, 1))); 2544 break; 2545 2546 case LABEL_REF: 2547 return "mov.l %1,%0" "\n" 2548 " mov.l %1+4,%T0"; 2549 case POST_INC: 2550 return "mov.l %1,%0" "\n" 2551 " mov.l %1,%T0"; 2552 default: 2553 gcc_unreachable (); 2554 } 2555 2556 /* Work out the safe way to copy. Copy into the second half first. */ 2557 if (dreg == ptrreg) 2558 return "mov.l %T1,%T0" "\n" 2559 " mov.l %1,%0"; 2560 } 2561 2562 return "mov.l %1,%0" "\n" 2563 " mov.l %T1,%T0"; 2564 } 2565 2566 /* Print an instruction which would have gone into a delay slot after 2567 another instruction, but couldn't because the other instruction expanded 2568 into a sequence where putting the slot insn at the end wouldn't work. */ 2569 static void 2570 print_slot (rtx_sequence *seq) 2571 { 2572 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL); 2573 2574 seq->insn (1)->set_deleted (); 2575 } 2576 2577 const char * 2578 output_far_jump (rtx_insn *insn, rtx op) 2579 { 2580 struct { rtx lab, reg, op; } this_jmp; 2581 rtx_code_label *braf_base_lab = NULL; 2582 const char *jump; 2583 int far; 2584 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); 2585 rtx_insn *prev; 2586 2587 this_jmp.lab = gen_label_rtx (); 2588 2589 if (TARGET_SH2 2590 && offset >= -32764 2591 && offset - get_attr_length (insn) <= 32766 2592 && ! CROSSING_JUMP_P (insn)) 2593 { 2594 far = 0; 2595 jump = "mov.w %O0,%1" "\n" 2596 " braf %1"; 2597 } 2598 else 2599 { 2600 far = 1; 2601 if (flag_pic) 2602 { 2603 if (TARGET_SH2) 2604 jump = "mov.l %O0,%1" "\n" 2605 " braf %1"; 2606 else 2607 jump = "mov.l r0,@-r15" "\n" 2608 " mova %O0,r0" "\n" 2609 " mov.l @r0,%1" "\n" 2610 " add r0,%1" "\n" 2611 " mov.l @r15+,r0" "\n" 2612 " jmp @%1"; 2613 } 2614 else 2615 jump = "mov.l %O0,%1" "\n" 2616 " jmp @%1"; 2617 } 2618 /* If we have a scratch register available, use it. */ 2619 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn))) 2620 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) 2621 { 2622 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0)); 2623 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2) 2624 jump = "mov.l r1,@-r15" "\n" 2625 " mova %O0,r0" "\n" 2626 " mov.l @r0,r1" "\n" 2627 " add r1,r0" "\n" 2628 " mov.l @r15+,r1" "\n" 2629 " jmp @%1"; 2630 output_asm_insn (jump, &this_jmp.lab); 2631 if (dbr_sequence_length ()) 2632 print_slot (final_sequence); 2633 else 2634 output_asm_insn ("nop", 0); 2635 } 2636 else 2637 { 2638 /* Output the delay slot insn first if any. */ 2639 if (dbr_sequence_length ()) 2640 print_slot (final_sequence); 2641 2642 this_jmp.reg = gen_rtx_REG (SImode, 13); 2643 output_asm_insn ("mov.l r13,@-r15", 0); 2644 output_asm_insn (jump, &this_jmp.lab); 2645 output_asm_insn ("mov.l @r15+,r13", 0); 2646 } 2647 if (far && flag_pic && TARGET_SH2) 2648 { 2649 braf_base_lab = gen_label_rtx (); 2650 (*targetm.asm_out.internal_label) (asm_out_file, "L", 2651 CODE_LABEL_NUMBER (braf_base_lab)); 2652 } 2653 if (far) 2654 output_asm_insn (".align 2", 0); 2655 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab)); 2656 this_jmp.op = op; 2657 if (far && flag_pic) 2658 { 2659 if (TARGET_SH2) 2660 this_jmp.lab = braf_base_lab; 2661 output_asm_insn (".long %O2-%O0", &this_jmp.lab); 2662 } 2663 else 2664 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab); 2665 return ""; 2666 } 2667 2668 /* Local label counter, used for constants in the pool and inside 2669 pattern branches. */ 2670 static int lf = 100; 2671 2672 /* Output code for ordinary branches. */ 2673 const char * 2674 output_branch (int logic, rtx_insn *insn, rtx *operands) 2675 { 2676 switch (get_attr_length (insn)) 2677 { 2678 case 6: 2679 /* This can happen if filling the delay slot has caused a forward 2680 branch to exceed its range (we could reverse it, but only 2681 when we know we won't overextend other branches; this should 2682 best be handled by relaxation). 2683 It can also happen when other condbranches hoist delay slot insn 2684 from their destination, thus leading to code size increase. 2685 But the branch will still be in the range -4092..+4098 bytes. */ 2686 if (! TARGET_RELAX) 2687 { 2688 int label = lf++; 2689 /* The call to print_slot will clobber the operands. */ 2690 rtx op0 = operands[0]; 2691 2692 /* If the instruction in the delay slot is annulled (true), then 2693 there is no delay slot where we can put it now. The only safe 2694 place for it is after the label. final will do that by default. */ 2695 2696 if (final_sequence 2697 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0)) 2698 && get_attr_length (final_sequence->insn (1))) 2699 { 2700 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", 2701 ASSEMBLER_DIALECT ? "/" : ".", label); 2702 print_slot (final_sequence); 2703 } 2704 else 2705 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); 2706 2707 output_asm_insn ("bra\t%l0", &op0); 2708 fprintf (asm_out_file, "\tnop\n"); 2709 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); 2710 2711 return ""; 2712 } 2713 /* FALLTHRU */ 2714 /* When relaxing, handle this like a short branch. The linker 2715 will fix it up if it still doesn't fit after relaxation. */ 2716 case 2: 2717 return logic ? "bt%.\t%l0" : "bf%.\t%l0"; 2718 2719 /* These are for SH2e, in which we have to account for the 2720 extra nop because of the hardware bug in annulled branches. */ 2721 case 8: 2722 if (! TARGET_RELAX) 2723 { 2724 int label = lf++; 2725 2726 gcc_assert (!final_sequence 2727 || !(INSN_ANNULLED_BRANCH_P 2728 (XVECEXP (final_sequence, 0, 0)))); 2729 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n", 2730 logic ? "f" : "t", 2731 ASSEMBLER_DIALECT ? "/" : ".", label); 2732 fprintf (asm_out_file, "\tnop\n"); 2733 output_asm_insn ("bra\t%l0", operands); 2734 fprintf (asm_out_file, "\tnop\n"); 2735 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); 2736 2737 return ""; 2738 } 2739 /* FALLTHRU */ 2740 case 4: 2741 { 2742 char buffer[10]; 2743 2744 sprintf (buffer, "b%s%ss\t%%l0", 2745 logic ? "t" : "f", 2746 ASSEMBLER_DIALECT ? "/" : "."); 2747 output_asm_insn (buffer, &operands[0]); 2748 return "nop"; 2749 } 2750 2751 default: 2752 /* There should be no longer branches now - that would 2753 indicate that something has destroyed the branches set 2754 up in machine_dependent_reorg. */ 2755 gcc_unreachable (); 2756 } 2757 } 2758 2759 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before, 2760 fill in operands 9 as a label to the successor insn. 2761 We try to use jump threading where possible. 2762 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump, 2763 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means 2764 follow jmp and bt, if the address is in range. */ 2765 const char * 2766 output_branchy_insn (enum rtx_code code, const char *templ, 2767 rtx_insn *insn, rtx *operands) 2768 { 2769 rtx_insn *next_insn = NEXT_INSN (insn); 2770 2771 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn)) 2772 { 2773 rtx src = SET_SRC (PATTERN (next_insn)); 2774 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) 2775 { 2776 /* Following branch not taken */ 2777 rtx_code_label *lab = gen_label_rtx (); 2778 emit_label_after (lab, next_insn); 2779 INSN_ADDRESSES_NEW (lab, 2780 INSN_ADDRESSES (INSN_UID (next_insn)) 2781 + get_attr_length (next_insn)); 2782 operands[9] = lab; 2783 return templ; 2784 } 2785 else 2786 { 2787 int offset = (branch_dest (next_insn) 2788 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4); 2789 if (offset >= -252 && offset <= 258) 2790 { 2791 if (GET_CODE (src) == IF_THEN_ELSE) 2792 /* branch_true */ 2793 src = XEXP (src, 1); 2794 operands[9] = src; 2795 return templ; 2796 } 2797 } 2798 } 2799 rtx_code_label *lab = gen_label_rtx (); 2800 emit_label_after (lab, insn); 2801 INSN_ADDRESSES_NEW (lab, 2802 INSN_ADDRESSES (INSN_UID (insn)) 2803 + get_attr_length (insn)); 2804 operands[9] = lab; 2805 return templ; 2806 } 2807 2808 const char * 2809 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands) 2810 { 2811 return output_branchy_insn (NE, "bt %l9" "\n" 2812 " fcmp/eq %1,%0", 2813 insn, operands); 2814 } 2815 2816 /* Output the start of the assembler file. */ 2817 static void 2818 sh_file_start (void) 2819 { 2820 default_file_start (); 2821 2822 if (TARGET_ELF) 2823 /* We need to show the text section with the proper 2824 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out 2825 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS 2826 will complain. We can teach GAS specifically about the 2827 default attributes for our choice of text section, but 2828 then we would have to change GAS again if/when we change 2829 the text section name. */ 2830 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP); 2831 else 2832 /* Switch to the data section so that the coffsem symbol 2833 isn't in the text section. */ 2834 switch_to_section (data_section); 2835 2836 if (TARGET_LITTLE_ENDIAN) 2837 fputs ("\t.little\n", asm_out_file); 2838 } 2839 2840 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions 2841 need to be output as pointers to function descriptors for 2842 FDPIC. */ 2843 2844 static bool 2845 sh_assemble_integer (rtx value, unsigned int size, int aligned_p) 2846 { 2847 if (TARGET_FDPIC && size == UNITS_PER_WORD 2848 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value)) 2849 { 2850 fputs ("\t.long\t", asm_out_file); 2851 output_addr_const (asm_out_file, value); 2852 fputs ("@FUNCDESC\n", asm_out_file); 2853 return true; 2854 } 2855 return default_assemble_integer (value, size, aligned_p); 2856 } 2857 2858 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */ 2859 static bool 2860 unspec_caller_rtx_p (rtx pat) 2861 { 2862 rtx base, offset; 2863 split_const (pat, &base, &offset); 2864 2865 if (GET_CODE (base) == UNSPEC) 2866 { 2867 if (XINT (base, 1) == UNSPEC_CALLER) 2868 return true; 2869 for (int i = 0; i < XVECLEN (base, 0); i++) 2870 if (unspec_caller_rtx_p (XVECEXP (base, 0, i))) 2871 return true; 2872 } 2873 return false; 2874 } 2875 2876 /* Indicate that INSN cannot be duplicated. This is true for insn 2877 that generates a unique label. */ 2878 static bool 2879 sh_cannot_copy_insn_p (rtx_insn *insn) 2880 { 2881 if (!reload_completed || !flag_pic) 2882 return false; 2883 2884 if (!NONJUMP_INSN_P (insn)) 2885 return false; 2886 if (asm_noperands (insn) >= 0) 2887 return false; 2888 2889 rtx pat = PATTERN (insn); 2890 2891 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE) 2892 return false; 2893 2894 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL) 2895 { 2896 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1); 2897 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0))) 2898 return true; 2899 } 2900 2901 if (GET_CODE (pat) != SET) 2902 return false; 2903 pat = SET_SRC (pat); 2904 2905 if (unspec_caller_rtx_p (pat)) 2906 return true; 2907 2908 return false; 2909 } 2910 2911 /* Number of instructions used to make an arithmetic right shift by N. */ 2912 static const char ashiftrt_insns[] = 2913 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; 2914 2915 /* Description of a logical left or right shift, when expanded to a sequence 2916 of 1/2/8/16 shifts. 2917 Notice that one bit right shifts clobber the T bit. One bit left shifts 2918 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */ 2919 enum 2920 { 2921 ASHL_CLOBBERS_T = 1 << 0, 2922 LSHR_CLOBBERS_T = 1 << 1 2923 }; 2924 2925 struct ashl_lshr_sequence 2926 { 2927 char insn_count; 2928 signed char amount[6]; 2929 char clobbers_t; 2930 }; 2931 2932 static const struct ashl_lshr_sequence ashl_lshr_seq[32] = 2933 { 2934 { 0, { 0 }, 0 }, // 0 2935 { 1, { 1 }, LSHR_CLOBBERS_T }, 2936 { 1, { 2 }, 0 }, 2937 { 2, { 2, 1 }, LSHR_CLOBBERS_T }, 2938 { 2, { 2, 2 }, 0 }, // 4 2939 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, 2940 { 3, { 2, 2, 2 }, 0 }, 2941 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T }, 2942 { 1, { 8 }, 0 }, // 8 2943 { 2, { 8, 1 }, LSHR_CLOBBERS_T }, 2944 { 2, { 8, 2 }, 0 }, 2945 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, 2946 { 3, { 8, 2, 2 }, 0 }, // 12 2947 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T }, 2948 { 3, { 8, -2, 8 }, 0 }, 2949 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T }, 2950 { 1, { 16 }, 0 }, // 16 2951 { 2, { 16, 1 }, LSHR_CLOBBERS_T }, 2952 { 2, { 16, 2 }, 0 }, 2953 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, 2954 { 3, { 16, 2, 2 }, 0 }, // 20 2955 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, 2956 { 3, { 16, -2, 8 }, 0 }, 2957 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, 2958 { 2, { 16, 8 }, 0 }, // 24 2959 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, 2960 { 3, { 16, 8, 2 }, 0 }, 2961 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, 2962 { 4, { 16, 8, 2, 2 }, 0 }, // 28 2963 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, 2964 { 3, { 16, -2, 16 }, 0 }, 2965 2966 /* For a right shift by 31 a 2 insn shll-movt sequence can be used. 2967 For a left shift by 31 a 2 insn and-rotl sequences can be used. 2968 However, the shift-and combiner code needs this entry here to be in 2969 terms of real shift insns. */ 2970 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } 2971 }; 2972 2973 /* Individual shift amounts for shift amounts < 16, up to three highmost 2974 bits might be clobbered. This is typically used when combined with some 2975 kind of sign or zero extension. */ 2976 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] = 2977 { 2978 { 0, { 0 }, 0 }, // 0 2979 { 1, { 1 }, LSHR_CLOBBERS_T }, 2980 { 1, { 2 }, 0 }, 2981 { 2, { 2, 1 }, LSHR_CLOBBERS_T }, 2982 { 2, { 2, 2 }, 0 }, // 4 2983 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, 2984 { 2, { 8, -2 }, 0 }, 2985 { 2, { 8, -1 }, ASHL_CLOBBERS_T }, 2986 { 1, { 8 }, 0 }, // 8 2987 { 2, { 8, 1 }, LSHR_CLOBBERS_T }, 2988 { 2, { 8, 2 }, 0 }, 2989 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, 2990 { 3, { 8, 2, 2 }, 0 }, // 12 2991 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T }, 2992 { 2, { 16, -2 }, 0 }, 2993 { 2, { 16, -1 }, ASHL_CLOBBERS_T }, 2994 { 1, { 16 }, 0 }, // 16 2995 { 2, { 16, 1 }, LSHR_CLOBBERS_T }, 2996 { 2, { 16, 2 }, 0 }, 2997 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, 2998 { 3, { 16, 2, 2 }, 0 }, // 20 2999 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, 3000 { 3, { 16, -2, 8 }, 0 }, 3001 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, 3002 { 2, { 16, 8 }, 0 }, // 24 3003 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, 3004 { 3, { 16, 8, 2 }, 0 }, 3005 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, 3006 { 4, { 16, 8, 2, 2 }, 0 }, // 28 3007 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, 3008 { 3, { 16, -2, 16 }, 0 }, 3009 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } 3010 }; 3011 3012 /* Return true if a shift left consisting of 1/2/8/16 shift instructions 3013 will clobber the T bit. */ 3014 bool 3015 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount) 3016 { 3017 gcc_assert (CONST_INT_P (shift_amount)); 3018 3019 const int shift_amount_i = INTVAL (shift_amount) & 31; 3020 3021 /* Special case for shift count of 31: use and-rotl sequence. */ 3022 if (shift_amount_i == 31) 3023 return true; 3024 3025 return (ashl_lshr_seq[shift_amount_i].clobbers_t 3026 & ASHL_CLOBBERS_T) != 0; 3027 } 3028 3029 /* Return true if a logical right shift consisting of 1/2/8/16 shift 3030 instructions will clobber the T bit. */ 3031 bool 3032 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount) 3033 { 3034 gcc_assert (CONST_INT_P (shift_amount)); 3035 3036 /* For right shifts the constant might be negative. */ 3037 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31; 3038 3039 /* Special case for shift count of 31: use shll-movt sequence. */ 3040 if (shift_amount_i == 31) 3041 return true; 3042 3043 return (ashl_lshr_seq[shift_amount_i].clobbers_t 3044 & LSHR_CLOBBERS_T) != 0; 3045 } 3046 3047 /* Return true if it is potentially beneficial to use a dynamic shift 3048 instruction (shad / shar) instead of a combination of 1/2/8/16 3049 shift instructions for the specified shift count. 3050 If dynamic shifts are not available, always return false. */ 3051 bool 3052 sh_dynamicalize_shift_p (rtx count) 3053 { 3054 gcc_assert (CONST_INT_P (count)); 3055 3056 /* For right shifts the constant might be negative. */ 3057 const int shift_amount_i = std::abs (INTVAL (count)) & 31; 3058 int insn_count; 3059 3060 /* For left and right shifts, there are shorter 2 insn sequences for 3061 shift amounts of 31. */ 3062 if (shift_amount_i == 31) 3063 insn_count = 2; 3064 else 3065 insn_count = ashl_lshr_seq[shift_amount_i].insn_count; 3066 3067 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST); 3068 } 3069 3070 /* Assuming we have a value that has been sign-extended by at least one bit, 3071 can we use the ext_shift_amounts with the last shift turned to an 3072 arithmetic shift to shift it by N without data loss, and quicker than by 3073 other means? */ 3074 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) 3075 3076 /* Return the cost of a shift. */ 3077 static inline int 3078 shiftcosts (rtx x) 3079 { 3080 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 3081 { 3082 if (GET_MODE (x) == DImode 3083 && CONST_INT_P (XEXP (x, 1)) 3084 && INTVAL (XEXP (x, 1)) == 1) 3085 return 2; 3086 3087 /* Everything else is invalid, because there is no pattern for it. */ 3088 return -1; 3089 } 3090 /* If shift by a non constant, then this will be expensive. */ 3091 if (!CONST_INT_P (XEXP (x, 1))) 3092 return SH_DYNAMIC_SHIFT_COST; 3093 3094 /* Otherwise, return the true cost in instructions. Cope with out of range 3095 shift counts more or less arbitrarily. */ 3096 int value = INTVAL (XEXP (x, 1)) & 31; 3097 3098 if (GET_CODE (x) == ASHIFTRT) 3099 { 3100 int cost = ashiftrt_insns[value]; 3101 /* If dynamic shifts are available and profitable in this case, then we 3102 put the constant in a reg and use shad. */ 3103 if (cost > 1 + SH_DYNAMIC_SHIFT_COST) 3104 cost = 1 + SH_DYNAMIC_SHIFT_COST; 3105 return cost; 3106 } 3107 else 3108 return ashl_lshr_seq[value].insn_count; 3109 } 3110 3111 /* Return the cost of an AND/XOR/IOR operation. */ 3112 static inline int 3113 and_xor_ior_costs (rtx x, int code) 3114 { 3115 /* On SH1-4 we have only max. SImode operations. 3116 Double the cost for modes > SImode. */ 3117 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1; 3118 3119 /* A logical operation with two registers is a single cycle 3120 instruction. */ 3121 if (!CONST_INT_P (XEXP (x, 1))) 3122 return 1 * cost_scale; 3123 3124 int i = INTVAL (XEXP (x, 1)); 3125 3126 /* These constants are single cycle extu.[bw] instructions. */ 3127 if ((i == 0xff || i == 0xffff) && code == AND) 3128 return 1 * cost_scale; 3129 /* Constants that can be used in an instruction as an immediate are 3130 a single cycle, but this requires r0, so make it a little more 3131 expensive. */ 3132 if (CONST_OK_FOR_K08 (i)) 3133 return 2 * cost_scale; 3134 /* Constants that can be loaded with a mov immediate need one more cycle. 3135 This case is probably unnecessary. */ 3136 if (CONST_OK_FOR_I08 (i)) 3137 return 2 * cost_scale; 3138 /* Any other constant requires an additional 2 cycle pc-relative load. 3139 This case is probably unnecessary. */ 3140 return 3 * cost_scale; 3141 } 3142 3143 /* Return the cost of an addition or a subtraction. */ 3144 static inline int 3145 addsubcosts (rtx x) 3146 { 3147 if (GET_MODE (x) == SImode) 3148 { 3149 /* The addc or subc patterns will eventually become one or two 3150 instructions. Below are some costs for some of the patterns 3151 which combine would reject because the costs of the individual 3152 insns in the patterns are lower. 3153 3154 FIXME: It would be much easier if we had something like insn cost 3155 attributes and the cost calculation machinery used those attributes 3156 in the first place. This would eliminate redundant recog-like C 3157 code to calculate costs of complex patterns. */ 3158 rtx op0 = XEXP (x, 0); 3159 rtx op1 = XEXP (x, 1); 3160 3161 if (GET_CODE (x) == PLUS) 3162 { 3163 if (GET_CODE (op0) == AND 3164 && XEXP (op0, 1) == const1_rtx 3165 && (GET_CODE (op1) == PLUS 3166 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx))) 3167 return 1; 3168 3169 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx 3170 && GET_CODE (op1) == LSHIFTRT 3171 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31) 3172 return 1; 3173 } 3174 /* Let's assume that adding the result of an insns that stores into 3175 the T bit is cheap. */ 3176 if (treg_set_expr (op1, SImode)) 3177 return 1; 3178 if (treg_set_expr (op0, SImode)) 3179 return 1; 3180 } 3181 3182 /* On SH1-4 we have only max. SImode operations. 3183 Double the cost for modes > SImode. */ 3184 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1; 3185 3186 /* Adding a register is a single cycle insn. */ 3187 if (REG_P (XEXP (x, 1)) 3188 || GET_CODE (XEXP (x, 1)) == SUBREG) 3189 return 1 * cost_scale; 3190 3191 /* Likewise for small constants. */ 3192 if (CONST_INT_P (XEXP (x, 1)) 3193 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1)))) 3194 return 1 * cost_scale; 3195 3196 /* Any other constant requires a 2 cycle pc-relative load plus an 3197 addition. */ 3198 return 3 * cost_scale; 3199 } 3200 3201 /* Return the cost of a multiply. */ 3202 static inline int 3203 multcosts (rtx x ATTRIBUTE_UNUSED) 3204 { 3205 if (sh_multcost >= 0) 3206 return sh_multcost; 3207 3208 if (TARGET_SH2) 3209 { 3210 /* We have a mul insn, so we can never take more than the mul and the 3211 read of the mac reg, but count more because of the latency and extra 3212 reg usage. */ 3213 if (optimize_size) 3214 return 2; 3215 return 3; 3216 } 3217 3218 /* If we're aiming at small code, then just count the number of 3219 insns in a multiply call sequence. */ 3220 if (optimize_size) 3221 return 5; 3222 3223 /* Otherwise count all the insns in the routine we'd be calling too. */ 3224 return 20; 3225 } 3226 3227 /* Compute a (partial) cost for rtx X. Return true if the complete 3228 cost has been computed, and false if subexpressions should be 3229 scanned. In either case, *TOTAL contains the cost result. */ 3230 static bool 3231 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code, 3232 int opno ATTRIBUTE_UNUSED, 3233 int *total, bool speed ATTRIBUTE_UNUSED) 3234 { 3235 int code = GET_CODE (x); 3236 3237 switch (code) 3238 { 3239 /* The lower-subreg pass decides whether to split multi-word regs 3240 into individual regs by looking at the cost for a SET of certain 3241 modes with the following patterns: 3242 (set (reg) (reg)) 3243 (set (reg) (const_int 0)) 3244 On machines that support vector-move operations a multi-word move 3245 is the same cost as individual reg move. On SH there is no 3246 vector-move, so we have to provide the correct cost in the number 3247 of move insns to load/store the reg of the mode in question. */ 3248 case SET: 3249 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL) 3250 { 3251 *total = COSTS_N_INSNS (1); 3252 return true; 3253 } 3254 3255 if (register_operand (SET_DEST (x), VOIDmode) 3256 && (register_operand (SET_SRC (x), VOIDmode) 3257 || satisfies_constraint_Z (SET_SRC (x)))) 3258 { 3259 const machine_mode mode = GET_MODE (SET_DEST (x)); 3260 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) 3261 / mov_insn_size (mode, TARGET_SH2A)); 3262 return true; 3263 } 3264 return false; 3265 3266 /* The cost of a mem access is mainly the cost of the address mode. */ 3267 case MEM: 3268 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x), 3269 true); 3270 return true; 3271 3272 case IF_THEN_ELSE: 3273 /* This case is required for the if_then_else negc pattern. */ 3274 if (treg_set_expr (XEXP (x, 0), SImode)) 3275 { 3276 *total = COSTS_N_INSNS (1); 3277 return true; 3278 } 3279 else 3280 return false; 3281 3282 /* Zero extracts of single bits are usually combine patterns for the 3283 tst insns. */ 3284 case ZERO_EXTRACT: 3285 if (GET_CODE (XEXP (x, 0)) == XOR 3286 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode) 3287 && XEXP (x, 1) == const1_rtx 3288 && CONST_INT_P (XEXP (x, 2)) 3289 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3290 /* Check that the xor constaint overlaps with the extracted bit. */ 3291 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2))))) 3292 { 3293 *total = 1; //COSTS_N_INSNS (1); 3294 return true; 3295 } 3296 3297 /* div0s variant. */ 3298 if (GET_CODE (XEXP (x, 0)) == XOR 3299 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR 3300 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 3301 { 3302 *total = 1; 3303 return true; 3304 } 3305 return false; 3306 3307 /* The cost of a sign or zero extend depends on whether the source is a 3308 reg or a mem. In case of a mem take the address into account. */ 3309 case SIGN_EXTEND: 3310 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) 3311 { 3312 *total = COSTS_N_INSNS (1); 3313 return true; 3314 } 3315 if (MEM_P (XEXP (x, 0))) 3316 { 3317 *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 3318 GET_MODE (XEXP (x, 0)), 3319 MEM_ADDR_SPACE (XEXP (x, 0)), true); 3320 return true; 3321 } 3322 return false; 3323 3324 case ZERO_EXTEND: 3325 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) 3326 { 3327 *total = COSTS_N_INSNS (1); 3328 return true; 3329 } 3330 else if (TARGET_SH2A && MEM_P (XEXP (x, 0)) 3331 && (GET_MODE (XEXP (x, 0)) == QImode 3332 || GET_MODE (XEXP (x, 0)) == HImode)) 3333 { 3334 /* Handle SH2A's movu.b and movu.w insn. */ 3335 *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 3336 GET_MODE (XEXP (x, 0)), 3337 MEM_ADDR_SPACE (XEXP (x, 0)), true); 3338 return true; 3339 } 3340 return false; 3341 3342 /* mems for SFmode and DFmode can be inside a parallel due to 3343 the way the fpscr is handled. */ 3344 case PARALLEL: 3345 for (int i = 0; i < XVECLEN (x, 0); i++) 3346 { 3347 rtx xx = XVECEXP (x, 0, i); 3348 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0))) 3349 { 3350 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 3351 GET_MODE (XEXP (xx, 0)), 3352 MEM_ADDR_SPACE (XEXP (xx, 0)), true); 3353 return true; 3354 } 3355 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1))) 3356 { 3357 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0), 3358 GET_MODE (XEXP (xx, 1)), 3359 MEM_ADDR_SPACE (XEXP (xx, 1)), true); 3360 return true; 3361 } 3362 } 3363 3364 if (sh_1el_vec (x, VOIDmode)) 3365 *total = outer_code != SET; 3366 else if (sh_rep_vec (x, VOIDmode)) 3367 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 3368 + (outer_code != SET)); 3369 else 3370 *total = COSTS_N_INSNS (3) + (outer_code != SET); 3371 return true; 3372 3373 case CONST_INT: 3374 if (CONST_OK_FOR_I08 (INTVAL (x))) 3375 *total = 0; 3376 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR) 3377 && CONST_OK_FOR_K08 (INTVAL (x))) 3378 *total = 1; 3379 /* prepare_cmp_insn will force costly constants int registers before 3380 the cbranch[sd]i4 patterns can see them, so preserve potentially 3381 interesting ones not covered by I08 above. */ 3382 else if (outer_code == COMPARE 3383 && ((unsigned HOST_WIDE_INT) INTVAL (x) 3384 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 3385 || INTVAL (x) == 0x7fffffff 3386 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81)) 3387 *total = 1; 3388 else 3389 *total = 8; 3390 return true; 3391 3392 case EQ: 3393 /* An and with a constant compared against zero is 3394 most likely going to be a TST #imm, R0 instruction. */ 3395 if (XEXP (x, 1) == const0_rtx 3396 && ((GET_CODE (XEXP (x, 0)) == AND 3397 || (SUBREG_P (XEXP (x, 0)) 3398 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)) 3399 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT)) 3400 { 3401 *total = 1; 3402 return true; 3403 } 3404 3405 else if (XEXP (x, 1) == const0_rtx 3406 && GET_CODE (XEXP (x, 0)) == AND 3407 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT 3409 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode) 3410 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))) 3411 { 3412 *total = 1; 3413 return true; 3414 } 3415 else 3416 return false; 3417 3418 case SMIN: 3419 case SMAX: 3420 /* This is most likely a clips.b or clips.w insn that is being made up 3421 by combine. */ 3422 if (TARGET_SH2A 3423 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN) 3424 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3425 && REG_P (XEXP (XEXP (x, 0), 0)) 3426 && CONST_INT_P (XEXP (x, 1))) 3427 { 3428 *total = COSTS_N_INSNS (1); 3429 return true; 3430 } 3431 else 3432 return false; 3433 3434 case CONST: 3435 case LABEL_REF: 3436 case SYMBOL_REF: 3437 *total = 5; 3438 return true; 3439 3440 case CONST_DOUBLE: 3441 /* prepare_cmp_insn will force costly constants int registers before 3442 the cbranchdi4 pattern can see them, so preserve potentially 3443 interesting ones. */ 3444 if (outer_code == COMPARE && GET_MODE (x) == DImode) 3445 *total = 1; 3446 else 3447 *total = 10; 3448 return true; 3449 3450 case CONST_VECTOR: 3451 /* FIXME: This looks broken. Only the last statement has any effect. 3452 Probably this could be folded with the PARALLEL case? */ 3453 if (x == CONST0_RTX (GET_MODE (x))) 3454 *total = 0; 3455 else if (sh_1el_vec (x, VOIDmode)) 3456 *total = outer_code != SET; 3457 if (sh_rep_vec (x, VOIDmode)) 3458 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 3459 + (outer_code != SET)); 3460 *total = COSTS_N_INSNS (3) + (outer_code != SET); 3461 return true; 3462 3463 case PLUS: 3464 case MINUS: 3465 *total = COSTS_N_INSNS (addsubcosts (x)); 3466 return true; 3467 3468 case AND: 3469 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */ 3470 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx) 3471 { 3472 *total = COSTS_N_INSNS (1); 3473 return true; 3474 } 3475 /* Fall through. */ 3476 3477 case XOR: 3478 case IOR: 3479 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code)); 3480 return true; 3481 3482 case MULT: 3483 *total = COSTS_N_INSNS (multcosts (x)); 3484 return true; 3485 3486 case LT: 3487 case GE: 3488 /* div0s sign comparison. */ 3489 if (GET_CODE (XEXP (x, 0)) == XOR 3490 && REG_P ((XEXP (XEXP (x, 0), 0))) 3491 && REG_P ((XEXP (XEXP (x, 0), 1))) 3492 && satisfies_constraint_Z (XEXP (x, 1))) 3493 { 3494 *total = COSTS_N_INSNS (1); 3495 return true; 3496 } 3497 else 3498 return false; 3499 3500 case LSHIFTRT: 3501 /* div0s sign comparison. */ 3502 if (GET_CODE (XEXP (x, 0)) == XOR 3503 && REG_P ((XEXP (XEXP (x, 0), 0))) 3504 && REG_P ((XEXP (XEXP (x, 0), 1))) 3505 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31) 3506 { 3507 *total = COSTS_N_INSNS (1); 3508 return true; 3509 } 3510 /* FALLTHRU */ 3511 case ASHIFT: 3512 case ASHIFTRT: 3513 { 3514 int cost = shiftcosts (x); 3515 if (cost < 0) 3516 return false; 3517 *total = COSTS_N_INSNS (cost); 3518 return true; 3519 } 3520 3521 case DIV: 3522 case UDIV: 3523 case MOD: 3524 case UMOD: 3525 *total = COSTS_N_INSNS (20); 3526 return true; 3527 3528 case FLOAT: 3529 case FIX: 3530 *total = 100; 3531 return true; 3532 3533 default: 3534 return false; 3535 } 3536 } 3537 3538 /* Determine the size of the fundamental move insn that will be used 3539 for the specified mode. */ 3540 static inline int 3541 mov_insn_size (machine_mode mode, bool consider_sh2a) 3542 { 3543 const int mode_sz = GET_MODE_SIZE (mode); 3544 3545 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode) 3546 || (TARGET_FMOVD && mode == DFmode)) 3547 return mode_sz; 3548 else 3549 { 3550 /* The max. available mode for actual move insns is SImode. 3551 Larger accesses will be split into multiple loads/stores. */ 3552 const int max_mov_sz = GET_MODE_SIZE (SImode); 3553 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz; 3554 } 3555 } 3556 3557 /* Determine the maximum possible displacement for a move insn for the 3558 specified mode. */ 3559 int 3560 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a) 3561 { 3562 /* The 4 byte displacement move insns are the same as the 2 byte 3563 versions but take a 12 bit displacement. All we need to do is to 3564 scale the max. displacement value accordingly. */ 3565 const int disp_scale = consider_sh2a ? (4095 / 15) : 1; 3566 3567 /* SH2A supports FPU move insns with 12 bit displacements. 3568 Other variants to do not support any kind of displacements for 3569 FPU move insns. */ 3570 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT) 3571 return 0; 3572 else 3573 { 3574 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a); 3575 const int mode_sz = GET_MODE_SIZE (mode); 3576 int r = 15 * mov_insn_sz * disp_scale; 3577 3578 /* If the mov insn will be split into multiple loads/stores, the 3579 maximum possible displacement is a bit smaller. */ 3580 if (mode_sz > mov_insn_sz) 3581 r -= mode_sz - mov_insn_sz; 3582 return r; 3583 } 3584 } 3585 3586 /* Determine the alignment mask for a move insn of the 3587 specified mode. */ 3588 static inline int 3589 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a) 3590 { 3591 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a); 3592 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0; 3593 } 3594 3595 /* Return the displacement value of a displacement address. */ 3596 HOST_WIDE_INT 3597 sh_disp_addr_displacement (rtx x) 3598 { 3599 gcc_assert (satisfies_constraint_Sdd (x)); 3600 return INTVAL (XEXP (XEXP (x, 0), 1)); 3601 } 3602 3603 /* Compute the cost of an address. */ 3604 static int 3605 sh_address_cost (rtx x, machine_mode mode, 3606 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) 3607 { 3608 /* 'GBR + 0'. Account one more because of R0 restriction. */ 3609 if (REG_P (x) && REGNO (x) == GBR_REG) 3610 return 2; 3611 3612 /* Simple reg, post-inc, pre-dec addressing. */ 3613 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) 3614 return 1; 3615 3616 /* 'reg + disp' addressing. */ 3617 if (GET_CODE (x) == PLUS 3618 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) 3619 { 3620 /* 'GBR + disp'. Account one more because of R0 restriction. */ 3621 if (REGNO (XEXP (x, 0)) == GBR_REG 3622 && gbr_displacement (XEXP (x, 1), mode)) 3623 return 2; 3624 3625 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 3626 3627 if (offset == 0) 3628 return 1; 3629 3630 /* The displacement would fit into a 2 byte move insn. 3631 HImode and QImode loads/stores with displacement put pressure on 3632 R0 which will most likely require another reg copy. Thus account 3633 a higher cost for that. */ 3634 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false)) 3635 return (mode == HImode || mode == QImode) ? 2 : 1; 3636 3637 /* The displacement would fit into a 4 byte move insn (SH2A). */ 3638 if (TARGET_SH2A 3639 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true)) 3640 return 2; 3641 3642 /* The displacement is probably out of range and will require extra 3643 calculations. */ 3644 return 3; 3645 } 3646 3647 /* 'reg + reg' addressing. Account a slightly higher cost because of 3648 increased pressure on R0. */ 3649 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))) 3650 return 3; 3651 3652 /* Not sure what it is - probably expensive. */ 3653 return 10; 3654 } 3655 3656 /* Code to expand a shift. */ 3657 static void 3658 gen_ashift (int type, int n, rtx reg) 3659 { 3660 rtx n_rtx; 3661 3662 /* Negative values here come from the shift_amounts array. */ 3663 if (n < 0) 3664 { 3665 if (type == ASHIFT) 3666 type = LSHIFTRT; 3667 else 3668 type = ASHIFT; 3669 n = -n; 3670 } 3671 3672 n_rtx = GEN_INT (n); 3673 gcc_assert (satisfies_constraint_P27 (n_rtx)); 3674 3675 switch (type) 3676 { 3677 case ASHIFTRT: 3678 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx)); 3679 break; 3680 case LSHIFTRT: 3681 if (n == 1) 3682 emit_insn (gen_shlr (reg, reg)); 3683 else 3684 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx)); 3685 break; 3686 case ASHIFT: 3687 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx)); 3688 break; 3689 default: 3690 gcc_unreachable (); 3691 } 3692 } 3693 3694 /* Code to expand a HImode shift. */ 3695 static void 3696 gen_ashift_hi (int type, int n, rtx reg) 3697 { 3698 /* Negative values here come from the shift_amounts array. */ 3699 if (n < 0) 3700 { 3701 if (type == ASHIFT) 3702 type = LSHIFTRT; 3703 else 3704 type = ASHIFT; 3705 n = -n; 3706 } 3707 3708 switch (type) 3709 { 3710 case ASHIFTRT: 3711 case LSHIFTRT: 3712 /* We don't have HImode right shift operations because using the 3713 ordinary 32 bit shift instructions for that doesn't generate proper 3714 zero/sign extension. 3715 gen_ashift_hi is only called in contexts where we know that the 3716 sign extension works out correctly. */ 3717 { 3718 int offset = 0; 3719 if (GET_CODE (reg) == SUBREG) 3720 { 3721 offset = SUBREG_BYTE (reg); 3722 reg = SUBREG_REG (reg); 3723 } 3724 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset)); 3725 break; 3726 } 3727 case ASHIFT: 3728 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); 3729 break; 3730 } 3731 } 3732 3733 /* Output RTL to split a constant shift into its component SH constant 3734 shift instructions. */ 3735 void 3736 gen_shifty_op (int code, rtx *operands) 3737 { 3738 int value = INTVAL (operands[2]); 3739 int max, i; 3740 3741 /* Truncate the shift count in case it is out of bounds. */ 3742 value = value & 31; 3743 3744 if (value == 31) 3745 { 3746 if (code == LSHIFTRT) 3747 { 3748 emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); 3749 emit_insn (gen_movt (operands[0], get_t_reg_rtx ())); 3750 return; 3751 } 3752 else if (code == ASHIFT) 3753 { 3754 /* There is a two instruction sequence for 31 bit left shifts, 3755 but it requires r0. */ 3756 if (REG_P (operands[0]) && REGNO (operands[0]) == 0) 3757 { 3758 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); 3759 emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); 3760 return; 3761 } 3762 } 3763 } 3764 else if (value == 0) 3765 { 3766 /* This can happen even when optimizing, if there were subregs before 3767 reload. Don't output a nop here, as this is never optimized away; 3768 use a no-op move instead. */ 3769 emit_insn (gen_rtx_SET (operands[0], operands[0])); 3770 return; 3771 } 3772 3773 max = ashl_lshr_seq[value].insn_count; 3774 for (i = 0; i < max; i++) 3775 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]); 3776 } 3777 3778 /* Same as gen_shifty_op, but optimized for values where the topmost bits 3779 don't matter. */ 3780 void 3781 gen_shifty_hi_op (int code, rtx *operands) 3782 { 3783 int value = INTVAL (operands[2]); 3784 int max, i; 3785 void (*gen_fun) (int, int, rtx); 3786 3787 /* This operation is used by and_shl for SImode values with a few 3788 high bits known to be cleared. */ 3789 value &= 31; 3790 if (value == 0) 3791 { 3792 emit_insn (gen_nop ()); 3793 return; 3794 } 3795 3796 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; 3797 if (code == ASHIFT) 3798 { 3799 max = ext_ashl_lshr_seq[value].insn_count; 3800 for (i = 0; i < max; i++) 3801 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]); 3802 } 3803 else 3804 /* When shifting right, emit the shifts in reverse order, so that 3805 solitary negative values come first. */ 3806 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--) 3807 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]); 3808 } 3809 3810 /* Output RTL for an arithmetic right shift. 3811 ??? Rewrite to use super-optimizer sequences. */ 3812 bool 3813 expand_ashiftrt (rtx *operands) 3814 { 3815 rtx wrk; 3816 char func[18]; 3817 int value; 3818 3819 if (TARGET_DYNSHIFT) 3820 { 3821 if (!CONST_INT_P (operands[2])) 3822 { 3823 rtx count = copy_to_mode_reg (SImode, operands[2]); 3824 emit_insn (gen_negsi2 (count, count)); 3825 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); 3826 return true; 3827 } 3828 else if (ashiftrt_insns[INTVAL (operands[2]) & 31] 3829 > 1 + SH_DYNAMIC_SHIFT_COST) 3830 { 3831 rtx count 3832 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); 3833 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); 3834 return true; 3835 } 3836 } 3837 if (!CONST_INT_P (operands[2])) 3838 return false; 3839 3840 value = INTVAL (operands[2]) & 31; 3841 3842 if (value == 31) 3843 { 3844 /* If we are called from abs expansion, arrange things so that we 3845 we can use a single MT instruction that doesn't clobber the source, 3846 if LICM can hoist out the load of the constant zero. */ 3847 if (currently_expanding_to_rtl) 3848 { 3849 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)), 3850 operands[1])); 3851 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ())); 3852 return true; 3853 } 3854 emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); 3855 return true; 3856 } 3857 else if (value >= 16 && value <= 19) 3858 { 3859 wrk = gen_reg_rtx (SImode); 3860 emit_insn (gen_ashrsi2_16 (wrk, operands[1])); 3861 value -= 16; 3862 while (value--) 3863 gen_ashift (ASHIFTRT, 1, wrk); 3864 emit_move_insn (operands[0], wrk); 3865 return true; 3866 } 3867 /* Expand a short sequence inline, longer call a magic routine. */ 3868 else if (value <= 5) 3869 { 3870 wrk = gen_reg_rtx (SImode); 3871 emit_move_insn (wrk, operands[1]); 3872 while (value--) 3873 gen_ashift (ASHIFTRT, 1, wrk); 3874 emit_move_insn (operands[0], wrk); 3875 return true; 3876 } 3877 3878 wrk = gen_reg_rtx (Pmode); 3879 3880 /* Load the value into an arg reg and call a helper. */ 3881 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); 3882 sprintf (func, "__ashiftrt_r4_%d", value); 3883 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab; 3884 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab)); 3885 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); 3886 return true; 3887 } 3888 3889 /* Try to find a good way to implement the combiner pattern 3890 [(set (match_operand:SI 0 "register_operand" "r") 3891 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") 3892 (match_operand:SI 2 "const_int_operand" "n")) 3893 (match_operand:SI 3 "const_int_operand" "n"))) . 3894 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. 3895 return 0 for simple right / left or left/right shift combination. 3896 return 1 for a combination of shifts with zero_extend. 3897 return 2 for a combination of shifts with an AND that needs r0. 3898 return 3 for a combination of shifts with an AND that needs an extra 3899 scratch register, when the three highmost bits of the AND mask are clear. 3900 return 4 for a combination of shifts with an AND that needs an extra 3901 scratch register, when any of the three highmost bits of the AND mask 3902 is set. 3903 If ATTRP is set, store an initial right shift width in ATTRP[0], 3904 and the instruction length in ATTRP[1] . These values are not valid 3905 when returning 0. 3906 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into 3907 shift_amounts for the last shift value that is to be used before the 3908 sign extend. */ 3909 int 3910 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp) 3911 { 3912 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; 3913 int left = INTVAL (left_rtx), right; 3914 int best = 0; 3915 int cost, best_cost = 10000; 3916 int best_right = 0, best_len = 0; 3917 int i; 3918 int can_ext; 3919 3920 if (left < 0 || left > 31) 3921 return 0; 3922 if (CONST_INT_P (mask_rtx)) 3923 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; 3924 else 3925 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; 3926 /* Can this be expressed as a right shift / left shift pair? */ 3927 lsb = ((mask ^ (mask - 1)) >> 1) + 1; 3928 right = exact_log2 (lsb); 3929 mask2 = ~(mask + lsb - 1); 3930 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; 3931 /* mask has no zeroes but trailing zeroes <==> ! mask2 */ 3932 if (! mask2) 3933 best_cost = ashl_lshr_seq[right].insn_count 3934 + ashl_lshr_seq[right + left].insn_count; 3935 /* mask has no trailing zeroes <==> ! right */ 3936 else if (! right && mask2 == ~(lsb2 - 1)) 3937 { 3938 int late_right = exact_log2 (lsb2); 3939 best_cost = ashl_lshr_seq[left + late_right].insn_count 3940 + ashl_lshr_seq[late_right].insn_count; 3941 } 3942 /* Try to use zero extend. */ 3943 if (mask2 == ~(lsb2 - 1)) 3944 { 3945 int width, first; 3946 3947 for (width = 8; width <= 16; width += 8) 3948 { 3949 /* Can we zero-extend right away? */ 3950 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width) 3951 { 3952 cost = 1 + ext_ashl_lshr_seq[right].insn_count 3953 + ext_ashl_lshr_seq[left + right].insn_count; 3954 if (cost < best_cost) 3955 { 3956 best = 1; 3957 best_cost = cost; 3958 best_right = right; 3959 best_len = cost; 3960 if (attrp) 3961 attrp[2] = -1; 3962 } 3963 continue; 3964 } 3965 /* ??? Could try to put zero extend into initial right shift, 3966 or even shift a bit left before the right shift. */ 3967 /* Determine value of first part of left shift, to get to the 3968 zero extend cut-off point. */ 3969 first = width - exact_log2 (lsb2) + right; 3970 if (first >= 0 && right + left - first >= 0) 3971 { 3972 cost = ext_ashl_lshr_seq[right].insn_count 3973 + ext_ashl_lshr_seq[first].insn_count + 1 3974 + ext_ashl_lshr_seq[right + left - first].insn_count; 3975 3976 if (cost < best_cost) 3977 { 3978 best = 1; 3979 best_cost = cost; 3980 best_right = right; 3981 best_len = cost; 3982 if (attrp) 3983 attrp[2] = first; 3984 } 3985 } 3986 } 3987 } 3988 /* Try to use r0 AND pattern */ 3989 for (i = 0; i <= 2; i++) 3990 { 3991 if (i > right) 3992 break; 3993 if (! CONST_OK_FOR_K08 (mask >> i)) 3994 continue; 3995 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count; 3996 if (cost < best_cost) 3997 { 3998 best = 2; 3999 best_cost = cost; 4000 best_right = i; 4001 best_len = cost - 1; 4002 } 4003 } 4004 /* Try to use a scratch register to hold the AND operand. */ 4005 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0; 4006 for (i = 0; i <= 2; i++) 4007 { 4008 if (i > right) 4009 break; 4010 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3) 4011 + (can_ext 4012 ? ext_ashl_lshr_seq 4013 : ashl_lshr_seq)[left + i].insn_count; 4014 if (cost < best_cost) 4015 { 4016 best = 4 - can_ext; 4017 best_cost = cost; 4018 best_right = i; 4019 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i); 4020 } 4021 } 4022 4023 if (attrp) 4024 { 4025 attrp[0] = best_right; 4026 attrp[1] = best_len; 4027 } 4028 return best; 4029 } 4030 4031 /* This is used in length attributes of the unnamed instructions 4032 corresponding to shl_and_kind return values of 1 and 2. */ 4033 int 4034 shl_and_length (rtx insn) 4035 { 4036 rtx set_src, left_rtx, mask_rtx; 4037 int attributes[3]; 4038 4039 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 4040 left_rtx = XEXP (XEXP (set_src, 0), 1); 4041 mask_rtx = XEXP (set_src, 1); 4042 shl_and_kind (left_rtx, mask_rtx, attributes); 4043 return attributes[1]; 4044 } 4045 4046 /* This is used in length attribute of the and_shl_scratch instruction. */ 4047 int 4048 shl_and_scr_length (rtx insn) 4049 { 4050 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 4051 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count; 4052 rtx op = XEXP (set_src, 0); 4053 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1; 4054 op = XEXP (XEXP (op, 0), 0); 4055 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count; 4056 } 4057 4058 /* Generate rtl for instructions for which shl_and_kind advised a particular 4059 method of generating them, i.e. returned zero. */ 4060 bool 4061 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source) 4062 { 4063 int attributes[3]; 4064 unsigned HOST_WIDE_INT mask; 4065 int kind = shl_and_kind (left_rtx, mask_rtx, attributes); 4066 int right, total_shift; 4067 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op; 4068 4069 right = attributes[0]; 4070 total_shift = INTVAL (left_rtx) + right; 4071 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; 4072 switch (kind) 4073 { 4074 default: 4075 return true; 4076 case 1: 4077 { 4078 int first = attributes[2]; 4079 rtx operands[3]; 4080 4081 if (first < 0) 4082 { 4083 emit_insn ((mask << right) <= 0xff 4084 ? gen_zero_extendqisi2 (dest, 4085 gen_lowpart (QImode, source)) 4086 : gen_zero_extendhisi2 (dest, 4087 gen_lowpart (HImode, source))); 4088 source = dest; 4089 } 4090 if (source != dest) 4091 emit_insn (gen_movsi (dest, source)); 4092 operands[0] = dest; 4093 if (right) 4094 { 4095 operands[2] = GEN_INT (right); 4096 gen_shifty_hi_op (LSHIFTRT, operands); 4097 } 4098 if (first > 0) 4099 { 4100 operands[2] = GEN_INT (first); 4101 gen_shifty_hi_op (ASHIFT, operands); 4102 total_shift -= first; 4103 mask <<= first; 4104 } 4105 if (first >= 0) 4106 emit_insn (mask <= 0xff 4107 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest)) 4108 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4109 if (total_shift > 0) 4110 { 4111 operands[2] = GEN_INT (total_shift); 4112 gen_shifty_hi_op (ASHIFT, operands); 4113 } 4114 break; 4115 } 4116 case 4: 4117 shift_gen_fun = gen_shifty_op; 4118 /* FALLTHRU */ 4119 case 3: 4120 /* If the topmost bit that matters is set, set the topmost bits 4121 that don't matter. This way, we might be able to get a shorter 4122 signed constant. */ 4123 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift))) 4124 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift)); 4125 /* FALLTHRU */ 4126 case 2: 4127 /* Don't expand fine-grained when combining, because that will 4128 make the pattern fail. */ 4129 if (currently_expanding_to_rtl 4130 || reload_in_progress || reload_completed) 4131 { 4132 rtx operands[3]; 4133 4134 /* Cases 3 and 4 should be handled by this split 4135 only while combining */ 4136 gcc_assert (kind <= 2); 4137 if (right) 4138 { 4139 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); 4140 source = dest; 4141 } 4142 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); 4143 if (total_shift) 4144 { 4145 operands[0] = dest; 4146 operands[1] = dest; 4147 operands[2] = GEN_INT (total_shift); 4148 shift_gen_fun (ASHIFT, operands); 4149 } 4150 break; 4151 } 4152 else 4153 { 4154 int neg = 0; 4155 if (kind != 4 && total_shift < 16) 4156 { 4157 neg = -ext_ashl_lshr_seq[total_shift].amount[1]; 4158 if (neg > 0) 4159 neg -= ext_ashl_lshr_seq[total_shift].amount[2]; 4160 else 4161 neg = 0; 4162 } 4163 emit_insn (gen_and_shl_scratch (dest, source, 4164 GEN_INT (right), 4165 GEN_INT (mask), 4166 GEN_INT (total_shift + neg), 4167 GEN_INT (neg))); 4168 emit_insn (gen_movsi (dest, dest)); 4169 break; 4170 } 4171 } 4172 return false; 4173 } 4174 4175 /* Try to find a good way to implement the combiner pattern 4176 [(set (match_operand:SI 0 "register_operand" "=r") 4177 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") 4178 (match_operand:SI 2 "const_int_operand" "n") 4179 (match_operand:SI 3 "const_int_operand" "n") 4180 (const_int 0))) 4181 (clobber (reg:SI T_REG))] 4182 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. 4183 return 0 for simple left / right shift combination. 4184 return 1 for left shift / 8 bit sign extend / left shift. 4185 return 2 for left shift / 16 bit sign extend / left shift. 4186 return 3 for left shift / 8 bit sign extend / shift / sign extend. 4187 return 4 for left shift / 16 bit sign extend / shift / sign extend. 4188 return 5 for left shift / 16 bit sign extend / right shift 4189 return 6 for < 8 bit sign extend / left shift. 4190 return 7 for < 8 bit sign extend / left shift / single right shift. 4191 If COSTP is nonzero, assign the calculated cost to *COSTP. */ 4192 int 4193 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp) 4194 { 4195 int left, size, insize, ext; 4196 int cost = 0, best_cost; 4197 int kind; 4198 4199 left = INTVAL (left_rtx); 4200 size = INTVAL (size_rtx); 4201 insize = size - left; 4202 gcc_assert (insize > 0); 4203 /* Default to left / right shift. */ 4204 kind = 0; 4205 best_cost = ashl_lshr_seq[32 - insize].insn_count 4206 + ashl_lshr_seq[32 - size].insn_count; 4207 if (size <= 16) 4208 { 4209 /* 16 bit shift / sign extend / 16 bit shift */ 4210 cost = ashl_lshr_seq[16 - insize].insn_count + 1 4211 + ashl_lshr_seq[16 - size].insn_count; 4212 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden 4213 below, by alternative 3 or something even better. */ 4214 if (cost < best_cost) 4215 { 4216 kind = 5; 4217 best_cost = cost; 4218 } 4219 } 4220 /* Try a plain sign extend between two shifts. */ 4221 for (ext = 16; ext >= insize; ext -= 8) 4222 { 4223 if (ext <= size) 4224 { 4225 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1 4226 + ashl_lshr_seq[size - ext].insn_count; 4227 if (cost < best_cost) 4228 { 4229 kind = ext / (unsigned) 8; 4230 best_cost = cost; 4231 } 4232 } 4233 /* Check if we can do a sloppy shift with a final signed shift 4234 restoring the sign. */ 4235 if (EXT_SHIFT_SIGNED (size - ext)) 4236 cost = ext_ashl_lshr_seq[ext - insize].insn_count 4237 + ext_ashl_lshr_seq[size - ext].insn_count + 1; 4238 /* If not, maybe it's still cheaper to do the second shift sloppy, 4239 and do a final sign extend? */ 4240 else if (size <= 16) 4241 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1 4242 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count 4243 + 1; 4244 else 4245 continue; 4246 if (cost < best_cost) 4247 { 4248 kind = ext / (unsigned) 8 + 2; 4249 best_cost = cost; 4250 } 4251 } 4252 /* Check if we can sign extend in r0 */ 4253 if (insize < 8) 4254 { 4255 cost = 3 + ashl_lshr_seq[left].insn_count; 4256 if (cost < best_cost) 4257 { 4258 kind = 6; 4259 best_cost = cost; 4260 } 4261 /* Try the same with a final signed shift. */ 4262 if (left < 31) 4263 { 4264 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1; 4265 if (cost < best_cost) 4266 { 4267 kind = 7; 4268 best_cost = cost; 4269 } 4270 } 4271 } 4272 if (TARGET_DYNSHIFT) 4273 { 4274 /* Try to use a dynamic shift. */ 4275 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST; 4276 if (cost < best_cost) 4277 { 4278 kind = 0; 4279 best_cost = cost; 4280 } 4281 } 4282 if (costp) 4283 *costp = cost; 4284 return kind; 4285 } 4286 4287 /* Function to be used in the length attribute of the instructions 4288 implementing this pattern. */ 4289 int 4290 shl_sext_length (rtx insn) 4291 { 4292 rtx set_src, left_rtx, size_rtx; 4293 int cost; 4294 4295 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 4296 left_rtx = XEXP (XEXP (set_src, 0), 1); 4297 size_rtx = XEXP (set_src, 1); 4298 shl_sext_kind (left_rtx, size_rtx, &cost); 4299 return cost; 4300 } 4301 4302 /* Generate rtl for this pattern */ 4303 bool 4304 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source) 4305 { 4306 int kind; 4307 int left, size, insize, cost; 4308 rtx operands[3]; 4309 4310 kind = shl_sext_kind (left_rtx, size_rtx, &cost); 4311 left = INTVAL (left_rtx); 4312 size = INTVAL (size_rtx); 4313 insize = size - left; 4314 switch (kind) 4315 { 4316 case 1: 4317 case 2: 4318 case 3: 4319 case 4: 4320 { 4321 int ext = kind & 1 ? 8 : 16; 4322 int shift2 = size - ext; 4323 4324 /* Don't expand fine-grained when combining, because that will 4325 make the pattern fail. */ 4326 if (! currently_expanding_to_rtl 4327 && ! reload_in_progress && ! reload_completed) 4328 { 4329 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 4330 emit_insn (gen_movsi (dest, source)); 4331 break; 4332 } 4333 if (dest != source) 4334 emit_insn (gen_movsi (dest, source)); 4335 operands[0] = dest; 4336 if (ext - insize) 4337 { 4338 operands[2] = GEN_INT (ext - insize); 4339 gen_shifty_hi_op (ASHIFT, operands); 4340 } 4341 emit_insn (kind & 1 4342 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) 4343 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4344 if (kind <= 2) 4345 { 4346 if (shift2) 4347 { 4348 operands[2] = GEN_INT (shift2); 4349 gen_shifty_op (ASHIFT, operands); 4350 } 4351 } 4352 else 4353 { 4354 if (shift2 > 0) 4355 { 4356 if (EXT_SHIFT_SIGNED (shift2)) 4357 { 4358 operands[2] = GEN_INT (shift2 + 1); 4359 gen_shifty_op (ASHIFT, operands); 4360 operands[2] = const1_rtx; 4361 gen_shifty_op (ASHIFTRT, operands); 4362 break; 4363 } 4364 operands[2] = GEN_INT (shift2); 4365 gen_shifty_hi_op (ASHIFT, operands); 4366 } 4367 else if (shift2) 4368 { 4369 operands[2] = GEN_INT (-shift2); 4370 gen_shifty_hi_op (LSHIFTRT, operands); 4371 } 4372 emit_insn (size <= 8 4373 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) 4374 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4375 } 4376 break; 4377 } 4378 case 5: 4379 { 4380 int i = 16 - size; 4381 if (! currently_expanding_to_rtl 4382 && ! reload_in_progress && ! reload_completed) 4383 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 4384 else 4385 { 4386 operands[0] = dest; 4387 operands[2] = GEN_INT (16 - insize); 4388 gen_shifty_hi_op (ASHIFT, operands); 4389 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4390 } 4391 /* Don't use gen_ashrsi3 because it generates new pseudos. */ 4392 while (--i >= 0) 4393 gen_ashift (ASHIFTRT, 1, dest); 4394 break; 4395 } 4396 case 6: 4397 case 7: 4398 /* Don't expand fine-grained when combining, because that will 4399 make the pattern fail. */ 4400 if (! currently_expanding_to_rtl 4401 && ! reload_in_progress && ! reload_completed) 4402 { 4403 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 4404 emit_insn (gen_movsi (dest, source)); 4405 break; 4406 } 4407 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); 4408 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); 4409 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1)))); 4410 operands[0] = dest; 4411 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; 4412 gen_shifty_op (ASHIFT, operands); 4413 if (kind == 7) 4414 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx)); 4415 break; 4416 default: 4417 return true; 4418 } 4419 return false; 4420 } 4421 4422 typedef struct label_ref_list_d 4423 { 4424 rtx_code_label *label; 4425 struct label_ref_list_d *next; 4426 } *label_ref_list_t; 4427 4428 static object_allocator<label_ref_list_d> label_ref_list_d_pool 4429 ("label references list"); 4430 4431 /* The SH cannot load a large constant into a register, constants have to 4432 come from a pc relative load. The reference of a pc relative load 4433 instruction must be less than 1k in front of the instruction. This 4434 means that we often have to dump a constant inside a function, and 4435 generate code to branch around it. 4436 4437 It is important to minimize this, since the branches will slow things 4438 down and make things bigger. 4439 4440 Worst case code looks like: 4441 4442 mov.l L1,rn 4443 bra L2 4444 nop 4445 align 4446 L1: .long value 4447 L2: 4448 .. 4449 4450 mov.l L3,rn 4451 bra L4 4452 nop 4453 align 4454 L3: .long value 4455 L4: 4456 .. 4457 4458 We fix this by performing a scan before scheduling, which notices which 4459 instructions need to have their operands fetched from the constant table 4460 and builds the table. 4461 4462 The algorithm is: 4463 4464 scan, find an instruction which needs a pcrel move. Look forward, find the 4465 last barrier which is within MAX_COUNT bytes of the requirement. 4466 If there isn't one, make one. Process all the instructions between 4467 the find and the barrier. 4468 4469 In the above example, we can tell that L3 is within 1k of L1, so 4470 the first move can be shrunk from the 3 insn+constant sequence into 4471 just 1 insn, and the constant moved to L3 to make: 4472 4473 mov.l L1,rn 4474 .. 4475 mov.l L3,rn 4476 bra L4 4477 nop 4478 align 4479 L3:.long value 4480 L4:.long value 4481 4482 Then the second move becomes the target for the shortening process. */ 4483 4484 typedef struct 4485 { 4486 rtx value; /* Value in table. */ 4487 rtx_code_label *label; /* Label of value. */ 4488 label_ref_list_t wend; /* End of window. */ 4489 machine_mode mode; /* Mode of value. */ 4490 4491 /* True if this constant is accessed as part of a post-increment 4492 sequence. Note that HImode constants are never accessed in this way. */ 4493 bool part_of_sequence_p; 4494 } pool_node; 4495 4496 /* The maximum number of constants that can fit into one pool, since 4497 constants in the range 0..510 are at least 2 bytes long, and in the 4498 range from there to 1018 at least 4 bytes. */ 4499 4500 #define MAX_POOL_SIZE 372 4501 static pool_node pool_vector[MAX_POOL_SIZE]; 4502 static int pool_size; 4503 static rtx_code_label *pool_window_label; 4504 static int pool_window_last; 4505 4506 static int max_labelno_before_reorg; 4507 4508 /* ??? If we need a constant in HImode which is the truncated value of a 4509 constant we need in SImode, we could combine the two entries thus saving 4510 two bytes. Is this common enough to be worth the effort of implementing 4511 it? */ 4512 4513 /* ??? This stuff should be done at the same time that we shorten branches. 4514 As it is now, we must assume that all branches are the maximum size, and 4515 this causes us to almost always output constant pools sooner than 4516 necessary. */ 4517 4518 /* Add a constant to the pool and return its label. */ 4519 static rtx_code_label * 4520 add_constant (rtx x, machine_mode mode, rtx last_value) 4521 { 4522 rtx_code_label *lab, *new_rtx; 4523 label_ref_list_t ref, newref; 4524 4525 /* First see if we've already got it. */ 4526 for (int i = 0; i < pool_size; i++) 4527 { 4528 if (x->code == pool_vector[i].value->code 4529 && mode == pool_vector[i].mode) 4530 { 4531 if (x->code == CODE_LABEL) 4532 { 4533 if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) 4534 continue; 4535 } 4536 if (rtx_equal_p (x, pool_vector[i].value)) 4537 { 4538 lab = new_rtx = 0; 4539 if (! last_value 4540 || ! i 4541 || ! rtx_equal_p (last_value, pool_vector[i-1].value)) 4542 { 4543 new_rtx = gen_label_rtx (); 4544 LABEL_REFS (new_rtx) = pool_vector[i].label; 4545 pool_vector[i].label = lab = new_rtx; 4546 } 4547 if (lab && pool_window_label) 4548 { 4549 newref = label_ref_list_d_pool.allocate (); 4550 newref->label = pool_window_label; 4551 ref = pool_vector[pool_window_last].wend; 4552 newref->next = ref; 4553 pool_vector[pool_window_last].wend = newref; 4554 } 4555 if (new_rtx) 4556 pool_window_label = new_rtx; 4557 pool_window_last = i; 4558 return lab; 4559 } 4560 } 4561 } 4562 4563 /* Need a new one. */ 4564 pool_vector[pool_size].value = x; 4565 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) 4566 { 4567 lab = 0; 4568 pool_vector[pool_size - 1].part_of_sequence_p = true; 4569 } 4570 else 4571 lab = gen_label_rtx (); 4572 pool_vector[pool_size].mode = mode; 4573 pool_vector[pool_size].label = lab; 4574 pool_vector[pool_size].wend = NULL; 4575 pool_vector[pool_size].part_of_sequence_p = (lab == 0); 4576 if (lab && pool_window_label) 4577 { 4578 newref = label_ref_list_d_pool.allocate (); 4579 newref->label = pool_window_label; 4580 ref = pool_vector[pool_window_last].wend; 4581 newref->next = ref; 4582 pool_vector[pool_window_last].wend = newref; 4583 } 4584 if (lab) 4585 pool_window_label = lab; 4586 pool_window_last = pool_size; 4587 pool_size++; 4588 return lab; 4589 } 4590 4591 /* Output the literal table. START, if nonzero, is the first instruction 4592 this table is needed for, and also indicates that there is at least one 4593 casesi_worker_2 instruction; We have to emit the operand3 labels from 4594 these insns at a 4-byte aligned position. BARRIER is the barrier 4595 after which we are to place the table. */ 4596 static void 4597 dump_table (rtx_insn *start, rtx_insn *barrier) 4598 { 4599 rtx_insn *scan = barrier; 4600 bool need_align = true; 4601 rtx_code_label *lab; 4602 label_ref_list_t ref; 4603 bool have_df = false; 4604 4605 /* Do two passes, first time dump out the HI sized constants. */ 4606 4607 for (int i = 0; i < pool_size; i++) 4608 { 4609 pool_node *p = &pool_vector[i]; 4610 4611 if (p->mode == HImode) 4612 { 4613 if (need_align) 4614 { 4615 scan = emit_insn_after (gen_align_2 (), scan); 4616 need_align = false; 4617 } 4618 for (lab = p->label; lab; 4619 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) 4620 scan = emit_label_after (lab, scan); 4621 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx), 4622 scan); 4623 for (ref = p->wend; ref; ref = ref->next) 4624 { 4625 lab = ref->label; 4626 scan = emit_insn_after (gen_consttable_window_end (lab), scan); 4627 } 4628 } 4629 else if (p->mode == DFmode) 4630 have_df = true; 4631 } 4632 4633 need_align = true; 4634 4635 if (start) 4636 { 4637 scan = emit_insn_after (gen_align_4 (), scan); 4638 need_align = false; 4639 for (; start != barrier; start = NEXT_INSN (start)) 4640 if (NONJUMP_INSN_P (start) 4641 && recog_memoized (start) == CODE_FOR_casesi_worker_2) 4642 { 4643 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0)); 4644 rtx lab = XEXP (XVECEXP (src, 0, 3), 0); 4645 4646 scan = emit_label_after (as_a <rtx_insn *> (lab), scan); 4647 } 4648 } 4649 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df) 4650 { 4651 rtx_insn *align_insn = NULL; 4652 4653 scan = emit_label_after (gen_label_rtx (), scan); 4654 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); 4655 need_align = false; 4656 4657 for (int i = 0; i < pool_size; i++) 4658 { 4659 pool_node *p = &pool_vector[i]; 4660 4661 switch (p->mode) 4662 { 4663 case E_HImode: 4664 break; 4665 case E_SImode: 4666 case E_SFmode: 4667 if (align_insn && !p->part_of_sequence_p) 4668 { 4669 for (lab = p->label; lab; 4670 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) 4671 emit_label_before (lab, align_insn); 4672 emit_insn_before (gen_consttable_4 (p->value, const0_rtx), 4673 align_insn); 4674 for (ref = p->wend; ref; ref = ref->next) 4675 { 4676 lab = ref->label; 4677 emit_insn_before (gen_consttable_window_end (lab), 4678 align_insn); 4679 } 4680 delete_insn (align_insn); 4681 align_insn = NULL; 4682 continue; 4683 } 4684 else 4685 { 4686 for (lab = p->label; lab; 4687 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) 4688 scan = emit_label_after (lab, scan); 4689 scan = emit_insn_after (gen_consttable_4 (p->value, 4690 const0_rtx), scan); 4691 need_align = ! need_align; 4692 } 4693 break; 4694 case E_DFmode: 4695 if (need_align) 4696 { 4697 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); 4698 align_insn = scan; 4699 need_align = false; 4700 } 4701 /* FALLTHRU */ 4702 case E_DImode: 4703 for (lab = p->label; lab; 4704 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) 4705 scan = emit_label_after (lab, scan); 4706 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), 4707 scan); 4708 break; 4709 default: 4710 gcc_unreachable (); 4711 } 4712 4713 if (p->mode != HImode) 4714 { 4715 for (ref = p->wend; ref; ref = ref->next) 4716 { 4717 lab = ref->label; 4718 scan = emit_insn_after (gen_consttable_window_end (lab), 4719 scan); 4720 } 4721 } 4722 } 4723 4724 pool_size = 0; 4725 } 4726 4727 for (int i = 0; i < pool_size; i++) 4728 { 4729 pool_node *p = &pool_vector[i]; 4730 4731 switch (p->mode) 4732 { 4733 case E_HImode: 4734 break; 4735 case E_SImode: 4736 case E_SFmode: 4737 if (need_align) 4738 { 4739 need_align = false; 4740 scan = emit_label_after (gen_label_rtx (), scan); 4741 scan = emit_insn_after (gen_align_4 (), scan); 4742 } 4743 for (lab = p->label; lab; 4744 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) 4745 scan = emit_label_after (lab, scan); 4746 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx), 4747 scan); 4748 break; 4749 case E_DFmode: 4750 case E_DImode: 4751 if (need_align) 4752 { 4753 need_align = false; 4754 scan = emit_label_after (gen_label_rtx (), scan); 4755 scan = emit_insn_after (gen_align_4 (), scan); 4756 } 4757 for (lab = p->label; lab; 4758 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) 4759 scan = emit_label_after (lab, scan); 4760 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), 4761 scan); 4762 break; 4763 default: 4764 gcc_unreachable (); 4765 } 4766 4767 if (p->mode != HImode) 4768 { 4769 for (ref = p->wend; ref; ref = ref->next) 4770 { 4771 lab = ref->label; 4772 scan = emit_insn_after (gen_consttable_window_end (lab), scan); 4773 } 4774 } 4775 } 4776 4777 scan = emit_insn_after (gen_consttable_end (), scan); 4778 scan = emit_barrier_after (scan); 4779 pool_size = 0; 4780 pool_window_label = NULL; 4781 pool_window_last = 0; 4782 } 4783 4784 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0) 4785 4786 /* Nonzero if the insn is a move instruction which needs to be fixed. */ 4787 4788 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the 4789 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't 4790 need to fix it if the input value is CONST_OK_FOR_I08. */ 4791 static bool 4792 broken_move (rtx_insn *insn) 4793 { 4794 if (NONJUMP_INSN_P (insn)) 4795 { 4796 rtx pat = PATTERN (insn); 4797 if (GET_CODE (pat) == PARALLEL) 4798 pat = XVECEXP (pat, 0, 0); 4799 if (GET_CODE (pat) == SET 4800 /* We can load any 8-bit value if we don't care what the high 4801 order bits end up as. */ 4802 && GET_MODE (SET_DEST (pat)) != QImode 4803 && (CONSTANT_P (SET_SRC (pat)) 4804 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE 4805 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B) 4806 /* Match mova_const. */ 4807 || (GET_CODE (SET_SRC (pat)) == UNSPEC 4808 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA 4809 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST)) 4810 && ! (TARGET_SH2E 4811 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE 4812 && (fp_zero_operand (SET_SRC (pat)) 4813 || fp_one_operand (SET_SRC (pat))) 4814 /* In general we don't know the current setting of fpscr, so 4815 disable fldi. 4816 There is an exception if this was a register-register move 4817 before reload - and hence it was ascertained that we have 4818 single precision setting - and in a post-reload optimization 4819 we changed this to do a constant load. In that case 4820 we don't have an r0 clobber, hence we must use fldi. */ 4821 && (TARGET_FMOVD 4822 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) 4823 == SCRATCH)) 4824 && REG_P (SET_DEST (pat)) 4825 && FP_REGISTER_P (REGNO (SET_DEST (pat)))) 4826 && ! (TARGET_SH2A 4827 && GET_MODE (SET_DEST (pat)) == SImode 4828 && (satisfies_constraint_I20 (SET_SRC (pat)) 4829 || satisfies_constraint_I28 (SET_SRC (pat)))) 4830 && ! satisfies_constraint_I08 (SET_SRC (pat))) 4831 return true; 4832 } 4833 4834 return false; 4835 } 4836 4837 /* Return true if the specified insn is a mova insn. */ 4838 static bool 4839 mova_p (rtx_insn *insn) 4840 { 4841 return (NONJUMP_INSN_P (insn) 4842 && GET_CODE (PATTERN (insn)) == SET 4843 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC 4844 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA 4845 /* Don't match mova_const. */ 4846 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF); 4847 } 4848 4849 /* Fix up a mova from a switch that went out of range. */ 4850 static void 4851 fixup_mova (rtx_insn *mova) 4852 { 4853 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode); 4854 if (! flag_pic) 4855 { 4856 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova); 4857 INSN_CODE (mova) = -1; 4858 } 4859 else 4860 { 4861 rtx_insn *worker = mova; 4862 rtx_code_label *lab = gen_label_rtx (); 4863 rtx wpat, wpat0, wpat1, wsrc, target, base, diff; 4864 4865 do 4866 { 4867 worker = NEXT_INSN (worker); 4868 gcc_assert (worker 4869 && !LABEL_P (worker) 4870 && !JUMP_P (worker)); 4871 } while (NOTE_P (worker) 4872 || recog_memoized (worker) != CODE_FOR_casesi_worker_1); 4873 wpat = PATTERN (worker); 4874 wpat0 = XVECEXP (wpat, 0, 0); 4875 wpat1 = XVECEXP (wpat, 0, 1); 4876 wsrc = SET_SRC (wpat0); 4877 PATTERN (worker) = (gen_casesi_worker_2 4878 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1), 4879 XEXP (XVECEXP (wsrc, 0, 2), 0), lab, 4880 XEXP (wpat1, 0))); 4881 INSN_CODE (worker) = -1; 4882 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); 4883 base = gen_rtx_LABEL_REF (Pmode, lab); 4884 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF); 4885 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff); 4886 INSN_CODE (mova) = -1; 4887 } 4888 } 4889 4890 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update 4891 *num_mova, and check if the new mova is not nested within the first one. 4892 return 0 if *first_mova was replaced, 1 if new_mova was replaced, 4893 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */ 4894 static int 4895 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova) 4896 { 4897 int n_addr = 0; /* Initialization to shut up spurious warning. */ 4898 int f_target, n_target = 0; /* Likewise. */ 4899 4900 if (optimize) 4901 { 4902 /* If NEW_MOVA has no address yet, it will be handled later. */ 4903 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova)) 4904 return -1; 4905 4906 n_addr = INSN_ADDRESSES (INSN_UID (new_mova)); 4907 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0))); 4908 if (n_addr > n_target || n_addr + 1022 < n_target) 4909 { 4910 /* Change the mova into a load. 4911 broken_move will then return true for it. */ 4912 fixup_mova (new_mova); 4913 return 1; 4914 } 4915 } 4916 if (!(*num_mova)++) 4917 { 4918 *first_mova = new_mova; 4919 return 2; 4920 } 4921 if (!optimize 4922 || ((f_target 4923 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0)))) 4924 >= n_target)) 4925 return -1; 4926 4927 (*num_mova)--; 4928 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova)) 4929 > n_target - n_addr) 4930 { 4931 fixup_mova (*first_mova); 4932 return 0; 4933 } 4934 else 4935 { 4936 fixup_mova (new_mova); 4937 return 1; 4938 } 4939 } 4940 4941 /* Find the last barrier from insn FROM which is close enough to hold the 4942 constant pool. If we can't find one, then create one near the end of 4943 the range. */ 4944 static rtx_insn * 4945 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from) 4946 { 4947 int count_si = 0; 4948 int count_hi = 0; 4949 int found_hi = 0; 4950 int found_si = 0; 4951 int hi_align = 2; 4952 int si_align = 2; 4953 int leading_mova = num_mova; 4954 rtx_insn *barrier_before_mova = NULL; 4955 rtx_insn *found_barrier = NULL; 4956 rtx_insn *good_barrier = NULL; 4957 int si_limit; 4958 int hi_limit; 4959 rtx_insn *orig = from; 4960 rtx_insn *last_got = NULL; 4961 rtx_insn *last_symoff = NULL; 4962 4963 /* For HImode: range is 510, add 4 because pc counts from address of 4964 second instruction after this one, subtract 2 for the jump instruction 4965 that we may need to emit before the table, subtract 2 for the instruction 4966 that fills the jump delay slot (in very rare cases, reorg will take an 4967 instruction from after the constant pool or will leave the delay slot 4968 empty). This gives 510. 4969 For SImode: range is 1020, add 4 because pc counts from address of 4970 second instruction after this one, subtract 2 in case pc is 2 byte 4971 aligned, subtract 2 for the jump instruction that we may need to emit 4972 before the table, subtract 2 for the instruction that fills the jump 4973 delay slot. This gives 1018. */ 4974 4975 /* The branch will always be shortened now that the reference address for 4976 forward branches is the successor address, thus we need no longer make 4977 adjustments to the [sh]i_limit for -O0. */ 4978 4979 si_limit = 1018; 4980 hi_limit = 510; 4981 4982 while (from && count_si < si_limit && count_hi < hi_limit) 4983 { 4984 int inc = get_attr_length (from); 4985 int new_align = 1; 4986 4987 /* If this is a label that existed at the time of the compute_alignments 4988 call, determine the alignment. N.B. When find_barrier recurses for 4989 an out-of-reach mova, we might see labels at the start of previously 4990 inserted constant tables. */ 4991 if (LABEL_P (from) 4992 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg) 4993 { 4994 if (optimize) 4995 new_align = 1 << label_to_alignment (from).levels[0].log; 4996 else if (BARRIER_P (prev_nonnote_insn (from))) 4997 new_align = 1 << barrier_align (from); 4998 else 4999 new_align = 1; 5000 inc = 0; 5001 } 5002 /* In case we are scanning a constant table because of recursion, check 5003 for explicit alignments. If the table is long, we might be forced 5004 to emit the new table in front of it; the length of the alignment 5005 might be the last straw. */ 5006 else if (NONJUMP_INSN_P (from) 5007 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE 5008 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN) 5009 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0)); 5010 /* When we find the end of a constant table, paste the new constant 5011 at the end. That is better than putting it in front because 5012 this way, we don't need extra alignment for adding a 4-byte-aligned 5013 mov(a) label to a 2/4 or 8/4 byte aligned table. */ 5014 else if (NONJUMP_INSN_P (from) 5015 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE 5016 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END) 5017 return from; 5018 5019 if (BARRIER_P (from)) 5020 { 5021 rtx_insn *next; 5022 5023 found_barrier = from; 5024 5025 /* If we are at the end of the function, or in front of an alignment 5026 instruction, we need not insert an extra alignment. We prefer 5027 this kind of barrier. */ 5028 if (barrier_align (from) > 2) 5029 good_barrier = from; 5030 5031 /* If we are at the end of a hot/cold block, dump the constants 5032 here. */ 5033 next = NEXT_INSN (from); 5034 if (next 5035 && NOTE_P (next) 5036 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS) 5037 break; 5038 } 5039 5040 if (broken_move (from)) 5041 { 5042 rtx pat, src, dst; 5043 machine_mode mode; 5044 5045 pat = PATTERN (from); 5046 if (GET_CODE (pat) == PARALLEL) 5047 pat = XVECEXP (pat, 0, 0); 5048 src = SET_SRC (pat); 5049 dst = SET_DEST (pat); 5050 mode = GET_MODE (dst); 5051 5052 /* GOT pcrelat setting comes in pair of 5053 mova .L8,r0 5054 mov.l .L8,r12 5055 instructions. (plus add r0,r12). 5056 Remember if we see one without the other. */ 5057 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0))) 5058 last_got = last_got ? NULL : from; 5059 else if (PIC_ADDR_P (src)) 5060 last_got = last_got ? NULL : from; 5061 5062 /* We must explicitly check the mode, because sometimes the 5063 front end will generate code to load unsigned constants into 5064 HImode targets without properly sign extending them. */ 5065 if (mode == HImode 5066 || (mode == SImode && satisfies_constraint_I16 (src) 5067 && REGNO (dst) != FPUL_REG)) 5068 { 5069 found_hi += 2; 5070 /* We put the short constants before the long constants, so 5071 we must count the length of short constants in the range 5072 for the long constants. */ 5073 /* ??? This isn't optimal, but is easy to do. */ 5074 si_limit -= 2; 5075 } 5076 else 5077 { 5078 /* We dump DF/DI constants before SF/SI ones, because 5079 the limit is the same, but the alignment requirements 5080 are higher. We may waste up to 4 additional bytes 5081 for alignment, and the DF/DI constant may have 5082 another SF/SI constant placed before it. */ 5083 while (si_align > 2 && found_si + si_align - 2 > count_si) 5084 si_align >>= 1; 5085 if (found_si > count_si) 5086 count_si = found_si; 5087 found_si += GET_MODE_SIZE (mode); 5088 if (num_mova) 5089 si_limit -= GET_MODE_SIZE (mode); 5090 } 5091 } 5092 5093 if (mova_p (from)) 5094 { 5095 switch (untangle_mova (&num_mova, &mova, from)) 5096 { 5097 case 1: 5098 if (flag_pic) 5099 { 5100 rtx src = SET_SRC (PATTERN (from)); 5101 if (GET_CODE (src) == CONST 5102 && GET_CODE (XEXP (src, 0)) == UNSPEC 5103 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF) 5104 last_symoff = from; 5105 } 5106 break; 5107 case 0: return find_barrier (0, 0, mova); 5108 case 2: 5109 { 5110 leading_mova = 0; 5111 barrier_before_mova 5112 = good_barrier ? good_barrier : found_barrier; 5113 } 5114 default: break; 5115 } 5116 if (found_si > count_si) 5117 count_si = found_si; 5118 } 5119 else if (JUMP_TABLE_DATA_P (from) 5120 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC) 5121 { 5122 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode) 5123 || (num_mova 5124 && (prev_nonnote_insn (from) 5125 == XEXP (MOVA_LABELREF (mova), 0)))) 5126 num_mova--; 5127 if (barrier_align (next_real_insn (from)) == align_jumps.levels[0].log) 5128 { 5129 /* We have just passed the barrier in front of the 5130 ADDR_DIFF_VEC, which is stored in found_barrier. Since 5131 the ADDR_DIFF_VEC is accessed as data, just like our pool 5132 constants, this is a good opportunity to accommodate what 5133 we have gathered so far. 5134 If we waited any longer, we could end up at a barrier in 5135 front of code, which gives worse cache usage for separated 5136 instruction / data caches. */ 5137 good_barrier = found_barrier; 5138 break; 5139 } 5140 else 5141 { 5142 rtx body = PATTERN (from); 5143 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); 5144 } 5145 } 5146 /* For the SH1, we generate alignments even after jumps-around-jumps. */ 5147 else if (JUMP_P (from) 5148 && ! TARGET_SH2 5149 && ! optimize_size) 5150 new_align = 4; 5151 5152 /* There is a possibility that a bf is transformed into a bf/s by the 5153 delay slot scheduler. */ 5154 if (JUMP_P (from) 5155 && get_attr_type (from) == TYPE_CBRANCH 5156 && ! sequence_insn_p (from)) 5157 inc += 2; 5158 5159 if (found_si) 5160 { 5161 count_si += inc; 5162 if (new_align > si_align) 5163 { 5164 si_limit -= (count_si - 1) & (new_align - si_align); 5165 si_align = new_align; 5166 } 5167 count_si = (count_si + new_align - 1) & -new_align; 5168 } 5169 if (found_hi) 5170 { 5171 count_hi += inc; 5172 if (new_align > hi_align) 5173 { 5174 hi_limit -= (count_hi - 1) & (new_align - hi_align); 5175 hi_align = new_align; 5176 } 5177 count_hi = (count_hi + new_align - 1) & -new_align; 5178 } 5179 from = NEXT_INSN (from); 5180 } 5181 5182 if (num_mova) 5183 { 5184 if (leading_mova) 5185 { 5186 /* Try as we might, the leading mova is out of range. Change 5187 it into a load (which will become a pcload) and retry. */ 5188 fixup_mova (mova); 5189 return find_barrier (0, 0, mova); 5190 } 5191 else 5192 { 5193 /* Insert the constant pool table before the mova instruction, 5194 to prevent the mova label reference from going out of range. */ 5195 from = mova; 5196 good_barrier = found_barrier = barrier_before_mova; 5197 } 5198 } 5199 5200 if (found_barrier) 5201 { 5202 if (good_barrier && next_real_insn (found_barrier)) 5203 found_barrier = good_barrier; 5204 } 5205 else 5206 { 5207 /* We didn't find a barrier in time to dump our stuff, 5208 so we'll make one. */ 5209 rtx_code_label *label = gen_label_rtx (); 5210 5211 /* Don't emit a constant table in the middle of insns for 5212 casesi_worker_2. This is a bit overkill but is enough 5213 because casesi_worker_2 wouldn't appear so frequently. */ 5214 if (last_symoff) 5215 from = last_symoff; 5216 5217 /* If we exceeded the range, then we must back up over the last 5218 instruction we looked at. Otherwise, we just need to undo the 5219 NEXT_INSN at the end of the loop. */ 5220 if (PREV_INSN (from) != orig 5221 && (count_hi > hi_limit || count_si > si_limit)) 5222 from = PREV_INSN (PREV_INSN (from)); 5223 else 5224 from = PREV_INSN (from); 5225 5226 /* Don't emit a constant table int the middle of global pointer setting, 5227 since that that would move the addressing base GOT into another table. 5228 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_ 5229 in the pool anyway, so just move up the whole constant pool. 5230 5231 However, avoid doing so when the last single GOT mov is the starting 5232 insn itself. Going past above the start insn would create a negative 5233 offset, causing errors. */ 5234 if (last_got && last_got != orig) 5235 from = PREV_INSN (last_got); 5236 5237 /* Don't insert the constant pool table at the position which 5238 may be the landing pad. */ 5239 if (flag_exceptions 5240 && CALL_P (from) 5241 && find_reg_note (from, REG_EH_REGION, NULL_RTX)) 5242 from = PREV_INSN (from); 5243 5244 /* Walk back to be just before any jump or label. 5245 Putting it before a label reduces the number of times the branch 5246 around the constant pool table will be hit. Putting it before 5247 a jump makes it more likely that the bra delay slot will be 5248 filled. */ 5249 while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from)) 5250 from = PREV_INSN (from); 5251 5252 if (CALL_P (from)) 5253 { 5254 bool sibcall_p = SIBLING_CALL_P (from); 5255 5256 /* If FROM was a sibling call, then we know that control 5257 will not return. In fact, we were guaranteed to hit 5258 a barrier before another real insn. 5259 5260 The jump around the constant pool is unnecessary. It 5261 costs space, but more importantly it confuses dwarf2cfi 5262 generation. */ 5263 if (sibcall_p) 5264 return emit_barrier_after (from); 5265 } 5266 5267 from = emit_jump_insn_after (gen_jump (label), from); 5268 JUMP_LABEL (from) = label; 5269 LABEL_NUSES (label) = 1; 5270 found_barrier = emit_barrier_after (from); 5271 emit_label_after (label, found_barrier); 5272 } 5273 5274 return found_barrier; 5275 } 5276 5277 /* If the instruction INSN is implemented by a special function, and we can 5278 positively find the register that is used to call the sfunc, and this 5279 register is not used anywhere else in this instruction - except as the 5280 destination of a set, return this register; else, return 0. */ 5281 rtx 5282 sfunc_uses_reg (rtx_insn *insn) 5283 { 5284 int i; 5285 rtx pattern, part, reg_part, reg; 5286 5287 if (!NONJUMP_INSN_P (insn)) 5288 return NULL_RTX; 5289 pattern = PATTERN (insn); 5290 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) 5291 return NULL_RTX; 5292 5293 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) 5294 { 5295 part = XVECEXP (pattern, 0, i); 5296 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) 5297 reg_part = part; 5298 } 5299 if (! reg_part) 5300 return NULL_RTX; 5301 reg = XEXP (reg_part, 0); 5302 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--) 5303 { 5304 part = XVECEXP (pattern, 0, i); 5305 if (part == reg_part || GET_CODE (part) == CLOBBER) 5306 continue; 5307 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET 5308 && REG_P (SET_DEST (part))) 5309 ? SET_SRC (part) : part))) 5310 return NULL_RTX; 5311 } 5312 return reg; 5313 } 5314 5315 /* See if the only way in which INSN uses REG is by calling it, or by 5316 setting it while calling it. Set *SET to a SET rtx if the register 5317 is set by INSN. */ 5318 static bool 5319 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set) 5320 { 5321 *set = NULL_RTX; 5322 5323 rtx reg2 = sfunc_uses_reg (insn); 5324 if (reg2 && REGNO (reg2) == REGNO (reg)) 5325 { 5326 rtx pattern = single_set (insn); 5327 if (pattern 5328 && REG_P (SET_DEST (pattern)) 5329 && REGNO (reg) == REGNO (SET_DEST (pattern))) 5330 *set = pattern; 5331 return false; 5332 } 5333 if (!CALL_P (insn)) 5334 { 5335 /* We don't use rtx_equal_p because we don't care if the mode is 5336 different. */ 5337 rtx pattern = single_set (insn); 5338 if (pattern 5339 && REG_P (SET_DEST (pattern)) 5340 && REGNO (reg) == REGNO (SET_DEST (pattern))) 5341 { 5342 rtx par, part; 5343 int i; 5344 5345 *set = pattern; 5346 par = PATTERN (insn); 5347 if (GET_CODE (par) == PARALLEL) 5348 for (i = XVECLEN (par, 0) - 1; i >= 0; i--) 5349 { 5350 part = XVECEXP (par, 0, i); 5351 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) 5352 return true; 5353 } 5354 return reg_mentioned_p (reg, SET_SRC (pattern)); 5355 } 5356 5357 return true; 5358 } 5359 5360 rtx pattern = PATTERN (insn); 5361 5362 if (GET_CODE (pattern) == PARALLEL) 5363 { 5364 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--) 5365 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) 5366 return true; 5367 pattern = XVECEXP (pattern, 0, 0); 5368 } 5369 5370 if (GET_CODE (pattern) == SET) 5371 { 5372 if (reg_mentioned_p (reg, SET_DEST (pattern))) 5373 { 5374 /* We don't use rtx_equal_p, because we don't care if the 5375 mode is different. */ 5376 if (!REG_P (SET_DEST (pattern)) 5377 || REGNO (reg) != REGNO (SET_DEST (pattern))) 5378 return true; 5379 5380 *set = pattern; 5381 } 5382 5383 pattern = SET_SRC (pattern); 5384 } 5385 5386 if (GET_CODE (pattern) != CALL 5387 || !MEM_P (XEXP (pattern, 0)) 5388 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) 5389 return true; 5390 5391 return false; 5392 } 5393 5394 /* Given a X, a pattern of an insn or a part of it, return a mask of used 5395 general registers. Bits 0..15 mean that the respective registers 5396 are used as inputs in the instruction. Bits 16..31 mean that the 5397 registers 0..15, respectively, are used as outputs, or are clobbered. 5398 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ 5399 int 5400 regs_used (rtx x, int is_dest) 5401 { 5402 enum rtx_code code; 5403 const char *fmt; 5404 int used = 0; 5405 5406 if (! x) 5407 return used; 5408 code = GET_CODE (x); 5409 switch (code) 5410 { 5411 case REG: 5412 if (REGNO (x) < 16) 5413 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1) 5414 << (REGNO (x) + is_dest)); 5415 return 0; 5416 case SUBREG: 5417 { 5418 rtx y = SUBREG_REG (x); 5419 5420 if (!REG_P (y)) 5421 break; 5422 if (REGNO (y) < 16) 5423 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1) 5424 << (REGNO (y) + 5425 subreg_regno_offset (REGNO (y), 5426 GET_MODE (y), 5427 SUBREG_BYTE (x), 5428 GET_MODE (x)) + is_dest)); 5429 return 0; 5430 } 5431 case SET: 5432 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); 5433 case RETURN: 5434 /* If there was a return value, it must have been indicated with USE. */ 5435 return 0x00ffff00; 5436 case CLOBBER: 5437 is_dest = 1; 5438 break; 5439 case MEM: 5440 is_dest = 0; 5441 break; 5442 case CALL: 5443 used |= 0x00ff00f0; 5444 break; 5445 default: 5446 break; 5447 } 5448 5449 fmt = GET_RTX_FORMAT (code); 5450 5451 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 5452 { 5453 if (fmt[i] == 'E') 5454 { 5455 for (int j = XVECLEN (x, i) - 1; j >= 0; j--) 5456 used |= regs_used (XVECEXP (x, i, j), is_dest); 5457 } 5458 else if (fmt[i] == 'e') 5459 used |= regs_used (XEXP (x, i), is_dest); 5460 } 5461 return used; 5462 } 5463 5464 /* Create an instruction that prevents redirection of a conditional branch 5465 to the destination of the JUMP with address ADDR. 5466 If the branch needs to be implemented as an indirect jump, try to find 5467 a scratch register for it. 5468 If NEED_BLOCK is 0, don't do anything unless we need a scratch register. 5469 If any preceding insn that doesn't fit into a delay slot is good enough, 5470 pass 1. Pass 2 if a definite blocking insn is needed. 5471 -1 is used internally to avoid deep recursion. 5472 If a blocking instruction is made or recognized, return it. */ 5473 static rtx_insn * 5474 gen_block_redirect (rtx_insn *jump, int addr, int need_block) 5475 { 5476 int dead = 0; 5477 rtx_insn *prev = prev_nonnote_insn (jump); 5478 5479 /* First, check if we already have an instruction that satisfies our need. */ 5480 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ()) 5481 { 5482 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) 5483 return prev; 5484 if (GET_CODE (PATTERN (prev)) == USE 5485 || GET_CODE (PATTERN (prev)) == CLOBBER 5486 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) 5487 prev = jump; 5488 else if ((need_block &= ~1) < 0) 5489 return prev; 5490 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) 5491 need_block = 0; 5492 } 5493 if (GET_CODE (PATTERN (jump)) == RETURN) 5494 { 5495 if (! need_block) 5496 return prev; 5497 /* Reorg even does nasty things with return insns that cause branches 5498 to go out of range - see find_end_label and callers. */ 5499 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump); 5500 } 5501 /* We can't use JUMP_LABEL here because it might be undefined 5502 when not optimizing. */ 5503 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0); 5504 /* If the branch is out of range, try to find a scratch register for it. */ 5505 if (optimize 5506 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 5507 > 4092 + 4098)) 5508 { 5509 rtx_insn *scan; 5510 /* Don't look for the stack pointer as a scratch register, 5511 it would cause trouble if an interrupt occurred. */ 5512 unsigned attempt = 0x7fff, used; 5513 int jump_left = flag_expensive_optimizations + 1; 5514 5515 /* It is likely that the most recent eligible instruction is wanted for 5516 the delay slot. Therefore, find out which registers it uses, and 5517 try to avoid using them. */ 5518 5519 for (scan = jump; (scan = PREV_INSN (scan)); ) 5520 { 5521 if (scan->deleted ()) 5522 continue; 5523 rtx_code code = GET_CODE (scan); 5524 if (code == CODE_LABEL || code == JUMP_INSN) 5525 break; 5526 if (code == INSN 5527 && GET_CODE (PATTERN (scan)) != USE 5528 && GET_CODE (PATTERN (scan)) != CLOBBER 5529 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) 5530 { 5531 attempt &= ~regs_used (PATTERN (scan), 0); 5532 break; 5533 } 5534 } 5535 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump); 5536 (scan = NEXT_INSN (scan)); ) 5537 { 5538 if (scan->deleted ()) 5539 continue; 5540 rtx_code code = GET_CODE (scan); 5541 if (INSN_P (scan)) 5542 { 5543 used |= regs_used (PATTERN (scan), 0); 5544 if (code == CALL_INSN) 5545 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); 5546 dead |= (used >> 16) & ~used; 5547 if (dead & attempt) 5548 { 5549 dead &= attempt; 5550 break; 5551 } 5552 if (code == JUMP_INSN) 5553 { 5554 if (jump_left-- && simplejump_p (scan)) 5555 scan = JUMP_LABEL_AS_INSN (scan); 5556 else 5557 break; 5558 } 5559 } 5560 } 5561 /* Mask out the stack pointer again, in case it was 5562 the only 'free' register we have found. */ 5563 dead &= 0x7fff; 5564 } 5565 /* If the immediate destination is still in range, check for possible 5566 threading with a jump beyond the delay slot insn. 5567 Don't check if we are called recursively; the jump has been or will be 5568 checked in a different invocation then. */ 5569 5570 else if (optimize && need_block >= 0) 5571 { 5572 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest)); 5573 next = next_active_insn (next); 5574 if (next && JUMP_P (next) 5575 && GET_CODE (PATTERN (next)) == SET 5576 && recog_memoized (next) == CODE_FOR_jump_compact) 5577 { 5578 dest = JUMP_LABEL (next); 5579 if (dest 5580 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 5581 > 4092 + 4098)) 5582 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1); 5583 } 5584 } 5585 5586 if (dead) 5587 { 5588 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead)); 5589 5590 /* It would be nice if we could convert the jump into an indirect 5591 jump / far branch right now, and thus exposing all constituent 5592 instructions to further optimization. However, reorg uses 5593 simplejump_p to determine if there is an unconditional jump where 5594 it should try to schedule instructions from the target of the 5595 branch; simplejump_p fails for indirect jumps even if they have 5596 a JUMP_LABEL. */ 5597 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch 5598 (reg, GEN_INT (unspec_bbr_uid++)), 5599 jump); 5600 /* ??? We would like this to have the scope of the jump, but that 5601 scope will change when a delay slot insn of an inner scope is added. 5602 Hence, after delay slot scheduling, we'll have to expect 5603 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and 5604 the jump. */ 5605 5606 INSN_LOCATION (insn) = INSN_LOCATION (jump); 5607 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; 5608 return insn; 5609 } 5610 else if (need_block) 5611 /* We can't use JUMP_LABEL here because it might be undefined 5612 when not optimizing. */ 5613 return emit_insn_before (gen_block_branch_redirect 5614 (GEN_INT (unspec_bbr_uid++)), 5615 jump); 5616 return prev; 5617 } 5618 5619 #define CONDJUMP_MIN -252 5620 #define CONDJUMP_MAX 262 5621 struct far_branch 5622 { 5623 /* A label (to be placed) in front of the jump 5624 that jumps to our ultimate destination. */ 5625 rtx_insn *near_label; 5626 /* Where we are going to insert it if we cannot move the jump any farther, 5627 or the jump itself if we have picked up an existing jump. */ 5628 rtx_insn *insert_place; 5629 /* The ultimate destination. */ 5630 rtx_insn *far_label; 5631 struct far_branch *prev; 5632 /* If the branch has already been created, its address; 5633 else the address of its first prospective user. */ 5634 int address; 5635 }; 5636 5637 enum mdep_reorg_phase_e mdep_reorg_phase; 5638 5639 static void 5640 gen_far_branch (struct far_branch *bp) 5641 { 5642 rtx_insn *insn = bp->insert_place; 5643 rtx_jump_insn *jump; 5644 rtx_code_label *label = gen_label_rtx (); 5645 5646 emit_label_after (label, insn); 5647 if (bp->far_label) 5648 { 5649 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); 5650 LABEL_NUSES (bp->far_label)++; 5651 } 5652 else 5653 jump = emit_jump_insn_after (gen_return (), insn); 5654 5655 /* Emit a barrier so that reorg knows that any following instructions 5656 are not reachable via a fall-through path. 5657 But don't do this when not optimizing, since we wouldn't suppress the 5658 alignment for the barrier then, and could end up with out-of-range 5659 pc-relative loads. */ 5660 if (optimize) 5661 emit_barrier_after (jump); 5662 emit_label_after (bp->near_label, insn); 5663 5664 if (bp->far_label) 5665 JUMP_LABEL (jump) = bp->far_label; 5666 else 5667 { 5668 rtx pat = PATTERN (jump); 5669 gcc_assert (ANY_RETURN_P (pat)); 5670 JUMP_LABEL (jump) = pat; 5671 } 5672 5673 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1); 5674 gcc_assert (ok); 5675 5676 /* If we are branching around a jump (rather than a return), prevent 5677 reorg from using an insn from the jump target as the delay slot insn - 5678 when reorg did this, it pessimized code (we rather hide the delay slot) 5679 and it could cause branches to go out of range. */ 5680 if (bp->far_label) 5681 (emit_insn_after 5682 (gen_stuff_delay_slot 5683 (GEN_INT (unspec_bbr_uid++), 5684 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)), 5685 insn)); 5686 /* Prevent reorg from undoing our splits. */ 5687 gen_block_redirect (jump, bp->address += 2, 2); 5688 } 5689 5690 /* Fix up ADDR_DIFF_VECs. */ 5691 void 5692 fixup_addr_diff_vecs (rtx_insn *first) 5693 { 5694 rtx_insn *insn; 5695 5696 for (insn = first; insn; insn = NEXT_INSN (insn)) 5697 { 5698 rtx vec_lab, pat, prevpat, x, braf_label; 5699 rtx_insn *prev; 5700 5701 if (! JUMP_TABLE_DATA_P (insn) 5702 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 5703 continue; 5704 pat = PATTERN (insn); 5705 vec_lab = XEXP (XEXP (pat, 0), 0); 5706 5707 /* Search the matching casesi_jump_2. */ 5708 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev)) 5709 { 5710 if (!JUMP_P (prev)) 5711 continue; 5712 prevpat = PATTERN (prev); 5713 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) 5714 continue; 5715 x = XVECEXP (prevpat, 0, 1); 5716 if (GET_CODE (x) != USE) 5717 continue; 5718 x = XEXP (x, 0); 5719 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) 5720 break; 5721 } 5722 /* FIXME: This is a bug in the optimizer, but it seems harmless 5723 to just avoid panicing. */ 5724 if (!prev) 5725 continue; 5726 5727 /* Emit the reference label of the braf where it belongs, right after 5728 the casesi_jump_2 (i.e. braf). */ 5729 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); 5730 emit_label_after (as_a <rtx_insn *> (braf_label), prev); 5731 5732 /* Fix up the ADDR_DIF_VEC to be relative 5733 to the reference address of the braf. */ 5734 XEXP (XEXP (pat, 0), 0) = braf_label; 5735 } 5736 } 5737 5738 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following 5739 a barrier. Return the base 2 logarithm of the desired alignment. */ 5740 int 5741 barrier_align (rtx_insn *barrier_or_label) 5742 { 5743 if (! barrier_or_label) 5744 return 0; 5745 5746 if (LABEL_P (barrier_or_label) 5747 && NEXT_INSN (barrier_or_label) 5748 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label))) 5749 return 2; 5750 5751 if (BARRIER_P (barrier_or_label) 5752 && PREV_INSN (barrier_or_label) 5753 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label))) 5754 { 5755 rtx pat = PATTERN (PREV_INSN (barrier_or_label)); 5756 /* If this is a very small table, we want to keep the alignment after 5757 the table to the minimum for proper code alignment. */ 5758 return ((optimize_size 5759 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) 5760 <= (unsigned) 1 << (CACHE_LOG - 2))) 5761 ? 1 : align_jumps.levels[0].log); 5762 } 5763 5764 rtx_insn *next = next_active_insn (barrier_or_label); 5765 5766 if (! next) 5767 return 0; 5768 5769 rtx pat = PATTERN (next); 5770 5771 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN) 5772 /* This is a barrier in front of a constant table. */ 5773 return 0; 5774 5775 if (optimize_size) 5776 return 0; 5777 5778 if (! TARGET_SH2 || ! optimize) 5779 return align_jumps.levels[0].log; 5780 5781 /* When fixing up pcloads, a constant table might be inserted just before 5782 the basic block that ends with the barrier. Thus, we can't trust the 5783 instruction lengths before that. */ 5784 if (mdep_reorg_phase > SH_FIXUP_PCLOAD) 5785 { 5786 /* Check if there is an immediately preceding branch to the insn beyond 5787 the barrier. We must weight the cost of discarding useful information 5788 from the current cache line when executing this branch and there is 5789 an alignment, against that of fetching unneeded insn in front of the 5790 branch target when there is no alignment. */ 5791 5792 /* There are two delay_slot cases to consider. One is the simple case 5793 where the preceding branch is to the insn beyond the barrier (simple 5794 delay slot filling), and the other is where the preceding branch has 5795 a delay slot that is a duplicate of the insn after the barrier 5796 (fill_eager_delay_slots) and the branch is to the insn after the insn 5797 after the barrier. */ 5798 5799 int slot, credit; 5800 bool jump_to_next = false; 5801 5802 /* Skip to the insn before the JUMP_INSN before the barrier under 5803 investigation. */ 5804 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label)); 5805 5806 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2; 5807 credit >= 0 && prev && NONJUMP_INSN_P (prev); 5808 prev = prev_real_insn (prev)) 5809 { 5810 jump_to_next = false; 5811 if (GET_CODE (PATTERN (prev)) == USE 5812 || GET_CODE (PATTERN (prev)) == CLOBBER) 5813 continue; 5814 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev))) 5815 { 5816 prev = prev_seq->insn (1); 5817 if (INSN_UID (prev) == INSN_UID (next)) 5818 { 5819 /* Delay slot was filled with insn at jump target. */ 5820 jump_to_next = true; 5821 continue; 5822 } 5823 } 5824 5825 if (slot && 5826 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) 5827 slot = 0; 5828 credit -= get_attr_length (prev); 5829 } 5830 if (prev && jump_to_label_p (prev)) 5831 { 5832 rtx_insn *x; 5833 if (jump_to_next 5834 || next_real_insn (JUMP_LABEL_AS_INSN (prev)) == next 5835 /* If relax_delay_slots() decides NEXT was redundant 5836 with some previous instruction, it will have 5837 redirected PREV's jump to the following insn. */ 5838 || JUMP_LABEL (prev) == next_nonnote_insn (next) 5839 /* There is no upper bound on redundant instructions 5840 that might have been skipped, but we must not put an 5841 alignment where none had been before. */ 5842 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))), 5843 (INSN_P (x) 5844 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect 5845 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch 5846 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot)))) 5847 { 5848 rtx pat = PATTERN (prev); 5849 if (GET_CODE (pat) == PARALLEL) 5850 pat = XVECEXP (pat, 0, 0); 5851 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0)) 5852 return 0; 5853 } 5854 } 5855 } 5856 5857 return align_jumps.levels[0].log; 5858 } 5859 5860 /* If we are inside a phony loop, almost any kind of label can turn up as the 5861 first one in the loop. Aligning a braf label causes incorrect switch 5862 destination addresses; we can detect braf labels because they are 5863 followed by a BARRIER. 5864 Applying loop alignment to small constant or switch tables is a waste 5865 of space, so we suppress this too. */ 5866 int 5867 sh_loop_align (rtx_insn *label) 5868 { 5869 rtx_insn *next = label; 5870 5871 if (! optimize || optimize_size) 5872 return 0; 5873 5874 do 5875 next = next_nonnote_insn (next); 5876 while (next && LABEL_P (next)); 5877 5878 if (! next 5879 || ! INSN_P (next) 5880 || recog_memoized (next) == CODE_FOR_consttable_2) 5881 return 0; 5882 5883 return align_loops.levels[0].log; 5884 } 5885 5886 /* Do a final pass over the function, just before delayed branch 5887 scheduling. */ 5888 static void 5889 sh_reorg (void) 5890 { 5891 rtx_insn *first, *insn, *mova = NULL; 5892 int num_mova; 5893 rtx r0_rtx = gen_rtx_REG (Pmode, 0); 5894 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx); 5895 5896 first = get_insns (); 5897 max_labelno_before_reorg = max_label_num (); 5898 5899 /* We must split call insns before introducing `mova's. If we're 5900 optimizing, they'll have already been split. Otherwise, make 5901 sure we don't split them too late. */ 5902 if (! optimize) 5903 split_all_insns_noflow (); 5904 5905 /* If relaxing, generate pseudo-ops to associate function calls with 5906 the symbols they call. It does no harm to not generate these 5907 pseudo-ops. However, when we can generate them, it enables the 5908 linker to potentially relax the jsr to a bsr, and eliminate the 5909 register load and, possibly, the constant pool entry. */ 5910 5911 mdep_reorg_phase = SH_INSERT_USES_LABELS; 5912 if (TARGET_RELAX) 5913 { 5914 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our 5915 own purposes. This works because none of the remaining passes 5916 need to look at them. 5917 5918 ??? But it may break in the future. We should use a machine 5919 dependent REG_NOTE, or some other approach entirely. */ 5920 for (insn = first; insn; insn = NEXT_INSN (insn)) 5921 { 5922 if (INSN_P (insn)) 5923 { 5924 rtx note; 5925 5926 while ((note = find_reg_note (insn, REG_LABEL_OPERAND, 5927 NULL_RTX)) != 0) 5928 remove_note (insn, note); 5929 } 5930 } 5931 5932 for (insn = first; insn; insn = NEXT_INSN (insn)) 5933 { 5934 rtx pattern, reg, set, dies; 5935 rtx_code_label *label; 5936 rtx_insn *link, *scan; 5937 int rescan = 0, foundinsn = 0; 5938 5939 if (CALL_P (insn)) 5940 { 5941 pattern = PATTERN (insn); 5942 5943 if (GET_CODE (pattern) == PARALLEL) 5944 pattern = XVECEXP (pattern, 0, 0); 5945 if (GET_CODE (pattern) == SET) 5946 pattern = SET_SRC (pattern); 5947 5948 if (GET_CODE (pattern) != CALL 5949 || !MEM_P (XEXP (pattern, 0))) 5950 continue; 5951 5952 reg = XEXP (XEXP (pattern, 0), 0); 5953 } 5954 else 5955 { 5956 reg = sfunc_uses_reg (insn); 5957 if (! reg) 5958 continue; 5959 } 5960 5961 if (!REG_P (reg)) 5962 continue; 5963 5964 /* Try scanning backward to find where the register is set. */ 5965 link = NULL; 5966 for (scan = PREV_INSN (insn); 5967 scan && !LABEL_P (scan); 5968 scan = PREV_INSN (scan)) 5969 { 5970 if (! INSN_P (scan)) 5971 continue; 5972 5973 if (! reg_mentioned_p (reg, scan)) 5974 continue; 5975 5976 if (noncall_uses_reg (reg, scan, &set)) 5977 break; 5978 5979 if (set) 5980 { 5981 link = scan; 5982 break; 5983 } 5984 } 5985 5986 if (! link) 5987 continue; 5988 5989 /* The register is set at LINK. */ 5990 5991 /* We can only optimize the function call if the register is 5992 being set to a symbol. In theory, we could sometimes 5993 optimize calls to a constant location, but the assembler 5994 and linker do not support that at present. */ 5995 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF 5996 && GET_CODE (SET_SRC (set)) != LABEL_REF) 5997 continue; 5998 5999 /* Scan forward from LINK to the place where REG dies, and 6000 make sure that the only insns which use REG are 6001 themselves function calls. */ 6002 6003 /* ??? This doesn't work for call targets that were allocated 6004 by reload, since there may not be a REG_DEAD note for the 6005 register. */ 6006 6007 dies = NULL_RTX; 6008 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) 6009 { 6010 rtx scanset; 6011 6012 /* Don't try to trace forward past a CODE_LABEL if we haven't 6013 seen INSN yet. Ordinarily, we will only find the setting insn 6014 if it is in the same basic block. However, 6015 cross-jumping can insert code labels in between the load and 6016 the call, and can result in situations where a single call 6017 insn may have two targets depending on where we came from. */ 6018 6019 if (LABEL_P (scan) && ! foundinsn) 6020 break; 6021 6022 if (! INSN_P (scan)) 6023 continue; 6024 6025 /* Don't try to trace forward past a JUMP. To optimize 6026 safely, we would have to check that all the 6027 instructions at the jump destination did not use REG. */ 6028 6029 if (JUMP_P (scan)) 6030 break; 6031 6032 if (! reg_mentioned_p (reg, scan)) 6033 continue; 6034 6035 if (noncall_uses_reg (reg, scan, &scanset)) 6036 break; 6037 6038 if (scan == insn) 6039 foundinsn = 1; 6040 6041 if (scan != insn 6042 && (CALL_P (scan) || sfunc_uses_reg (scan))) 6043 { 6044 /* There is a function call to this register other 6045 than the one we are checking. If we optimize 6046 this call, we need to rescan again below. */ 6047 rescan = 1; 6048 } 6049 6050 /* ??? We shouldn't have to worry about SCANSET here. 6051 We should just be able to check for a REG_DEAD note 6052 on a function call. However, the REG_DEAD notes are 6053 apparently not dependable around libcalls; c-torture 6054 execute/920501-2 is a test case. If SCANSET is set, 6055 then this insn sets the register, so it must have 6056 died earlier. Unfortunately, this will only handle 6057 the cases in which the register is, in fact, set in a 6058 later insn. */ 6059 6060 /* ??? We shouldn't have to use FOUNDINSN here. 6061 This dates back to when we used LOG_LINKS to find 6062 the most recent insn which sets the register. */ 6063 6064 if (foundinsn 6065 && (scanset 6066 || find_reg_note (scan, REG_DEAD, reg))) 6067 { 6068 dies = scan; 6069 break; 6070 } 6071 } 6072 6073 if (! dies) 6074 { 6075 /* Either there was a branch, or some insn used REG 6076 other than as a function call address. */ 6077 continue; 6078 } 6079 6080 /* Create a code label, and put it in a REG_LABEL_OPERAND note 6081 on the insn which sets the register, and on each call insn 6082 which uses the register. In final_prescan_insn we look for 6083 the REG_LABEL_OPERAND notes, and output the appropriate label 6084 or pseudo-op. */ 6085 6086 label = gen_label_rtx (); 6087 add_reg_note (link, REG_LABEL_OPERAND, label); 6088 add_reg_note (insn, REG_LABEL_OPERAND, label); 6089 if (rescan) 6090 { 6091 scan = link; 6092 do 6093 { 6094 rtx reg2; 6095 6096 scan = NEXT_INSN (scan); 6097 if (scan != insn 6098 && ((CALL_P (scan) 6099 && reg_mentioned_p (reg, scan)) 6100 || ((reg2 = sfunc_uses_reg (scan)) 6101 && REGNO (reg2) == REGNO (reg)))) 6102 add_reg_note (scan, REG_LABEL_OPERAND, label); 6103 } 6104 while (scan != dies); 6105 } 6106 } 6107 } 6108 6109 if (TARGET_SH2) 6110 fixup_addr_diff_vecs (first); 6111 6112 if (optimize) 6113 { 6114 mdep_reorg_phase = SH_SHORTEN_BRANCHES0; 6115 shorten_branches (first); 6116 } 6117 6118 /* Scan the function looking for move instructions which have to be 6119 changed to pc-relative loads and insert the literal tables. */ 6120 mdep_reorg_phase = SH_FIXUP_PCLOAD; 6121 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) 6122 { 6123 if (mova_p (insn)) 6124 { 6125 /* ??? basic block reordering can move a switch table dispatch 6126 below the switch table. Check if that has happened. 6127 We only have the addresses available when optimizing; but then, 6128 this check shouldn't be needed when not optimizing. */ 6129 if (!untangle_mova (&num_mova, &mova, insn)) 6130 { 6131 insn = mova; 6132 num_mova = 0; 6133 } 6134 } 6135 else if (JUMP_TABLE_DATA_P (insn) 6136 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC 6137 && num_mova 6138 /* ??? loop invariant motion can also move a mova out of a 6139 loop. Since loop does this code motion anyway, maybe we 6140 should wrap UNSPEC_MOVA into a CONST, so that reload can 6141 move it back. */ 6142 && ((num_mova > 1 6143 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode) 6144 || (prev_nonnote_insn (insn) 6145 == XEXP (MOVA_LABELREF (mova), 0)))) 6146 { 6147 rtx_insn *scan; 6148 int total; 6149 6150 num_mova--; 6151 6152 /* Some code might have been inserted between the mova and 6153 its ADDR_DIFF_VEC. Check if the mova is still in range. */ 6154 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) 6155 total += get_attr_length (scan); 6156 6157 /* range of mova is 1020, add 4 because pc counts from address of 6158 second instruction after this one, subtract 2 in case pc is 2 6159 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC 6160 cancels out with alignment effects of the mova itself. */ 6161 if (total > 1022) 6162 { 6163 /* Change the mova into a load, and restart scanning 6164 there. broken_move will then return true for mova. */ 6165 fixup_mova (mova); 6166 insn = mova; 6167 } 6168 } 6169 if (broken_move (insn) 6170 || (NONJUMP_INSN_P (insn) 6171 && recog_memoized (insn) == CODE_FOR_casesi_worker_2)) 6172 { 6173 rtx_insn *scan; 6174 /* Scan ahead looking for a barrier to stick the constant table 6175 behind. */ 6176 rtx_insn *barrier = find_barrier (num_mova, mova, insn); 6177 rtx_insn *last_float_move = NULL; 6178 rtx last_float = 0, *last_float_addr = NULL; 6179 int need_aligned_label = 0; 6180 6181 if (num_mova && ! mova_p (mova)) 6182 { 6183 /* find_barrier had to change the first mova into a 6184 pcload; thus, we have to start with this new pcload. */ 6185 insn = mova; 6186 num_mova = 0; 6187 } 6188 /* Now find all the moves between the points and modify them. */ 6189 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) 6190 { 6191 if (LABEL_P (scan)) 6192 last_float = 0; 6193 if (NONJUMP_INSN_P (scan) 6194 && recog_memoized (scan) == CODE_FOR_casesi_worker_2) 6195 need_aligned_label = 1; 6196 if (broken_move (scan)) 6197 { 6198 rtx *patp = &PATTERN (scan), pat = *patp; 6199 rtx src, dst; 6200 rtx lab; 6201 rtx newsrc; 6202 machine_mode mode; 6203 6204 if (GET_CODE (pat) == PARALLEL) 6205 patp = &XVECEXP (pat, 0, 0), pat = *patp; 6206 src = SET_SRC (pat); 6207 dst = SET_DEST (pat); 6208 mode = GET_MODE (dst); 6209 6210 if (mode == SImode && satisfies_constraint_I16 (src) 6211 && REGNO (dst) != FPUL_REG) 6212 { 6213 int offset = 0; 6214 6215 mode = HImode; 6216 while (GET_CODE (dst) == SUBREG) 6217 { 6218 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)), 6219 GET_MODE (SUBREG_REG (dst)), 6220 SUBREG_BYTE (dst), 6221 GET_MODE (dst)); 6222 dst = SUBREG_REG (dst); 6223 } 6224 dst = gen_rtx_REG (HImode, REGNO (dst) + offset); 6225 } 6226 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst))) 6227 { 6228 /* This must be an insn that clobbers r0. */ 6229 rtx *clobberp = &XVECEXP (PATTERN (scan), 0, 6230 XVECLEN (PATTERN (scan), 0) 6231 - 1); 6232 rtx clobber = *clobberp; 6233 6234 gcc_assert (GET_CODE (clobber) == CLOBBER 6235 && rtx_equal_p (XEXP (clobber, 0), r0_rtx)); 6236 6237 if (last_float 6238 && reg_set_between_p (r0_rtx, last_float_move, scan)) 6239 last_float = 0; 6240 lab = add_constant (src, mode, last_float); 6241 if (lab) 6242 emit_insn_before (gen_mova (lab), scan); 6243 else 6244 { 6245 /* There will be a REG_UNUSED note for r0 on 6246 LAST_FLOAT_MOVE; we have to change it to REG_INC, 6247 lest reorg:mark_target_live_regs will not 6248 consider r0 to be used, and we end up with delay 6249 slot insn in front of SCAN that clobbers r0. */ 6250 rtx note 6251 = find_regno_note (last_float_move, REG_UNUSED, 0); 6252 6253 /* If we are not optimizing, then there may not be 6254 a note. */ 6255 if (note) 6256 PUT_REG_NOTE_KIND (note, REG_INC); 6257 6258 *last_float_addr = r0_inc_rtx; 6259 } 6260 last_float_move = scan; 6261 last_float = src; 6262 newsrc = gen_const_mem (mode, 6263 (((TARGET_SH4 && ! TARGET_FMOVD) 6264 || REGNO (dst) == FPUL_REG) 6265 ? r0_inc_rtx 6266 : r0_rtx)); 6267 last_float_addr = &XEXP (newsrc, 0); 6268 6269 /* Remove the clobber of r0. */ 6270 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber), 6271 gen_rtx_SCRATCH (Pmode)); 6272 } 6273 /* This is a mova needing a label. Create it. */ 6274 else if (GET_CODE (src) == UNSPEC 6275 && XINT (src, 1) == UNSPEC_MOVA 6276 && GET_CODE (XVECEXP (src, 0, 0)) == CONST) 6277 { 6278 lab = add_constant (XVECEXP (src, 0, 0), mode, 0); 6279 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 6280 newsrc = gen_rtx_UNSPEC (SImode, 6281 gen_rtvec (1, newsrc), 6282 UNSPEC_MOVA); 6283 } 6284 else if (GET_CODE (src) == UNSPEC_VOLATILE 6285 && XINT (src, 1) == UNSPECV_SP_SWITCH_B) 6286 { 6287 newsrc = XVECEXP (src, 0, 0); 6288 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc); 6289 INSN_CODE (scan) = -1; 6290 continue; 6291 } 6292 else 6293 { 6294 lab = add_constant (src, mode, 0); 6295 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 6296 newsrc = gen_const_mem (mode, newsrc); 6297 } 6298 *patp = gen_rtx_SET (dst, newsrc); 6299 INSN_CODE (scan) = -1; 6300 } 6301 } 6302 dump_table (need_aligned_label ? insn : 0, barrier); 6303 insn = barrier; 6304 } 6305 } 6306 label_ref_list_d_pool.release (); 6307 for (insn = first; insn; insn = NEXT_INSN (insn)) 6308 PUT_MODE (insn, VOIDmode); 6309 6310 mdep_reorg_phase = SH_SHORTEN_BRANCHES1; 6311 INSN_ADDRESSES_FREE (); 6312 split_branches (first); 6313 6314 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it 6315 also has an effect on the register that holds the address of the sfunc. 6316 Insert an extra dummy insn in front of each sfunc that pretends to 6317 use this register. */ 6318 if (flag_delayed_branch) 6319 { 6320 for (insn = first; insn; insn = NEXT_INSN (insn)) 6321 { 6322 rtx reg = sfunc_uses_reg (insn); 6323 6324 if (! reg) 6325 continue; 6326 emit_insn_before (gen_use_sfunc_addr (reg), insn); 6327 } 6328 } 6329 mdep_reorg_phase = SH_AFTER_MDEP_REORG; 6330 } 6331 6332 /* Return the UID of the insn that follows the specified label. */ 6333 int 6334 get_dest_uid (rtx_insn *label, int max_uid) 6335 { 6336 rtx_insn *dest = next_real_insn (label); 6337 6338 if (! dest) 6339 /* This can happen for an undefined label. */ 6340 return 0; 6341 int dest_uid = INSN_UID (dest); 6342 /* If this is a newly created branch redirection blocking instruction, 6343 we cannot index the branch_uid or insn_addresses arrays with its 6344 uid. But then, we won't need to, because the actual destination is 6345 the following branch. */ 6346 while (dest_uid >= max_uid) 6347 { 6348 dest = NEXT_INSN (dest); 6349 dest_uid = INSN_UID (dest); 6350 } 6351 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN) 6352 return 0; 6353 return dest_uid; 6354 } 6355 6356 /* Split condbranches that are out of range. Also add clobbers for 6357 scratch registers that are needed in far jumps. 6358 We do this before delay slot scheduling, so that it can take our 6359 newly created instructions into account. It also allows us to 6360 find branches with common targets more easily. */ 6361 static void 6362 split_branches (rtx_insn *first) 6363 { 6364 rtx_insn *insn; 6365 struct far_branch **uid_branch, *far_branch_list = 0; 6366 int max_uid = get_max_uid (); 6367 int ok; 6368 6369 /* Find out which branches are out of range. */ 6370 shorten_branches (first); 6371 6372 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); 6373 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch); 6374 6375 for (insn = first; insn; insn = NEXT_INSN (insn)) 6376 if (! INSN_P (insn)) 6377 continue; 6378 else if (insn->deleted ()) 6379 { 6380 /* Shorten_branches would split this instruction again, 6381 so transform it into a note. */ 6382 SET_INSN_DELETED (insn); 6383 } 6384 else if (JUMP_P (insn)) 6385 { 6386 enum attr_type type = get_attr_type (insn); 6387 if (type == TYPE_CBRANCH) 6388 { 6389 rtx_insn *next, *beyond; 6390 6391 if (get_attr_length (insn) > 4) 6392 { 6393 rtx src = SET_SRC (PATTERN (insn)); 6394 rtx_insn *olabel = safe_as_a <rtx_insn *> (XEXP (XEXP (src, 1), 0)); 6395 int addr = INSN_ADDRESSES (INSN_UID (insn)); 6396 rtx_insn *label = 0; 6397 int dest_uid = get_dest_uid (olabel, max_uid); 6398 struct far_branch *bp = uid_branch[dest_uid]; 6399 6400 /* redirect_jump needs a valid JUMP_LABEL, and it might delete 6401 the label if the LABEL_NUSES count drops to zero. There is 6402 always a jump_optimize pass that sets these values, but it 6403 proceeds to delete unreferenced code, and then if not 6404 optimizing, to un-delete the deleted instructions, thus 6405 leaving labels with too low uses counts. */ 6406 if (! optimize) 6407 { 6408 JUMP_LABEL (insn) = olabel; 6409 LABEL_NUSES (olabel)++; 6410 } 6411 if (! bp) 6412 { 6413 bp = (struct far_branch *) alloca (sizeof *bp); 6414 uid_branch[dest_uid] = bp; 6415 bp->prev = far_branch_list; 6416 far_branch_list = bp; 6417 bp->far_label = as_a <rtx_insn *> ( 6418 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 6419 0)); 6420 LABEL_NUSES (bp->far_label)++; 6421 } 6422 else 6423 { 6424 label = bp->near_label; 6425 if (! label && bp->address - addr >= CONDJUMP_MIN) 6426 { 6427 rtx_insn *block = bp->insert_place; 6428 6429 if (GET_CODE (PATTERN (block)) == RETURN) 6430 block = PREV_INSN (block); 6431 else 6432 block = gen_block_redirect (block, 6433 bp->address, 2); 6434 label = emit_label_after (gen_label_rtx (), 6435 PREV_INSN (block)); 6436 bp->near_label = label; 6437 } 6438 else if (label && ! NEXT_INSN (label)) 6439 { 6440 if (addr + 2 - bp->address <= CONDJUMP_MAX) 6441 bp->insert_place = insn; 6442 else 6443 gen_far_branch (bp); 6444 } 6445 } 6446 if (! label 6447 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)) 6448 { 6449 bp->near_label = label = gen_label_rtx (); 6450 bp->insert_place = insn; 6451 bp->address = addr; 6452 } 6453 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0); 6454 gcc_assert (ok); 6455 } 6456 else 6457 { 6458 /* get_attr_length (insn) == 2 */ 6459 /* Check if we have a pattern where reorg wants to redirect 6460 the branch to a label from an unconditional branch that 6461 is too far away. */ 6462 /* We can't use JUMP_LABEL here because it might be undefined 6463 when not optimizing. */ 6464 /* A syntax error might cause beyond to be NULL_RTX. */ 6465 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); 6466 beyond = next_active_insn (as_a<rtx_insn *> (temp)); 6467 6468 if (beyond 6469 && (JUMP_P (beyond) 6470 || ((beyond = next_active_insn (beyond)) 6471 && JUMP_P (beyond))) 6472 && GET_CODE (PATTERN (beyond)) == SET 6473 && recog_memoized (beyond) == CODE_FOR_jump_compact 6474 && ((INSN_ADDRESSES 6475 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))) 6476 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) 6477 > 252 + 258 + 2)) 6478 gen_block_redirect (beyond, 6479 INSN_ADDRESSES (INSN_UID (beyond)), 1); 6480 } 6481 6482 next = next_active_insn (insn); 6483 6484 if (next 6485 && (JUMP_P (next) 6486 || ((next = next_active_insn (next)) 6487 && JUMP_P (next))) 6488 && GET_CODE (PATTERN (next)) == SET 6489 && recog_memoized (next) == CODE_FOR_jump_compact 6490 && ((INSN_ADDRESSES 6491 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))) 6492 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) 6493 > 252 + 258 + 2)) 6494 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1); 6495 } 6496 else if (type == TYPE_JUMP || type == TYPE_RETURN) 6497 { 6498 int addr = INSN_ADDRESSES (INSN_UID (insn)); 6499 rtx_insn *far_label = 0; 6500 int dest_uid = 0; 6501 struct far_branch *bp; 6502 6503 if (type == TYPE_JUMP) 6504 { 6505 if (CROSSING_JUMP_P (insn)) 6506 { 6507 emit_insn_before (gen_block_branch_redirect (const0_rtx), 6508 insn); 6509 continue; 6510 } 6511 6512 far_label = as_a <rtx_insn *> ( 6513 XEXP (SET_SRC (PATTERN (insn)), 0)); 6514 dest_uid = get_dest_uid (far_label, max_uid); 6515 if (! dest_uid) 6516 { 6517 /* Parse errors can lead to labels outside 6518 the insn stream. */ 6519 if (! NEXT_INSN (far_label)) 6520 continue; 6521 6522 if (! optimize) 6523 { 6524 JUMP_LABEL (insn) = far_label; 6525 LABEL_NUSES (far_label)++; 6526 } 6527 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1); 6528 far_label = 0; 6529 } 6530 } 6531 bp = uid_branch[dest_uid]; 6532 if (! bp) 6533 { 6534 bp = (struct far_branch *) alloca (sizeof *bp); 6535 uid_branch[dest_uid] = bp; 6536 bp->prev = far_branch_list; 6537 far_branch_list = bp; 6538 bp->near_label = 0; 6539 bp->far_label = far_label; 6540 if (far_label) 6541 LABEL_NUSES (far_label)++; 6542 } 6543 else if (bp->near_label && ! NEXT_INSN (bp->near_label)) 6544 if (addr - bp->address <= CONDJUMP_MAX) 6545 emit_label_after (bp->near_label, PREV_INSN (insn)); 6546 else 6547 { 6548 gen_far_branch (bp); 6549 bp->near_label = 0; 6550 } 6551 else 6552 bp->near_label = 0; 6553 bp->address = addr; 6554 bp->insert_place = insn; 6555 if (! far_label) 6556 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); 6557 else 6558 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); 6559 } 6560 } 6561 /* Generate all pending far branches, 6562 and free our references to the far labels. */ 6563 while (far_branch_list) 6564 { 6565 if (far_branch_list->near_label 6566 && ! NEXT_INSN (far_branch_list->near_label)) 6567 gen_far_branch (far_branch_list); 6568 if (optimize 6569 && far_branch_list->far_label 6570 && ! --LABEL_NUSES (far_branch_list->far_label)) 6571 delete_insn (far_branch_list->far_label); 6572 far_branch_list = far_branch_list->prev; 6573 } 6574 6575 /* Instruction length information is no longer valid due to the new 6576 instructions that have been generated. */ 6577 init_insn_lengths (); 6578 } 6579 6580 /* Dump out instruction addresses, which is useful for debugging the 6581 constant pool table stuff. 6582 6583 If relaxing, output the label and pseudo-ops used to link together 6584 calls and the instruction which set the registers. 6585 6586 ??? The addresses printed by this routine for insns are nonsense for 6587 insns which are inside of a sequence where none of the inner insns have 6588 variable length. This is because the second pass of shorten_branches 6589 does not bother to update them. */ 6590 void 6591 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED, 6592 int noperands ATTRIBUTE_UNUSED) 6593 { 6594 if (TARGET_DUMPISIZE) 6595 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); 6596 6597 if (TARGET_RELAX) 6598 { 6599 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX)) 6600 { 6601 rtx pattern = PATTERN (insn); 6602 if (GET_CODE (pattern) == PARALLEL) 6603 pattern = XVECEXP (pattern, 0, 0); 6604 switch (GET_CODE (pattern)) 6605 { 6606 case SET: 6607 if (GET_CODE (SET_SRC (pattern)) != CALL 6608 && get_attr_type (insn) != TYPE_SFUNC) 6609 { 6610 targetm.asm_out.internal_label 6611 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0))); 6612 break; 6613 } 6614 /* FALLTHROUGH */ 6615 case CALL: 6616 asm_fprintf (asm_out_file, "\t.uses %LL%d\n", 6617 CODE_LABEL_NUMBER (XEXP (note, 0))); 6618 break; 6619 6620 default: 6621 gcc_unreachable (); 6622 } 6623 } 6624 } 6625 } 6626 6627 /* Dump out any constants accumulated in the final pass. These will 6628 only be labels. */ 6629 const char * 6630 output_jump_label_table (void) 6631 { 6632 if (pool_size) 6633 { 6634 fprintf (asm_out_file, "\t.align 2\n"); 6635 for (int i = 0; i < pool_size; i++) 6636 { 6637 pool_node *p = &pool_vector[i]; 6638 6639 (*targetm.asm_out.internal_label) (asm_out_file, "L", 6640 CODE_LABEL_NUMBER (p->label)); 6641 output_asm_insn (".long %O0", &p->value); 6642 } 6643 pool_size = 0; 6644 } 6645 6646 return ""; 6647 } 6648 6649 /* A full frame looks like: 6650 6651 arg-5 6652 arg-4 6653 [ if current_function_anonymous_args 6654 arg-3 6655 arg-2 6656 arg-1 6657 arg-0 ] 6658 saved-fp 6659 saved-r10 6660 saved-r11 6661 saved-r12 6662 saved-pr 6663 local-n 6664 .. 6665 local-1 6666 local-0 <- fp points here. 6667 6668 Number of bytes pushed for anonymous args, used to pass information 6669 between expand_prologue and expand_epilogue. 6670 6671 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be 6672 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's 6673 for an epilogue and a negative value means that it's for a sibcall 6674 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of 6675 all the registers that are about to be restored, and hence dead. */ 6676 static void 6677 output_stack_adjust (int size, rtx reg, int epilogue_p, 6678 HARD_REG_SET *live_regs_mask, bool frame_p) 6679 { 6680 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn; 6681 if (size) 6682 { 6683 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; 6684 6685 /* This test is bogus, as output_stack_adjust is used to re-align the 6686 stack. */ 6687 #if 0 6688 gcc_assert (!(size % align)); 6689 #endif 6690 6691 if (CONST_OK_FOR_ADD (size)) 6692 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size))); 6693 /* Try to do it with two partial adjustments; however, we must make 6694 sure that the stack is properly aligned at all times, in case 6695 an interrupt occurs between the two partial adjustments. */ 6696 else if (CONST_OK_FOR_ADD (size / 2 & -align) 6697 && CONST_OK_FOR_ADD (size - (size / 2 & -align))) 6698 { 6699 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align))); 6700 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align)))); 6701 } 6702 else 6703 { 6704 rtx const_reg; 6705 rtx insn; 6706 int temp = epilogue_p ? 7 : 1; 6707 int i; 6708 6709 /* If TEMP is invalid, we could temporarily save a general 6710 register to MACL. However, there is currently no need 6711 to handle this case, so just die when we see it. */ 6712 if (epilogue_p < 0 6713 || current_function_interrupt 6714 || ! call_really_used_regs[temp] || fixed_regs[temp]) 6715 temp = -1; 6716 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0) 6717 { 6718 HARD_REG_SET temps; 6719 COPY_HARD_REG_SET (temps, call_used_reg_set); 6720 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set); 6721 if (epilogue_p > 0) 6722 { 6723 int nreg = 0; 6724 if (crtl->return_rtx) 6725 { 6726 machine_mode mode; 6727 mode = GET_MODE (crtl->return_rtx); 6728 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG) 6729 nreg = hard_regno_nregs (FIRST_RET_REG, mode); 6730 } 6731 for (i = 0; i < nreg; i++) 6732 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i); 6733 if (crtl->calls_eh_return) 6734 { 6735 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO); 6736 for (i = 0; i <= 3; i++) 6737 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i)); 6738 } 6739 } 6740 if (epilogue_p <= 0) 6741 { 6742 for (i = FIRST_PARM_REG; 6743 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++) 6744 CLEAR_HARD_REG_BIT (temps, i); 6745 if (cfun->static_chain_decl != NULL) 6746 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM); 6747 } 6748 temp = scavenge_reg (&temps); 6749 } 6750 if (temp < 0 && live_regs_mask) 6751 { 6752 HARD_REG_SET temps; 6753 6754 COPY_HARD_REG_SET (temps, *live_regs_mask); 6755 CLEAR_HARD_REG_BIT (temps, REGNO (reg)); 6756 temp = scavenge_reg (&temps); 6757 } 6758 if (temp < 0) 6759 { 6760 rtx adj_reg, tmp_reg, mem; 6761 6762 /* If we reached here, the most likely case is the (sibcall) 6763 epilogue. Put a special push/pop sequence for such case as 6764 the last resort. This looks lengthy but would not be problem 6765 because it seems to be very rare. */ 6766 gcc_assert (epilogue_p); 6767 6768 /* ??? There is still the slight possibility that r4 or 6769 r5 have been reserved as fixed registers or assigned 6770 as global registers, and they change during an 6771 interrupt. There are possible ways to handle this: 6772 6773 - If we are adjusting the frame pointer (r14), we can do 6774 with a single temp register and an ordinary push / pop 6775 on the stack. 6776 - Grab any call-used or call-saved registers (i.e. not 6777 fixed or globals) for the temps we need. We might 6778 also grab r14 if we are adjusting the stack pointer. 6779 If we can't find enough available registers, issue 6780 a diagnostic and die - the user must have reserved 6781 way too many registers. 6782 But since all this is rather unlikely to happen and 6783 would require extra testing, we just die if r4 / r5 6784 are not available. */ 6785 gcc_assert (!fixed_regs[4] && !fixed_regs[5] 6786 && !global_regs[4] && !global_regs[5]); 6787 6788 adj_reg = gen_rtx_REG (GET_MODE (reg), 4); 6789 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5); 6790 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg); 6791 emit_insn (GEN_MOV (adj_reg, GEN_INT (size))); 6792 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg)); 6793 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); 6794 emit_move_insn (mem, tmp_reg); 6795 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg)); 6796 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); 6797 emit_move_insn (mem, tmp_reg); 6798 emit_move_insn (reg, adj_reg); 6799 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); 6800 emit_move_insn (adj_reg, mem); 6801 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); 6802 emit_move_insn (tmp_reg, mem); 6803 /* Tell flow the insns that pop r4/r5 aren't dead. */ 6804 emit_use (tmp_reg); 6805 emit_use (adj_reg); 6806 return; 6807 } 6808 const_reg = gen_rtx_REG (GET_MODE (reg), temp); 6809 6810 /* If SIZE is negative, subtract the positive value. 6811 This sometimes allows a constant pool entry to be shared 6812 between prologue and epilogue code. */ 6813 if (size < 0) 6814 { 6815 emit_insn (GEN_MOV (const_reg, GEN_INT (-size))); 6816 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg)); 6817 } 6818 else 6819 { 6820 emit_insn (GEN_MOV (const_reg, GEN_INT (size))); 6821 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg)); 6822 } 6823 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 6824 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg, 6825 GEN_INT (size)))); 6826 } 6827 } 6828 } 6829 6830 /* Emit the specified insn and mark it as frame related. */ 6831 static rtx_insn * 6832 emit_frame_insn (rtx x) 6833 { 6834 rtx_insn *insn = emit_insn (x); 6835 RTX_FRAME_RELATED_P (insn) = 1; 6836 return insn; 6837 } 6838 6839 /* Output RTL to push register RN onto the stack. */ 6840 static rtx 6841 push (int rn) 6842 { 6843 rtx x; 6844 if (rn == FPUL_REG) 6845 x = gen_push_fpul (); 6846 else if (rn == FPSCR_REG) 6847 x = gen_push_fpscr (); 6848 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD 6849 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn)) 6850 { 6851 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) 6852 return NULL_RTX; 6853 x = gen_push_4 (gen_rtx_REG (DFmode, rn)); 6854 } 6855 else if (TARGET_SH2E && FP_REGISTER_P (rn)) 6856 x = gen_push_e (gen_rtx_REG (SFmode, rn)); 6857 else 6858 x = gen_push (gen_rtx_REG (SImode, rn)); 6859 6860 x = emit_frame_insn (x); 6861 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); 6862 return x; 6863 } 6864 6865 /* Output RTL to pop register RN from the stack. */ 6866 static void 6867 pop (int rn) 6868 { 6869 rtx x, sp_reg, reg; 6870 if (rn == FPUL_REG) 6871 x = gen_pop_fpul (); 6872 else if (rn == FPSCR_REG) 6873 x = gen_pop_fpscr (); 6874 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD 6875 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn)) 6876 { 6877 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) 6878 return; 6879 x = gen_pop_4 (gen_rtx_REG (DFmode, rn)); 6880 } 6881 else if (TARGET_SH2E && FP_REGISTER_P (rn)) 6882 x = gen_pop_e (gen_rtx_REG (SFmode, rn)); 6883 else 6884 x = gen_pop (gen_rtx_REG (SImode, rn)); 6885 6886 x = emit_insn (x); 6887 6888 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 6889 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL 6890 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0)) 6891 : SET_DEST (PATTERN (x))); 6892 add_reg_note (x, REG_CFA_RESTORE, reg); 6893 add_reg_note (x, REG_CFA_ADJUST_CFA, 6894 gen_rtx_SET (sp_reg, 6895 plus_constant (SImode, sp_reg, 6896 GET_MODE_SIZE (GET_MODE (reg))))); 6897 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); 6898 RTX_FRAME_RELATED_P (x) = 1; 6899 } 6900 6901 /* Generate code to push the regs specified in the mask. */ 6902 static void 6903 push_regs (HARD_REG_SET *mask, bool interrupt_handler) 6904 { 6905 bool skip_fpscr = false; 6906 6907 /* Push PR last; this gives better latencies after the prologue, and 6908 candidates for the return delay slot when there are no general 6909 registers pushed. */ 6910 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0; 6911 i < FIRST_PSEUDO_REGISTER; i++) 6912 { 6913 /* If this is an interrupt handler, and the SZ bit varies, 6914 and we have to push any floating point register, we need 6915 to switch to the correct precision first. */ 6916 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD 6917 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS])) 6918 { 6919 HARD_REG_SET unsaved; 6920 6921 push (FPSCR_REG); 6922 COMPL_HARD_REG_SET (unsaved, *mask); 6923 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved); 6924 skip_fpscr = true; 6925 } 6926 if (i != PR_REG 6927 && (i != FPSCR_REG || ! skip_fpscr) 6928 && TEST_HARD_REG_BIT (*mask, i)) 6929 { 6930 /* If the ISR has RESBANK attribute assigned, don't push any of 6931 the following registers - R0-R14, MACH, MACL and GBR. */ 6932 if (! (sh_cfun_resbank_handler_p () 6933 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG) 6934 || i == MACH_REG 6935 || i == MACL_REG 6936 || i == GBR_REG))) 6937 push (i); 6938 } 6939 } 6940 6941 /* Push banked registers last to improve delay slot opportunities. */ 6942 if (interrupt_handler) 6943 { 6944 bool use_movml = false; 6945 6946 if (TARGET_SH2A) 6947 { 6948 unsigned int count = 0; 6949 6950 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 6951 if (TEST_HARD_REG_BIT (*mask, i)) 6952 count++; 6953 else 6954 break; 6955 6956 /* Use movml when all banked registers are pushed. */ 6957 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) 6958 use_movml = true; 6959 } 6960 6961 if (sh_cfun_resbank_handler_p ()) 6962 ; /* Do nothing. */ 6963 else if (use_movml) 6964 { 6965 rtx x, mem, reg, set; 6966 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 6967 6968 /* We must avoid scheduling multiple store insn with another 6969 insns. */ 6970 emit_insn (gen_blockage ()); 6971 x = gen_movml_push_banked (sp_reg); 6972 x = emit_frame_insn (x); 6973 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 6974 { 6975 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4)); 6976 reg = gen_rtx_REG (SImode, i); 6977 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); 6978 } 6979 6980 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32)); 6981 add_reg_note (x, REG_CFA_ADJUST_CFA, set); 6982 emit_insn (gen_blockage ()); 6983 } 6984 else 6985 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 6986 if (TEST_HARD_REG_BIT (*mask, i)) 6987 push (i); 6988 } 6989 6990 /* Don't push PR register for an ISR with RESBANK attribute assigned. */ 6991 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ()) 6992 push (PR_REG); 6993 } 6994 6995 /* Work out the registers which need to be saved, both as a mask and a 6996 count of saved words. Return the count. 6997 6998 If doing a pragma interrupt function, then push all regs used by the 6999 function, and if we call another function (we can tell by looking at PR), 7000 make sure that all the regs it clobbers are safe too. */ 7001 static int 7002 calc_live_regs (HARD_REG_SET *live_regs_mask) 7003 { 7004 unsigned int reg; 7005 tree attrs; 7006 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler; 7007 bool nosave_low_regs; 7008 7009 attrs = DECL_ATTRIBUTES (current_function_decl); 7010 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p (); 7011 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE; 7012 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler; 7013 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE; 7014 7015 CLEAR_HARD_REG_SET (*live_regs_mask); 7016 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler 7017 && df_regs_ever_live_p (FPSCR_REG)) 7018 target_flags &= ~MASK_FPU_SINGLE; 7019 /* If we can save a lot of saves by switching to double mode, do that. */ 7020 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE) 7021 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) 7022 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1) 7023 && (! call_really_used_regs[reg] 7024 || interrupt_handler) 7025 && ++count > 2) 7026 { 7027 target_flags &= ~MASK_FPU_SINGLE; 7028 break; 7029 } 7030 7031 7032 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG); 7033 bool pr_live = (pr_initial 7034 ? (!REG_P (pr_initial) 7035 || REGNO (pr_initial) != (PR_REG)) 7036 : df_regs_ever_live_p (PR_REG)); 7037 /* For Shcompact, if not optimizing, we end up with a memory reference 7038 using the return address pointer for __builtin_return_address even 7039 though there is no actual need to put the PR register on the stack. */ 7040 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM); 7041 7042 /* Force PR to be live if the prologue has to call the SHmedia 7043 argument decoder or register saver. */ 7044 bool has_call = pr_live; 7045 7046 int count; 7047 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; ) 7048 { 7049 if (reg == PR_REG 7050 ? pr_live 7051 : interrupt_handler 7052 ? (/* Need to save all the regs ever live. */ 7053 (df_regs_ever_live_p (reg) 7054 || (call_really_used_regs[reg] 7055 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG 7056 || reg == PIC_OFFSET_TABLE_REGNUM) 7057 && has_call)) 7058 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM 7059 && reg != RETURN_ADDRESS_POINTER_REGNUM 7060 && reg != T_REG && reg != GBR_REG 7061 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG 7062 /* Push fpscr only on targets which have FPU */ 7063 && (reg != FPSCR_REG || TARGET_FPU_ANY)) 7064 : (/* Only push those regs which are used and need to be saved. */ 7065 (false) 7066 || (df_regs_ever_live_p (reg) 7067 && ((!call_really_used_regs[reg] 7068 && !(reg != PIC_OFFSET_TABLE_REGNUM 7069 && fixed_regs[reg] && call_used_regs[reg])) 7070 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY))) 7071 || (crtl->calls_eh_return 7072 && (reg == EH_RETURN_DATA_REGNO (0) 7073 || reg == EH_RETURN_DATA_REGNO (1) 7074 || reg == EH_RETURN_DATA_REGNO (2) 7075 || reg == EH_RETURN_DATA_REGNO (3))) 7076 || ((reg == MACL_REG || reg == MACH_REG) 7077 && df_regs_ever_live_p (reg) 7078 && sh_cfun_attr_renesas_p ()) 7079 )) 7080 { 7081 SET_HARD_REG_BIT (*live_regs_mask, reg); 7082 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); 7083 7084 if (TARGET_FPU_DOUBLE && TARGET_FMOVD 7085 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT) 7086 { 7087 if (FP_REGISTER_P (reg)) 7088 { 7089 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1)) 7090 { 7091 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1)); 7092 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1)); 7093 } 7094 } 7095 else if (XD_REGISTER_P (reg)) 7096 { 7097 /* Must switch to double mode to access these registers. */ 7098 target_flags &= ~MASK_FPU_SINGLE; 7099 } 7100 } 7101 } 7102 if (nosave_low_regs && reg == R8_REG) 7103 break; 7104 } 7105 7106 return count; 7107 } 7108 7109 /* Code to generate prologue and epilogue sequences */ 7110 7111 /* PUSHED is the number of bytes that are being pushed on the 7112 stack for register saves. Return the frame size, padded 7113 appropriately so that the stack stays properly aligned. */ 7114 static HOST_WIDE_INT 7115 rounded_frame_size (int pushed) 7116 { 7117 HOST_WIDE_INT size = get_frame_size (); 7118 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; 7119 7120 if (ACCUMULATE_OUTGOING_ARGS) 7121 size += crtl->outgoing_args_size; 7122 7123 return ((size + pushed + align - 1) & -align) - pushed; 7124 } 7125 7126 /* Expand code for the function prologue. */ 7127 void 7128 sh_expand_prologue (void) 7129 { 7130 int save_flags = target_flags; 7131 tree sp_switch_attr 7132 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)); 7133 7134 current_function_interrupt = sh_cfun_interrupt_handler_p (); 7135 7136 /* We have pretend args if we had an object sent partially in registers 7137 and partially on the stack, e.g. a large structure. */ 7138 int pretend_args = crtl->args.pretend_args_size; 7139 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl) 7140 && (NPARM_REGS(SImode) 7141 > crtl->args.info.arg_count[(int) SH_ARG_INT])) 7142 pretend_args = 0; 7143 7144 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true); 7145 int stack_usage = pretend_args; 7146 7147 /* Emit the code for SETUP_VARARGS. */ 7148 if (cfun->stdarg) 7149 { 7150 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) 7151 { 7152 /* Push arg regs as if they'd been provided by caller in stack. */ 7153 for (int i = 0; i < NPARM_REGS(SImode); i++) 7154 { 7155 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; 7156 7157 if (i >= (NPARM_REGS(SImode) 7158 - crtl->args.info.arg_count[(int) SH_ARG_INT] 7159 )) 7160 break; 7161 push (rn); 7162 stack_usage += GET_MODE_SIZE (SImode); 7163 } 7164 } 7165 } 7166 7167 /* If we're supposed to switch stacks at function entry, do so now. */ 7168 if (sp_switch_attr) 7169 { 7170 rtx lab, newsrc; 7171 /* The argument specifies a variable holding the address of the 7172 stack the interrupt function should switch to/from at entry/exit. */ 7173 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr)); 7174 const char* s = ggc_strdup (TREE_STRING_POINTER (arg)); 7175 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s); 7176 7177 lab = add_constant (sp_switch, SImode, 0); 7178 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 7179 7180 emit_insn (gen_sp_switch_1 (newsrc)); 7181 } 7182 7183 HARD_REG_SET live_regs_mask; 7184 int d = calc_live_regs (&live_regs_mask); 7185 /* ??? Maybe we could save some switching if we can move a mode switch 7186 that already happens to be at the function start into the prologue. */ 7187 if (target_flags != save_flags && ! current_function_interrupt) 7188 emit_insn (gen_toggle_sz ()); 7189 7190 push_regs (&live_regs_mask, current_function_interrupt); 7191 stack_usage += d; 7192 7193 if (flag_pic && !TARGET_FDPIC 7194 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) 7195 emit_insn (gen_GOTaddr2picreg (const0_rtx)); 7196 7197 if (target_flags != save_flags && ! current_function_interrupt) 7198 emit_insn (gen_toggle_sz ()); 7199 7200 target_flags = save_flags; 7201 7202 output_stack_adjust (-rounded_frame_size (d), 7203 stack_pointer_rtx, 0, NULL, true); 7204 stack_usage += rounded_frame_size (d); 7205 7206 if (frame_pointer_needed) 7207 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx)); 7208 7209 /* If we are profiling, make sure no instructions are scheduled before 7210 the call to mcount. Similarly if some call instructions are swapped 7211 before frame related insns, it'll confuse the unwinder because 7212 currently SH has no unwind info for function epilogues. */ 7213 if (crtl->profile || flag_exceptions || flag_unwind_tables) 7214 emit_insn (gen_blockage ()); 7215 7216 if (flag_stack_usage_info) 7217 current_function_static_stack_size = stack_usage; 7218 } 7219 7220 /* Expand code for the function epilogue. */ 7221 void 7222 sh_expand_epilogue (bool sibcall_p) 7223 { 7224 int save_flags = target_flags; 7225 bool fpscr_deferred = false; 7226 int e = sibcall_p ? -1 : 1; 7227 7228 HARD_REG_SET live_regs_mask; 7229 int d = calc_live_regs (&live_regs_mask); 7230 7231 int save_size = d; 7232 int frame_size = rounded_frame_size (d); 7233 7234 if (frame_pointer_needed) 7235 { 7236 /* We must avoid scheduling the epilogue with previous basic blocks. 7237 See PR/18032 and PR/40313. */ 7238 emit_insn (gen_blockage ()); 7239 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e, 7240 &live_regs_mask, true); 7241 7242 /* We must avoid moving the stack pointer adjustment past code 7243 which reads from the local frame, else an interrupt could 7244 occur after the SP adjustment and clobber data in the local 7245 frame. */ 7246 emit_insn (gen_blockage ()); 7247 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx)); 7248 } 7249 else if (frame_size) 7250 { 7251 /* We must avoid moving the stack pointer adjustment past code 7252 which reads from the local frame, else an interrupt could 7253 occur after the SP adjustment and clobber data in the local 7254 frame. */ 7255 emit_insn (gen_blockage ()); 7256 output_stack_adjust (frame_size, stack_pointer_rtx, e, 7257 &live_regs_mask, true); 7258 } 7259 7260 /* Pop all the registers. */ 7261 7262 if (target_flags != save_flags && ! current_function_interrupt) 7263 emit_insn (gen_toggle_sz ()); 7264 7265 { 7266 int last_reg; 7267 7268 save_size = 0; 7269 /* For an ISR with RESBANK attribute assigned, don't pop PR 7270 register. */ 7271 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG) 7272 && !sh_cfun_resbank_handler_p ()) 7273 { 7274 if (!frame_pointer_needed) 7275 emit_insn (gen_blockage ()); 7276 pop (PR_REG); 7277 } 7278 7279 /* Banked registers are popped first to avoid being scheduled in the 7280 delay slot. RTE switches banks before the ds instruction. */ 7281 if (current_function_interrupt) 7282 { 7283 bool use_movml = false; 7284 7285 if (TARGET_SH2A) 7286 { 7287 unsigned int count = 0; 7288 7289 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 7290 if (TEST_HARD_REG_BIT (live_regs_mask, i)) 7291 count++; 7292 else 7293 break; 7294 7295 /* Use movml when all banked register are poped. */ 7296 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) 7297 use_movml = true; 7298 } 7299 7300 if (sh_cfun_resbank_handler_p ()) 7301 ; /* Do nothing. */ 7302 else if (use_movml) 7303 { 7304 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 7305 7306 /* We must avoid scheduling multiple load insn with another 7307 insns. */ 7308 emit_insn (gen_blockage ()); 7309 emit_insn (gen_movml_pop_banked (sp_reg)); 7310 emit_insn (gen_blockage ()); 7311 } 7312 else 7313 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--) 7314 if (TEST_HARD_REG_BIT (live_regs_mask, i)) 7315 pop (i); 7316 7317 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1; 7318 } 7319 else 7320 last_reg = FIRST_PSEUDO_REGISTER; 7321 7322 for (int i = 0; i < last_reg; i++) 7323 { 7324 int j = (FIRST_PSEUDO_REGISTER - 1) - i; 7325 7326 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD 7327 && hard_reg_set_intersect_p (live_regs_mask, 7328 reg_class_contents[DF_REGS])) 7329 fpscr_deferred = true; 7330 /* For an ISR with RESBANK attribute assigned, don't pop 7331 following registers, R0-R14, MACH, MACL and GBR. */ 7332 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 7333 && ! (sh_cfun_resbank_handler_p () 7334 && ((j >= FIRST_GENERAL_REG 7335 && j < LAST_GENERAL_REG) 7336 || j == MACH_REG 7337 || j == MACL_REG 7338 || j == GBR_REG))) 7339 pop (j); 7340 7341 if (j == FIRST_FP_REG && fpscr_deferred) 7342 pop (FPSCR_REG); 7343 } 7344 } 7345 if (target_flags != save_flags && ! current_function_interrupt) 7346 emit_insn (gen_toggle_sz ()); 7347 target_flags = save_flags; 7348 7349 output_stack_adjust (crtl->args.pretend_args_size + save_size, 7350 stack_pointer_rtx, e, NULL, true); 7351 7352 if (crtl->calls_eh_return) 7353 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx, 7354 EH_RETURN_STACKADJ_RTX)); 7355 7356 /* Switch back to the normal stack if necessary. */ 7357 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl))) 7358 emit_insn (gen_sp_switch_2 ()); 7359 7360 /* Tell flow the insn that pops PR isn't dead. */ 7361 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)) 7362 emit_use (gen_rtx_REG (SImode, PR_REG)); 7363 } 7364 7365 /* Emit code to change the current function's return address to RA. 7366 TEMP is available as a scratch register, if needed. */ 7367 void 7368 sh_set_return_address (rtx ra, rtx tmp) 7369 { 7370 HARD_REG_SET live_regs_mask; 7371 int d = calc_live_regs (&live_regs_mask); 7372 7373 /* If pr_reg isn't life, we can set it directly. */ 7374 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG)) 7375 { 7376 rtx rr = gen_rtx_REG (SImode, PR_REG); 7377 emit_insn (GEN_MOV (rr, ra)); 7378 /* Tell flow the register for return isn't dead. */ 7379 emit_use (rr); 7380 return; 7381 } 7382 7383 int pr_offset = rounded_frame_size (d); 7384 7385 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset))); 7386 7387 if (frame_pointer_needed) 7388 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx)); 7389 else 7390 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx)); 7391 7392 tmp = gen_frame_mem (Pmode, tmp); 7393 emit_insn (GEN_MOV (tmp, ra)); 7394 /* Tell this store isn't dead. */ 7395 emit_use (tmp); 7396 } 7397 7398 /* Clear variables at function end. */ 7399 static void 7400 sh_output_function_epilogue (FILE *) 7401 { 7402 } 7403 7404 static rtx 7405 sh_builtin_saveregs (void) 7406 { 7407 /* First unnamed integer register. */ 7408 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT]; 7409 /* Number of integer registers we need to save. */ 7410 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); 7411 /* First unnamed SFmode float reg */ 7412 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT]; 7413 /* Number of SFmode float regs to save. */ 7414 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); 7415 rtx regbuf, fpregs; 7416 int bufsize, regno; 7417 alias_set_type alias_set; 7418 7419 if (!TARGET_FPU_ANY) 7420 { 7421 error ("%<__builtin_saveregs%> not supported by this subtarget"); 7422 return const0_rtx; 7423 } 7424 7425 /* Allocate block of memory for the regs. */ 7426 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? 7427 Or can assign_stack_local accept a 0 SIZE argument? */ 7428 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); 7429 7430 if (n_floatregs & 1) 7431 { 7432 rtx addr; 7433 7434 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); 7435 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0)); 7436 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD))); 7437 regbuf = change_address (regbuf, BLKmode, addr); 7438 } 7439 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs) 7440 { 7441 rtx addr, mask; 7442 7443 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); 7444 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode, 7445 XEXP (regbuf, 0), 4)); 7446 mask = copy_to_mode_reg (Pmode, GEN_INT (-8)); 7447 emit_insn (gen_andsi3 (addr, addr, mask)); 7448 regbuf = change_address (regbuf, BLKmode, addr); 7449 } 7450 else 7451 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0); 7452 alias_set = get_varargs_alias_set (); 7453 set_mem_alias_set (regbuf, alias_set); 7454 7455 /* Save int args. 7456 This is optimized to only save the regs that are necessary. Explicitly 7457 named args need not be saved. */ 7458 if (n_intregs > 0) 7459 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, 7460 adjust_address (regbuf, BLKmode, 7461 n_floatregs * UNITS_PER_WORD), 7462 n_intregs); 7463 7464 /* Save float args. 7465 This is optimized to only save the regs that are necessary. Explicitly 7466 named args need not be saved. 7467 We explicitly build a pointer to the buffer because it halves the insn 7468 count when not optimizing (otherwise the pointer is built for each reg 7469 saved). 7470 We emit the moves in reverse order so that we can use predecrement. */ 7471 7472 fpregs = copy_to_mode_reg (Pmode, 7473 plus_constant (Pmode, XEXP (regbuf, 0), 7474 n_floatregs * UNITS_PER_WORD)); 7475 if (TARGET_FPU_DOUBLE) 7476 { 7477 rtx mem; 7478 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) 7479 { 7480 emit_insn (gen_addsi3 (fpregs, fpregs, 7481 GEN_INT (-2 * UNITS_PER_WORD))); 7482 mem = change_address (regbuf, DFmode, fpregs); 7483 emit_move_insn (mem, 7484 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno)); 7485 } 7486 regno = first_floatreg; 7487 if (regno & 1) 7488 { 7489 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); 7490 mem = change_address (regbuf, SFmode, fpregs); 7491 emit_move_insn (mem, 7492 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) 7493 + regno - SH_REG_MSW_OFFSET)); 7494 } 7495 } 7496 else 7497 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) 7498 { 7499 rtx mem; 7500 7501 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); 7502 mem = change_address (regbuf, SFmode, fpregs); 7503 emit_move_insn (mem, 7504 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno)); 7505 } 7506 7507 /* Return the address of the regbuf. */ 7508 return XEXP (regbuf, 0); 7509 } 7510 7511 /* Define the `__builtin_va_list' type for the ABI. */ 7512 static tree 7513 sh_build_builtin_va_list (void) 7514 { 7515 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 7516 tree record, type_decl; 7517 7518 if ((! TARGET_SH2E && ! TARGET_SH4) 7519 || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) 7520 return ptr_type_node; 7521 7522 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 7523 type_decl = build_decl (BUILTINS_LOCATION, 7524 TYPE_DECL, get_identifier ("__va_list_tag"), record); 7525 7526 f_next_o = build_decl (BUILTINS_LOCATION, 7527 FIELD_DECL, get_identifier ("__va_next_o"), 7528 ptr_type_node); 7529 f_next_o_limit = build_decl (BUILTINS_LOCATION, 7530 FIELD_DECL, 7531 get_identifier ("__va_next_o_limit"), 7532 ptr_type_node); 7533 f_next_fp = build_decl (BUILTINS_LOCATION, 7534 FIELD_DECL, get_identifier ("__va_next_fp"), 7535 ptr_type_node); 7536 f_next_fp_limit = build_decl (BUILTINS_LOCATION, 7537 FIELD_DECL, 7538 get_identifier ("__va_next_fp_limit"), 7539 ptr_type_node); 7540 f_next_stack = build_decl (BUILTINS_LOCATION, 7541 FIELD_DECL, get_identifier ("__va_next_stack"), 7542 ptr_type_node); 7543 7544 DECL_FIELD_CONTEXT (f_next_o) = record; 7545 DECL_FIELD_CONTEXT (f_next_o_limit) = record; 7546 DECL_FIELD_CONTEXT (f_next_fp) = record; 7547 DECL_FIELD_CONTEXT (f_next_fp_limit) = record; 7548 DECL_FIELD_CONTEXT (f_next_stack) = record; 7549 7550 TYPE_STUB_DECL (record) = type_decl; 7551 TYPE_NAME (record) = type_decl; 7552 TYPE_FIELDS (record) = f_next_o; 7553 DECL_CHAIN (f_next_o) = f_next_o_limit; 7554 DECL_CHAIN (f_next_o_limit) = f_next_fp; 7555 DECL_CHAIN (f_next_fp) = f_next_fp_limit; 7556 DECL_CHAIN (f_next_fp_limit) = f_next_stack; 7557 7558 layout_type (record); 7559 7560 return record; 7561 } 7562 7563 /* Implement `va_start' for varargs and stdarg. */ 7564 static void 7565 sh_va_start (tree valist, rtx nextarg) 7566 { 7567 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 7568 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; 7569 tree t, u; 7570 int nfp, nint; 7571 7572 if ((! TARGET_SH2E && ! TARGET_SH4) 7573 || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) 7574 { 7575 std_expand_builtin_va_start (valist, nextarg); 7576 return; 7577 } 7578 7579 f_next_o = TYPE_FIELDS (va_list_type_node); 7580 f_next_o_limit = DECL_CHAIN (f_next_o); 7581 f_next_fp = DECL_CHAIN (f_next_o_limit); 7582 f_next_fp_limit = DECL_CHAIN (f_next_fp); 7583 f_next_stack = DECL_CHAIN (f_next_fp_limit); 7584 7585 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, 7586 NULL_TREE); 7587 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), 7588 valist, f_next_o_limit, NULL_TREE); 7589 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp, 7590 NULL_TREE); 7591 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), 7592 valist, f_next_fp_limit, NULL_TREE); 7593 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), 7594 valist, f_next_stack, NULL_TREE); 7595 7596 /* Call __builtin_saveregs. */ 7597 u = make_tree (sizetype, expand_builtin_saveregs ()); 7598 u = fold_convert (ptr_type_node, u); 7599 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u); 7600 TREE_SIDE_EFFECTS (t) = 1; 7601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7602 7603 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT]; 7604 if (nfp < 8) 7605 nfp = 8 - nfp; 7606 else 7607 nfp = 0; 7608 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp); 7609 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u); 7610 TREE_SIDE_EFFECTS (t) = 1; 7611 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7612 7613 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u); 7614 TREE_SIDE_EFFECTS (t) = 1; 7615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7616 7617 nint = crtl->args.info.arg_count[SH_ARG_INT]; 7618 if (nint < 4) 7619 nint = 4 - nint; 7620 else 7621 nint = 0; 7622 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint); 7623 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u); 7624 TREE_SIDE_EFFECTS (t) = 1; 7625 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7626 7627 u = make_tree (ptr_type_node, nextarg); 7628 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u); 7629 TREE_SIDE_EFFECTS (t) = 1; 7630 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7631 } 7632 7633 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized 7634 member, return it. */ 7635 static tree 7636 find_sole_member (tree type) 7637 { 7638 tree field, member = NULL_TREE; 7639 7640 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 7641 { 7642 if (TREE_CODE (field) != FIELD_DECL) 7643 continue; 7644 if (!DECL_SIZE (field)) 7645 return NULL_TREE; 7646 if (integer_zerop (DECL_SIZE (field))) 7647 continue; 7648 if (member) 7649 return NULL_TREE; 7650 member = field; 7651 } 7652 return member; 7653 } 7654 7655 /* Implement `va_arg'. */ 7656 static tree 7657 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 7658 gimple_seq *post_p ATTRIBUTE_UNUSED) 7659 { 7660 tree tmp; 7661 tree addr, lab_over = NULL, result = NULL; 7662 tree eff_type; 7663 7664 const bool pass_by_ref = 7665 !VOID_TYPE_P (type) 7666 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type); 7667 7668 if (pass_by_ref) 7669 type = build_pointer_type (type); 7670 7671 HOST_WIDE_INT size = int_size_in_bytes (type); 7672 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; 7673 tree pptr_type_node = build_pointer_type (ptr_type_node); 7674 7675 if ((TARGET_SH2E || TARGET_SH4) 7676 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ())) 7677 { 7678 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 7679 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; 7680 tree lab_false; 7681 tree member; 7682 7683 f_next_o = TYPE_FIELDS (va_list_type_node); 7684 f_next_o_limit = DECL_CHAIN (f_next_o); 7685 f_next_fp = DECL_CHAIN (f_next_o_limit); 7686 f_next_fp_limit = DECL_CHAIN (f_next_fp); 7687 f_next_stack = DECL_CHAIN (f_next_fp_limit); 7688 7689 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, 7690 NULL_TREE); 7691 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), 7692 valist, f_next_o_limit, NULL_TREE); 7693 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), 7694 valist, f_next_fp, NULL_TREE); 7695 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), 7696 valist, f_next_fp_limit, NULL_TREE); 7697 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), 7698 valist, f_next_stack, NULL_TREE); 7699 7700 /* Structures with a single member with a distinct mode are passed 7701 like their member. This is relevant if the latter has a REAL_TYPE 7702 or COMPLEX_TYPE type. */ 7703 eff_type = type; 7704 while (TREE_CODE (eff_type) == RECORD_TYPE 7705 && (member = find_sole_member (eff_type)) 7706 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE 7707 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE 7708 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE)) 7709 { 7710 tree field_type = TREE_TYPE (member); 7711 7712 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type)) 7713 eff_type = field_type; 7714 else 7715 { 7716 gcc_assert ((TYPE_ALIGN (eff_type) 7717 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type))) 7718 || (TYPE_ALIGN (eff_type) 7719 > GET_MODE_BITSIZE (TYPE_MODE (field_type)))); 7720 break; 7721 } 7722 } 7723 7724 bool pass_as_float; 7725 if (TARGET_FPU_DOUBLE) 7726 { 7727 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8) 7728 || (TREE_CODE (eff_type) == COMPLEX_TYPE 7729 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE 7730 && size <= 16)); 7731 } 7732 else 7733 { 7734 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4); 7735 } 7736 7737 addr = create_tmp_var (pptr_type_node); 7738 lab_false = create_artificial_label (UNKNOWN_LOCATION); 7739 lab_over = create_artificial_label (UNKNOWN_LOCATION); 7740 7741 valist = build_simple_mem_ref (addr); 7742 7743 if (pass_as_float) 7744 { 7745 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp)); 7746 tree cmp; 7747 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE; 7748 7749 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp)); 7750 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7751 7752 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p); 7753 tmp = next_fp_limit; 7754 if (size > 4 && !is_double) 7755 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size); 7756 tmp = build2 (GE_EXPR, boolean_type_node, 7757 unshare_expr (next_fp_tmp), unshare_expr (tmp)); 7758 cmp = build3 (COND_EXPR, void_type_node, tmp, 7759 build1 (GOTO_EXPR, void_type_node, 7760 unshare_expr (lab_false)), NULL_TREE); 7761 if (!is_double) 7762 gimplify_and_add (cmp, pre_p); 7763 7764 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD 7765 || (is_double || size == 16)) 7766 { 7767 tmp = fold_convert (sizetype, next_fp_tmp); 7768 tmp = build2 (BIT_AND_EXPR, sizetype, tmp, 7769 size_int (UNITS_PER_WORD)); 7770 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp); 7771 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p); 7772 } 7773 if (is_double) 7774 gimplify_and_add (cmp, pre_p); 7775 7776 #ifdef FUNCTION_ARG_SCmode_WART 7777 if (TYPE_MODE (eff_type) == SCmode 7778 && TARGET_SH4 && TARGET_LITTLE_ENDIAN) 7779 { 7780 tree subtype = TREE_TYPE (eff_type); 7781 tree real, imag; 7782 7783 imag 7784 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); 7785 imag = get_initialized_tmp_var (imag, pre_p, NULL); 7786 7787 real 7788 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); 7789 real = get_initialized_tmp_var (real, pre_p, NULL); 7790 7791 result = build2 (COMPLEX_EXPR, eff_type, real, imag); 7792 if (type != eff_type) 7793 result = build1 (VIEW_CONVERT_EXPR, type, result); 7794 result = get_initialized_tmp_var (result, pre_p, NULL); 7795 } 7796 #endif /* FUNCTION_ARG_SCmode_WART */ 7797 7798 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); 7799 gimplify_and_add (tmp, pre_p); 7800 7801 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); 7802 gimplify_and_add (tmp, pre_p); 7803 7804 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); 7805 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7806 gimplify_assign (unshare_expr (next_fp_tmp), 7807 unshare_expr (valist), pre_p); 7808 7809 gimplify_assign (unshare_expr (valist), 7810 unshare_expr (next_fp_tmp), post_p); 7811 valist = next_fp_tmp; 7812 } 7813 else 7814 { 7815 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize); 7816 tmp = build2 (GT_EXPR, boolean_type_node, tmp, 7817 unshare_expr (next_o_limit)); 7818 tmp = build3 (COND_EXPR, void_type_node, tmp, 7819 build1 (GOTO_EXPR, void_type_node, 7820 unshare_expr (lab_false)), 7821 NULL_TREE); 7822 gimplify_and_add (tmp, pre_p); 7823 7824 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o)); 7825 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7826 7827 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); 7828 gimplify_and_add (tmp, pre_p); 7829 7830 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); 7831 gimplify_and_add (tmp, pre_p); 7832 7833 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A)) 7834 gimplify_assign (unshare_expr (next_o), 7835 unshare_expr (next_o_limit), pre_p); 7836 7837 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); 7838 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7839 } 7840 7841 if (!result) 7842 { 7843 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); 7844 gimplify_and_add (tmp, pre_p); 7845 } 7846 } 7847 7848 /* ??? In va-sh.h, there had been code to make values larger than 7849 size 8 indirect. This does not match the FUNCTION_ARG macros. */ 7850 7851 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL); 7852 if (result) 7853 { 7854 gimplify_assign (result, tmp, pre_p); 7855 result = build1 (NOP_EXPR, TREE_TYPE (result), result); 7856 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); 7857 gimplify_and_add (tmp, pre_p); 7858 } 7859 else 7860 result = tmp; 7861 7862 if (pass_by_ref) 7863 result = build_va_arg_indirect_ref (result); 7864 7865 return result; 7866 } 7867 7868 /* 64 bit floating points memory transfers are paired single precision loads 7869 or store. So DWARF information needs fixing in little endian (unless 7870 PR=SZ=1 in FPSCR). */ 7871 rtx 7872 sh_dwarf_register_span (rtx reg) 7873 { 7874 unsigned regno = REGNO (reg); 7875 7876 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode) 7877 return NULL_RTX; 7878 7879 return 7880 gen_rtx_PARALLEL (VOIDmode, 7881 gen_rtvec (2, 7882 gen_rtx_REG (SFmode, regno + 1), 7883 gen_rtx_REG (SFmode, regno))); 7884 } 7885 7886 static machine_mode 7887 sh_promote_function_mode (const_tree type, machine_mode mode, 7888 int *punsignedp, const_tree funtype, 7889 int for_return) 7890 { 7891 if (sh_promote_prototypes (funtype)) 7892 return promote_mode (type, mode, punsignedp); 7893 else 7894 return default_promote_function_mode (type, mode, punsignedp, funtype, 7895 for_return); 7896 } 7897 7898 static bool 7899 sh_promote_prototypes (const_tree type) 7900 { 7901 if (TARGET_HITACHI) 7902 return false; 7903 if (! type) 7904 return true; 7905 return ! sh_attr_renesas_p (type); 7906 } 7907 7908 static bool 7909 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, 7910 const_tree type, bool named ATTRIBUTE_UNUSED) 7911 { 7912 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7913 7914 if (targetm.calls.must_pass_in_stack (mode, type)) 7915 return true; 7916 7917 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function 7918 wants to know about pass-by-reference semantics for incoming 7919 arguments. */ 7920 if (! cum) 7921 return false; 7922 7923 return false; 7924 } 7925 7926 static bool 7927 sh_callee_copies (cumulative_args_t cum, machine_mode mode, 7928 const_tree type, bool named ATTRIBUTE_UNUSED) 7929 { 7930 /* ??? How can it possibly be correct to return true only on the 7931 caller side of the equation? Is there someplace else in the 7932 sh backend that's magically producing the copies? */ 7933 return (get_cumulative_args (cum)->outgoing 7934 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) 7935 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0)); 7936 } 7937 7938 static sh_arg_class 7939 get_sh_arg_class (machine_mode mode) 7940 { 7941 if (TARGET_FPU_ANY && mode == SFmode) 7942 return SH_ARG_FLOAT; 7943 7944 if (TARGET_FPU_DOUBLE 7945 && (GET_MODE_CLASS (mode) == MODE_FLOAT 7946 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)) 7947 return SH_ARG_FLOAT; 7948 7949 return SH_ARG_INT; 7950 } 7951 7952 /* Round a register number up to a proper boundary for an arg of mode 7953 MODE. 7954 The SH doesn't care about double alignment, so we only 7955 round doubles to even regs when asked to explicitly. */ 7956 static int 7957 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode) 7958 { 7959 /* FIXME: This used to be a macro and has been copy pasted into this 7960 function as is. Make this more readable. */ 7961 return 7962 (((TARGET_ALIGN_DOUBLE 7963 || (TARGET_FPU_DOUBLE 7964 && (mode == DFmode || mode == DCmode) 7965 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode))) 7966 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD) 7967 ? (cum.arg_count[(int) get_sh_arg_class (mode)] 7968 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1)) 7969 : cum.arg_count[(int) get_sh_arg_class (mode)]); 7970 } 7971 7972 /* Return true if arg of the specified mode should be passed in a register 7973 or false otherwise. */ 7974 static bool 7975 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode, 7976 const_tree type) 7977 { 7978 /* FIXME: This used to be a macro and has been copy pasted into this 7979 function as is. Make this more readable. */ 7980 return 7981 ((type == 0 7982 || (! TREE_ADDRESSABLE (type) 7983 && (! (TARGET_HITACHI || cum.renesas_abi) 7984 || ! (AGGREGATE_TYPE_P (type) 7985 || (!TARGET_FPU_ANY 7986 && (GET_MODE_CLASS (mode) == MODE_FLOAT 7987 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode))))))) 7988 && ! cum.force_mem 7989 && (TARGET_SH2E 7990 ? ((mode) == BLKmode 7991 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD 7992 + int_size_in_bytes (type)) 7993 <= NPARM_REGS (SImode) * UNITS_PER_WORD) 7994 : ((sh_round_reg (cum, mode) 7995 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode)) 7996 <= NPARM_REGS (mode))) 7997 : sh_round_reg (cum, mode) < NPARM_REGS (mode))); 7998 } 7999 8000 static int 8001 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, 8002 tree type, bool named ATTRIBUTE_UNUSED) 8003 { 8004 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 8005 int words = 0; 8006 8007 if (sh_pass_in_reg_p (*cum, mode, type) 8008 && !TARGET_FPU_DOUBLE 8009 && (sh_round_reg (*cum, mode) 8010 + (mode != BLKmode 8011 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD) 8012 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD)) 8013 > NPARM_REGS (mode))) 8014 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode); 8015 8016 return words * UNITS_PER_WORD; 8017 } 8018 8019 8020 /* Define where to put the arguments to a function. 8021 Value is zero to push the argument on the stack, 8022 or a hard register in which to store the argument. 8023 8024 MODE is the argument's machine mode. 8025 TYPE is the data type of the argument (as a tree). 8026 This is null for libcalls where that information may 8027 not be available. 8028 CUM is a variable of type CUMULATIVE_ARGS which gives info about 8029 the preceding args and about the function being called. 8030 NAMED is nonzero if this argument is a named parameter 8031 (otherwise it is an extra parameter matching an ellipsis). 8032 8033 On SH the first args are normally in registers 8034 and the rest are pushed. Any arg that starts within the first 8035 NPARM_REGS words is at least partially passed in a register unless 8036 its data type forbids. */ 8037 static rtx 8038 sh_function_arg (cumulative_args_t ca_v, machine_mode mode, 8039 const_tree type, bool named) 8040 { 8041 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); 8042 8043 if (mode == VOIDmode) 8044 return ca->renesas_abi ? const1_rtx : const0_rtx; 8045 8046 if (sh_pass_in_reg_p (*ca, mode, type) 8047 && (named || ! (TARGET_HITACHI || ca->renesas_abi))) 8048 { 8049 int regno; 8050 8051 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN 8052 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1))) 8053 { 8054 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode, 8055 gen_rtx_REG (SFmode, 8056 BASE_ARG_REG (mode) 8057 + (sh_round_reg (*ca, mode) ^ 1)), 8058 const0_rtx); 8059 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode, 8060 gen_rtx_REG (SFmode, 8061 BASE_ARG_REG (mode) 8062 + ((sh_round_reg (*ca, mode) + 1) ^ 1)), 8063 GEN_INT (4)); 8064 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2)); 8065 } 8066 8067 /* If the alignment of a DF value causes an SF register to be 8068 skipped, we will use that skipped register for the next SF 8069 value. */ 8070 if ((TARGET_HITACHI || ca->renesas_abi) 8071 && ca->free_single_fp_reg 8072 && mode == SFmode) 8073 return gen_rtx_REG (mode, ca->free_single_fp_reg); 8074 8075 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode)) 8076 ^ (mode == SFmode && TARGET_SH4 8077 && TARGET_LITTLE_ENDIAN 8078 && ! TARGET_HITACHI && ! ca->renesas_abi); 8079 return gen_rtx_REG (mode, regno); 8080 8081 } 8082 8083 return NULL_RTX; 8084 } 8085 8086 /* Update the data in CUM to advance over an argument 8087 of mode MODE and data type TYPE. 8088 (TYPE is null for libcalls where that information may not be 8089 available.) */ 8090 static void 8091 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode, 8092 const_tree type, bool named ATTRIBUTE_UNUSED) 8093 { 8094 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); 8095 8096 if (ca->force_mem) 8097 ca->force_mem = false; 8098 8099 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE) 8100 { 8101 /* Note that we've used the skipped register. */ 8102 if (mode == SFmode && ca->free_single_fp_reg) 8103 { 8104 ca->free_single_fp_reg = 0; 8105 return; 8106 } 8107 /* When we have a DF after an SF, there's an SF register that get 8108 skipped in order to align the DF value. We note this skipped 8109 register, because the next SF value will use it, and not the 8110 SF that follows the DF. */ 8111 if (mode == DFmode 8112 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode)) 8113 { 8114 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode) 8115 + BASE_ARG_REG (mode)); 8116 } 8117 } 8118 8119 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi) 8120 || sh_pass_in_reg_p (*ca, mode, type)) 8121 (ca->arg_count[(int) get_sh_arg_class (mode)] 8122 = (sh_round_reg (*ca, mode) 8123 + (mode == BLKmode 8124 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD) 8125 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)))); 8126 } 8127 8128 /* The Renesas calling convention doesn't quite fit into this scheme since 8129 the address is passed like an invisible argument, but one that is always 8130 passed in memory. */ 8131 static rtx 8132 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED) 8133 { 8134 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl)) 8135 return NULL_RTX; 8136 return gen_rtx_REG (Pmode, 2); 8137 } 8138 8139 /* Worker function for TARGET_FUNCTION_VALUE. 8140 8141 For the SH, this is like LIBCALL_VALUE, except that we must change the 8142 mode like PROMOTE_MODE does. 8143 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types 8144 tested here has to be kept in sync with the one in 8145 explow.c:promote_mode. */ 8146 static rtx 8147 sh_function_value (const_tree valtype, 8148 const_tree fn_decl_or_type, 8149 bool outgoing ATTRIBUTE_UNUSED) 8150 { 8151 if (fn_decl_or_type 8152 && !DECL_P (fn_decl_or_type)) 8153 fn_decl_or_type = NULL; 8154 8155 return gen_rtx_REG ( 8156 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT 8157 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4 8158 && (TREE_CODE (valtype) == INTEGER_TYPE 8159 || TREE_CODE (valtype) == ENUMERAL_TYPE 8160 || TREE_CODE (valtype) == BOOLEAN_TYPE 8161 || TREE_CODE (valtype) == REAL_TYPE 8162 || TREE_CODE (valtype) == OFFSET_TYPE)) 8163 && sh_promote_prototypes (fn_decl_or_type) 8164 ? SImode : TYPE_MODE (valtype)), 8165 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype))); 8166 } 8167 8168 /* Worker function for TARGET_LIBCALL_VALUE. */ 8169 static rtx 8170 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) 8171 { 8172 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode)); 8173 } 8174 8175 /* Return true if N is a possible register number of function value. */ 8176 static bool 8177 sh_function_value_regno_p (const unsigned int regno) 8178 { 8179 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG); 8180 } 8181 8182 /* Worker function for TARGET_RETURN_IN_MEMORY. */ 8183 static bool 8184 sh_return_in_memory (const_tree type, const_tree fndecl) 8185 { 8186 return TYPE_MODE (type) == BLKmode 8187 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl)) 8188 && TREE_CODE (type) == RECORD_TYPE); 8189 } 8190 8191 /* We actually emit the code in sh_expand_prologue. We used to use 8192 a static variable to flag that we need to emit this code, but that 8193 doesn't when inlining, when functions are deferred and then emitted 8194 later. Fortunately, we already have two flags that are part of struct 8195 function that tell if a function uses varargs or stdarg. */ 8196 static void 8197 sh_setup_incoming_varargs (cumulative_args_t ca, 8198 machine_mode mode, 8199 tree type, 8200 int *pretend_arg_size, 8201 int second_time ATTRIBUTE_UNUSED) 8202 { 8203 gcc_assert (cfun->stdarg); 8204 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) 8205 { 8206 int named_parm_regs, anon_parm_regs; 8207 8208 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode) 8209 + (mode == BLKmode 8210 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD) 8211 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))); 8212 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs; 8213 if (anon_parm_regs > 0) 8214 *pretend_arg_size = anon_parm_regs * 4; 8215 } 8216 } 8217 8218 static bool 8219 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) 8220 { 8221 return false; 8222 } 8223 8224 static bool 8225 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v) 8226 { 8227 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); 8228 8229 return ! (TARGET_HITACHI || ca->renesas_abi); 8230 } 8231 8232 8233 /* Define the offset between two registers, one to be eliminated, and 8234 the other its replacement, at the start of a routine. */ 8235 int 8236 initial_elimination_offset (int from, int to) 8237 { 8238 const int regs_saved_rounding = 0; 8239 int save_flags = target_flags; 8240 HARD_REG_SET live_regs_mask; 8241 8242 int regs_saved = calc_live_regs (&live_regs_mask); 8243 8244 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding; 8245 target_flags = save_flags; 8246 8247 int total_saved_regs_space = regs_saved + regs_saved_rounding; 8248 8249 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 8250 return total_saved_regs_space + total_auto_space; 8251 8252 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 8253 return total_saved_regs_space + total_auto_space; 8254 8255 /* Initial gap between fp and sp is 0. */ 8256 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 8257 return 0; 8258 8259 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 8260 return rounded_frame_size (0); 8261 8262 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 8263 return rounded_frame_size (0); 8264 8265 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM 8266 && (to == HARD_FRAME_POINTER_REGNUM 8267 || to == STACK_POINTER_REGNUM)); 8268 return total_auto_space; 8269 } 8270 8271 /* Parse the -mfixed-range= option string. */ 8272 void 8273 sh_fix_range (const char *const_str) 8274 { 8275 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 8276 REG2 are either register names or register numbers. The effect 8277 of this option is to mark the registers in the range from REG1 to 8278 REG2 as ``fixed'' so they won't be used by the compiler. */ 8279 8280 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str); 8281 8282 while (1) 8283 { 8284 char* dash = strchr (str, '-'); 8285 if (!dash) 8286 { 8287 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2"); 8288 return; 8289 } 8290 *dash = '\0'; 8291 char* comma = strchr (dash + 1, ','); 8292 if (comma) 8293 *comma = '\0'; 8294 8295 int first = decode_reg_name (str); 8296 if (first < 0) 8297 { 8298 warning (0, "unknown register name: %s", str); 8299 return; 8300 } 8301 8302 int last = decode_reg_name (dash + 1); 8303 if (last < 0) 8304 { 8305 warning (0, "unknown register name: %s", dash + 1); 8306 return; 8307 } 8308 8309 *dash = '-'; 8310 8311 if (first > last) 8312 { 8313 warning (0, "%s-%s is an empty range", str, dash + 1); 8314 return; 8315 } 8316 8317 for (int i = first; i <= last; ++i) 8318 fixed_regs[i] = call_used_regs[i] = 1; 8319 8320 if (!comma) 8321 break; 8322 8323 *comma = ','; 8324 str = comma + 1; 8325 } 8326 } 8327 8328 /* Insert any deferred function attributes from earlier pragmas. */ 8329 static void 8330 sh_insert_attributes (tree node, tree *attributes) 8331 { 8332 if (TREE_CODE (node) != FUNCTION_DECL) 8333 return; 8334 8335 /* We are only interested in fields. */ 8336 if (!DECL_P (node)) 8337 return; 8338 8339 /* Append the attributes to the deferred attributes. */ 8340 *sh_deferred_function_attributes_tail = *attributes; 8341 tree attrs = sh_deferred_function_attributes; 8342 if (!attrs) 8343 return; 8344 8345 /* Some attributes imply or require the interrupt attribute. */ 8346 if (!lookup_attribute ("interrupt_handler", attrs) 8347 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node))) 8348 { 8349 /* If we have a trapa_handler, but no interrupt_handler attribute, 8350 insert an interrupt_handler attribute. */ 8351 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE) 8352 /* We can't use sh_pr_interrupt here because that's not in the 8353 java frontend. */ 8354 attrs 8355 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs); 8356 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank, 8357 if the interrupt attribute is missing, we ignore the attribute 8358 and warn. */ 8359 else if (lookup_attribute ("sp_switch", attrs) 8360 || lookup_attribute ("trap_exit", attrs) 8361 || lookup_attribute ("nosave_low_regs", attrs) 8362 || lookup_attribute ("resbank", attrs)) 8363 { 8364 tree *tail; 8365 8366 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs)) 8367 { 8368 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs)) 8369 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs)) 8370 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)) 8371 || is_attribute_p ("resbank", TREE_PURPOSE (attrs))) 8372 warning (OPT_Wattributes, 8373 "%qE attribute only applies to interrupt functions", 8374 TREE_PURPOSE (attrs)); 8375 else 8376 { 8377 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE, 8378 NULL_TREE); 8379 tail = &TREE_CHAIN (*tail); 8380 } 8381 } 8382 attrs = *attributes; 8383 } 8384 } 8385 8386 /* Install the processed list. */ 8387 *attributes = attrs; 8388 8389 /* Clear deferred attributes. */ 8390 sh_deferred_function_attributes = NULL_TREE; 8391 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; 8392 8393 return; 8394 } 8395 8396 /*------------------------------------------------------------------------------ 8397 Target specific attributes 8398 Supported attributes are: 8399 8400 * interrupt_handler 8401 Specifies this function is an interrupt handler. 8402 8403 * trapa_handler 8404 Like interrupt_handler, but don't save all registers. 8405 8406 * sp_switch 8407 Specifies an alternate stack for an interrupt handler to run on. 8408 8409 * trap_exit 8410 Use a trapa to exit an interrupt function instead of rte. 8411 8412 * nosave_low_regs 8413 Don't save r0..r7 in an interrupt handler function. 8414 This is useful on SH3* and SH4*, which have a separate set of low 8415 regs for user and privileged modes. 8416 This is mainly to be used for non-reentrant interrupt handlers (i.e. 8417 those that run with interrupts disabled and thus can't be 8418 interrupted thenselves). 8419 8420 * renesas 8421 Use Renesas calling/layout conventions (functions and structures). 8422 8423 * resbank 8424 In case of an interrupt handler function, use a register bank to 8425 save registers R0-R14, MACH, MACL, GBR and PR. 8426 This is available only on SH2A targets. 8427 8428 * function_vector 8429 Declares a function to be called using the TBR relative addressing 8430 mode. Takes an argument that specifies the slot number in the table 8431 where this function can be looked up by the JSR/N @@(disp8,TBR) insn. 8432 */ 8433 8434 /* Handle a 'resbank' attribute. */ 8435 static tree 8436 sh_handle_resbank_handler_attribute (tree * node, tree name, 8437 tree args ATTRIBUTE_UNUSED, 8438 int flags ATTRIBUTE_UNUSED, 8439 bool * no_add_attrs) 8440 { 8441 if (!TARGET_SH2A) 8442 { 8443 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A", 8444 name); 8445 *no_add_attrs = true; 8446 } 8447 if (TREE_CODE (*node) != FUNCTION_DECL) 8448 { 8449 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8450 name); 8451 *no_add_attrs = true; 8452 } 8453 8454 return NULL_TREE; 8455 } 8456 8457 /* Handle an "interrupt_handler" attribute; arguments as in 8458 struct attribute_spec.handler. */ 8459 static tree 8460 sh_handle_interrupt_handler_attribute (tree *node, tree name, 8461 tree args ATTRIBUTE_UNUSED, 8462 int flags ATTRIBUTE_UNUSED, 8463 bool *no_add_attrs) 8464 { 8465 if (TREE_CODE (*node) != FUNCTION_DECL) 8466 { 8467 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8468 name); 8469 *no_add_attrs = true; 8470 } 8471 8472 return NULL_TREE; 8473 } 8474 8475 /* Handle an 'function_vector' attribute; arguments as in 8476 struct attribute_spec.handler. */ 8477 static tree 8478 sh2a_handle_function_vector_handler_attribute (tree * node, tree name, 8479 tree args ATTRIBUTE_UNUSED, 8480 int flags ATTRIBUTE_UNUSED, 8481 bool * no_add_attrs) 8482 { 8483 if (!TARGET_SH2A) 8484 { 8485 warning (OPT_Wattributes, "%qE attribute only applies to SH2A", 8486 name); 8487 *no_add_attrs = true; 8488 } 8489 else if (TREE_CODE (*node) != FUNCTION_DECL) 8490 { 8491 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8492 name); 8493 *no_add_attrs = true; 8494 } 8495 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) 8496 { 8497 /* The argument must be a constant integer. */ 8498 warning (OPT_Wattributes, 8499 "%qE attribute argument not an integer constant", 8500 name); 8501 *no_add_attrs = true; 8502 } 8503 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255) 8504 { 8505 /* The argument value must be between 0 to 255. */ 8506 warning (OPT_Wattributes, 8507 "%qE attribute argument should be between 0 to 255", 8508 name); 8509 *no_add_attrs = true; 8510 } 8511 return NULL_TREE; 8512 } 8513 8514 /* Returns true if current function has been assigned the attribute 8515 'function_vector'. */ 8516 bool 8517 sh2a_is_function_vector_call (rtx x) 8518 { 8519 if (GET_CODE (x) == SYMBOL_REF 8520 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) 8521 { 8522 tree tr = SYMBOL_REF_DECL (x); 8523 8524 if (sh2a_function_vector_p (tr)) 8525 return true; 8526 } 8527 8528 return false; 8529 } 8530 8531 /* Returns the function vector number, if the attribute 8532 'function_vector' is assigned, otherwise returns zero. */ 8533 int 8534 sh2a_get_function_vector_number (rtx x) 8535 { 8536 if ((GET_CODE (x) == SYMBOL_REF) 8537 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) 8538 { 8539 tree t = SYMBOL_REF_DECL (x); 8540 8541 if (TREE_CODE (t) != FUNCTION_DECL) 8542 return 0; 8543 8544 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list)) 8545 if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) 8546 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list))); 8547 8548 return 0; 8549 } 8550 else 8551 return 0; 8552 } 8553 8554 /* Handle an "sp_switch" attribute; arguments as in 8555 struct attribute_spec.handler. */ 8556 static tree 8557 sh_handle_sp_switch_attribute (tree *node, tree name, tree args, 8558 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 8559 { 8560 if (TREE_CODE (*node) != FUNCTION_DECL) 8561 { 8562 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8563 name); 8564 *no_add_attrs = true; 8565 } 8566 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) 8567 { 8568 /* The argument must be a constant string. */ 8569 warning (OPT_Wattributes, "%qE attribute argument not a string constant", 8570 name); 8571 *no_add_attrs = true; 8572 } 8573 8574 return NULL_TREE; 8575 } 8576 8577 /* Handle an "trap_exit" attribute; arguments as in 8578 struct attribute_spec.handler. */ 8579 static tree 8580 sh_handle_trap_exit_attribute (tree *node, tree name, tree args, 8581 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 8582 { 8583 if (TREE_CODE (*node) != FUNCTION_DECL) 8584 { 8585 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8586 name); 8587 *no_add_attrs = true; 8588 } 8589 /* The argument specifies a trap number to be used in a trapa instruction 8590 at function exit (instead of an rte instruction). */ 8591 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) 8592 { 8593 /* The argument must be a constant integer. */ 8594 warning (OPT_Wattributes, "%qE attribute argument not an " 8595 "integer constant", name); 8596 *no_add_attrs = true; 8597 } 8598 8599 return NULL_TREE; 8600 } 8601 8602 static tree 8603 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED, 8604 tree name ATTRIBUTE_UNUSED, 8605 tree args ATTRIBUTE_UNUSED, 8606 int flags ATTRIBUTE_UNUSED, 8607 bool *no_add_attrs ATTRIBUTE_UNUSED) 8608 { 8609 return NULL_TREE; 8610 } 8611 8612 /* True if __attribute__((renesas)) or -mrenesas. */ 8613 bool 8614 sh_attr_renesas_p (const_tree td) 8615 { 8616 if (TARGET_HITACHI) 8617 return true; 8618 if (td == NULL_TREE) 8619 return false; 8620 if (DECL_P (td)) 8621 td = TREE_TYPE (td); 8622 if (td == error_mark_node) 8623 return false; 8624 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE; 8625 } 8626 8627 /* True if __attribute__((renesas)) or -mrenesas, for the current 8628 function. */ 8629 bool 8630 sh_cfun_attr_renesas_p (void) 8631 { 8632 return sh_attr_renesas_p (current_function_decl); 8633 } 8634 8635 /* Returns true if the current function has the "interrupt_handler" 8636 attribute set. */ 8637 bool 8638 sh_cfun_interrupt_handler_p (void) 8639 { 8640 return (lookup_attribute ("interrupt_handler", 8641 DECL_ATTRIBUTES (current_function_decl)) 8642 != NULL_TREE); 8643 } 8644 8645 /* Returns true if FUNC has been assigned the attribute 8646 "function_vector". */ 8647 bool 8648 sh2a_function_vector_p (tree func) 8649 { 8650 if (TREE_CODE (func) != FUNCTION_DECL) 8651 return false; 8652 8653 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list)) 8654 if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) 8655 return true; 8656 8657 return false; 8658 } 8659 8660 /* Returns true if given tree has the "resbank" attribute set. */ 8661 bool 8662 sh_cfun_resbank_handler_p (void) 8663 { 8664 return ((lookup_attribute ("resbank", 8665 DECL_ATTRIBUTES (current_function_decl)) 8666 != NULL_TREE) 8667 && (lookup_attribute ("interrupt_handler", 8668 DECL_ATTRIBUTES (current_function_decl)) 8669 != NULL_TREE) && TARGET_SH2A); 8670 } 8671 8672 /* Returns true if the current function has a "trap_exit" attribute set. */ 8673 bool 8674 sh_cfun_trap_exit_p (void) 8675 { 8676 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl)) 8677 != NULL_TREE; 8678 } 8679 8680 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */ 8681 static const char * 8682 sh_check_pch_target_flags (int old_flags) 8683 { 8684 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3 8685 | MASK_SH_E | MASK_HARD_SH4 8686 | MASK_FPU_SINGLE | MASK_SH4)) 8687 return _("created and used with different architectures / ABIs"); 8688 if ((old_flags ^ target_flags) & MASK_HITACHI) 8689 return _("created and used with different ABIs"); 8690 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN) 8691 return _("created and used with different endianness"); 8692 return NULL; 8693 } 8694 8695 /* Predicates used by the templates. */ 8696 8697 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx. 8698 Used only in general_movsrc_operand. */ 8699 bool 8700 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED) 8701 { 8702 switch (REGNO (op)) 8703 { 8704 case PR_REG: 8705 case MACL_REG: 8706 case MACH_REG: 8707 return true; 8708 } 8709 return false; 8710 } 8711 8712 /* Returns true if OP is a floating point value with value 0.0. */ 8713 bool 8714 fp_zero_operand (rtx op) 8715 { 8716 if (GET_MODE (op) != SFmode) 8717 return false; 8718 8719 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op); 8720 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r); 8721 } 8722 8723 /* Returns true if OP is a floating point value with value 1.0. */ 8724 bool 8725 fp_one_operand (rtx op) 8726 { 8727 if (GET_MODE (op) != SFmode) 8728 return false; 8729 8730 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1); 8731 } 8732 8733 /* Return the TLS type for TLS symbols. */ 8734 enum tls_model 8735 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED) 8736 { 8737 if (GET_CODE (op) != SYMBOL_REF) 8738 return TLS_MODEL_NONE; 8739 return SYMBOL_REF_TLS_MODEL (op); 8740 } 8741 8742 /* Return the destination address of a branch. */ 8743 static int 8744 branch_dest (rtx branch) 8745 { 8746 rtx dest = SET_SRC (PATTERN (branch)); 8747 8748 if (GET_CODE (dest) == IF_THEN_ELSE) 8749 dest = XEXP (dest, 1); 8750 8751 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0))); 8752 } 8753 8754 /* Return nonzero if REG is not used after INSN. 8755 We assume REG is a reload reg, and therefore does 8756 not live past labels. It may live past calls or jumps though. */ 8757 bool 8758 reg_unused_after (rtx reg, rtx_insn *insn) 8759 { 8760 /* If the reg is set by this instruction, then it is safe for our 8761 case. Disregard the case where this is a store to memory, since 8762 we are checking a register used in the store address. */ 8763 rtx set = single_set (insn); 8764 if (set && !MEM_P (SET_DEST (set)) 8765 && reg_overlap_mentioned_p (reg, SET_DEST (set))) 8766 return true; 8767 8768 while ((insn = NEXT_INSN (insn))) 8769 { 8770 if (!INSN_P (insn)) 8771 continue; 8772 8773 rtx_code code = GET_CODE (insn); 8774 8775 #if 0 8776 /* If this is a label that existed before reload, then the register 8777 is dead here. However, if this is a label added by reorg, then 8778 the register may still be live here. We can't tell the difference, 8779 so we just ignore labels completely. */ 8780 if (code == CODE_LABEL) 8781 return 1; 8782 /* else */ 8783 #endif 8784 8785 if (code == JUMP_INSN) 8786 return false; 8787 8788 /* If this is a sequence, we must handle them all at once. 8789 We could have for instance a call that sets the target register, 8790 and an insn in a delay slot that uses the register. In this case, 8791 we must return 0. */ 8792 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) 8793 { 8794 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn)); 8795 bool retval = false; 8796 8797 for (int i = 0; i < seq->len (); i++) 8798 { 8799 rtx_insn *this_insn = seq->insn (i); 8800 rtx set = single_set (this_insn); 8801 8802 if (CALL_P (this_insn)) 8803 code = CALL_INSN; 8804 else if (JUMP_P (this_insn)) 8805 { 8806 if (INSN_ANNULLED_BRANCH_P (this_insn)) 8807 return false; 8808 code = JUMP_INSN; 8809 } 8810 8811 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) 8812 return false; 8813 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) 8814 { 8815 if (!MEM_P (SET_DEST (set))) 8816 retval = true; 8817 else 8818 return false; 8819 } 8820 if (set == NULL_RTX 8821 && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) 8822 return false; 8823 } 8824 if (retval) 8825 return true; 8826 else if (code == JUMP_INSN) 8827 return false; 8828 } 8829 8830 rtx set = single_set (insn); 8831 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) 8832 return false; 8833 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) 8834 return !MEM_P (SET_DEST (set)); 8835 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn))) 8836 return false; 8837 8838 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)]) 8839 return true; 8840 } 8841 return true; 8842 } 8843 8844 8845 static GTY(()) rtx t_reg_rtx; 8846 rtx 8847 get_t_reg_rtx (void) 8848 { 8849 if (! t_reg_rtx) 8850 t_reg_rtx = gen_rtx_REG (SImode, T_REG); 8851 return t_reg_rtx; 8852 } 8853 8854 static GTY(()) tree fpscr_values; 8855 8856 static void 8857 emit_fpu_switch (rtx scratch, int index) 8858 { 8859 if (fpscr_values == NULL) 8860 { 8861 tree t = build_index_type (integer_one_node); 8862 t = build_array_type (integer_type_node, t); 8863 t = build_decl (BUILTINS_LOCATION, 8864 VAR_DECL, get_identifier ("__fpscr_values"), t); 8865 DECL_ARTIFICIAL (t) = 1; 8866 DECL_IGNORED_P (t) = 1; 8867 DECL_EXTERNAL (t) = 1; 8868 TREE_STATIC (t) = 1; 8869 TREE_PUBLIC (t) = 1; 8870 TREE_USED (t) = 1; 8871 8872 fpscr_values = t; 8873 } 8874 8875 rtx src = DECL_RTL (fpscr_values); 8876 if (!can_create_pseudo_p ()) 8877 { 8878 emit_move_insn (scratch, XEXP (src, 0)); 8879 if (index != 0) 8880 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4))); 8881 src = adjust_automodify_address (src, SImode, scratch, index * 4); 8882 } 8883 else 8884 src = adjust_address (src, SImode, index * 4); 8885 8886 emit_insn (gen_lds_fpscr (src)); 8887 } 8888 8889 static rtx get_free_reg (HARD_REG_SET); 8890 8891 /* This function returns a register to use to load the address to load 8892 the fpscr from. Currently it always returns r1 or r7, but when we are 8893 able to use pseudo registers after combine, or have a better mechanism 8894 for choosing a register, it should be done here. */ 8895 /* REGS_LIVE is the liveness information for the point for which we 8896 need this allocation. In some bare-bones exit blocks, r1 is live at the 8897 start. We can even have all of r0..r3 being live: 8898 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; } 8899 INSN before which new insns are placed with will clobber the register 8900 we return. If a basic block consists only of setting the return value 8901 register to a pseudo and using that register, the return value is not 8902 live before or after this block, yet we we'll insert our insns right in 8903 the middle. */ 8904 static rtx 8905 get_free_reg (HARD_REG_SET regs_live) 8906 { 8907 if (! TEST_HARD_REG_BIT (regs_live, 1)) 8908 return gen_rtx_REG (Pmode, 1); 8909 8910 /* Hard reg 1 is live; since this is a small register classes target, 8911 there shouldn't be anything but a jump before the function end. */ 8912 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7)); 8913 return gen_rtx_REG (Pmode, 7); 8914 } 8915 8916 /* This function will set the fpscr from memory. 8917 MODE is the mode we are setting it to. */ 8918 void 8919 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live) 8920 { 8921 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode; 8922 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE); 8923 8924 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX; 8925 emit_fpu_switch (addr_reg, fp_mode == norm_mode); 8926 } 8927 8928 /* Is the given character a logical line separator for the assembler? */ 8929 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR 8930 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';') 8931 #endif 8932 8933 static bool 8934 sequence_insn_p (rtx_insn *insn) 8935 { 8936 rtx_insn* prev = PREV_INSN (insn); 8937 if (prev == NULL) 8938 return false; 8939 8940 rtx_insn* next = NEXT_INSN (prev); 8941 if (next == NULL) 8942 return false; 8943 8944 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE; 8945 } 8946 8947 int 8948 sh_insn_length_adjustment (rtx_insn *insn) 8949 { 8950 /* Instructions with unfilled delay slots take up an extra two bytes for 8951 the nop in the delay slot. */ 8952 if (((NONJUMP_INSN_P (insn) 8953 && GET_CODE (PATTERN (insn)) != USE 8954 && GET_CODE (PATTERN (insn)) != CLOBBER) 8955 || CALL_P (insn) || JUMP_P (insn)) 8956 && ! sequence_insn_p (insn) 8957 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES) 8958 return 2; 8959 8960 /* Increase the insn length of a cbranch without a delay slot insn to 8961 force a delay slot which will be stuffed with a nop. */ 8962 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2 8963 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH 8964 && ! sequence_insn_p (insn)) 8965 return 2; 8966 8967 /* sh-dsp parallel processing insn take four bytes instead of two. */ 8968 8969 if (NONJUMP_INSN_P (insn)) 8970 { 8971 int sum = 0; 8972 rtx body = PATTERN (insn); 8973 const char *templ; 8974 char c; 8975 bool maybe_label = true; 8976 8977 if (GET_CODE (body) == ASM_INPUT) 8978 templ = XSTR (body, 0); 8979 else if (asm_noperands (body) >= 0) 8980 templ 8981 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL); 8982 else 8983 return 0; 8984 do 8985 { 8986 int ppi_adjust = 0; 8987 8988 do 8989 c = *templ++; 8990 while (c == ' ' || c == '\t'); 8991 /* all sh-dsp parallel-processing insns start with p. 8992 The only non-ppi sh insn starting with p is pref. 8993 The only ppi starting with pr is prnd. */ 8994 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2)) 8995 ppi_adjust = 2; 8996 /* The repeat pseudo-insn expands two three insns, a total of 8997 six bytes in size. */ 8998 else if ((c == 'r' || c == 'R') 8999 && ! strncasecmp ("epeat", templ, 5)) 9000 ppi_adjust = 4; 9001 while (c && c != '\n' 9002 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ)) 9003 { 9004 /* If this is a label, it is obviously not a ppi insn. */ 9005 if (c == ':' && maybe_label) 9006 { 9007 ppi_adjust = 0; 9008 break; 9009 } 9010 else if (c == '\'' || c == '"') 9011 maybe_label = false; 9012 c = *templ++; 9013 } 9014 sum += ppi_adjust; 9015 maybe_label = c != ':'; 9016 } 9017 while (c); 9018 return sum; 9019 } 9020 return 0; 9021 } 9022 9023 /* Return TRUE for a valid displacement for the REG+disp addressing 9024 with MODE. */ 9025 bool 9026 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a, 9027 bool allow_zero) 9028 { 9029 if (! CONST_INT_P (op)) 9030 return false; 9031 9032 { 9033 const HOST_WIDE_INT offset = INTVAL (op); 9034 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a); 9035 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a); 9036 9037 /* If the mode does not support any displacement always return false. 9038 Even though an index of '0' is actually always valid, it will cause 9039 troubles when e.g. a DFmode move is split into two SFmode moves, 9040 where one SFmode move will have index '0' and the other move will 9041 have index '4'. */ 9042 if (!allow_zero && max_disp < 1) 9043 return false; 9044 9045 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0; 9046 } 9047 } 9048 9049 /* Recognize an RTL expression that is a valid memory address for 9050 an instruction. 9051 The MODE argument is the machine mode for the MEM expression 9052 that wants to use this address. 9053 Allow REG 9054 REG+disp 9055 REG+r0 9056 REG++ 9057 --REG 9058 GBR 9059 GBR+disp */ 9060 static bool 9061 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict) 9062 { 9063 if (REG_P (x) && REGNO (x) == GBR_REG) 9064 return true; 9065 9066 if (MAYBE_BASE_REGISTER_RTX_P (x, strict)) 9067 return true; 9068 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) 9069 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict)) 9070 return true; 9071 else if (GET_CODE (x) == PLUS) 9072 { 9073 rtx xop0 = XEXP (x, 0); 9074 rtx xop1 = XEXP (x, 1); 9075 9076 if (REG_P (xop0) && REGNO (xop0) == GBR_REG) 9077 return gbr_displacement (xop1, mode); 9078 9079 if (GET_MODE_SIZE (mode) <= 8 9080 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict) 9081 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false)) 9082 return true; 9083 9084 if (GET_MODE_SIZE (mode) <= 4 9085 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode)) 9086 { 9087 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict) 9088 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict)) 9089 return true; 9090 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict) 9091 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)) 9092 return true; 9093 } 9094 } 9095 9096 return false; 9097 } 9098 9099 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol 9100 isn't protected by a PIC unspec. */ 9101 bool 9102 nonpic_symbol_mentioned_p (rtx x) 9103 { 9104 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF 9105 || GET_CODE (x) == PC) 9106 return true; 9107 9108 /* We don't want to look into the possible MEM location of a 9109 CONST_DOUBLE, since we're not going to use it, in general. */ 9110 if (GET_CODE (x) == CONST_DOUBLE) 9111 return false; 9112 9113 if (GET_CODE (x) == UNSPEC 9114 && (XINT (x, 1) == UNSPEC_PIC 9115 || XINT (x, 1) == UNSPEC_GOT 9116 || XINT (x, 1) == UNSPEC_GOTOFF 9117 || XINT (x, 1) == UNSPEC_GOTPLT 9118 || XINT (x, 1) == UNSPEC_GOTTPOFF 9119 || XINT (x, 1) == UNSPEC_DTPOFF 9120 || XINT (x, 1) == UNSPEC_TPOFF 9121 || XINT (x, 1) == UNSPEC_PLT 9122 || XINT (x, 1) == UNSPEC_PCREL 9123 || XINT (x, 1) == UNSPEC_SYMOFF 9124 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF 9125 || XINT (x, 1) == UNSPEC_GOTFUNCDESC 9126 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC)) 9127 return false; 9128 9129 const char* fmt = GET_RTX_FORMAT (GET_CODE (x)); 9130 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 9131 { 9132 if (fmt[i] == 'E') 9133 { 9134 for (int j = XVECLEN (x, i) - 1; j >= 0; j--) 9135 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j))) 9136 return true; 9137 } 9138 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i))) 9139 return true; 9140 } 9141 9142 return false; 9143 } 9144 9145 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or 9146 @GOTOFF in `reg'. */ 9147 rtx 9148 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg) 9149 { 9150 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE) 9151 return orig; 9152 9153 if (GET_CODE (orig) == LABEL_REF 9154 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig))) 9155 { 9156 if (reg == NULL_RTX) 9157 reg = gen_reg_rtx (Pmode); 9158 9159 if (TARGET_FDPIC 9160 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig)) 9161 { 9162 /* Weak functions may be NULL which doesn't work with 9163 GOTOFFFUNCDESC because the runtime offset is not known. */ 9164 if (SYMBOL_REF_WEAK (orig)) 9165 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig)); 9166 else 9167 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig)); 9168 } 9169 else if (TARGET_FDPIC 9170 && (GET_CODE (orig) == LABEL_REF 9171 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig) 9172 && (TREE_READONLY (SYMBOL_REF_DECL (orig)) 9173 || SYMBOL_REF_EXTERNAL_P (orig) 9174 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig)))))) 9175 /* In FDPIC, GOTOFF can only be used for writable data. */ 9176 emit_insn (gen_symGOT2reg (reg, orig)); 9177 else 9178 emit_insn (gen_symGOTOFF2reg (reg, orig)); 9179 return reg; 9180 } 9181 else if (GET_CODE (orig) == SYMBOL_REF) 9182 { 9183 if (reg == NULL_RTX) 9184 reg = gen_reg_rtx (Pmode); 9185 9186 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig)) 9187 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig)); 9188 else 9189 emit_insn (gen_symGOT2reg (reg, orig)); 9190 return reg; 9191 } 9192 return orig; 9193 } 9194 9195 /* Given a (logical) mode size and an offset in bytes, try to find a the 9196 appropriate displacement value for a mov insn. On SH the displacements 9197 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max. 9198 15 bytes in QImode. To compensate this we create a new base address by 9199 adding an adjustment value to it. 9200 9201 If the originally requested offset is greater than 127 we prefer using 9202 values 124..127 over 128..131 to increase opportunities to use the 9203 add #imm, Rn insn. 9204 9205 In some cases it is possible that a requested offset might seem unaligned 9206 or inappropriate for the mode size, like offset = 2 and mode size = 4. 9207 This is compensated by adjusting the base address so that the effective 9208 address of the displacement move insn will be aligned. 9209 9210 This is not the best possible way of rebasing the base address, as it 9211 does not look at other present displacement addressings around it. 9212 In some cases this can create more base address adjustments than would 9213 actually be necessary. */ 9214 struct disp_adjust 9215 { 9216 rtx offset_adjust; 9217 rtx mov_disp; 9218 }; 9219 9220 static struct disp_adjust 9221 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset) 9222 { 9223 struct disp_adjust res = { NULL_RTX, NULL_RTX }; 9224 9225 /* Do not try to use SH2A's large displacements here, because this would 9226 effectively disable the small displacement insns. */ 9227 const int mode_sz = GET_MODE_SIZE (mode); 9228 const int mov_insn_sz = mov_insn_size (mode, false); 9229 const int max_disp = sh_max_mov_insn_displacement (mode, false); 9230 const int max_disp_next = max_disp + mov_insn_sz; 9231 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0; 9232 HOST_WIDE_INT offset_adjust; 9233 9234 /* In some cases this actually does happen and we must check for it. */ 9235 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1) 9236 return res; 9237 9238 /* Keeps the previous behavior for QImode displacement addressing. 9239 This just decides how the offset is re-based. Removing this special 9240 case will result in slightly bigger code on average, but it's not that 9241 bad actually. */ 9242 if (mov_insn_sz == 1) 9243 align_modifier = 0; 9244 9245 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier; 9246 9247 if (mode_sz + offset - offset_adjust <= max_disp_next) 9248 { 9249 res.offset_adjust = GEN_INT (offset_adjust); 9250 res.mov_disp = GEN_INT (offset - offset_adjust); 9251 } 9252 9253 return res; 9254 } 9255 9256 /* Try to modify an illegitimate address and make it legitimate. 9257 If we find one, return the new, valid address. 9258 Otherwise, return the original address. */ 9259 static rtx 9260 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode) 9261 { 9262 if (flag_pic) 9263 x = legitimize_pic_address (oldx, mode, NULL_RTX); 9264 9265 if ((TARGET_FPU_DOUBLE && mode == DFmode) 9266 || (TARGET_SH2E && mode == SFmode)) 9267 return x; 9268 9269 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) 9270 && BASE_REGISTER_RTX_P (XEXP (x, 0))) 9271 { 9272 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, 9273 INTVAL (XEXP (x, 1))); 9274 9275 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) 9276 { 9277 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0), 9278 adj.offset_adjust, NULL_RTX, 0, 9279 OPTAB_LIB_WIDEN); 9280 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp); 9281 } 9282 } 9283 return x; 9284 } 9285 9286 /* Attempt to replace *p, which is an address that needs reloading, with 9287 a valid memory address for an operand of mode MODE. 9288 Like for sh_legitimize_address, for the SH we try to get a normal form 9289 of the address. That will allow inheritance of the address reloads. */ 9290 bool 9291 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum, 9292 int itype) 9293 { 9294 enum reload_type type = (enum reload_type) itype; 9295 const int mode_sz = GET_MODE_SIZE (mode); 9296 9297 if (sh_lra_p ()) 9298 return false; 9299 9300 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1)) 9301 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)) 9302 { 9303 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1)); 9304 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset); 9305 9306 if (TARGET_SH2A && mode == DFmode && (offset & 0x7)) 9307 { 9308 push_reload (*p, NULL_RTX, p, NULL, 9309 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 9310 return true; 9311 } 9312 9313 if (TARGET_SH2E && mode == SFmode) 9314 { 9315 *p = copy_rtx (*p); 9316 push_reload (*p, NULL_RTX, p, NULL, 9317 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 9318 return true; 9319 } 9320 9321 /* FIXME: Do not allow to legitimize QImode and HImode displacement 9322 moves because then reload has a problem figuring the constraint 9323 that the move insn target/source reg must be R0. 9324 Or maybe some handling is wrong in sh_secondary_reload for this 9325 to work properly? */ 9326 if ((mode_sz == 4 || mode_sz == 8) 9327 && ! (TARGET_SH4 && mode == DFmode) 9328 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) 9329 { 9330 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust); 9331 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp); 9332 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL, 9333 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 9334 return true; 9335 } 9336 } 9337 9338 /* We must re-recognize what we created before. */ 9339 if (GET_CODE (*p) == PLUS 9340 && (mode_sz == 4 || mode_sz == 8) 9341 && GET_CODE (XEXP (*p, 0)) == PLUS 9342 && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) 9343 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true) 9344 && CONST_INT_P (XEXP (*p, 1)) 9345 && ! (TARGET_SH2E && mode == SFmode)) 9346 { 9347 /* Because this address is so complex, we know it must have 9348 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, 9349 it is already unshared, and needs no further unsharing. */ 9350 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, 9351 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 9352 return true; 9353 } 9354 9355 return false; 9356 } 9357 9358 /* In the name of slightly smaller debug output, and to cater to 9359 general assembler lossage, recognize various UNSPEC sequences 9360 and turn them back into a direct symbol reference. */ 9361 static rtx 9362 sh_delegitimize_address (rtx orig_x) 9363 { 9364 orig_x = delegitimize_mem_from_attrs (orig_x); 9365 9366 rtx x = orig_x; 9367 if (MEM_P (x)) 9368 x = XEXP (x, 0); 9369 if (GET_CODE (x) == CONST) 9370 { 9371 rtx y = XEXP (x, 0); 9372 if (GET_CODE (y) == UNSPEC) 9373 { 9374 if (XINT (y, 1) == UNSPEC_GOT 9375 || XINT (y, 1) == UNSPEC_GOTOFF 9376 || XINT (y, 1) == UNSPEC_SYMOFF) 9377 return XVECEXP (y, 0, 0); 9378 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF) 9379 { 9380 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST) 9381 { 9382 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0); 9383 9384 if (GET_CODE (symplt) == UNSPEC 9385 && (XINT (symplt, 1) == UNSPEC_PLT 9386 || XINT (symplt, 1) == UNSPEC_PCREL)) 9387 return XVECEXP (symplt, 0, 0); 9388 } 9389 } 9390 } 9391 } 9392 9393 return orig_x; 9394 } 9395 9396 /* Mark the use of a constant in the literal table. If the constant 9397 has multiple labels, make it unique. */ 9398 static rtx 9399 mark_constant_pool_use (rtx x) 9400 { 9401 if (x == NULL_RTX) 9402 return x; 9403 9404 switch (GET_CODE (x)) 9405 { 9406 case LABEL_REF: 9407 x = XEXP (x, 0); 9408 case CODE_LABEL: 9409 break; 9410 default: 9411 return x; 9412 } 9413 9414 /* Get the first label in the list of labels for the same constant 9415 and delete another labels in the list. */ 9416 rtx_insn* lab = as_a <rtx_insn*> (x); 9417 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn)) 9418 { 9419 if (!LABEL_P (insn) 9420 || LABEL_REFS (insn) != NEXT_INSN (insn)) 9421 break; 9422 lab = insn; 9423 } 9424 9425 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn)) 9426 as_a<rtx_insn *> (insn)->set_deleted (); 9427 9428 /* Mark constants in a window. */ 9429 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; 9430 insn = NEXT_INSN (insn)) 9431 { 9432 if (!NONJUMP_INSN_P (insn)) 9433 continue; 9434 9435 rtx pattern = PATTERN (insn); 9436 if (GET_CODE (pattern) != UNSPEC_VOLATILE) 9437 continue; 9438 9439 switch (XINT (pattern, 1)) 9440 { 9441 case UNSPECV_CONST2: 9442 case UNSPECV_CONST4: 9443 case UNSPECV_CONST8: 9444 XVECEXP (pattern, 0, 1) = const1_rtx; 9445 break; 9446 case UNSPECV_WINDOW_END: 9447 if (XVECEXP (pattern, 0, 0) == x) 9448 return lab; 9449 break; 9450 case UNSPECV_CONST_END: 9451 return lab; 9452 default: 9453 break; 9454 } 9455 } 9456 9457 return lab; 9458 } 9459 9460 /* Return true if it's possible to redirect BRANCH1 to the destination 9461 of an unconditional jump BRANCH2. We only want to do this if the 9462 resulting branch will have a short displacement. */ 9463 static bool 9464 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2) 9465 { 9466 /* Don't follow if BRANCH2 is possible to be a jump crossing between 9467 hot and cold partitions. */ 9468 if (flag_reorder_blocks_and_partition 9469 && simplejump_p (branch2) 9470 && CROSSING_JUMP_P (branch2)) 9471 return false; 9472 9473 if (flag_expensive_optimizations && simplejump_p (branch2)) 9474 { 9475 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0); 9476 rtx_insn *insn; 9477 int distance; 9478 9479 for (distance = 0, insn = NEXT_INSN (branch1); 9480 insn && distance < 256; 9481 insn = PREV_INSN (insn)) 9482 { 9483 if (insn == dest) 9484 return true; 9485 else 9486 distance += get_attr_length (insn); 9487 } 9488 for (distance = 0, insn = NEXT_INSN (branch1); 9489 insn && distance < 256; 9490 insn = NEXT_INSN (insn)) 9491 { 9492 if (insn == dest) 9493 return true; 9494 else 9495 distance += get_attr_length (insn); 9496 } 9497 } 9498 return false; 9499 } 9500 9501 /* Return nonzero if register old_reg can be renamed to register new_reg. */ 9502 bool 9503 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, 9504 unsigned int new_reg) 9505 { 9506 /* Interrupt functions can only use registers that have already been 9507 saved by the prologue, even if they would normally be 9508 call-clobbered. */ 9509 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg)) 9510 return false; 9511 9512 return true; 9513 } 9514 9515 /* Function to update the integer COST 9516 based on the relationship between INSN that is dependent on 9517 DEP_INSN through the dependence LINK. The default is to make no 9518 adjustment to COST. This can be used for example to specify to 9519 the scheduler that an output- or anti-dependence does not incur 9520 the same cost as a data-dependence. The return value should be 9521 the new value for COST. */ 9522 static int 9523 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, 9524 unsigned int) 9525 { 9526 rtx reg, use_pat; 9527 9528 if (dep_type == 0) 9529 { 9530 if (recog_memoized (insn) < 0 9531 || recog_memoized (dep_insn) < 0) 9532 return cost; 9533 9534 rtx dep_set = single_set (dep_insn); 9535 9536 /* The latency that we specify in the scheduling description refers 9537 to the actual output, not to an auto-increment register; for that, 9538 the latency is one. */ 9539 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1) 9540 { 9541 rtx set = single_set (insn); 9542 9543 if (set 9544 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set)) 9545 && (!MEM_P (SET_DEST (set)) 9546 || !reg_mentioned_p (SET_DEST (dep_set), 9547 XEXP (SET_DEST (set), 0)))) 9548 cost = 1; 9549 } 9550 /* The only input for a call that is timing-critical is the 9551 function's address. */ 9552 if (CALL_P (insn)) 9553 { 9554 rtx call = get_call_rtx_from (insn); 9555 if (call 9556 /* sibcalli_thunk uses a symbol_ref in an unspec. */ 9557 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC 9558 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))) 9559 cost -= TARGET_SH4_300 ? 3 : 6; 9560 } 9561 /* Likewise, the most timing critical input for an sfuncs call 9562 is the function address. However, sfuncs typically start 9563 using their arguments pretty quickly. 9564 Assume a four cycle delay for SH4 before they are needed. 9565 Cached ST40-300 calls are quicker, so assume only a one 9566 cycle delay there. 9567 ??? Maybe we should encode the delays till input registers 9568 are needed by sfuncs into the sfunc call insn. */ 9569 /* All sfunc calls are parallels with at least four components. 9570 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ 9571 else if (GET_CODE (PATTERN (insn)) == PARALLEL 9572 && XVECLEN (PATTERN (insn), 0) >= 4 9573 && (reg = sfunc_uses_reg (insn))) 9574 { 9575 if (! reg_set_p (reg, dep_insn)) 9576 cost -= TARGET_SH4_300 ? 1 : 4; 9577 } 9578 if (TARGET_HARD_SH4 && !TARGET_SH4_300) 9579 { 9580 attr_type dep_type = get_attr_type (dep_insn); 9581 attr_type type; 9582 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD) 9583 cost--; 9584 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI) 9585 && (type = get_attr_type (insn)) != TYPE_CALL 9586 && type != TYPE_SFUNC) 9587 cost--; 9588 /* When the preceding instruction loads the shift amount of 9589 the following SHAD/SHLD, the latency of the load is increased 9590 by 1 cycle. */ 9591 if (get_attr_type (insn) == TYPE_DYN_SHIFT 9592 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES 9593 && reg_overlap_mentioned_p (SET_DEST (dep_set), 9594 XEXP (SET_SRC (single_set (insn)), 9595 1))) 9596 cost++; 9597 /* When an LS group instruction with a latency of less than 9598 3 cycles is followed by a double-precision floating-point 9599 instruction, FIPR, or FTRV, the latency of the first 9600 instruction is increased to 3 cycles. */ 9601 else if (cost < 3 9602 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP 9603 && get_attr_dfp_comp (insn) == DFP_COMP_YES) 9604 cost = 3; 9605 /* The lsw register of a double-precision computation is ready one 9606 cycle earlier. */ 9607 else if (reload_completed 9608 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES 9609 && (use_pat = single_set (insn)) 9610 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))), 9611 SET_SRC (use_pat))) 9612 cost -= 1; 9613 9614 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES 9615 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES) 9616 cost -= 1; 9617 } 9618 else if (TARGET_SH4_300) 9619 { 9620 /* Stores need their input register two cycles later. */ 9621 attr_type type; 9622 if (dep_set && cost >= 1 9623 && ((type = get_attr_type (insn)) == TYPE_STORE 9624 || type == TYPE_PSTORE 9625 || type == TYPE_FSTORE || type == TYPE_MAC_MEM)) 9626 { 9627 rtx set = single_set (insn); 9628 9629 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0)) 9630 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set))) 9631 { 9632 cost -= 2; 9633 /* But don't reduce the cost below 1 if the address depends 9634 on a side effect of dep_insn. */ 9635 if (cost < 1 9636 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn)) 9637 cost = 1; 9638 } 9639 } 9640 } 9641 } 9642 /* An anti-dependence penalty of two applies if the first insn is a double 9643 precision fadd / fsub / fmul. */ 9644 else if (!TARGET_SH4_300 9645 && dep_type == REG_DEP_ANTI 9646 && recog_memoized (dep_insn) >= 0 9647 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH 9648 || get_attr_type (dep_insn) == TYPE_DFP_MUL) 9649 /* A lot of alleged anti-flow dependences are fake, 9650 so check this one is real. */ 9651 && flow_dependent_p (dep_insn, insn)) 9652 cost = 2; 9653 9654 return cost; 9655 } 9656 9657 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check 9658 if DEP_INSN is anti-flow dependent on INSN. */ 9659 static bool 9660 flow_dependent_p (rtx insn, rtx dep_insn) 9661 { 9662 rtx tmp = PATTERN (insn); 9663 9664 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp); 9665 return tmp == NULL_RTX; 9666 } 9667 9668 /* A helper function for flow_dependent_p called through note_stores. */ 9669 static void 9670 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) 9671 { 9672 rtx * pinsn = (rtx *) data; 9673 9674 if (*pinsn && reg_referenced_p (x, *pinsn)) 9675 *pinsn = NULL_RTX; 9676 } 9677 9678 /* For use by sh_allocate_initial_value. Note that sh.md contains some 9679 'special function' patterns (type sfunc) that clobber pr, but that 9680 do not look like function calls to leaf_function_p. Hence we must 9681 do this extra check. */ 9682 static int 9683 sh_pr_n_sets (void) 9684 { 9685 return DF_REG_DEF_COUNT (PR_REG); 9686 } 9687 9688 /* Return where to allocate pseudo for a given hard register initial 9689 value. */ 9690 static rtx 9691 sh_allocate_initial_value (rtx hard_reg) 9692 { 9693 if (REGNO (hard_reg) == PR_REG) 9694 { 9695 if (crtl->is_leaf && ! sh_pr_n_sets ()) 9696 return hard_reg; 9697 else 9698 return gen_frame_mem (Pmode, return_address_pointer_rtx); 9699 } 9700 9701 return NULL_RTX; 9702 } 9703 9704 /* This function returns "2" to indicate dual issue for the SH4 9705 processor. To be used by the DFA pipeline description. */ 9706 static int 9707 sh_issue_rate (void) 9708 { 9709 if (TARGET_SUPERSCALAR) 9710 return 2; 9711 else 9712 return 1; 9713 } 9714 9715 /* Functions for ready queue reordering for sched1. */ 9716 9717 /* Get weight for mode for a set x. */ 9718 static short 9719 find_set_regmode_weight (rtx x, machine_mode mode) 9720 { 9721 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode)) 9722 return 1; 9723 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode)) 9724 { 9725 if (REG_P (SET_DEST (x))) 9726 { 9727 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x))) 9728 return 1; 9729 else 9730 return 0; 9731 } 9732 return 1; 9733 } 9734 return 0; 9735 } 9736 9737 /* Get regmode weight for insn. */ 9738 static short 9739 find_insn_regmode_weight (rtx insn, machine_mode mode) 9740 { 9741 /* Increment weight for each register born here. */ 9742 rtx x = PATTERN (insn); 9743 short reg_weight = find_set_regmode_weight (x, mode); 9744 if (GET_CODE (x) == PARALLEL) 9745 { 9746 int j; 9747 for (j = XVECLEN (x, 0) - 1; j >= 0; j--) 9748 { 9749 x = XVECEXP (PATTERN (insn), 0, j); 9750 reg_weight += find_set_regmode_weight (x, mode); 9751 } 9752 } 9753 /* Decrement weight for each register that dies here. */ 9754 for (x = REG_NOTES (insn); x; x = XEXP (x, 1)) 9755 { 9756 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED) 9757 { 9758 rtx note = XEXP (x, 0); 9759 if (REG_P (note) && GET_MODE (note) == mode) 9760 reg_weight--; 9761 } 9762 } 9763 return reg_weight; 9764 } 9765 9766 /* Calculate regmode weights for all insns of a basic block. */ 9767 static void 9768 find_regmode_weight (basic_block b, machine_mode mode) 9769 { 9770 rtx_insn *insn, *next_tail, *head, *tail; 9771 9772 get_ebb_head_tail (b, b, &head, &tail); 9773 next_tail = NEXT_INSN (tail); 9774 9775 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 9776 { 9777 /* Handle register life information. */ 9778 if (!INSN_P (insn)) 9779 continue; 9780 9781 if (mode == SFmode) 9782 INSN_REGMODE_WEIGHT (insn, mode) = 9783 find_insn_regmode_weight (insn, mode) 9784 + 2 * find_insn_regmode_weight (insn, DFmode); 9785 else if (mode == SImode) 9786 INSN_REGMODE_WEIGHT (insn, mode) = 9787 find_insn_regmode_weight (insn, mode) 9788 + 2 * find_insn_regmode_weight (insn, DImode); 9789 } 9790 } 9791 9792 /* Comparison function for ready queue sorting. */ 9793 static int 9794 rank_for_reorder (const void *x, const void *y) 9795 { 9796 rtx_insn *tmp = *(rtx_insn * const *) y; 9797 rtx_insn *tmp2 = *(rtx_insn * const *) x; 9798 9799 /* The insn in a schedule group should be issued the first. */ 9800 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2)) 9801 return SCHED_GROUP_P (tmp2) ? 1 : -1; 9802 9803 /* If insns are equally good, sort by INSN_LUID (original insn order), This 9804 minimizes instruction movement, thus minimizing sched's effect on 9805 register pressure. */ 9806 return INSN_LUID (tmp) - INSN_LUID (tmp2); 9807 } 9808 9809 /* Resort the array A in which only element at index N may be out of order. */ 9810 static void 9811 swap_reorder (rtx_insn **a, int n) 9812 { 9813 rtx_insn *insn = a[n - 1]; 9814 int i = n - 2; 9815 9816 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0) 9817 { 9818 a[i + 1] = a[i]; 9819 i -= 1; 9820 } 9821 a[i + 1] = insn; 9822 } 9823 9824 /* Sort the ready list by ascending priority. */ 9825 static void 9826 ready_reorder (rtx_insn **ready, int nready) 9827 { 9828 if (nready == 2) 9829 swap_reorder (ready, nready); 9830 else if (nready > 2) 9831 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder); 9832 } 9833 9834 /* Count life regions of r0 for a block. */ 9835 static int 9836 find_r0_life_regions (basic_block b) 9837 { 9838 bool live; 9839 int set; 9840 int death = 0; 9841 9842 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG)) 9843 { 9844 set = 1; 9845 live = true; 9846 } 9847 else 9848 { 9849 set = 0; 9850 live = false; 9851 } 9852 9853 rtx_insn* insn = BB_HEAD (b); 9854 rtx_insn* end = BB_END (b); 9855 rtx r0_reg = gen_rtx_REG (SImode, R0_REG); 9856 while (1) 9857 { 9858 if (INSN_P (insn)) 9859 { 9860 if (find_regno_note (insn, REG_DEAD, R0_REG)) 9861 { 9862 death++; 9863 live = false; 9864 } 9865 9866 rtx pset; 9867 if (!live 9868 && (pset = single_set (insn)) 9869 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset)) 9870 && !find_regno_note (insn, REG_UNUSED, R0_REG)) 9871 { 9872 set++; 9873 live = true; 9874 } 9875 } 9876 if (insn == end) 9877 break; 9878 insn = NEXT_INSN (insn); 9879 } 9880 return set - death; 9881 } 9882 9883 /* Calculate regmode weights for all insns of all basic block. */ 9884 static void 9885 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED, 9886 int verbose ATTRIBUTE_UNUSED, 9887 int old_max_uid) 9888 { 9889 basic_block b; 9890 9891 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short)); 9892 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short)); 9893 r0_life_regions = 0; 9894 9895 FOR_EACH_BB_REVERSE_FN (b, cfun) 9896 { 9897 find_regmode_weight (b, SImode); 9898 find_regmode_weight (b, SFmode); 9899 if (!reload_completed) 9900 r0_life_regions += find_r0_life_regions (b); 9901 } 9902 9903 CURR_REGMODE_PRESSURE (SImode) = 0; 9904 CURR_REGMODE_PRESSURE (SFmode) = 0; 9905 } 9906 9907 /* Cleanup. */ 9908 static void 9909 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED, 9910 int verbose ATTRIBUTE_UNUSED) 9911 { 9912 if (regmode_weight[0]) 9913 { 9914 free (regmode_weight[0]); 9915 regmode_weight[0] = NULL; 9916 } 9917 if (regmode_weight[1]) 9918 { 9919 free (regmode_weight[1]); 9920 regmode_weight[1] = NULL; 9921 } 9922 } 9923 9924 /* Cache the can_issue_more so that we can return it from reorder2. Also, 9925 keep count of register pressures on SImode and SFmode. */ 9926 static int 9927 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED, 9928 int sched_verbose ATTRIBUTE_UNUSED, 9929 rtx_insn *insn, 9930 int can_issue_more) 9931 { 9932 if (GET_CODE (PATTERN (insn)) != USE 9933 && GET_CODE (PATTERN (insn)) != CLOBBER) 9934 cached_can_issue_more = can_issue_more - 1; 9935 else 9936 cached_can_issue_more = can_issue_more; 9937 9938 if (reload_completed) 9939 return cached_can_issue_more; 9940 9941 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode); 9942 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode); 9943 9944 return cached_can_issue_more; 9945 } 9946 9947 static void 9948 sh_md_init (FILE *dump ATTRIBUTE_UNUSED, 9949 int verbose ATTRIBUTE_UNUSED, 9950 int veclen ATTRIBUTE_UNUSED) 9951 { 9952 CURR_REGMODE_PRESSURE (SImode) = 0; 9953 CURR_REGMODE_PRESSURE (SFmode) = 0; 9954 } 9955 9956 /* Some magic numbers. */ 9957 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for 9958 functions that already have high pressure on r0. */ 9959 #define R0_MAX_LIFE_REGIONS 2 9960 /* Register Pressure thresholds for SImode and SFmode registers. */ 9961 #define SIMODE_MAX_WEIGHT 5 9962 #define SFMODE_MAX_WEIGHT 10 9963 9964 /* Return true if the pressure is high for MODE. */ 9965 static bool 9966 high_pressure (machine_mode mode) 9967 { 9968 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for 9969 functions that already have high pressure on r0. */ 9970 if (r0_life_regions >= R0_MAX_LIFE_REGIONS) 9971 return true; 9972 9973 if (mode == SFmode) 9974 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT); 9975 else 9976 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT); 9977 } 9978 9979 /* Reorder ready queue if register pressure is high. */ 9980 static int 9981 sh_reorder (FILE *dump ATTRIBUTE_UNUSED, 9982 int sched_verbose ATTRIBUTE_UNUSED, 9983 rtx_insn **ready, 9984 int *n_readyp, 9985 int clock_var ATTRIBUTE_UNUSED) 9986 { 9987 if (reload_completed) 9988 return sh_issue_rate (); 9989 9990 if (high_pressure (SFmode) || high_pressure (SImode)) 9991 { 9992 ready_reorder (ready, *n_readyp); 9993 } 9994 9995 return sh_issue_rate (); 9996 } 9997 9998 /* Skip cycles if the current register pressure is high. */ 9999 static int 10000 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED, 10001 int sched_verbose ATTRIBUTE_UNUSED, 10002 rtx_insn **ready ATTRIBUTE_UNUSED, 10003 int *n_readyp ATTRIBUTE_UNUSED, 10004 int clock_var ATTRIBUTE_UNUSED) 10005 { 10006 if (reload_completed) 10007 return cached_can_issue_more; 10008 10009 if (high_pressure(SFmode) || high_pressure (SImode)) 10010 skip_cycles = 1; 10011 10012 return cached_can_issue_more; 10013 } 10014 10015 /* Skip cycles without sorting the ready queue. This will move insn from 10016 Q->R. If this is the last cycle we are skipping; allow sorting of ready 10017 queue by sh_reorder. */ 10018 10019 /* Generally, skipping these many cycles are sufficient for all insns to move 10020 from Q -> R. */ 10021 #define MAX_SKIPS 8 10022 10023 static int 10024 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED, 10025 int sched_verbose ATTRIBUTE_UNUSED, 10026 rtx_insn *insn ATTRIBUTE_UNUSED, 10027 int last_clock_var, 10028 int clock_var, 10029 int *sort_p) 10030 { 10031 if (reload_completed) 10032 return 0; 10033 10034 if (skip_cycles) 10035 { 10036 if ((clock_var - last_clock_var) < MAX_SKIPS) 10037 { 10038 *sort_p = 0; 10039 return 1; 10040 } 10041 /* If this is the last cycle we are skipping, allow reordering of R. */ 10042 if ((clock_var - last_clock_var) == MAX_SKIPS) 10043 { 10044 *sort_p = 1; 10045 return 1; 10046 } 10047 } 10048 10049 skip_cycles = 0; 10050 10051 return 0; 10052 } 10053 10054 static bool 10055 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED) 10056 { 10057 return TARGET_HITACHI || sh_attr_renesas_p (record_type); 10058 } 10059 10060 /* 10061 On the SH1..SH4, the trampoline looks like 10062 2 0002 D202 mov.l l2,r2 10063 1 0000 D301 mov.l l1,r3 10064 3 0004 422B jmp @r2 10065 4 0006 0009 nop 10066 5 0008 00000000 l1: .long area 10067 6 000c 00000000 l2: .long function 10068 10069 FDPIC needs a form that includes a function descriptor and 10070 code to load the GOT register: 10071 0 0000 00000000 .long l0 10072 1 0004 00000000 .long gotval 10073 2 0008 D302 l0: mov.l l1,r3 10074 3 000a D203 mov.l l2,r2 10075 4 000c 6122 mov.l @r2,r1 10076 5 000e 5C21 mov.l @(4,r2),r12 10077 6 0010 412B jmp @r1 10078 7 0012 0009 nop 10079 8 0014 00000000 l1: .long area 10080 9 0018 00000000 l2: .long function 10081 10082 SH5 (compact) uses r1 instead of r3 for the static chain. */ 10083 10084 /* Emit insns to store a value at memory address + offset. */ 10085 static void 10086 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value) 10087 { 10088 gcc_assert ((offset & 3) == 0); 10089 emit_move_insn (offset == 0 10090 ? change_address (addr, SImode, NULL_RTX) 10091 : adjust_address (addr, SImode, offset), value); 10092 } 10093 10094 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */ 10095 static void 10096 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1) 10097 { 10098 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN 10099 ? (w0 | (w1 << 16)) 10100 : (w1 | (w0 << 16)), SImode)); 10101 } 10102 10103 /* Emit RTL insns to initialize the variable parts of a trampoline. 10104 FNADDR is an RTX for the address of the function's pure code. 10105 CXT is an RTX for the static chain value for the function. */ 10106 static void 10107 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt) 10108 { 10109 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 10110 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0)); 10111 10112 if (TARGET_FDPIC) 10113 { 10114 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8)); 10115 10116 sh_emit_storesi (tramp_mem, 0, a); 10117 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ()); 10118 10119 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203); 10120 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21); 10121 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009); 10122 10123 sh_emit_storesi (tramp_mem, 20, cxt); 10124 sh_emit_storesi (tramp_mem, 24, fnaddr); 10125 } 10126 else 10127 { 10128 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301); 10129 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009); 10130 10131 sh_emit_storesi (tramp_mem, 8, cxt); 10132 sh_emit_storesi (tramp_mem, 12, fnaddr); 10133 } 10134 if (TARGET_HARD_SH4) 10135 { 10136 if (!TARGET_INLINE_IC_INVALIDATE 10137 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE)) 10138 emit_library_call (function_symbol (NULL, "__ic_invalidate", 10139 FUNCTION_ORDINARY).sym, 10140 LCT_NORMAL, VOIDmode, tramp, SImode); 10141 else 10142 emit_insn (gen_ic_invalidate_line (tramp)); 10143 } 10144 } 10145 10146 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */ 10147 static rtx 10148 sh_trampoline_adjust_address (rtx tramp) 10149 { 10150 return tramp; 10151 } 10152 10153 /* If PIC, we cannot make sibling calls to global functions 10154 because the PLT requires r12 to be live. */ 10155 static bool 10156 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 10157 { 10158 return (1 10159 && ! sh_cfun_interrupt_handler_p () 10160 && (! flag_pic || TARGET_FDPIC 10161 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl))) 10162 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT))); 10163 } 10164 10165 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */ 10166 void 10167 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p) 10168 { 10169 const_tree decl = SYMBOL_REF_DECL (sym); 10170 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl)); 10171 10172 if (!is_weak && SYMBOL_REF_LOCAL_P (sym)) 10173 emit_insn (gen_sym_label2reg (reg, sym, lab)); 10174 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym)) 10175 emit_insn (gen_symPCREL_label2reg (reg, sym, lab)); 10176 else 10177 emit_insn (gen_symPLT_label2reg (reg, sym, lab)); 10178 } 10179 10180 /* Machine specific built-in functions. */ 10181 10182 struct builtin_description 10183 { 10184 bool (* const is_enabled) (void); 10185 const enum insn_code icode; 10186 const char *const name; 10187 int signature; 10188 tree fndecl; 10189 }; 10190 10191 /* This function can be used if there are any built-ins that are not for 10192 SHmedia. It's commented out to avoid the defined-but-unused warning. */ 10193 static bool 10194 sh1_builtin_p (void) 10195 { 10196 return TARGET_SH1; 10197 } 10198 10199 /* describe number and signedness of arguments; arg[0] == result 10200 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */ 10201 /* 9: 64-bit pointer, 10: 32-bit pointer */ 10202 static const char signature_args[][4] = 10203 { 10204 #define SH_BLTIN_V2SI2 0 10205 { 4, 4 }, 10206 #define SH_BLTIN_V4HI2 1 10207 { 4, 4 }, 10208 #define SH_BLTIN_V2SI3 2 10209 { 4, 4, 4 }, 10210 #define SH_BLTIN_V4HI3 3 10211 { 4, 4, 4 }, 10212 #define SH_BLTIN_V8QI3 4 10213 { 4, 4, 4 }, 10214 #define SH_BLTIN_MAC_HISI 5 10215 { 1, 4, 4, 1 }, 10216 #define SH_BLTIN_SH_HI 6 10217 { 4, 4, 1 }, 10218 #define SH_BLTIN_SH_SI 7 10219 { 4, 4, 1 }, 10220 #define SH_BLTIN_V4HI2V2SI 8 10221 { 4, 4, 4 }, 10222 #define SH_BLTIN_V4HI2V8QI 9 10223 { 4, 4, 4 }, 10224 #define SH_BLTIN_SISF 10 10225 { 4, 2 }, 10226 #define SH_BLTIN_LDUA_L 11 10227 { 2, 10 }, 10228 #define SH_BLTIN_LDUA_Q 12 10229 { 1, 10 }, 10230 #define SH_BLTIN_STUA_L 13 10231 { 0, 10, 2 }, 10232 #define SH_BLTIN_STUA_Q 14 10233 { 0, 10, 1 }, 10234 #define SH_BLTIN_LDUA_L64 15 10235 { 2, 9 }, 10236 #define SH_BLTIN_LDUA_Q64 16 10237 { 1, 9 }, 10238 #define SH_BLTIN_STUA_L64 17 10239 { 0, 9, 2 }, 10240 #define SH_BLTIN_STUA_Q64 18 10241 { 0, 9, 1 }, 10242 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19 10243 #define SH_BLTIN_2 19 10244 #define SH_BLTIN_SU 19 10245 { 1, 2 }, 10246 #define SH_BLTIN_3 20 10247 #define SH_BLTIN_SUS 20 10248 { 2, 2, 1 }, 10249 #define SH_BLTIN_PSSV 21 10250 { 0, 8, 2, 2 }, 10251 #define SH_BLTIN_XXUU 22 10252 #define SH_BLTIN_UUUU 22 10253 { 1, 1, 1, 1 }, 10254 #define SH_BLTIN_PV 23 10255 { 0, 8 }, 10256 #define SH_BLTIN_VP 24 10257 { 8, 0 }, 10258 #define SH_BLTIN_UV 25 10259 { 1, 0 }, 10260 #define SH_BLTIN_VU 26 10261 { 0, 1 }, 10262 }; 10263 /* mcmv: operands considered unsigned. */ 10264 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */ 10265 /* mperm: control value considered unsigned int. */ 10266 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */ 10267 /* mshards_q: returns signed short. */ 10268 /* nsb: takes long long arg, returns unsigned char. */ 10269 static struct builtin_description bdesc[] = 10270 { 10271 { sh1_builtin_p, 10272 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 }, 10273 { sh1_builtin_p, 10274 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 }, 10275 }; 10276 10277 static tree sh_builtin_get_fpscr; 10278 static tree sh_builtin_set_fpscr; 10279 10280 static void 10281 sh_init_builtins (void) 10282 { 10283 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES]; 10284 memset (shared, 0, sizeof shared); 10285 10286 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di) 10287 { 10288 builtin_description* d = &bdesc[di]; 10289 10290 if (!d->is_enabled ()) 10291 continue; 10292 10293 tree type, arg_type = NULL_TREE; 10294 int signature = d->signature; 10295 10296 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature]) 10297 type = shared[signature]; 10298 else 10299 { 10300 int has_result = signature_args[signature][0] != 0; 10301 tree args[3]; 10302 10303 if (! TARGET_FPU_ANY 10304 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode)) 10305 continue; 10306 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++) 10307 args[i] = NULL_TREE; 10308 for (int i = 3; ; i--) 10309 { 10310 int arg = signature_args[signature][i]; 10311 int opno = i - 1 + has_result; 10312 10313 if (arg & 8) 10314 arg_type = ptr_type_node; 10315 else if (arg) 10316 arg_type = (*lang_hooks.types.type_for_mode) 10317 (insn_data[d->icode].operand[opno].mode, (arg & 1)); 10318 else if (i) 10319 continue; 10320 else 10321 arg_type = void_type_node; 10322 if (i == 0) 10323 break; 10324 args[i-1] = arg_type; 10325 } 10326 type = build_function_type_list (arg_type, args[0], args[1], 10327 args[2], NULL_TREE); 10328 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES) 10329 shared[signature] = type; 10330 } 10331 d->fndecl = 10332 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD, 10333 NULL, NULL_TREE); 10334 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */ 10335 if (d->icode == CODE_FOR_sts_fpscr) 10336 sh_builtin_get_fpscr = d->fndecl; 10337 else if (d->icode == CODE_FOR_set_fpscr) 10338 sh_builtin_set_fpscr = d->fndecl; 10339 } 10340 } 10341 10342 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ 10343 10344 static void 10345 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 10346 { 10347 const unsigned SH_FE_INVALID = 64; 10348 const unsigned SH_FE_DIVBYZERO = 32; 10349 const unsigned SH_FE_OVERFLOW = 16; 10350 const unsigned SH_FE_UNDERFLOW = 8; 10351 const unsigned SH_FE_INEXACT = 4; 10352 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID 10353 | SH_FE_DIVBYZERO 10354 | SH_FE_OVERFLOW 10355 | SH_FE_UNDERFLOW 10356 | SH_FE_INEXACT); 10357 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5; 10358 tree fenv_var, mask, ld_fenv, masked_fenv; 10359 tree new_fenv_var, reload_fenv, restore_fnenv; 10360 tree update_call, atomic_feraiseexcept, hold_fnclex; 10361 10362 if (! TARGET_FPU_ANY) 10363 return; 10364 10365 /* Generate the equivalent of : 10366 unsigned int fenv_var; 10367 fenv_var = __builtin_sh_get_fpscr (); 10368 10369 unsigned int masked_fenv; 10370 masked_fenv = fenv_var & mask; 10371 10372 __builtin_sh_set_fpscr (masked_fenv); */ 10373 10374 fenv_var = create_tmp_var_raw (unsigned_type_node); 10375 mask = build_int_cst (unsigned_type_node, 10376 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT) 10377 | SH_FE_ALL_EXCEPT)); 10378 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node, 10379 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0)); 10380 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask); 10381 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv); 10382 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, 10383 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, 10384 ld_fenv), 10385 NULL_TREE, NULL_TREE); 10386 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex); 10387 10388 /* Store the value of masked_fenv to clear the exceptions: 10389 __builtin_sh_set_fpscr (masked_fenv); */ 10390 10391 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv); 10392 10393 /* Generate the equivalent of : 10394 unsigned int new_fenv_var; 10395 new_fenv_var = __builtin_sh_get_fpscr (); 10396 10397 __builtin_sh_set_fpscr (fenv_var); 10398 10399 __atomic_feraiseexcept (new_fenv_var); */ 10400 10401 new_fenv_var = create_tmp_var_raw (unsigned_type_node); 10402 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var, 10403 build_call_expr (sh_builtin_get_fpscr, 0)); 10404 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var); 10405 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 10406 update_call = build_call_expr (atomic_feraiseexcept, 1, 10407 fold_convert (integer_type_node, 10408 new_fenv_var)); 10409 *update = build2 (COMPOUND_EXPR, void_type_node, 10410 build2 (COMPOUND_EXPR, void_type_node, 10411 reload_fenv, restore_fnenv), update_call); 10412 } 10413 10414 /* Implements target hook vector_mode_supported_p. */ 10415 bool 10416 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED) 10417 { 10418 return false; 10419 } 10420 10421 bool 10422 sh_frame_pointer_required (void) 10423 { 10424 /* If needed override this in other tm.h files to cope with various OS 10425 lossage requiring a frame pointer. */ 10426 if (SUBTARGET_FRAME_POINTER_REQUIRED) 10427 return true; 10428 10429 if (crtl->profile) 10430 return true; 10431 10432 return false; 10433 } 10434 10435 /* Implements target hook dwarf_calling_convention. Return an enum 10436 of dwarf_calling_convention. */ 10437 int 10438 sh_dwarf_calling_convention (const_tree func) 10439 { 10440 if (sh_attr_renesas_p (func)) 10441 return DW_CC_GNU_renesas_sh; 10442 10443 return DW_CC_normal; 10444 } 10445 10446 /* Returns the sh builtin decl for CODE. */ 10447 static tree 10448 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 10449 { 10450 if (code >= ARRAY_SIZE (bdesc)) 10451 return error_mark_node; 10452 10453 if (!bdesc[code].is_enabled ()) 10454 return error_mark_node; 10455 10456 return bdesc[code].fndecl; 10457 } 10458 10459 /* Expand an expression EXP that calls a built-in function, 10460 with result going to TARGET if that's convenient 10461 (and in mode MODE if that's convenient). 10462 SUBTARGET may be used as the target for computing one of EXP's operands. 10463 IGNORE is nonzero if the value is to be ignored. */ 10464 static rtx 10465 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 10466 machine_mode mode ATTRIBUTE_UNUSED, int ignore) 10467 { 10468 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 10469 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 10470 const struct builtin_description *d = &bdesc[fcode]; 10471 enum insn_code icode = d->icode; 10472 int signature = d->signature; 10473 int nop = 0; 10474 rtx op[4]; 10475 10476 if (signature_args[signature][0]) 10477 { 10478 if (ignore) 10479 return NULL_RTX; 10480 10481 machine_mode tmode = insn_data[icode].operand[0].mode; 10482 if (! target || GET_MODE (target) != tmode 10483 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 10484 target = gen_reg_rtx (tmode); 10485 op[nop++] = target; 10486 } 10487 else 10488 target = NULL_RTX; 10489 10490 for (int i = 1; i <= 3; i++, nop++) 10491 { 10492 if (! signature_args[signature][i]) 10493 break; 10494 tree arg = CALL_EXPR_ARG (exp, i - 1); 10495 if (arg == error_mark_node) 10496 return const0_rtx; 10497 10498 machine_mode opmode; 10499 tree optype; 10500 if (signature_args[signature][i] & 8) 10501 { 10502 opmode = ptr_mode; 10503 optype = ptr_type_node; 10504 } 10505 else 10506 { 10507 opmode = insn_data[icode].operand[nop].mode; 10508 optype = (*lang_hooks.types.type_for_mode) (opmode, 0); 10509 } 10510 10511 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg)); 10512 if (argmode != opmode) 10513 arg = build1 (NOP_EXPR, optype, arg); 10514 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL); 10515 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode)) 10516 op[nop] = copy_to_mode_reg (opmode, op[nop]); 10517 } 10518 10519 rtx pat = NULL_RTX; 10520 10521 switch (nop) 10522 { 10523 case 1: 10524 pat = (*insn_data[d->icode].genfun) (op[0]); 10525 break; 10526 case 2: 10527 pat = (*insn_data[d->icode].genfun) (op[0], op[1]); 10528 break; 10529 case 3: 10530 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]); 10531 break; 10532 case 4: 10533 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]); 10534 break; 10535 default: 10536 gcc_unreachable (); 10537 } 10538 if (! pat) 10539 return NULL_RTX; 10540 emit_insn (pat); 10541 return target; 10542 } 10543 10544 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are 10545 UNITS_PER_WORD bits wide. */ 10546 10547 static unsigned int 10548 sh_hard_regno_nregs (unsigned int regno, machine_mode mode) 10549 { 10550 if (XD_REGISTER_P (regno)) 10551 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD); 10552 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 10553 } 10554 10555 /* Implement TARGET_HARD_REGNO_MODE_OK. 10556 10557 We can allow any mode in any general register. The special registers 10558 only allow SImode. Don't allow any mode in the PR. 10559 10560 We cannot hold DCmode values in the XD registers because alter_reg 10561 handles subregs of them incorrectly. We could work around this by 10562 spacing the XD registers like the DR registers, but this would require 10563 additional memory in every compilation to hold larger register vectors. 10564 We could hold SFmode / SCmode values in XD registers, but that 10565 would require a tertiary reload when reloading from / to memory, 10566 and a secondary reload to reload from / to general regs; that 10567 seems to be a losing proposition. 10568 10569 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode, 10570 it won't be ferried through GP registers first. */ 10571 static bool 10572 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 10573 { 10574 if (SPECIAL_REGISTER_P (regno)) 10575 return mode == SImode; 10576 10577 if (regno == FPUL_REG) 10578 return (mode == SImode || mode == SFmode); 10579 10580 if (FP_REGISTER_P (regno) && mode == SFmode) 10581 return true; 10582 10583 if (mode == V2SFmode) 10584 { 10585 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0) 10586 || GENERAL_REGISTER_P (regno))) 10587 return true; 10588 else 10589 return false; 10590 } 10591 10592 if (mode == V4SFmode) 10593 { 10594 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0) 10595 || GENERAL_REGISTER_P (regno)) 10596 return true; 10597 else 10598 return false; 10599 } 10600 10601 if (mode == V16SFmode) 10602 return regno == FIRST_XD_REG; 10603 10604 if (FP_REGISTER_P (regno)) 10605 { 10606 if (mode == SFmode 10607 || mode == SImode 10608 || ((TARGET_SH2E) && mode == SCmode) 10609 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode) 10610 && ((regno - FIRST_FP_REG) & 1) == 0) 10611 || (TARGET_SH4 && mode == TImode 10612 && ((regno - FIRST_FP_REG) & 3) == 0)) 10613 return true; 10614 else 10615 return false; 10616 } 10617 10618 if (XD_REGISTER_P (regno)) 10619 return mode == DFmode; 10620 10621 if (regno == PR_REG) 10622 return mode == SImode; 10623 10624 if (regno == FPSCR_REG) 10625 return mode == SImode; 10626 10627 return true; 10628 } 10629 10630 /* Implement TARGET_MODES_TIEABLE_P. 10631 10632 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1 10633 and MODE2, for any hard reg, then this must be false for correct output. 10634 That's the case for xd registers: we don't hold SFmode values in 10635 them, so we can't tie an SFmode pseudos with one in another 10636 floating-point mode. */ 10637 10638 static bool 10639 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2) 10640 { 10641 return (mode1 == mode2 10642 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2) 10643 && (mode1 != SFmode && mode2 != SFmode))); 10644 } 10645 10646 /* Specify the modes required to caller save a given hard regno. 10647 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK 10648 and returns ?Imode for float regs when sh_hard_regno_mode_ok 10649 permits integer modes on them. That makes LRA's split process 10650 unhappy. See PR55212. 10651 */ 10652 machine_mode 10653 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs, 10654 machine_mode mode) 10655 { 10656 if (FP_REGISTER_P (regno) 10657 && (mode == SFmode 10658 || mode == SCmode 10659 || ((mode == DFmode || mode == DCmode) 10660 && ((regno - FIRST_FP_REG) & 1) == 0))) 10661 return mode; 10662 10663 return choose_hard_reg_mode (regno, nregs, false); 10664 } 10665 10666 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 10667 static bool 10668 sh_can_change_mode_class (machine_mode from, machine_mode to, 10669 reg_class_t rclass) 10670 { 10671 /* We want to enable the use of SUBREGs as a means to 10672 VEC_SELECT a single element of a vector. */ 10673 10674 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs. 10675 This can be problematic when SFmode vector subregs need to be accessed 10676 on the stack with displacement addressing, as it happens with -O0. 10677 Thus we disallow the mode change for -O0. */ 10678 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode) 10679 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true; 10680 10681 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)) 10682 { 10683 if (TARGET_LITTLE_ENDIAN) 10684 { 10685 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8) 10686 return !reg_classes_intersect_p (DF_REGS, rclass); 10687 } 10688 else 10689 { 10690 if (GET_MODE_SIZE (from) < 8) 10691 return !reg_classes_intersect_p (DF_REGS, rclass); 10692 } 10693 } 10694 return true; 10695 } 10696 10697 /* Return true if registers in machine mode MODE will likely be 10698 allocated to registers in small register classes. */ 10699 bool 10700 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED) 10701 { 10702 return true; 10703 } 10704 10705 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times 10706 that label is used. */ 10707 void 10708 sh_mark_label (rtx address, int nuses) 10709 { 10710 if (GOTOFF_P (address)) 10711 { 10712 /* Extract the label or symbol. */ 10713 address = XEXP (address, 0); 10714 if (GET_CODE (address) == PLUS) 10715 address = XEXP (address, 0); 10716 address = XVECEXP (address, 0, 0); 10717 } 10718 if (GET_CODE (address) == LABEL_REF 10719 && LABEL_P (XEXP (address, 0))) 10720 LABEL_NUSES (XEXP (address, 0)) += nuses; 10721 } 10722 10723 /* Compute extra cost of moving data between one register class 10724 and another. 10725 10726 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass 10727 uses this information. Hence, the general register <-> floating point 10728 register information here is not used for SFmode. */ 10729 static int 10730 sh_register_move_cost (machine_mode mode, 10731 reg_class_t srcclass, reg_class_t dstclass) 10732 { 10733 if (dstclass == T_REGS || dstclass == PR_REGS) 10734 return 10; 10735 10736 if (dstclass == MAC_REGS && srcclass == MAC_REGS) 10737 return 4; 10738 10739 if (mode == SImode && TARGET_FMOVD 10740 && REGCLASS_HAS_FP_REG (srcclass) 10741 && REGCLASS_HAS_FP_REG (dstclass)) 10742 return 4; 10743 10744 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS) 10745 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7); 10746 10747 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS) 10748 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass))) 10749 return 9; 10750 10751 if ((REGCLASS_HAS_FP_REG (dstclass) 10752 && REGCLASS_HAS_GENERAL_REG (srcclass)) 10753 || (REGCLASS_HAS_GENERAL_REG (dstclass) 10754 && REGCLASS_HAS_FP_REG (srcclass))) 10755 { 10756 /* Discourage trying to use fp regs for a pointer. This also 10757 discourages fp regs with SImode because Pmode is an alias 10758 of SImode on this target. See PR target/48596. */ 10759 int addend = (mode == Pmode) ? 40 : 0; 10760 10761 return ((TARGET_FMOVD ? 8 : 12) + addend) 10762 * ((GET_MODE_SIZE (mode) + 7) / 8U); 10763 } 10764 10765 if ((dstclass == FPUL_REGS 10766 && REGCLASS_HAS_GENERAL_REG (srcclass)) 10767 || (srcclass == FPUL_REGS 10768 && REGCLASS_HAS_GENERAL_REG (dstclass))) 10769 return 5; 10770 10771 if ((dstclass == FPUL_REGS 10772 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS)) 10773 || (srcclass == FPUL_REGS 10774 && (dstclass == PR_REGS || dstclass == MAC_REGS))) 10775 return 7; 10776 10777 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) 10778 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) 10779 return 4; 10780 10781 if (TARGET_FMOVD 10782 && ! REGCLASS_HAS_GENERAL_REG (srcclass) 10783 && ! REGCLASS_HAS_GENERAL_REG (dstclass)) 10784 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U); 10785 10786 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U); 10787 } 10788 10789 static rtx 10790 emit_load_ptr (rtx reg, rtx addr) 10791 { 10792 rtx mem = gen_const_mem (ptr_mode, addr); 10793 10794 if (Pmode != ptr_mode) 10795 mem = gen_rtx_SIGN_EXTEND (Pmode, mem); 10796 return emit_move_insn (reg, mem); 10797 } 10798 10799 static void 10800 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 10801 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 10802 tree function) 10803 { 10804 CUMULATIVE_ARGS cum; 10805 int structure_value_byref = 0; 10806 rtx this_rtx, this_value, sibcall, funexp; 10807 rtx_insn *insns; 10808 tree funtype = TREE_TYPE (function); 10809 int simple_add = CONST_OK_FOR_ADD (delta); 10810 int did_load = 0; 10811 rtx scratch0, scratch1, scratch2; 10812 10813 reload_completed = 1; 10814 epilogue_completed = 1; 10815 crtl->uses_only_leaf_regs = 1; 10816 10817 emit_note (NOTE_INSN_PROLOGUE_END); 10818 10819 /* Find the "this" pointer. We have such a wide range of ABIs for the 10820 SH that it's best to do this completely machine independently. 10821 "this" is passed as first argument, unless a structure return pointer 10822 comes first, in which case "this" comes second. */ 10823 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1); 10824 #ifndef PCC_STATIC_STRUCT_RETURN 10825 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 10826 structure_value_byref = 1; 10827 #endif /* not PCC_STATIC_STRUCT_RETURN */ 10828 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0) 10829 { 10830 tree ptype = build_pointer_type (TREE_TYPE (funtype)); 10831 10832 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true); 10833 } 10834 this_rtx 10835 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true); 10836 10837 /* For SHcompact, we only have r0 for a scratch register: r1 is the 10838 static chain pointer (even if you can't have nested virtual functions 10839 right now, someone might implement them sometime), and the rest of the 10840 registers are used for argument passing, are callee-saved, or reserved. */ 10841 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg / 10842 -ffixed-reg has been used. */ 10843 if (! call_used_regs[0] || fixed_regs[0]) 10844 error ("r0 needs to be available as a call-clobbered register"); 10845 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0); 10846 10847 { 10848 if (call_used_regs[1] && ! fixed_regs[1]) 10849 scratch1 = gen_rtx_REG (ptr_mode, 1); 10850 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer 10851 pointing where to return struct values. */ 10852 if (call_used_regs[3] && ! fixed_regs[3]) 10853 scratch2 = gen_rtx_REG (Pmode, 3); 10854 } 10855 10856 this_value = plus_constant (Pmode, this_rtx, delta); 10857 if (vcall_offset 10858 && (simple_add || scratch0 != scratch1) 10859 && strict_memory_address_p (ptr_mode, this_value)) 10860 { 10861 emit_load_ptr (scratch0, this_value); 10862 did_load = 1; 10863 } 10864 10865 if (!delta) 10866 ; /* Do nothing. */ 10867 else if (simple_add) 10868 emit_move_insn (this_rtx, this_value); 10869 else 10870 { 10871 emit_move_insn (scratch1, GEN_INT (delta)); 10872 emit_insn (gen_add2_insn (this_rtx, scratch1)); 10873 } 10874 10875 if (vcall_offset) 10876 { 10877 rtx offset_addr; 10878 10879 if (!did_load) 10880 emit_load_ptr (scratch0, this_rtx); 10881 10882 offset_addr = plus_constant (Pmode, scratch0, vcall_offset); 10883 if (strict_memory_address_p (ptr_mode, offset_addr)) 10884 ; /* Do nothing. */ 10885 else if (scratch0 != scratch1) 10886 { 10887 /* scratch0 != scratch1, and we have indexed loads. Get better 10888 schedule by loading the offset into r1 and using an indexed 10889 load - then the load of r1 can issue before the load from 10890 (this_rtx + delta) finishes. */ 10891 emit_move_insn (scratch1, GEN_INT (vcall_offset)); 10892 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1); 10893 } 10894 else if (CONST_OK_FOR_ADD (vcall_offset)) 10895 { 10896 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset))); 10897 offset_addr = scratch0; 10898 } 10899 else 10900 gcc_unreachable (); /* FIXME */ 10901 emit_load_ptr (scratch0, offset_addr); 10902 10903 if (Pmode != ptr_mode) 10904 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0); 10905 emit_insn (gen_add2_insn (this_rtx, scratch0)); 10906 } 10907 10908 /* Generate a tail call to the target function. */ 10909 if (! TREE_USED (function)) 10910 { 10911 assemble_external (function); 10912 TREE_USED (function) = 1; 10913 } 10914 funexp = XEXP (DECL_RTL (function), 0); 10915 /* If the function is overridden, so is the thunk, hence we don't 10916 need GOT addressing even if this is a public symbol. */ 10917 #if 0 10918 if (TARGET_SH1 && ! flag_weak) 10919 sibcall = gen_sibcalli_thunk (funexp, const0_rtx); 10920 else 10921 #endif 10922 if (TARGET_SH2 && flag_pic) 10923 { 10924 if (TARGET_FDPIC) 10925 { 10926 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx); 10927 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2; 10928 } 10929 else 10930 { 10931 sibcall = gen_sibcall_pcrel (funexp, const0_rtx); 10932 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2; 10933 } 10934 } 10935 else 10936 { 10937 emit_move_insn (scratch2, funexp); 10938 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2); 10939 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX); 10940 } 10941 sibcall = emit_call_insn (sibcall); 10942 SIBLING_CALL_P (sibcall) = 1; 10943 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx); 10944 emit_barrier (); 10945 10946 /* Run just enough of rest_of_compilation to do scheduling and get 10947 the insns emitted. Note that use_thunk calls 10948 assemble_start_function and assemble_end_function. */ 10949 10950 insns = get_insns (); 10951 10952 if (optimize > 0) 10953 { 10954 if (! cfun->cfg) 10955 init_flow (cfun); 10956 split_all_insns_noflow (); 10957 } 10958 10959 sh_reorg (); 10960 shorten_branches (insns); 10961 final_start_function (insns, file, 1); 10962 final (insns, file, 1); 10963 final_end_function (); 10964 10965 reload_completed = 0; 10966 epilogue_completed = 0; 10967 } 10968 10969 /* Return an RTX pair for the address and call site label of a function 10970 NAME of kind KIND, placing the result in TARGET if not NULL. For 10971 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to 10972 (const_int 0) if jsr should be used, or a label_ref if bsrf should 10973 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the 10974 address of the function itself, not a function descriptor, so they 10975 can only be used with functions not using the FDPIC register that 10976 are known to be called directory without a PLT entry. */ 10977 10978 function_symbol_result 10979 function_symbol (rtx target, const char *name, sh_function_kind kind) 10980 { 10981 /* If this is not an ordinary function, the name usually comes from a 10982 string literal or an sprintf buffer. Make sure we use the same 10983 string consistently, so that cse will be able to unify address loads. */ 10984 if (kind != FUNCTION_ORDINARY) 10985 name = IDENTIFIER_POINTER (get_identifier (name)); 10986 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name); 10987 rtx lab = const0_rtx; 10988 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION; 10989 if (flag_pic) 10990 switch (kind) 10991 { 10992 case FUNCTION_ORDINARY: 10993 break; 10994 case SFUNC_GOT: 10995 { 10996 rtx reg = target ? target : gen_reg_rtx (Pmode); 10997 10998 emit_insn (gen_symGOT2reg (reg, sym)); 10999 sym = reg; 11000 break; 11001 } 11002 case SFUNC_STATIC: 11003 { 11004 rtx reg = target ? target : gen_reg_rtx (Pmode); 11005 11006 if (TARGET_FDPIC) 11007 { 11008 /* We use PC-relative calls, since GOTOFF can only refer 11009 to writable data. This works along with sh_sfunc_call. */ 11010 lab = PATTERN (gen_call_site ()); 11011 emit_insn (gen_sym_label2reg (reg, sym, lab)); 11012 } 11013 else 11014 { 11015 /* ??? To allow cse to work, we use GOTOFF relocations. 11016 we could add combiner patterns to transform this into 11017 straight pc-relative calls with sym2PIC / bsrf when 11018 label load and function call are still 1:1 and in the 11019 same basic block during combine. */ 11020 emit_insn (gen_symGOTOFF2reg (reg, sym)); 11021 } 11022 11023 sym = reg; 11024 break; 11025 } 11026 } 11027 if (target && sym != target) 11028 { 11029 emit_move_insn (target, sym); 11030 return function_symbol_result (target, lab); 11031 } 11032 return function_symbol_result (sym, lab); 11033 } 11034 11035 /* Find the number of the first general purpose register in S that 11036 is not set. */ 11037 static int 11038 scavenge_reg (HARD_REG_SET *s) 11039 { 11040 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++) 11041 if (TEST_HARD_REG_BIT (*s, r)) 11042 return r; 11043 return -1; 11044 } 11045 11046 rtx 11047 sh_get_pr_initial_val (void) 11048 { 11049 /* If we haven't finished rtl generation, there might be a nonlocal label 11050 that we haven't seen yet. 11051 ??? get_hard_reg_initial_val fails if it is called after register 11052 allocation has started, unless it has been called before for the 11053 same register. And even then, we end in trouble if we didn't use 11054 the register in the same basic block before. So call 11055 get_hard_reg_initial_val now and wrap it in an unspec if we might 11056 need to replace it. */ 11057 /* ??? We also must do this for TARGET_SH1 in general, because otherwise 11058 combine can put the pseudo returned by get_hard_reg_initial_val into 11059 instructions that need a general purpose registers, which will fail to 11060 be recognized when the pseudo becomes allocated to PR. */ 11061 rtx val = get_hard_reg_initial_val (Pmode, PR_REG); 11062 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA); 11063 } 11064 11065 bool 11066 sh_expand_t_scc (rtx operands[]) 11067 { 11068 enum rtx_code code = GET_CODE (operands[1]); 11069 rtx target = operands[0]; 11070 rtx op0 = operands[2]; 11071 rtx op1 = operands[3]; 11072 rtx result = target; 11073 11074 if (!REG_P (op0) || REGNO (op0) != T_REG 11075 || !CONST_INT_P (op1)) 11076 return false; 11077 if (!REG_P (result)) 11078 result = gen_reg_rtx (SImode); 11079 HOST_WIDE_INT val = INTVAL (op1); 11080 if ((code == EQ && val == 1) || (code == NE && val == 0)) 11081 emit_insn (gen_movt (result, get_t_reg_rtx ())); 11082 else if ((code == EQ && val == 0) || (code == NE && val == 1)) 11083 emit_insn (gen_movnegt (result, get_t_reg_rtx ())); 11084 else if (code == EQ || code == NE) 11085 emit_insn (gen_move_insn (result, GEN_INT (code == NE))); 11086 else 11087 return false; 11088 if (result != target) 11089 emit_move_insn (target, result); 11090 return true; 11091 } 11092 11093 /* INSN is an sfunc; return the rtx that describes the address used. */ 11094 static rtx 11095 extract_sfunc_addr (rtx insn) 11096 { 11097 rtx pattern = PATTERN (insn); 11098 const int len = XVECLEN (pattern, 0); 11099 for (int i = 0; i < len; i++) 11100 { 11101 rtx part = XVECEXP (pattern, 0, i); 11102 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode 11103 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0)))) 11104 return XEXP (part, 0); 11105 } 11106 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE); 11107 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1); 11108 } 11109 11110 /* Verify that the register in use_sfunc_addr still agrees with the address 11111 used in the sfunc. This prevents fill_slots_from_thread from changing 11112 use_sfunc_addr. 11113 INSN is the use_sfunc_addr instruction, and REG is the register it 11114 guards. */ 11115 bool 11116 check_use_sfunc_addr (rtx_insn *insn, rtx reg) 11117 { 11118 /* Search for the sfunc. It should really come right after INSN. */ 11119 while ((insn = NEXT_INSN (insn))) 11120 { 11121 if (LABEL_P (insn) || JUMP_P (insn)) 11122 break; 11123 if (! INSN_P (insn)) 11124 continue; 11125 11126 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn))) 11127 insn = seq->insn (0); 11128 if (GET_CODE (PATTERN (insn)) != PARALLEL 11129 || get_attr_type (insn) != TYPE_SFUNC) 11130 continue; 11131 return rtx_equal_p (extract_sfunc_addr (insn), reg); 11132 } 11133 gcc_unreachable (); 11134 } 11135 11136 /* This function returns a constant rtx that represents 2**15 / pi in 11137 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction 11138 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */ 11139 static GTY(()) rtx sh_fsca_sf2int_rtx; 11140 11141 rtx 11142 sh_fsca_sf2int (void) 11143 { 11144 if (! sh_fsca_sf2int_rtx) 11145 { 11146 REAL_VALUE_TYPE rv; 11147 11148 real_from_string (&rv, "10430.378350470453"); 11149 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode); 11150 } 11151 11152 return sh_fsca_sf2int_rtx; 11153 } 11154 11155 /* This function returns a constant rtx that represents pi / 2**15 in 11156 SFmode. It's used to scale SFmode angles, in radians, to a 11157 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi 11158 maps to 0x10000. */ 11159 static GTY(()) rtx sh_fsca_int2sf_rtx; 11160 11161 rtx 11162 sh_fsca_int2sf (void) 11163 { 11164 if (! sh_fsca_int2sf_rtx) 11165 { 11166 REAL_VALUE_TYPE rv; 11167 11168 real_from_string (&rv, "9.587379924285257e-5"); 11169 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode); 11170 } 11171 11172 return sh_fsca_int2sf_rtx; 11173 } 11174 11175 /* Initialize the CUMULATIVE_ARGS structure. */ 11176 void 11177 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum, 11178 tree fntype, 11179 rtx libname ATTRIBUTE_UNUSED, 11180 tree fndecl, 11181 signed int n_named_args, 11182 machine_mode mode) 11183 { 11184 pcum->arg_count [(int) SH_ARG_FLOAT] = 0; 11185 pcum->free_single_fp_reg = 0; 11186 pcum->outgoing = n_named_args != -1; 11187 11188 /* FIXME: Should we check TARGET_HITACHI here ??? */ 11189 pcum->renesas_abi = sh_attr_renesas_p (fntype); 11190 11191 if (fntype) 11192 { 11193 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi) 11194 && aggregate_value_p (TREE_TYPE (fntype), fndecl)); 11195 pcum->prototype_p = prototype_p (fntype); 11196 pcum->arg_count [(int) SH_ARG_INT] = false; 11197 } 11198 else 11199 { 11200 pcum->arg_count [(int) SH_ARG_INT] = 0; 11201 pcum->prototype_p = false; 11202 if (mode != VOIDmode) 11203 { 11204 /* If the default ABI is the Renesas ABI then all library 11205 calls must assume that the library will be using the 11206 Renesas ABI. So if the function would return its result 11207 in memory then we must force the address of this memory 11208 block onto the stack. Ideally we would like to call 11209 targetm.calls.return_in_memory() here but we do not have 11210 the TYPE or the FNDECL available so we synthesize the 11211 contents of that function as best we can. */ 11212 pcum->force_mem = 11213 (TARGET_DEFAULT & MASK_HITACHI) 11214 && (mode == BLKmode 11215 || (GET_MODE_SIZE (mode) > 4 11216 && !(mode == DFmode 11217 && TARGET_FPU_DOUBLE))); 11218 } 11219 else 11220 pcum->force_mem = false; 11221 } 11222 } 11223 11224 rtx 11225 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext) 11226 { 11227 enum rtx_code code = TRUNCATE; 11228 11229 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND) 11230 { 11231 rtx inner = XEXP (x, 0); 11232 machine_mode inner_mode = GET_MODE (inner); 11233 11234 if (inner_mode == mode) 11235 return inner; 11236 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode)) 11237 x = inner; 11238 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode) 11239 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND)) 11240 { 11241 code = GET_CODE (x); 11242 x = inner; 11243 } 11244 } 11245 return gen_rtx_fmt_e (code, mode, x); 11246 } 11247 11248 /* Load and store depend on the highpart of the address. However, 11249 set_attr_alternative does not give well-defined results before reload, 11250 so we must look at the rtl ourselves to see if any of the feeding 11251 registers is used in a memref. 11252 11253 Return true iff INSN contains a MEM. */ 11254 bool 11255 sh_contains_memref_p (rtx insn) 11256 { 11257 subrtx_iterator::array_type array; 11258 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 11259 if (MEM_P (*iter)) 11260 return true; 11261 return false; 11262 } 11263 11264 /* Return true iff INSN loads a banked register. */ 11265 bool 11266 sh_loads_bankedreg_p (rtx insn) 11267 { 11268 if (GET_CODE (PATTERN (insn)) == SET) 11269 { 11270 rtx op = SET_DEST (PATTERN(insn)); 11271 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op))) 11272 return true; 11273 } 11274 11275 return false; 11276 } 11277 11278 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */ 11279 static reg_class_t 11280 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) 11281 { 11282 return rclass; 11283 } 11284 11285 /* Implement TARGET_SECONDARY_RELOAD. */ 11286 static reg_class_t 11287 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 11288 machine_mode mode, secondary_reload_info *sri) 11289 { 11290 enum reg_class rclass = (enum reg_class) rclass_i; 11291 11292 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS 11293 && REG_P (XEXP (XEXP (x, 0), 0)) 11294 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG) 11295 return rclass == R0_REGS ? NO_REGS : R0_REGS; 11296 11297 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG) 11298 return rclass == R0_REGS ? NO_REGS : R0_REGS; 11299 11300 if (REG_P (x) && REGNO (x) == GBR_REG) 11301 return NO_REGS; 11302 11303 if (in_p) 11304 { 11305 if (REGCLASS_HAS_FP_REG (rclass) 11306 && immediate_operand ((x), mode) 11307 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode)) 11308 switch (mode) 11309 { 11310 case E_SFmode: 11311 sri->icode = CODE_FOR_reload_insf__frn; 11312 return NO_REGS; 11313 case E_DFmode: 11314 sri->icode = CODE_FOR_reload_indf__frn; 11315 return NO_REGS; 11316 case E_SImode: 11317 /* ??? If we knew that we are in the appropriate mode - 11318 single precision - we could use a reload pattern directly. */ 11319 return FPUL_REGS; 11320 default: 11321 abort (); 11322 } 11323 if (rclass == FPUL_REGS 11324 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG 11325 || REGNO (x) == T_REG)) 11326 || GET_CODE (x) == PLUS)) 11327 return GENERAL_REGS; 11328 if (rclass == FPUL_REGS && immediate_operand (x, mode)) 11329 { 11330 if (satisfies_constraint_I08 (x) || fp_zero_operand (x)) 11331 return GENERAL_REGS; 11332 else if (mode == SFmode) 11333 return FP_REGS; 11334 sri->icode = CODE_FOR_reload_insi__i_fpul; 11335 return NO_REGS; 11336 } 11337 if (rclass == FPSCR_REGS 11338 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER) 11339 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS))) 11340 return GENERAL_REGS; 11341 } /* end of input-only processing. */ 11342 11343 if (((REGCLASS_HAS_FP_REG (rclass) 11344 && (REG_P (x) 11345 && (GENERAL_OR_AP_REGISTER_P (REGNO (x)) 11346 || (FP_REGISTER_P (REGNO (x)) && mode == SImode 11347 && TARGET_FMOVD)))) 11348 || (REGCLASS_HAS_GENERAL_REG (rclass) 11349 && REG_P (x) 11350 && FP_REGISTER_P (REGNO (x)))) 11351 && (mode == SFmode || mode == SImode)) 11352 return FPUL_REGS; 11353 if ((rclass == FPUL_REGS 11354 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode)) 11355 && (MEM_P (x) 11356 || (REG_P (x) 11357 && (REGNO (x) >= FIRST_PSEUDO_REGISTER 11358 || REGNO (x) == T_REG 11359 || system_reg_operand (x, VOIDmode))))) 11360 { 11361 if (rclass == FPUL_REGS) 11362 return GENERAL_REGS; 11363 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS; 11364 } 11365 11366 if ((rclass == MAC_REGS || rclass == PR_REGS) 11367 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x)) 11368 && rclass != REGNO_REG_CLASS (REGNO (x))) 11369 return GENERAL_REGS; 11370 11371 /* If here fall back to loading FPUL register through general registers. 11372 This case can happen when movsi_ie insn is picked initially to 11373 load/store the FPUL register from/to another register, and then the 11374 other register is allocated on the stack. */ 11375 if (rclass == FPUL_REGS && true_regnum (x) == -1) 11376 return GENERAL_REGS; 11377 11378 /* Force mov.b / mov.w displacement addressing insn to use R0 as 11379 the other operand. 11380 On SH2A could also just leave it alone here, which would result in a 11381 4 byte move insn being generated instead. However, for this to work 11382 the insns must have the appropriate alternatives. */ 11383 if ((mode == QImode || mode == HImode) && rclass != R0_REGS 11384 && satisfies_constraint_Sdd (x) 11385 && sh_disp_addr_displacement (x) 11386 <= sh_max_mov_insn_displacement (mode, false)) 11387 return R0_REGS; 11388 11389 /* When reload is trying to address a QImode or HImode subreg on the stack, 11390 force any subreg byte into R0_REGS, as this is going to become a 11391 displacement address. 11392 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg 11393 is on the stack, the memref to it might already require a displacement 11394 and that has to be added to the final address. At this point we don't 11395 know the cumulative displacement so we assume the worst case. */ 11396 if ((mode == QImode || mode == HImode) && rclass != R0_REGS 11397 && GET_CODE (x) == SUBREG && true_regnum (x) == -1) 11398 return R0_REGS; 11399 11400 return NO_REGS; 11401 } 11402 11403 /* Return true if SUBST can't safely replace its equivalent during RA. */ 11404 static bool 11405 sh_cannot_substitute_mem_equiv_p (rtx) 11406 { 11407 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn 11408 uses R0 and may cause spill failure when R0 is already used. 11409 We have to return true for that case at least. 11410 Moreover SH has strong R0 parity and also have not enough numbers of 11411 the hard registers to make the equiv substitution win in the size 11412 and the speed on average working sets. The pseudos produced to 11413 hold the equiv values can't get good hard registers for bad cases 11414 and end up memory save/restore insns which make the code worse. */ 11415 return true; 11416 } 11417 11418 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */ 11419 static bool 11420 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2, 11421 poly_int64 orig_offset, 11422 machine_mode mode) 11423 { 11424 if ((TARGET_FPU_DOUBLE && mode == DFmode) 11425 || (TARGET_SH2E && mode == SFmode)) 11426 return false; 11427 11428 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset); 11429 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) 11430 { 11431 *offset1 = adj.offset_adjust; 11432 *offset2 = adj.mov_disp; 11433 return true; 11434 } 11435 11436 return false; 11437 } 11438 11439 /* Return true if movsf insn should be splited with an additional 11440 register. */ 11441 bool 11442 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2) 11443 { 11444 /* op0 == op1 */ 11445 if (rtx_equal_p (op0, op1)) 11446 return true; 11447 /* fy, FQ, reg */ 11448 if (GET_CODE (op1) == CONST_DOUBLE 11449 && ! satisfies_constraint_G (op1) 11450 && ! satisfies_constraint_H (op1) 11451 && REG_P (op0) 11452 && REG_P (op2)) 11453 return true; 11454 /* f, r, y */ 11455 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0)) 11456 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1)) 11457 && REG_P (op2) && (REGNO (op2) == FPUL_REG)) 11458 return true; 11459 /* r, f, y */ 11460 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1)) 11461 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0)) 11462 && REG_P (op2) && (REGNO (op2) == FPUL_REG)) 11463 return true; 11464 11465 return false; 11466 } 11467 11468 static void 11469 sh_conditional_register_usage (void) 11470 { 11471 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++) 11472 if (! VALID_REGISTER_P (regno)) 11473 fixed_regs[regno] = call_used_regs[regno] = 1; 11474 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */ 11475 if (flag_pic) 11476 { 11477 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 11478 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 11479 } 11480 if (TARGET_FDPIC) 11481 { 11482 fixed_regs[PIC_REG] = 1; 11483 call_used_regs[PIC_REG] = 1; 11484 call_really_used_regs[PIC_REG] = 1; 11485 } 11486 /* Renesas saves and restores mac registers on call. */ 11487 if (TARGET_HITACHI && ! TARGET_NOMACSAVE) 11488 { 11489 call_really_used_regs[MACH_REG] = 0; 11490 call_really_used_regs[MACL_REG] = 0; 11491 } 11492 11493 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++) 11494 if (! fixed_regs[regno] && call_really_used_regs[regno]) 11495 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); 11496 11497 call_really_used_regs[FPSCR_MODES_REG] = 0; 11498 call_really_used_regs[FPSCR_STAT_REG] = 0; 11499 } 11500 11501 /* Implement TARGET_LEGITIMATE_CONSTANT_P 11502 11503 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */ 11504 static bool 11505 sh_legitimate_constant_p (machine_mode mode, rtx x) 11506 { 11507 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) 11508 { 11509 rtx base, offset; 11510 split_const (x, &base, &offset); 11511 11512 if (GET_CODE (base) == SYMBOL_REF 11513 && !offset_within_block_p (base, INTVAL (offset))) 11514 return false; 11515 } 11516 11517 if (TARGET_FDPIC 11518 && (SYMBOLIC_CONST_P (x) 11519 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS 11520 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0))))) 11521 return false; 11522 11523 return GET_CODE (x) != CONST_DOUBLE 11524 || mode == DFmode || mode == SFmode 11525 || mode == DImode || GET_MODE (x) == VOIDmode; 11526 } 11527 11528 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT; 11529 11530 static void 11531 sh_init_sync_libfuncs (void) 11532 { 11533 init_sync_libfuncs (UNITS_PER_WORD); 11534 } 11535 11536 /* Return true if it is appropriate to emit `ret' instructions in the 11537 body of a function. */ 11538 bool 11539 sh_can_use_simple_return_p (void) 11540 { 11541 if (! reload_completed || frame_pointer_needed) 11542 return false; 11543 11544 /* Moving prologue around does't reduce the size. */ 11545 if (optimize_function_for_size_p (cfun)) 11546 return false; 11547 11548 /* Finally, allow for pr save. */ 11549 HARD_REG_SET live_regs_mask; 11550 int d = calc_live_regs (&live_regs_mask); 11551 11552 if (rounded_frame_size (d) > 4) 11553 return false; 11554 11555 return true; 11556 } 11557 11558 /*------------------------------------------------------------------------------ 11559 Address mode optimization support code 11560 */ 11561 11562 typedef HOST_WIDE_INT disp_t; 11563 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN; 11564 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX; 11565 static const disp_t INVALID_DISP = MAX_DISP; 11566 11567 /* A memory reference which is described by a base register and a 11568 displacement. */ 11569 class base_reg_disp 11570 { 11571 public: 11572 base_reg_disp (rtx br, disp_t d); 11573 11574 bool is_reg (void) const; 11575 bool is_disp (void) const; 11576 rtx reg (void) const; 11577 disp_t disp (void) const; 11578 11579 private: 11580 rtx reg_; 11581 disp_t disp_; 11582 }; 11583 11584 inline 11585 base_reg_disp::base_reg_disp (rtx br, disp_t d) 11586 : reg_ (br), disp_ (d) 11587 { 11588 } 11589 11590 inline bool 11591 base_reg_disp::is_reg (void) const 11592 { 11593 return reg_ != NULL_RTX && disp_ != INVALID_DISP; 11594 } 11595 11596 inline bool 11597 base_reg_disp::is_disp (void) const 11598 { 11599 return reg_ == NULL_RTX && disp_ != INVALID_DISP; 11600 } 11601 11602 inline rtx 11603 base_reg_disp::reg (void) const 11604 { 11605 return reg_; 11606 } 11607 11608 inline disp_t 11609 base_reg_disp::disp (void) const 11610 { 11611 return disp_; 11612 } 11613 11614 /* Find the base register and calculate the displacement for a given 11615 address rtx 'x'. */ 11616 static base_reg_disp 11617 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0, 11618 rtx base_reg = NULL) 11619 { 11620 if (REG_P (x)) 11621 { 11622 if (REGNO (x) == GBR_REG) 11623 return base_reg_disp (x, disp); 11624 11625 /* We've reached a hard-reg. This is probably the point where 11626 function args are copied to pseudos. Do not go any further and 11627 stick to the pseudo. If the original mem addr was in a hard reg 11628 from the beginning, it will become the base reg. */ 11629 if (REGNO (x) < FIRST_PSEUDO_REGISTER) 11630 return base_reg_disp (base_reg != NULL ? base_reg : x, disp); 11631 11632 /* Find the def of the reg and trace it. If there are more than one 11633 defs and they are not the same, assume it's not safe to proceed. */ 11634 rtx_insn* last_i = NULL; 11635 rtx last_set = NULL; 11636 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL; 11637 d = DF_REF_NEXT_REG (d)) 11638 { 11639 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d))); 11640 11641 /* Accept multiple defs, as long as they are equal. */ 11642 if (last_set == NULL || rtx_equal_p (last_set, set)) 11643 { 11644 last_i = DF_REF_INSN (d); 11645 last_set = set; 11646 } 11647 else 11648 { 11649 last_i = NULL; 11650 last_set = NULL; 11651 break; 11652 } 11653 } 11654 11655 if (last_set != NULL && last_i != NULL) 11656 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp, 11657 XEXP (last_set, 0)); 11658 11659 /* When here, no previous insn was found that sets the reg. 11660 The input reg is already the base reg. */ 11661 return base_reg_disp (x, disp); 11662 } 11663 11664 else if (GET_CODE (x) == PLUS) 11665 { 11666 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0)); 11667 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1)); 11668 11669 /* Either left or right val must be a reg. 11670 We don't handle the case of 'reg + reg' here. */ 11671 if (left_val.is_reg () && right_val.is_disp ()) 11672 return base_reg_disp (left_val.reg (), left_val.disp () 11673 + right_val.disp () + disp); 11674 else if (right_val.is_reg () && left_val.is_disp ()) 11675 return base_reg_disp (right_val.reg (), right_val.disp () 11676 + left_val.disp () + disp); 11677 else 11678 return base_reg_disp (base_reg, disp); 11679 } 11680 11681 else if (CONST_INT_P (x)) 11682 return base_reg_disp (NULL, disp + INTVAL (x)); 11683 11684 /* Didn't find anything useful. */ 11685 return base_reg_disp (base_reg, disp); 11686 } 11687 11688 /* Given an insn and a memory operand, try to find an equivalent GBR 11689 based memory address and return the corresponding new memory address. 11690 Return NULL_RTX if not found. */ 11691 rtx 11692 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem) 11693 { 11694 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem))) 11695 return NULL_RTX; 11696 11697 /* Leave post/pre inc/dec or any other side effect addresses alone. */ 11698 if (side_effects_p (XEXP (mem, 0))) 11699 return NULL_RTX; 11700 11701 /* When not optimizing there might be no dataflow available. */ 11702 if (df == NULL) 11703 return NULL_RTX; 11704 11705 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0)); 11706 11707 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG) 11708 { 11709 /* If GBR is marked as call clobbered we bail out if we see a call. 11710 FIXME: Actually should check if this mem refers to the gbr value 11711 before or after the call. If there is a store_gbr preceeding this 11712 mem, it's safe to use GBR for this mem. 11713 11714 If GBR is not marked as call clobbered, but there is some other 11715 def than a call, it's probably a load_gbr upon which we also 11716 bail out to be on the safe side. 11717 FIXME: Should check if we have a use-after-def case, such as 11718 the call case above. */ 11719 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL; 11720 d = DF_REF_NEXT_REG (d)) 11721 { 11722 if (CALL_P (DF_REF_INSN (d))) 11723 { 11724 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)) 11725 return NULL_RTX; 11726 else 11727 continue; 11728 } 11729 else 11730 return NULL_RTX; 11731 } 11732 11733 rtx disp = GEN_INT (gbr_disp.disp ()); 11734 if (gbr_displacement (disp, GET_MODE (mem))) 11735 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp); 11736 } 11737 11738 return NULL_RTX; 11739 } 11740 11741 /*------------------------------------------------------------------------------ 11742 Manual insn combine support code. 11743 */ 11744 11745 /* Return true if the specified insn contains any UNSPECs or 11746 UNSPEC_VOLATILEs. */ 11747 static bool 11748 sh_unspec_insn_p (rtx x) 11749 { 11750 subrtx_iterator::array_type array; 11751 FOR_EACH_SUBRTX (i, array, x, ALL) 11752 if (*i != NULL 11753 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE)) 11754 return true; 11755 11756 return false; 11757 } 11758 11759 /* Return true if the register operands of the specified insn are modified 11760 between the specified from and to insns (exclusive of those two). */ 11761 bool 11762 sh_insn_operands_modified_between_p (rtx_insn* operands_insn, 11763 const rtx_insn* from, 11764 const rtx_insn* to) 11765 { 11766 /* FIXME: Return true for multiple sets for now. */ 11767 rtx s = single_set (operands_insn); 11768 if (s == NULL_RTX) 11769 return true; 11770 11771 subrtx_iterator::array_type array; 11772 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL) 11773 if (*i != NULL && 11774 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to))) 11775 return true; 11776 11777 return false; 11778 } 11779 11780 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that 11781 negates the T bit and stores the result in the T bit. */ 11782 bool 11783 sh_is_nott_insn (const rtx_insn* i) 11784 { 11785 return i != NULL && GET_CODE (PATTERN (i)) == SET 11786 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode) 11787 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode); 11788 } 11789 11790 rtx 11791 sh_movt_set_dest (const rtx_insn* i) 11792 { 11793 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i)); 11794 } 11795 11796 rtx 11797 sh_movt_set_dest (const_rtx pat) 11798 { 11799 return GET_CODE (pat) == SET 11800 && arith_reg_dest (XEXP (pat, 0), SImode) 11801 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL; 11802 } 11803 11804 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn 11805 that stores the negated T bit in a register, and return the destination 11806 register rtx, or null. */ 11807 rtx 11808 sh_movrt_set_dest (const rtx_insn* i) 11809 { 11810 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i)); 11811 } 11812 11813 rtx 11814 sh_movrt_set_dest (const_rtx pat) 11815 { 11816 /* The negc movrt replacement is inside a parallel. */ 11817 if (GET_CODE (pat) == PARALLEL) 11818 pat = XVECEXP (pat, 0, 0); 11819 11820 return GET_CODE (pat) == SET 11821 && arith_reg_dest (XEXP (pat, 0), SImode) 11822 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL; 11823 11824 } 11825 11826 /* Given an insn and a reg number, tell whether the reg dies or is unused 11827 after the insn. */ 11828 bool 11829 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno) 11830 { 11831 return find_regno_note (i, REG_DEAD, regno) != NULL 11832 || find_regno_note (i, REG_UNUSED, regno) != NULL; 11833 } 11834 11835 /* Given an insn and a reg number, remove reg dead or reg unused notes to 11836 mark it as being used after the insn. */ 11837 void 11838 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno) 11839 { 11840 if (rtx n = find_regno_note (i, REG_DEAD, regno)) 11841 remove_note (i, n); 11842 if (rtx n = find_regno_note (i, REG_UNUSED, regno)) 11843 remove_note (i, n); 11844 } 11845 11846 /* Given an insn check if it contains any post/pre inc/dec mem operands and 11847 add the REG_INC notes accordingly. 11848 FIXME: This function is very similar to lra.c (add_auto_inc_notes). 11849 FIXME: This function is currently used by peephole2 patterns because 11850 the peephole2 pass does not preserve REG_INC notes. If the notes 11851 are dropped the following passes will do wrong things. */ 11852 rtx_insn* 11853 sh_check_add_incdec_notes (rtx_insn* i) 11854 { 11855 struct for_each_inc_dec_clb 11856 { 11857 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED, 11858 rtx dest, rtx src ATTRIBUTE_UNUSED, 11859 rtx srcoff ATTRIBUTE_UNUSED, void* arg) 11860 { 11861 gcc_assert (REG_P (dest)); 11862 11863 rtx_insn* i = (rtx_insn*)arg; 11864 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL) 11865 add_reg_note (i, REG_INC, dest); 11866 11867 return 0; 11868 } 11869 }; 11870 11871 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i); 11872 return i; 11873 } 11874 11875 /* Given a move insn destiation and a source, make sure that the move source 11876 operand is not a post-inc mem load with the same address reg as the 11877 destination. Returns the modified source operand with the post-inc removed 11878 if necessary. */ 11879 rtx 11880 sh_remove_overlapping_post_inc (rtx dst, rtx src) 11881 { 11882 if (!MEM_P (src)) 11883 return src; 11884 11885 rtx addr = XEXP (src, 0); 11886 11887 if (GET_CODE (addr) == POST_INC 11888 && reg_overlap_mentioned_p (XEXP (addr, 0), dst)) 11889 return replace_equiv_address (src, XEXP (addr, 0)); 11890 11891 gcc_assert (GET_CODE (addr) != POST_MODIFY); 11892 return src; 11893 } 11894 11895 /* Emit a move insn that is safe to be used in peephole patterns. */ 11896 rtx_insn* 11897 sh_peephole_emit_move_insn (rtx dst, rtx src) 11898 { 11899 return sh_check_add_incdec_notes ( 11900 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src))); 11901 } 11902 11903 /* Given an op rtx and an insn, try to find out whether the result of the 11904 specified op consists only of logical operations on T bit stores. */ 11905 bool 11906 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn) 11907 { 11908 if (!logical_operator (op, SImode)) 11909 return false; 11910 11911 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) }; 11912 int op_is_t_count = 0; 11913 11914 for (int i = 0; i < 2; ++i) 11915 { 11916 if (t_reg_operand (ops[i], VOIDmode) 11917 || negt_reg_operand (ops[i], VOIDmode)) 11918 op_is_t_count++; 11919 11920 else 11921 { 11922 set_of_reg op_set = sh_find_set_of_reg 11923 (ops[i], insn, prev_nonnote_nondebug_insn_bb); 11924 if (op_set.set_src == NULL_RTX) 11925 continue; 11926 11927 if (t_reg_operand (op_set.set_src, VOIDmode) 11928 || negt_reg_operand (op_set.set_src, VOIDmode) 11929 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn)) 11930 op_is_t_count++; 11931 } 11932 } 11933 11934 return op_is_t_count == 2; 11935 } 11936 11937 /* Given the operand that is extended in a sign/zero extend insn, and the 11938 insn, try to figure out whether the sign/zero extension can be replaced 11939 by a simple reg-reg copy. If so, the replacement reg rtx is returned, 11940 NULL_RTX otherwise. */ 11941 rtx 11942 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn) 11943 { 11944 if (REG_P (extended_op)) 11945 extended_op = extended_op; 11946 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op))) 11947 extended_op = SUBREG_REG (extended_op); 11948 else 11949 return NULL_RTX; 11950 11951 /* Reg moves must be of the same mode. */ 11952 if (GET_MODE (extended_op) != SImode) 11953 return NULL_RTX; 11954 11955 set_of_reg s = sh_find_set_of_reg (extended_op, insn, 11956 prev_nonnote_nondebug_insn_bb); 11957 if (s.set_src == NULL_RTX) 11958 return NULL_RTX; 11959 11960 if (t_reg_operand (s.set_src, VOIDmode) 11961 || negt_reg_operand (s.set_src, VOIDmode)) 11962 return extended_op; 11963 11964 /* If the zero extended reg was formed by a logical operation, check the 11965 operands of the logical operation. If both originated from T bit 11966 stores the zero extension can be eliminated. */ 11967 else if (sh_is_logical_t_store_expr (s.set_src, s.insn)) 11968 return extended_op; 11969 11970 return NULL_RTX; 11971 } 11972 11973 /* Given the current insn, which is assumed to be a movrt_negc insn, try to 11974 figure out whether it should be converted into a movt-xor sequence in 11975 the movrt_negc splitter. 11976 Returns true if insns have been modified and the splitter has succeeded. */ 11977 bool 11978 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[]) 11979 { 11980 /* In cases such as 11981 tst r4,r4 11982 mov #-1,r1 11983 negc r1,r1 11984 tst r4,r4 11985 we can replace the T bit clobbering negc with a movt-xor sequence and 11986 eliminate the redundant comparison. 11987 Because the xor insn depends on register allocation results, allow this 11988 only before reload. */ 11989 if (!can_create_pseudo_p ()) 11990 return false; 11991 11992 set_of_reg t_before_negc = sh_find_set_of_reg 11993 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb); 11994 set_of_reg t_after_negc = sh_find_set_of_reg 11995 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb); 11996 11997 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX 11998 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx) 11999 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn) 12000 && !sh_insn_operands_modified_between_p (t_before_negc.insn, 12001 t_before_negc.insn, 12002 t_after_negc.insn) 12003 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn) 12004 && !sh_unspec_insn_p (t_after_negc.insn) 12005 && !volatile_insn_p (PATTERN (t_after_negc.insn)) 12006 && !side_effects_p (PATTERN (t_after_negc.insn)) 12007 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn))) 12008 { 12009 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ())); 12010 set_insn_deleted (t_after_negc.insn); 12011 return true; 12012 } 12013 else 12014 return false; 12015 } 12016 12017 /* Given a reg and the current insn, see if the value of the reg originated 12018 from a sign or zero extension and return the discovered information. */ 12019 sh_extending_set_of_reg 12020 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn) 12021 { 12022 if (reg == NULL) 12023 return sh_extending_set_of_reg (curr_insn); 12024 12025 if (SUBREG_P (reg)) 12026 reg = SUBREG_REG (reg); 12027 12028 if (!REG_P (reg)) 12029 return sh_extending_set_of_reg (curr_insn); 12030 12031 /* FIXME: Also search the predecessor basic blocks. It seems that checking 12032 only the adjacent predecessor blocks would cover most of the cases. 12033 Also try to look through the first extension that we hit. There are some 12034 cases, where a zero_extend is followed an (implicit) sign_extend, and it 12035 fails to see the sign_extend. */ 12036 sh_extending_set_of_reg result = sh_find_set_of_reg 12037 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true); 12038 12039 if (result.set_src != NULL) 12040 { 12041 if (GET_CODE (result.set_src) == SIGN_EXTEND 12042 || GET_CODE (result.set_src) == ZERO_EXTEND) 12043 { 12044 if (dump_file) 12045 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is " 12046 "explicitly sign/zero extended in insn %d\n", 12047 REGNO (reg), INSN_UID (result.insn)); 12048 result.from_mode = GET_MODE (XEXP (result.set_src, 0)); 12049 result.ext_code = GET_CODE (result.set_src); 12050 } 12051 else if (MEM_P (result.set_src) 12052 && (GET_MODE (result.set_src) == QImode 12053 || GET_MODE (result.set_src) == HImode) 12054 && !sh_unspec_insn_p (result.insn)) 12055 { 12056 /* On SH QIHImode memory loads always sign extend. However, in 12057 some cases where it seems that the higher bits are not 12058 interesting, the loads will not be expanded as sign extending 12059 insns, but as QIHImode loads into QIHImode regs. We report that 12060 the reg has been sign extended by the mem load. When it is used 12061 as such, we must convert the mem load into a sign extending insn, 12062 see also sh_extending_set_of_reg::use_as_extended_reg. */ 12063 if (dump_file) 12064 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is " 12065 "implicitly sign extended in insn %d\n", 12066 REGNO (reg), INSN_UID (result.insn)); 12067 result.from_mode = GET_MODE (result.set_src); 12068 result.ext_code = SIGN_EXTEND; 12069 } 12070 } 12071 12072 return result; 12073 } 12074 12075 /* Given a reg that is known to be sign or zero extended at some insn, 12076 take the appropriate measures so that the extended value can be used as 12077 a reg at the specified insn and return the resulting reg rtx. */ 12078 rtx 12079 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const 12080 { 12081 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL); 12082 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND); 12083 gcc_assert (from_mode == QImode || from_mode == HImode); 12084 12085 if (MEM_P (set_src) && ext_code == SIGN_EXTEND) 12086 { 12087 if (dump_file) 12088 fprintf (dump_file, 12089 "use_as_extended_reg: converting non-extending mem load in " 12090 "insn %d into sign-extending load\n", INSN_UID (insn)); 12091 12092 rtx r = gen_reg_rtx (SImode); 12093 rtx_insn* i0; 12094 if (from_mode == QImode) 12095 i0 = sh_check_add_incdec_notes ( 12096 emit_insn_after (gen_extendqisi2 (r, set_src), insn)); 12097 else if (from_mode == HImode) 12098 i0 = sh_check_add_incdec_notes ( 12099 emit_insn_after (gen_extendhisi2 (r, set_src), insn)); 12100 else 12101 gcc_unreachable (); 12102 12103 emit_insn_after ( 12104 gen_move_insn (XEXP (set_rtx, 0), 12105 gen_lowpart (GET_MODE (set_src), r)), i0); 12106 set_insn_deleted (insn); 12107 return r; 12108 } 12109 else 12110 { 12111 rtx extension_dst = XEXP (set_rtx, 0); 12112 if (GET_MODE (extension_dst) != SImode) 12113 extension_dst = simplify_gen_subreg (SImode, extension_dst, 12114 GET_MODE (extension_dst), 0); 12115 if (modified_between_p (extension_dst, insn, use_at_insn)) 12116 { 12117 if (dump_file) 12118 fprintf (dump_file, 12119 "use_as_extended_reg: dest reg %d of extending insn %d is " 12120 "modified, inserting a reg-reg copy\n", 12121 REGNO (extension_dst), INSN_UID (insn)); 12122 12123 rtx r = gen_reg_rtx (SImode); 12124 emit_insn_after (gen_move_insn (r, extension_dst), insn); 12125 return r; 12126 } 12127 else 12128 { 12129 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst)); 12130 return extension_dst; 12131 } 12132 } 12133 } 12134 12135 bool 12136 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const 12137 { 12138 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND) 12139 && (from_mode == QImode || from_mode == HImode) 12140 && set_src != NULL) 12141 return arith_reg_operand (XEXP (set_src, 0), from_mode); 12142 else 12143 return false; 12144 } 12145 12146 rtx 12147 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const 12148 { 12149 gcc_assert (can_use_as_unextended_reg ()); 12150 12151 rtx r = XEXP (set_src, 0); 12152 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0); 12153 12154 if (modified_between_p (r, insn, use_at_insn)) 12155 { 12156 rtx r1 = gen_reg_rtx (SImode); 12157 emit_insn_after (gen_move_insn (r1, r0), insn); 12158 return r1; 12159 } 12160 else 12161 { 12162 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r) 12163 ? REGNO (SUBREG_REG (r)) 12164 : REGNO (r)); 12165 return r0; 12166 } 12167 } 12168 12169 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn, 12170 perform the necessary checks on the operands and split it accordingly. */ 12171 void 12172 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode, 12173 int subreg_offset, rtx operands[]) 12174 { 12175 gcc_assert (subreg_mode == QImode || subreg_mode == HImode); 12176 12177 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0], 12178 curr_insn); 12179 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1], 12180 curr_insn); 12181 12182 /* If one of the operands is known to be zero extended, that's already 12183 sufficient to mask out the unwanted high bits. */ 12184 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode) 12185 { 12186 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn), 12187 operands[1])); 12188 return; 12189 } 12190 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode) 12191 { 12192 emit_insn (gen_tstsi_t (operands[0], 12193 eop1.use_as_extended_reg (curr_insn))); 12194 return; 12195 } 12196 12197 /* None of the operands seem to be zero extended. 12198 If both are sign extended it's OK, too. */ 12199 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND 12200 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode) 12201 { 12202 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn), 12203 eop1.use_as_extended_reg (curr_insn))); 12204 return; 12205 } 12206 12207 /* Otherwise we have to insert a zero extension on one of the operands to 12208 mask out the unwanted high bits. 12209 Prefer the operand that has no known extension. */ 12210 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN) 12211 std::swap (operands[0], operands[1]); 12212 12213 rtx tmp0 = gen_reg_rtx (SImode); 12214 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0], 12215 GET_MODE (operands[0]), subreg_offset); 12216 emit_insn (subreg_mode == QImode 12217 ? gen_zero_extendqisi2 (tmp0, tmp1) 12218 : gen_zero_extendhisi2 (tmp0, tmp1)); 12219 emit_insn (gen_tstsi_t (tmp0, operands[1])); 12220 } 12221 12222 /* A helper class to increment/decrement a counter variable each time a 12223 function is entered/left. */ 12224 class scope_counter 12225 { 12226 public: 12227 scope_counter (int& counter) : m_counter (counter) { ++m_counter; } 12228 12229 ~scope_counter (void) 12230 { 12231 --m_counter; 12232 gcc_assert (m_counter >= 0); 12233 } 12234 12235 int count (void) const { return m_counter; } 12236 12237 private: 12238 int& m_counter; 12239 }; 12240 12241 /* Given an rtx x, determine whether the expression can be used to create 12242 an insn that calulates x and stores the result in the T bit. 12243 This is used by the 'treg_set_expr' predicate to construct insns sequences 12244 where T bit results are fed into other insns, such as addc, subc, negc 12245 insns. 12246 12247 FIXME: The patterns that expand 'treg_set_expr' operands tend to 12248 distinguish between 'positive' and 'negative' forms. For now this has to 12249 be done in the preparation code. We could also introduce 12250 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write 12251 two different patterns for the 'postive' and 'negative' forms. However, 12252 the total amount of lines of code seems to be about the same and the 12253 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the 12254 recog function would need to look inside the expression by temporarily 12255 splitting it. */ 12256 static int sh_recog_treg_set_expr_reent_count = 0; 12257 12258 bool 12259 sh_recog_treg_set_expr (rtx op, machine_mode mode) 12260 { 12261 scope_counter recursion (sh_recog_treg_set_expr_reent_count); 12262 12263 /* Limit the recursion count to avoid nested expressions which we can't 12264 resolve to a single treg set insn. */ 12265 if (recursion.count () > 1) 12266 return false; 12267 12268 /* Early accept known possible operands before doing recog. */ 12269 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode) 12270 || negt_reg_operand (op, mode)) 12271 return true; 12272 12273 /* Early reject impossible operands before doing recog. 12274 There are some (set ((t) (subreg ...))) patterns, but we must be careful 12275 not to allow any invalid reg-reg or mem-reg moves, or else other passes 12276 such as lower-subreg will bail out. Some insns such as SH4A movua are 12277 done with UNSPEC, so must reject those, too, or else it would result 12278 in an invalid reg -> treg move. */ 12279 if (CONST_INT_P (op) || register_operand (op, mode) 12280 || memory_operand (op, mode) || sh_unspec_insn_p (op)) 12281 return false; 12282 12283 if (!can_create_pseudo_p ()) 12284 return false; 12285 12286 /* expand_debug_locations may call this to compute rtx costs at 12287 very early stage. In that case, don't make new insns here to 12288 avoid codegen differences with -g. */ 12289 if (currently_expanding_to_rtl) 12290 return false; 12291 12292 /* We are going to invoke recog in a re-entrant way and thus 12293 have to capture its current state and restore it afterwards. */ 12294 recog_data_d prev_recog_data = recog_data; 12295 12296 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op)); 12297 SET_PREV_INSN (i) = NULL; 12298 SET_NEXT_INSN (i) = NULL; 12299 12300 /* If the comparison op doesn't have a result mode, set it to SImode. */ 12301 machine_mode prev_op_mode = GET_MODE (op); 12302 if (COMPARISON_P (op) && prev_op_mode == VOIDmode) 12303 PUT_MODE (op, SImode); 12304 12305 int result = recog (PATTERN (i), i, 0); 12306 12307 /* It seems there is no insn like that. Create a negated version and 12308 try again. If we hit a negated form, we'll allow that and append a 12309 nott sequence when splitting out the insns. Insns that do the split 12310 can then remove the trailing nott if they know how to deal with it. */ 12311 if (result < 0 && COMPARISON_P (op)) 12312 { 12313 machine_mode cmp_mode = GET_MODE (XEXP (op, 0)); 12314 if (cmp_mode == VOIDmode) 12315 cmp_mode = GET_MODE (XEXP (op, 1)); 12316 12317 rtx_code prev_code = GET_CODE (op); 12318 PUT_CODE (op, reverse_condition (GET_CODE (op))); 12319 result = recog (PATTERN (i), i, 0); 12320 PUT_CODE (op, prev_code); 12321 } 12322 12323 PUT_MODE (op, prev_op_mode); 12324 recog_data = prev_recog_data; 12325 return result >= 0; 12326 } 12327 12328 /* Returns true when recog of a 'treg_set_expr' is currently in progress. 12329 This can be used as a condition for insn/split patterns to allow certain 12330 T bit setting patters only to be matched as sub expressions of other 12331 patterns. */ 12332 bool 12333 sh_in_recog_treg_set_expr (void) 12334 { 12335 return sh_recog_treg_set_expr_reent_count > 0; 12336 } 12337 12338 /* Given an rtx x, which is assumed to be some expression that has been 12339 matched by the 'treg_set_expr' predicate before, split and emit the 12340 insns that are necessary to calculate the expression and store the result 12341 in the T bit. 12342 The splitting is done recursively similar to 'try_split' in emit-rt.c. 12343 Unfortunately we can't use 'try_split' here directly, as it tries to invoke 12344 'delete_insn' which then causes the DF parts to bail out, because we 12345 currently are inside another gen_split* function and would invoke 12346 'try_split' in a reentrant way. */ 12347 static std::pair<rtx_insn*, rtx_insn*> 12348 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0) 12349 { 12350 if (dump_file) 12351 { 12352 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n); 12353 print_rtl_single (dump_file, i); 12354 fprintf (dump_file, "\n"); 12355 } 12356 12357 rtx_insn* seq = split_insns (PATTERN (i), curr_insn); 12358 12359 if (seq == NULL) 12360 return std::make_pair (i, i); 12361 12362 /* Avoid infinite splitter loops if any insn of the result matches 12363 the original pattern. */ 12364 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s)) 12365 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i))) 12366 return std::make_pair (i, i); 12367 12368 unshare_all_rtl_in_chain (seq); 12369 12370 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in 12371 a linked list, replace the single insn with the new insns. */ 12372 rtx_insn* seqlast = seq; 12373 while (NEXT_INSN (seqlast) != NULL) 12374 seqlast = NEXT_INSN (seqlast); 12375 12376 if (rtx_insn* iprev = PREV_INSN (i)) 12377 SET_NEXT_INSN (iprev) = seq; 12378 if (rtx_insn* inext = NEXT_INSN (i)) 12379 SET_PREV_INSN (inext) = seqlast; 12380 12381 SET_PREV_INSN (seq) = PREV_INSN (i); 12382 SET_NEXT_INSN (seqlast) = NEXT_INSN (i); 12383 12384 SET_PREV_INSN (i) = NULL; 12385 SET_NEXT_INSN (i) = NULL; 12386 12387 /* Recursively split all insns. */ 12388 for (i = seq; ; i = NEXT_INSN (i)) 12389 { 12390 std::pair<rtx_insn*, rtx_insn*> ii = 12391 sh_try_split_insn_simple (i, curr_insn, n + 1); 12392 if (i == seq) 12393 seq = ii.first; 12394 if (i == seqlast) 12395 { 12396 seqlast = ii.second; 12397 break; 12398 } 12399 i = ii.first; 12400 } 12401 12402 return std::make_pair (seq, seqlast); 12403 } 12404 12405 sh_treg_insns 12406 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn) 12407 { 12408 if (t_reg_operand (x, VOIDmode)) 12409 return sh_treg_insns (); 12410 12411 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count); 12412 12413 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x)); 12414 SET_PREV_INSN (i) = NULL; 12415 SET_NEXT_INSN (i) = NULL; 12416 12417 if (dump_file) 12418 { 12419 fprintf (dump_file, "split_treg_set_expr insn:\n"); 12420 print_rtl (dump_file, i); 12421 fprintf (dump_file, "\n"); 12422 } 12423 12424 /* If the insn is not found, we will try a negated form and append 12425 a nott. */ 12426 bool append_nott = false; 12427 12428 /* We are going to invoke recog/split_insns in a re-entrant way and thus 12429 have to capture its current state and restore it afterwards. */ 12430 recog_data_d prev_recog_data = recog_data; 12431 12432 if (negt_reg_operand (x, GET_MODE (x))) 12433 { 12434 /* This is a normal movt followed by a nott. It will be converted 12435 into a movrt after initial expansion. */ 12436 XEXP (PATTERN (i), 1) = get_t_reg_rtx (); 12437 append_nott = true; 12438 } 12439 else 12440 { 12441 /* If the comparison op doesn't have a mode set, set it to SImode. */ 12442 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode) 12443 PUT_MODE (x, SImode); 12444 12445 int insn_code = recog (PATTERN (i), i, 0); 12446 12447 if (insn_code < 0 && COMPARISON_P (x)) 12448 { 12449 machine_mode cmp_mode = GET_MODE (XEXP (x, 0)); 12450 if (cmp_mode == VOIDmode) 12451 cmp_mode = GET_MODE (XEXP (x, 1)); 12452 12453 PUT_CODE (x, reverse_condition (GET_CODE (x))); 12454 insn_code = recog (PATTERN (i), i, 0); 12455 append_nott = true; 12456 } 12457 12458 gcc_assert (insn_code >= 0); 12459 } 12460 12461 /* Try to recursively split the insn. Some insns might refuse to split 12462 any further while we are in the treg_set_expr splitting phase. They 12463 will be emitted as part of the outer insn and then split again. */ 12464 std::pair<rtx_insn*, rtx_insn*> insnlist = 12465 sh_try_split_insn_simple (i, curr_insn); 12466 12467 /* Restore recog state. */ 12468 recog_data = prev_recog_data; 12469 12470 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second) 12471 ? insnlist.second 12472 : NULL; 12473 if (dump_file) 12474 { 12475 fprintf (dump_file, "split_treg_set_expr insnlist:\n"); 12476 print_rtl (dump_file, insnlist.first); 12477 fprintf (dump_file, "\n"); 12478 12479 if (nott_insn != NULL) 12480 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn)); 12481 } 12482 12483 emit_insn (insnlist.first); 12484 12485 if (nott_insn != NULL && append_nott) 12486 { 12487 if (dump_file) 12488 fprintf (dump_file, "removing trailing nott\n"); 12489 remove_insn (nott_insn); 12490 nott_insn = NULL; 12491 append_nott = false; 12492 } 12493 12494 if (append_nott) 12495 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ())); 12496 12497 rtx_insn* first_insn = get_insns (); 12498 12499 if (dump_file) 12500 { 12501 fprintf (dump_file, "resulting insns:\n"); 12502 print_rtl (dump_file, first_insn); 12503 fprintf (dump_file, "\n"); 12504 } 12505 12506 return sh_treg_insns (first_insn, nott_insn); 12507 } 12508 12509 /*------------------------------------------------------------------------------ 12510 Mode switching support code. 12511 */ 12512 12513 static void 12514 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode, 12515 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED) 12516 { 12517 if ((TARGET_SH4A_FP || TARGET_FPU_SH4_300) 12518 && prev_mode != FP_MODE_NONE && prev_mode != mode) 12519 { 12520 emit_insn (gen_toggle_pr ()); 12521 if (TARGET_FMOVD) 12522 emit_insn (gen_toggle_sz ()); 12523 } 12524 else if (mode != FP_MODE_NONE) 12525 { 12526 rtx tmp = gen_reg_rtx (SImode); 12527 emit_insn (gen_sts_fpscr (tmp)); 12528 rtx i = NULL; 12529 12530 const unsigned HOST_WIDE_INT fpbits = 12531 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR; 12532 12533 if (prev_mode != FP_MODE_NONE && prev_mode != mode) 12534 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits))); 12535 else if (mode == FP_MODE_SINGLE) 12536 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits))); 12537 else if (mode == FP_MODE_DOUBLE) 12538 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits))); 12539 else 12540 gcc_unreachable (); 12541 12542 emit_insn (i); 12543 emit_insn (gen_lds_fpscr (tmp)); 12544 } 12545 } 12546 12547 static int 12548 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn) 12549 { 12550 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE; 12551 } 12552 12553 static int 12554 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn) 12555 { 12556 if (TARGET_HITACHI && recog_memoized (insn) >= 0 && 12557 get_attr_fp_set (insn) != FP_SET_NONE) 12558 return (int) get_attr_fp_set (insn); 12559 else 12560 return mode; 12561 } 12562 12563 static int 12564 sh_mode_entry (int entity ATTRIBUTE_UNUSED) 12565 { 12566 return NORMAL_MODE (entity); 12567 } 12568 12569 static int 12570 sh_mode_exit (int entity ATTRIBUTE_UNUSED) 12571 { 12572 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity); 12573 } 12574 12575 static int 12576 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n) 12577 { 12578 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE); 12579 } 12580 12581 /*------------------------------------------------------------------------------ 12582 Misc 12583 */ 12584 12585 /* Return true if we use LRA instead of reload pass. */ 12586 bool 12587 sh_lra_p (void) 12588 { 12589 return sh_lra_flag; 12590 } 12591 12592 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ 12593 12594 static bool 12595 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, 12596 unsigned int align, 12597 enum by_pieces_operation op, 12598 bool speed_p) 12599 { 12600 switch (op) 12601 { 12602 case MOVE_BY_PIECES: 12603 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op) 12604 < (!speed_p ? 2 : (align >= 32) ? 16 : 2); 12605 case STORE_BY_PIECES: 12606 case SET_BY_PIECES: 12607 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op) 12608 < (!speed_p ? 2 : (align >= 32) ? 16 : 2); 12609 default: 12610 return default_use_by_pieces_infrastructure_p (size, align, 12611 op, speed_p); 12612 } 12613 } 12614 12615 bool 12616 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED, 12617 rtx x ATTRIBUTE_UNUSED) 12618 { 12619 return TARGET_FDPIC; 12620 } 12621 12622 /* Emit insns to load the function address from FUNCDESC (an FDPIC 12623 function descriptor) into r1 and the GOT address into r12, 12624 returning an rtx for r1. */ 12625 12626 rtx 12627 sh_load_function_descriptor (rtx funcdesc) 12628 { 12629 rtx r1 = gen_rtx_REG (Pmode, R1_REG); 12630 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG); 12631 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc); 12632 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4)); 12633 12634 emit_move_insn (r1, fnaddr); 12635 /* The ABI requires the entry point address to be loaded first, so 12636 prevent the load from being moved after that of the GOT 12637 address. */ 12638 emit_insn (gen_blockage ()); 12639 emit_move_insn (pic_reg, gotaddr); 12640 return r1; 12641 } 12642 12643 /* Return an rtx holding the initial value of the FDPIC register (the 12644 FDPIC pointer passed in from the caller). */ 12645 12646 rtx 12647 sh_get_fdpic_reg_initial_val (void) 12648 { 12649 return get_hard_reg_initial_val (Pmode, PIC_REG); 12650 } 12651 12652 #include "gt-sh.h" 12653