1 /* Output routines for GCC for Renesas / SuperH SH. 2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 4 Free Software Foundation, Inc. 5 Contributed by Steve Chamberlain (sac@cygnus.com). 6 Improved by Jim Wilson (wilson@cygnus.com). 7 8 This file is part of GCC. 9 10 GCC is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 3, or (at your option) 13 any later version. 14 15 GCC is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with GCC; see the file COPYING3. If not see 22 <http://www.gnu.org/licenses/>. */ 23 24 #include "config.h" 25 #include "system.h" 26 #include "coretypes.h" 27 #include "tm.h" 28 #include "insn-config.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "flags.h" 32 #include "expr.h" 33 #include "optabs.h" 34 #include "function.h" 35 #include "regs.h" 36 #include "hard-reg-set.h" 37 #include "output.h" 38 #include "insn-attr.h" 39 #include "toplev.h" 40 #include "recog.h" 41 #include "integrate.h" 42 #include "dwarf2.h" 43 #include "tm_p.h" 44 #include "target.h" 45 #include "target-def.h" 46 #include "real.h" 47 #include "langhooks.h" 48 #include "basic-block.h" 49 #include "df.h" 50 #include "cfglayout.h" 51 #include "intl.h" 52 #include "sched-int.h" 53 #include "params.h" 54 #include "ggc.h" 55 #include "gimple.h" 56 #include "cfgloop.h" 57 #include "alloc-pool.h" 58 #include "tm-constrs.h" 59 60 61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; 62 63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0) 64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1) 65 66 /* These are some macros to abstract register modes. */ 67 #define CONST_OK_FOR_ADD(size) \ 68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size)) 69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi)) 70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3)) 71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3)) 72 73 /* Used to simplify the logic below. Find the attributes wherever 74 they may be. */ 75 #define SH_ATTRIBUTES(decl) \ 76 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \ 77 : DECL_ATTRIBUTES (decl) \ 78 ? (DECL_ATTRIBUTES (decl)) \ 79 : TYPE_ATTRIBUTES (TREE_TYPE (decl)) 80 81 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */ 82 int current_function_interrupt; 83 84 tree sh_deferred_function_attributes; 85 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; 86 87 /* Global variables for machine-dependent things. */ 88 89 /* Which cpu are we scheduling for. */ 90 enum processor_type sh_cpu; 91 92 /* Definitions used in ready queue reordering for first scheduling pass. */ 93 94 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */ 95 static short *regmode_weight[2]; 96 97 /* Total SFmode and SImode weights of scheduled insns. */ 98 static int curr_regmode_pressure[2]; 99 100 /* Number of r0 life regions. */ 101 static int r0_life_regions; 102 103 /* If true, skip cycles for Q -> R movement. */ 104 static int skip_cycles = 0; 105 106 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook 107 and returned from sh_reorder2. */ 108 static short cached_can_issue_more; 109 110 /* Unique number for UNSPEC_BBR pattern. */ 111 static unsigned int unspec_bbr_uid = 1; 112 113 /* Provides the class number of the smallest class containing 114 reg number. */ 115 116 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] = 117 { 118 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 134 FP0_REGS,FP_REGS, FP_REGS, FP_REGS, 135 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 136 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 137 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 138 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 139 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 140 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 141 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 142 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 143 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 144 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 145 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 146 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 147 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 148 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 149 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 150 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, 151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, 152 DF_REGS, DF_REGS, DF_REGS, DF_REGS, 153 DF_REGS, DF_REGS, DF_REGS, DF_REGS, 154 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS, 155 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS, 156 GENERAL_REGS, GENERAL_REGS, 157 }; 158 159 char sh_register_names[FIRST_PSEUDO_REGISTER] \ 160 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER; 161 162 char sh_additional_register_names[ADDREGNAMES_SIZE] \ 163 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1] 164 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER; 165 166 int assembler_dialect; 167 168 static bool shmedia_space_reserved_for_target_registers; 169 170 static bool sh_handle_option (size_t, const char *, int); 171 static void split_branches (rtx); 172 static int branch_dest (rtx); 173 static void force_into (rtx, rtx); 174 static void print_slot (rtx); 175 static rtx add_constant (rtx, enum machine_mode, rtx); 176 static void dump_table (rtx, rtx); 177 static int hi_const (rtx); 178 static int broken_move (rtx); 179 static int mova_p (rtx); 180 static rtx find_barrier (int, rtx, rtx); 181 static int noncall_uses_reg (rtx, rtx, rtx *); 182 static rtx gen_block_redirect (rtx, int, int); 183 static void sh_reorg (void); 184 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool); 185 static rtx frame_insn (rtx); 186 static rtx push (int); 187 static void pop (int); 188 static void push_regs (HARD_REG_SET *, int); 189 static int calc_live_regs (HARD_REG_SET *); 190 static HOST_WIDE_INT rounded_frame_size (int); 191 static rtx mark_constant_pool_use (rtx); 192 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *); 193 static tree sh_handle_resbank_handler_attribute (tree *, tree, 194 tree, int, bool *); 195 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree, 196 tree, int, bool *); 197 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *); 198 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *); 199 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *); 200 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT); 201 static void sh_insert_attributes (tree, tree *); 202 static const char *sh_check_pch_target_flags (int); 203 static int sh_adjust_cost (rtx, rtx, rtx, int); 204 static int sh_issue_rate (void); 205 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p); 206 static short find_set_regmode_weight (rtx, enum machine_mode); 207 static short find_insn_regmode_weight (rtx, enum machine_mode); 208 static void find_regmode_weight (basic_block, enum machine_mode); 209 static int find_r0_life_regions (basic_block); 210 static void sh_md_init_global (FILE *, int, int); 211 static void sh_md_finish_global (FILE *, int); 212 static int rank_for_reorder (const void *, const void *); 213 static void swap_reorder (rtx *, int); 214 static void ready_reorder (rtx *, int); 215 static short high_pressure (enum machine_mode); 216 static int sh_reorder (FILE *, int, rtx *, int *, int); 217 static int sh_reorder2 (FILE *, int, rtx *, int *, int); 218 static void sh_md_init (FILE *, int, int); 219 static int sh_variable_issue (FILE *, int, rtx, int); 220 221 static bool sh_function_ok_for_sibcall (tree, tree); 222 223 static bool sh_cannot_modify_jumps_p (void); 224 static enum reg_class sh_target_reg_class (void); 225 static bool sh_optimize_target_register_callee_saved (bool); 226 static bool sh_ms_bitfield_layout_p (const_tree); 227 228 static void sh_init_builtins (void); 229 static tree sh_builtin_decl (unsigned, bool); 230 static void sh_media_init_builtins (void); 231 static tree sh_media_builtin_decl (unsigned, bool); 232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 234 static void sh_file_start (void); 235 static int flow_dependent_p (rtx, rtx); 236 static void flow_dependent_p_1 (rtx, const_rtx, void *); 237 static int shiftcosts (rtx); 238 static int andcosts (rtx); 239 static int addsubcosts (rtx); 240 static int multcosts (rtx); 241 static bool unspec_caller_rtx_p (rtx); 242 static bool sh_cannot_copy_insn_p (rtx); 243 static bool sh_rtx_costs (rtx, int, int, int *, bool); 244 static int sh_address_cost (rtx, bool); 245 static int sh_pr_n_sets (void); 246 static rtx sh_allocate_initial_value (rtx); 247 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool); 248 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode); 249 static int shmedia_target_regs_stack_space (HARD_REG_SET *); 250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *); 251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *); 252 static int scavenge_reg (HARD_REG_SET *s); 253 struct save_schedule_s; 254 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *, 255 struct save_schedule_s *, int); 256 257 static rtx sh_struct_value_rtx (tree, int); 258 static rtx sh_function_value (const_tree, const_tree, bool); 259 static rtx sh_libcall_value (enum machine_mode, const_rtx); 260 static bool sh_return_in_memory (const_tree, const_tree); 261 static rtx sh_builtin_saveregs (void); 262 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); 263 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *); 264 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *); 265 static tree sh_build_builtin_va_list (void); 266 static void sh_va_start (tree, rtx); 267 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 268 static bool sh_promote_prototypes (const_tree); 269 static enum machine_mode sh_promote_function_mode (const_tree type, 270 enum machine_mode, 271 int *punsignedp, 272 const_tree funtype, 273 int for_return); 274 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, 275 const_tree, bool); 276 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode, 277 const_tree, bool); 278 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, 279 tree, bool); 280 static bool sh_scalar_mode_supported_p (enum machine_mode); 281 static int sh_dwarf_calling_convention (const_tree); 282 static void sh_encode_section_info (tree, rtx, int); 283 static int sh2a_function_vector_p (tree); 284 static void sh_trampoline_init (rtx, tree, rtx); 285 static rtx sh_trampoline_adjust_address (rtx); 286 287 static const struct attribute_spec sh_attribute_table[] = 288 { 289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 290 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute }, 291 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute }, 292 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute }, 293 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute }, 294 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute }, 295 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute }, 296 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute }, 297 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute }, 298 #ifdef SYMBIAN 299 /* Symbian support adds three new attributes: 300 dllexport - for exporting a function/variable that will live in a dll 301 dllimport - for importing a function/variable from a dll 302 303 Microsoft allows multiple declspecs in one __declspec, separating 304 them with spaces. We do NOT support this. Instead, use __declspec 305 multiple times. */ 306 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute }, 307 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute }, 308 #endif 309 { NULL, 0, 0, false, false, false, NULL } 310 }; 311 312 /* Initialize the GCC target structure. */ 313 #undef TARGET_ATTRIBUTE_TABLE 314 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table 315 316 /* The next two are used for debug info when compiling with -gdwarf. */ 317 #undef TARGET_ASM_UNALIGNED_HI_OP 318 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t" 319 #undef TARGET_ASM_UNALIGNED_SI_OP 320 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t" 321 322 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */ 323 #undef TARGET_ASM_UNALIGNED_DI_OP 324 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t" 325 #undef TARGET_ASM_ALIGNED_DI_OP 326 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 327 328 #undef TARGET_ASM_FUNCTION_EPILOGUE 329 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue 330 331 #undef TARGET_ASM_OUTPUT_MI_THUNK 332 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk 333 334 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 335 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 336 337 #undef TARGET_ASM_FILE_START 338 #define TARGET_ASM_FILE_START sh_file_start 339 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE 340 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true 341 342 #undef TARGET_DEFAULT_TARGET_FLAGS 343 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT 344 #undef TARGET_HANDLE_OPTION 345 #define TARGET_HANDLE_OPTION sh_handle_option 346 347 #undef TARGET_INSERT_ATTRIBUTES 348 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes 349 350 #undef TARGET_SCHED_ADJUST_COST 351 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost 352 353 #undef TARGET_SCHED_ISSUE_RATE 354 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate 355 356 /* The next 5 hooks have been implemented for reenabling sched1. With the 357 help of these macros we are limiting the movement of insns in sched1 to 358 reduce the register pressure. The overall idea is to keep count of SImode 359 and SFmode regs required by already scheduled insns. When these counts 360 cross some threshold values; give priority to insns that free registers. 361 The insn that frees registers is most likely to be the insn with lowest 362 LUID (original insn order); but such an insn might be there in the stalled 363 queue (Q) instead of the ready queue (R). To solve this, we skip cycles 364 upto a max of 8 cycles so that such insns may move from Q -> R. 365 366 The description of the hooks are as below: 367 368 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic 369 scheduler; it is called inside the sched_init function just after 370 find_insn_reg_weights function call. It is used to calculate the SImode 371 and SFmode weights of insns of basic blocks; much similar to what 372 find_insn_reg_weights does. 373 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook. 374 375 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is 376 indicated by TARGET_SCHED_REORDER2; doing this may move insns from 377 (Q)->(R). 378 379 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is 380 high; reorder the ready queue so that the insn with lowest LUID will be 381 issued next. 382 383 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to 384 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles. 385 386 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it 387 can be returned from TARGET_SCHED_REORDER2. 388 389 TARGET_SCHED_INIT: Reset the register pressure counting variables. */ 390 391 #undef TARGET_SCHED_DFA_NEW_CYCLE 392 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle 393 394 #undef TARGET_SCHED_INIT_GLOBAL 395 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global 396 397 #undef TARGET_SCHED_FINISH_GLOBAL 398 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global 399 400 #undef TARGET_SCHED_VARIABLE_ISSUE 401 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue 402 403 #undef TARGET_SCHED_REORDER 404 #define TARGET_SCHED_REORDER sh_reorder 405 406 #undef TARGET_SCHED_REORDER2 407 #define TARGET_SCHED_REORDER2 sh_reorder2 408 409 #undef TARGET_SCHED_INIT 410 #define TARGET_SCHED_INIT sh_md_init 411 412 #undef TARGET_LEGITIMIZE_ADDRESS 413 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address 414 415 #undef TARGET_CANNOT_MODIFY_JUMPS_P 416 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p 417 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS 418 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class 419 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED 420 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \ 421 sh_optimize_target_register_callee_saved 422 423 #undef TARGET_MS_BITFIELD_LAYOUT_P 424 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p 425 426 #undef TARGET_INIT_BUILTINS 427 #define TARGET_INIT_BUILTINS sh_init_builtins 428 #undef TARGET_BUILTIN_DECL 429 #define TARGET_BUILTIN_DECL sh_builtin_decl 430 #undef TARGET_EXPAND_BUILTIN 431 #define TARGET_EXPAND_BUILTIN sh_expand_builtin 432 433 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 434 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall 435 436 #undef TARGET_CANNOT_COPY_INSN_P 437 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p 438 #undef TARGET_RTX_COSTS 439 #define TARGET_RTX_COSTS sh_rtx_costs 440 #undef TARGET_ADDRESS_COST 441 #define TARGET_ADDRESS_COST sh_address_cost 442 #undef TARGET_ALLOCATE_INITIAL_VALUE 443 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value 444 445 #undef TARGET_MACHINE_DEPENDENT_REORG 446 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg 447 448 #undef TARGET_DWARF_REGISTER_SPAN 449 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span 450 451 #ifdef HAVE_AS_TLS 452 #undef TARGET_HAVE_TLS 453 #define TARGET_HAVE_TLS true 454 #endif 455 456 #undef TARGET_PROMOTE_PROTOTYPES 457 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes 458 #undef TARGET_PROMOTE_FUNCTION_MODE 459 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode 460 461 #undef TARGET_FUNCTION_VALUE 462 #define TARGET_FUNCTION_VALUE sh_function_value 463 #undef TARGET_LIBCALL_VALUE 464 #define TARGET_LIBCALL_VALUE sh_libcall_value 465 #undef TARGET_STRUCT_VALUE_RTX 466 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx 467 #undef TARGET_RETURN_IN_MEMORY 468 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory 469 470 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 471 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs 472 #undef TARGET_SETUP_INCOMING_VARARGS 473 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs 474 #undef TARGET_STRICT_ARGUMENT_NAMING 475 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming 476 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 477 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named 478 #undef TARGET_MUST_PASS_IN_STACK 479 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 480 #undef TARGET_PASS_BY_REFERENCE 481 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference 482 #undef TARGET_CALLEE_COPIES 483 #define TARGET_CALLEE_COPIES sh_callee_copies 484 #undef TARGET_ARG_PARTIAL_BYTES 485 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes 486 487 #undef TARGET_BUILD_BUILTIN_VA_LIST 488 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list 489 #undef TARGET_EXPAND_BUILTIN_VA_START 490 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start 491 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 492 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr 493 494 #undef TARGET_SCALAR_MODE_SUPPORTED_P 495 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p 496 #undef TARGET_VECTOR_MODE_SUPPORTED_P 497 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p 498 499 #undef TARGET_CHECK_PCH_TARGET_FLAGS 500 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags 501 502 #undef TARGET_DWARF_CALLING_CONVENTION 503 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention 504 505 /* Return regmode weight for insn. */ 506 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)] 507 508 /* Return current register pressure for regmode. */ 509 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1] 510 511 #undef TARGET_ENCODE_SECTION_INFO 512 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info 513 514 #ifdef SYMBIAN 515 516 #undef TARGET_ENCODE_SECTION_INFO 517 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info 518 #undef TARGET_STRIP_NAME_ENCODING 519 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding 520 #undef TARGET_CXX_IMPORT_EXPORT_CLASS 521 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class 522 523 #endif /* SYMBIAN */ 524 525 #undef TARGET_SECONDARY_RELOAD 526 #define TARGET_SECONDARY_RELOAD sh_secondary_reload 527 528 #undef TARGET_LEGITIMATE_ADDRESS_P 529 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p 530 531 #undef TARGET_TRAMPOLINE_INIT 532 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init 533 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 534 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address 535 536 /* Machine-specific symbol_ref flags. */ 537 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) 538 539 struct gcc_target targetm = TARGET_INITIALIZER; 540 541 /* Implement TARGET_HANDLE_OPTION. */ 542 543 static bool 544 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, 545 int value ATTRIBUTE_UNUSED) 546 { 547 switch (code) 548 { 549 case OPT_m1: 550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1; 551 return true; 552 553 case OPT_m2: 554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2; 555 return true; 556 557 case OPT_m2a: 558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A; 559 return true; 560 561 case OPT_m2a_nofpu: 562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU; 563 return true; 564 565 case OPT_m2a_single: 566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE; 567 return true; 568 569 case OPT_m2a_single_only: 570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY; 571 return true; 572 573 case OPT_m2e: 574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E; 575 return true; 576 577 case OPT_m3: 578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3; 579 return true; 580 581 case OPT_m3e: 582 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E; 583 return true; 584 585 case OPT_m4: 586 case OPT_m4_100: 587 case OPT_m4_200: 588 case OPT_m4_300: 589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4; 590 return true; 591 592 case OPT_m4_nofpu: 593 case OPT_m4_100_nofpu: 594 case OPT_m4_200_nofpu: 595 case OPT_m4_300_nofpu: 596 case OPT_m4_340: 597 case OPT_m4_400: 598 case OPT_m4_500: 599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU; 600 return true; 601 602 case OPT_m4_single: 603 case OPT_m4_100_single: 604 case OPT_m4_200_single: 605 case OPT_m4_300_single: 606 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE; 607 return true; 608 609 case OPT_m4_single_only: 610 case OPT_m4_100_single_only: 611 case OPT_m4_200_single_only: 612 case OPT_m4_300_single_only: 613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY; 614 return true; 615 616 case OPT_m4a: 617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A; 618 return true; 619 620 case OPT_m4a_nofpu: 621 case OPT_m4al: 622 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU; 623 return true; 624 625 case OPT_m4a_single: 626 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE; 627 return true; 628 629 case OPT_m4a_single_only: 630 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY; 631 return true; 632 633 case OPT_m5_32media: 634 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA; 635 return true; 636 637 case OPT_m5_32media_nofpu: 638 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU; 639 return true; 640 641 case OPT_m5_64media: 642 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA; 643 return true; 644 645 case OPT_m5_64media_nofpu: 646 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU; 647 return true; 648 649 case OPT_m5_compact: 650 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT; 651 return true; 652 653 case OPT_m5_compact_nofpu: 654 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU; 655 return true; 656 657 default: 658 return true; 659 } 660 } 661 662 /* Set default optimization options. */ 663 void 664 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED) 665 { 666 if (level) 667 { 668 flag_omit_frame_pointer = 2; 669 if (!size) 670 sh_div_str = "inv:minlat"; 671 } 672 if (size) 673 { 674 target_flags |= MASK_SMALLCODE; 675 sh_div_str = SH_DIV_STR_FOR_SIZE ; 676 } 677 else 678 TARGET_CBRANCHDI4 = 1; 679 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options 680 haven't been parsed yet, hence we'd read only the default. 681 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so 682 it's OK to always set flag_branch_target_load_optimize. */ 683 if (level > 1) 684 { 685 flag_branch_target_load_optimize = 1; 686 if (!size) 687 target_flags |= MASK_SAVE_ALL_TARGET_REGS; 688 } 689 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE 690 here, so leave it to OVERRIDE_OPTIONS to set 691 flag_finite_math_only. We set it to 2 here so we know if the user 692 explicitly requested this to be on or off. */ 693 flag_finite_math_only = 2; 694 /* If flag_schedule_insns is 1, we set it to 2 here so we know if 695 the user explicitly requested this to be on or off. */ 696 if (flag_schedule_insns > 0) 697 flag_schedule_insns = 2; 698 699 set_param_value ("simultaneous-prefetches", 2); 700 } 701 702 /* Implement OVERRIDE_OPTIONS macro. Validate and override various 703 options, and do some machine dependent initialization. */ 704 void 705 sh_override_options (void) 706 { 707 int regno; 708 709 SUBTARGET_OVERRIDE_OPTIONS; 710 if (flag_finite_math_only == 2) 711 flag_finite_math_only 712 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE; 713 if (TARGET_SH2E && !flag_finite_math_only) 714 target_flags |= MASK_IEEE; 715 sh_cpu = PROCESSOR_SH1; 716 assembler_dialect = 0; 717 if (TARGET_SH2) 718 sh_cpu = PROCESSOR_SH2; 719 if (TARGET_SH2E) 720 sh_cpu = PROCESSOR_SH2E; 721 if (TARGET_SH2A) 722 sh_cpu = PROCESSOR_SH2A; 723 if (TARGET_SH3) 724 sh_cpu = PROCESSOR_SH3; 725 if (TARGET_SH3E) 726 sh_cpu = PROCESSOR_SH3E; 727 if (TARGET_SH4) 728 { 729 assembler_dialect = 1; 730 sh_cpu = PROCESSOR_SH4; 731 } 732 if (TARGET_SH4A_ARCH) 733 { 734 assembler_dialect = 1; 735 sh_cpu = PROCESSOR_SH4A; 736 } 737 if (TARGET_SH5) 738 { 739 sh_cpu = PROCESSOR_SH5; 740 target_flags |= MASK_ALIGN_DOUBLE; 741 if (TARGET_SHMEDIA_FPU) 742 target_flags |= MASK_FMOVD; 743 if (TARGET_SHMEDIA) 744 { 745 /* There are no delay slots on SHmedia. */ 746 flag_delayed_branch = 0; 747 /* Relaxation isn't yet supported for SHmedia */ 748 target_flags &= ~MASK_RELAX; 749 /* After reload, if conversion does little good but can cause 750 ICEs: 751 - find_if_block doesn't do anything for SH because we don't 752 have conditional execution patterns. (We use conditional 753 move patterns, which are handled differently, and only 754 before reload). 755 - find_cond_trap doesn't do anything for the SH because we 756 don't have conditional traps. 757 - find_if_case_1 uses redirect_edge_and_branch_force in 758 the only path that does an optimization, and this causes 759 an ICE when branch targets are in registers. 760 - find_if_case_2 doesn't do anything for the SHmedia after 761 reload except when it can redirect a tablejump - and 762 that's rather rare. */ 763 flag_if_conversion2 = 0; 764 if (! strcmp (sh_div_str, "call")) 765 sh_div_strategy = SH_DIV_CALL; 766 else if (! strcmp (sh_div_str, "call2")) 767 sh_div_strategy = SH_DIV_CALL2; 768 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY) 769 sh_div_strategy = SH_DIV_FP; 770 else if (! strcmp (sh_div_str, "inv")) 771 sh_div_strategy = SH_DIV_INV; 772 else if (! strcmp (sh_div_str, "inv:minlat")) 773 sh_div_strategy = SH_DIV_INV_MINLAT; 774 else if (! strcmp (sh_div_str, "inv20u")) 775 sh_div_strategy = SH_DIV_INV20U; 776 else if (! strcmp (sh_div_str, "inv20l")) 777 sh_div_strategy = SH_DIV_INV20L; 778 else if (! strcmp (sh_div_str, "inv:call2")) 779 sh_div_strategy = SH_DIV_INV_CALL2; 780 else if (! strcmp (sh_div_str, "inv:call")) 781 sh_div_strategy = SH_DIV_INV_CALL; 782 else if (! strcmp (sh_div_str, "inv:fp")) 783 { 784 if (TARGET_FPU_ANY) 785 sh_div_strategy = SH_DIV_INV_FP; 786 else 787 sh_div_strategy = SH_DIV_INV; 788 } 789 TARGET_CBRANCHDI4 = 0; 790 /* Assembler CFI isn't yet fully supported for SHmedia. */ 791 flag_dwarf2_cfi_asm = 0; 792 } 793 } 794 else 795 { 796 /* Only the sh64-elf assembler fully supports .quad properly. */ 797 targetm.asm_out.aligned_op.di = NULL; 798 targetm.asm_out.unaligned_op.di = NULL; 799 } 800 if (TARGET_SH1) 801 { 802 if (! strcmp (sh_div_str, "call-div1")) 803 sh_div_strategy = SH_DIV_CALL_DIV1; 804 else if (! strcmp (sh_div_str, "call-fp") 805 && (TARGET_FPU_DOUBLE 806 || (TARGET_HARD_SH4 && TARGET_SH2E) 807 || (TARGET_SHCOMPACT && TARGET_FPU_ANY))) 808 sh_div_strategy = SH_DIV_CALL_FP; 809 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2) 810 sh_div_strategy = SH_DIV_CALL_TABLE; 811 else 812 /* Pick one that makes most sense for the target in general. 813 It is not much good to use different functions depending 814 on -Os, since then we'll end up with two different functions 815 when some of the code is compiled for size, and some for 816 speed. */ 817 818 /* SH4 tends to emphasize speed. */ 819 if (TARGET_HARD_SH4) 820 sh_div_strategy = SH_DIV_CALL_TABLE; 821 /* These have their own way of doing things. */ 822 else if (TARGET_SH2A) 823 sh_div_strategy = SH_DIV_INTRINSIC; 824 /* ??? Should we use the integer SHmedia function instead? */ 825 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY) 826 sh_div_strategy = SH_DIV_CALL_FP; 827 /* SH1 .. SH3 cores often go into small-footprint systems, so 828 default to the smallest implementation available. */ 829 else if (TARGET_SH2) /* ??? EXPERIMENTAL */ 830 sh_div_strategy = SH_DIV_CALL_TABLE; 831 else 832 sh_div_strategy = SH_DIV_CALL_DIV1; 833 } 834 if (!TARGET_SH1) 835 TARGET_PRETEND_CMOVE = 0; 836 if (sh_divsi3_libfunc[0]) 837 ; /* User supplied - leave it alone. */ 838 else if (TARGET_DIVIDE_CALL_FP) 839 sh_divsi3_libfunc = "__sdivsi3_i4"; 840 else if (TARGET_DIVIDE_CALL_TABLE) 841 sh_divsi3_libfunc = "__sdivsi3_i4i"; 842 else if (TARGET_SH5) 843 sh_divsi3_libfunc = "__sdivsi3_1"; 844 else 845 sh_divsi3_libfunc = "__sdivsi3"; 846 if (sh_branch_cost == -1) 847 sh_branch_cost 848 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1; 849 850 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 851 if (! VALID_REGISTER_P (regno)) 852 sh_register_names[regno][0] = '\0'; 853 854 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++) 855 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno))) 856 sh_additional_register_names[regno][0] = '\0'; 857 858 if (flag_omit_frame_pointer == 2) 859 { 860 /* The debugging information is sufficient, 861 but gdb doesn't implement this yet */ 862 if (0) 863 flag_omit_frame_pointer 864 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG); 865 else 866 flag_omit_frame_pointer = 0; 867 } 868 869 if ((flag_pic && ! TARGET_PREFERGOT) 870 || (TARGET_SHMEDIA && !TARGET_PT_FIXED)) 871 flag_no_function_cse = 1; 872 873 if (SMALL_REGISTER_CLASSES) 874 { 875 /* Never run scheduling before reload, since that can 876 break global alloc, and generates slower code anyway due 877 to the pressure on R0. */ 878 /* Enable sched1 for SH4 if the user explicitly requests. 879 When sched1 is enabled, the ready queue will be reordered by 880 the target hooks if pressure is high. We can not do this for 881 PIC, SH3 and lower as they give spill failures for R0. */ 882 if (!TARGET_HARD_SH4 || flag_pic) 883 flag_schedule_insns = 0; 884 /* ??? Current exception handling places basic block boundaries 885 after call_insns. It causes the high pressure on R0 and gives 886 spill failures for R0 in reload. See PR 22553 and the thread 887 on gcc-patches 888 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */ 889 else if (flag_exceptions) 890 { 891 if (flag_schedule_insns == 1) 892 warning (0, "ignoring -fschedule-insns because of exception handling bug"); 893 flag_schedule_insns = 0; 894 } 895 else if (flag_schedule_insns == 2) 896 flag_schedule_insns = 0; 897 } 898 899 /* Unwinding with -freorder-blocks-and-partition does not work on this 900 architecture, because it requires far jumps to label crossing between 901 hot/cold sections which are rejected on this architecture. */ 902 if (flag_reorder_blocks_and_partition) 903 { 904 if (flag_exceptions) 905 { 906 inform (input_location, 907 "-freorder-blocks-and-partition does not work with " 908 "exceptions on this architecture"); 909 flag_reorder_blocks_and_partition = 0; 910 flag_reorder_blocks = 1; 911 } 912 else if (flag_unwind_tables) 913 { 914 inform (input_location, 915 "-freorder-blocks-and-partition does not support unwind " 916 "info on this architecture"); 917 flag_reorder_blocks_and_partition = 0; 918 flag_reorder_blocks = 1; 919 } 920 } 921 922 if (align_loops == 0) 923 align_loops = 1 << (TARGET_SH5 ? 3 : 2); 924 if (align_jumps == 0) 925 align_jumps = 1 << CACHE_LOG; 926 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2)) 927 align_jumps = TARGET_SHMEDIA ? 4 : 2; 928 929 /* Allocation boundary (in *bytes*) for the code of a function. 930 SH1: 32 bit alignment is faster, because instructions are always 931 fetched as a pair from a longword boundary. 932 SH2 .. SH5 : align to cache line start. */ 933 if (align_functions == 0) 934 align_functions 935 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG); 936 /* The linker relaxation code breaks when a function contains 937 alignments that are larger than that at the start of a 938 compilation unit. */ 939 if (TARGET_RELAX) 940 { 941 int min_align 942 = align_loops > align_jumps ? align_loops : align_jumps; 943 944 /* Also take possible .long constants / mova tables int account. */ 945 if (min_align < 4) 946 min_align = 4; 947 if (align_functions < min_align) 948 align_functions = min_align; 949 } 950 951 if (sh_fixed_range_str) 952 sh_fix_range (sh_fixed_range_str); 953 } 954 955 /* Print the operand address in x to the stream. */ 956 957 void 958 print_operand_address (FILE *stream, rtx x) 959 { 960 switch (GET_CODE (x)) 961 { 962 case REG: 963 case SUBREG: 964 fprintf (stream, "@%s", reg_names[true_regnum (x)]); 965 break; 966 967 case PLUS: 968 { 969 rtx base = XEXP (x, 0); 970 rtx index = XEXP (x, 1); 971 972 switch (GET_CODE (index)) 973 { 974 case CONST_INT: 975 fprintf (stream, "@(%d,%s)", (int) INTVAL (index), 976 reg_names[true_regnum (base)]); 977 break; 978 979 case REG: 980 case SUBREG: 981 { 982 int base_num = true_regnum (base); 983 int index_num = true_regnum (index); 984 985 fprintf (stream, "@(r0,%s)", 986 reg_names[MAX (base_num, index_num)]); 987 break; 988 } 989 990 default: 991 gcc_unreachable (); 992 } 993 } 994 break; 995 996 case PRE_DEC: 997 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); 998 break; 999 1000 case POST_INC: 1001 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); 1002 break; 1003 1004 default: 1005 x = mark_constant_pool_use (x); 1006 output_addr_const (stream, x); 1007 break; 1008 } 1009 } 1010 1011 /* Print operand x (an rtx) in assembler syntax to file stream 1012 according to modifier code. 1013 1014 '.' print a .s if insn needs delay slot 1015 ',' print LOCAL_LABEL_PREFIX 1016 '@' print trap, rte or rts depending upon pragma interruptness 1017 '#' output a nop if there is nothing to put in the delay slot 1018 ''' print likelihood suffix (/u for unlikely). 1019 '>' print branch target if -fverbose-asm 1020 'O' print a constant without the # 1021 'R' print the LSW of a dp value - changes if in little endian 1022 'S' print the MSW of a dp value - changes if in little endian 1023 'T' print the next word of a dp value - same as 'R' in big endian mode. 1024 'M' SHMEDIA: print an `x' if `m' will print `base,index'. 1025 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM. 1026 'N' print 'r63' if the operand is (const_int 0). 1027 'd' print a V2SF reg as dN instead of fpN. 1028 'm' print a pair `base,offset' or `base,index', for LD and ST. 1029 'U' Likewise for {LD,ST}{HI,LO}. 1030 'V' print the position of a single bit set. 1031 'W' print the position of a single bit cleared. 1032 't' print a memory address which is a register. 1033 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value. 1034 'o' output an operator. */ 1035 1036 void 1037 print_operand (FILE *stream, rtx x, int code) 1038 { 1039 int regno; 1040 enum machine_mode mode; 1041 1042 switch (code) 1043 { 1044 tree trapa_attr; 1045 1046 case '.': 1047 if (final_sequence 1048 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)) 1049 && get_attr_length (XVECEXP (final_sequence, 0, 1))) 1050 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); 1051 break; 1052 case ',': 1053 fprintf (stream, "%s", LOCAL_LABEL_PREFIX); 1054 break; 1055 case '@': 1056 trapa_attr = lookup_attribute ("trap_exit", 1057 DECL_ATTRIBUTES (current_function_decl)); 1058 if (trapa_attr) 1059 fprintf (stream, "trapa #%ld", 1060 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr)))); 1061 else if (sh_cfun_interrupt_handler_p ()) 1062 { 1063 if (sh_cfun_resbank_handler_p ()) 1064 fprintf (stream, "resbank\n"); 1065 fprintf (stream, "rte"); 1066 } 1067 else 1068 fprintf (stream, "rts"); 1069 break; 1070 case '#': 1071 /* Output a nop if there's nothing in the delay slot. */ 1072 if (dbr_sequence_length () == 0) 1073 fprintf (stream, "\n\tnop"); 1074 break; 1075 case '\'': 1076 { 1077 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0); 1078 1079 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE) 1080 fputs ("/u", stream); 1081 break; 1082 } 1083 case '>': 1084 if (flag_verbose_asm && JUMP_LABEL (current_output_insn)) 1085 { 1086 fputs ("\t! target: ", stream); 1087 output_addr_const (stream, JUMP_LABEL (current_output_insn)); 1088 } 1089 break; 1090 case 'O': 1091 x = mark_constant_pool_use (x); 1092 output_addr_const (stream, x); 1093 break; 1094 /* N.B.: %R / %S / %T adjust memory addresses by four. 1095 For SHMEDIA, that means they can be used to access the first and 1096 second 32 bit part of a 64 bit (or larger) value that 1097 might be held in floating point registers or memory. 1098 While they can be used to access 64 bit parts of a larger value 1099 held in general purpose registers, that won't work with memory - 1100 neither for fp registers, since the frxx names are used. */ 1101 case 'R': 1102 if (REG_P (x) || GET_CODE (x) == SUBREG) 1103 { 1104 regno = true_regnum (x); 1105 regno += FP_REGISTER_P (regno) ? 1 : LSW; 1106 fputs (reg_names[regno], (stream)); 1107 } 1108 else if (MEM_P (x)) 1109 { 1110 x = adjust_address (x, SImode, 4 * LSW); 1111 print_operand_address (stream, XEXP (x, 0)); 1112 } 1113 else 1114 { 1115 rtx sub = NULL_RTX; 1116 1117 mode = GET_MODE (x); 1118 if (mode == VOIDmode) 1119 mode = DImode; 1120 if (GET_MODE_SIZE (mode) >= 8) 1121 sub = simplify_subreg (SImode, x, mode, 4 * LSW); 1122 if (sub) 1123 print_operand (stream, sub, 0); 1124 else 1125 output_operand_lossage ("invalid operand to %%R"); 1126 } 1127 break; 1128 case 'S': 1129 if (REG_P (x) || GET_CODE (x) == SUBREG) 1130 { 1131 regno = true_regnum (x); 1132 regno += FP_REGISTER_P (regno) ? 0 : MSW; 1133 fputs (reg_names[regno], (stream)); 1134 } 1135 else if (MEM_P (x)) 1136 { 1137 x = adjust_address (x, SImode, 4 * MSW); 1138 print_operand_address (stream, XEXP (x, 0)); 1139 } 1140 else 1141 { 1142 rtx sub = NULL_RTX; 1143 1144 mode = GET_MODE (x); 1145 if (mode == VOIDmode) 1146 mode = DImode; 1147 if (GET_MODE_SIZE (mode) >= 8) 1148 sub = simplify_subreg (SImode, x, mode, 4 * MSW); 1149 if (sub) 1150 print_operand (stream, sub, 0); 1151 else 1152 output_operand_lossage ("invalid operand to %%S"); 1153 } 1154 break; 1155 case 'T': 1156 /* Next word of a double. */ 1157 switch (GET_CODE (x)) 1158 { 1159 case REG: 1160 fputs (reg_names[REGNO (x) + 1], (stream)); 1161 break; 1162 case MEM: 1163 if (GET_CODE (XEXP (x, 0)) != PRE_DEC 1164 && GET_CODE (XEXP (x, 0)) != POST_INC) 1165 x = adjust_address (x, SImode, 4); 1166 print_operand_address (stream, XEXP (x, 0)); 1167 break; 1168 default: 1169 break; 1170 } 1171 break; 1172 1173 case 't': 1174 gcc_assert (MEM_P (x)); 1175 x = XEXP (x, 0); 1176 switch (GET_CODE (x)) 1177 { 1178 case REG: 1179 case SUBREG: 1180 print_operand (stream, x, 0); 1181 break; 1182 default: 1183 break; 1184 } 1185 break; 1186 1187 case 'o': 1188 switch (GET_CODE (x)) 1189 { 1190 case PLUS: fputs ("add", stream); break; 1191 case MINUS: fputs ("sub", stream); break; 1192 case MULT: fputs ("mul", stream); break; 1193 case DIV: fputs ("div", stream); break; 1194 case EQ: fputs ("eq", stream); break; 1195 case NE: fputs ("ne", stream); break; 1196 case GT: case LT: fputs ("gt", stream); break; 1197 case GE: case LE: fputs ("ge", stream); break; 1198 case GTU: case LTU: fputs ("gtu", stream); break; 1199 case GEU: case LEU: fputs ("geu", stream); break; 1200 default: 1201 break; 1202 } 1203 break; 1204 case 'M': 1205 if (TARGET_SHMEDIA) 1206 { 1207 if (MEM_P (x) 1208 && GET_CODE (XEXP (x, 0)) == PLUS 1209 && (REG_P (XEXP (XEXP (x, 0), 1)) 1210 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG)) 1211 fputc ('x', stream); 1212 } 1213 else 1214 { 1215 if (MEM_P (x)) 1216 { 1217 switch (GET_MODE (x)) 1218 { 1219 case QImode: fputs (".b", stream); break; 1220 case HImode: fputs (".w", stream); break; 1221 case SImode: fputs (".l", stream); break; 1222 case SFmode: fputs (".s", stream); break; 1223 case DFmode: fputs (".d", stream); break; 1224 default: gcc_unreachable (); 1225 } 1226 } 1227 } 1228 break; 1229 1230 case 'm': 1231 gcc_assert (MEM_P (x)); 1232 x = XEXP (x, 0); 1233 /* Fall through. */ 1234 case 'U': 1235 switch (GET_CODE (x)) 1236 { 1237 case REG: 1238 case SUBREG: 1239 print_operand (stream, x, 0); 1240 fputs (", 0", stream); 1241 break; 1242 1243 case PLUS: 1244 print_operand (stream, XEXP (x, 0), 0); 1245 fputs (", ", stream); 1246 print_operand (stream, XEXP (x, 1), 0); 1247 break; 1248 1249 default: 1250 gcc_unreachable (); 1251 } 1252 break; 1253 1254 case 'V': 1255 { 1256 int num = exact_log2 (INTVAL (x)); 1257 gcc_assert (num >= 0); 1258 fprintf (stream, "#%d", num); 1259 } 1260 break; 1261 1262 case 'W': 1263 { 1264 int num = exact_log2 (~INTVAL (x)); 1265 gcc_assert (num >= 0); 1266 fprintf (stream, "#%d", num); 1267 } 1268 break; 1269 1270 case 'd': 1271 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode); 1272 1273 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1); 1274 break; 1275 1276 case 'N': 1277 if (x == CONST0_RTX (GET_MODE (x))) 1278 { 1279 fprintf ((stream), "r63"); 1280 break; 1281 } 1282 goto default_output; 1283 case 'u': 1284 if (CONST_INT_P (x)) 1285 { 1286 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1)); 1287 break; 1288 } 1289 /* Fall through. */ 1290 1291 default_output: 1292 default: 1293 regno = 0; 1294 mode = GET_MODE (x); 1295 1296 switch (GET_CODE (x)) 1297 { 1298 case TRUNCATE: 1299 { 1300 rtx inner = XEXP (x, 0); 1301 int offset = 0; 1302 enum machine_mode inner_mode; 1303 1304 /* We might see SUBREGs with vector mode registers inside. */ 1305 if (GET_CODE (inner) == SUBREG 1306 && (GET_MODE_SIZE (GET_MODE (inner)) 1307 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1308 && subreg_lowpart_p (inner)) 1309 inner = SUBREG_REG (inner); 1310 if (CONST_INT_P (inner)) 1311 { 1312 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x))); 1313 goto default_output; 1314 } 1315 inner_mode = GET_MODE (inner); 1316 if (GET_CODE (inner) == SUBREG 1317 && (GET_MODE_SIZE (GET_MODE (inner)) 1318 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1319 && REG_P (SUBREG_REG (inner))) 1320 { 1321 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)), 1322 GET_MODE (SUBREG_REG (inner)), 1323 SUBREG_BYTE (inner), 1324 GET_MODE (inner)); 1325 inner = SUBREG_REG (inner); 1326 } 1327 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8) 1328 abort (); 1329 /* Floating point register pairs are always big endian; 1330 general purpose registers are 64 bit wide. */ 1331 regno = REGNO (inner); 1332 regno = (HARD_REGNO_NREGS (regno, inner_mode) 1333 - HARD_REGNO_NREGS (regno, mode)) 1334 + offset; 1335 x = inner; 1336 goto reg; 1337 } 1338 case SIGN_EXTEND: 1339 x = XEXP (x, 0); 1340 goto reg; 1341 /* FIXME: We need this on SHmedia32 because reload generates 1342 some sign-extended HI or QI loads into DImode registers 1343 but, because Pmode is SImode, the address ends up with a 1344 subreg:SI of the DImode register. Maybe reload should be 1345 fixed so as to apply alter_subreg to such loads? */ 1346 case IF_THEN_ELSE: 1347 gcc_assert (trapping_target_operand (x, VOIDmode)); 1348 x = XEXP (XEXP (x, 2), 0); 1349 goto default_output; 1350 case SUBREG: 1351 gcc_assert (SUBREG_BYTE (x) == 0 1352 && REG_P (SUBREG_REG (x))); 1353 1354 x = SUBREG_REG (x); 1355 /* Fall through. */ 1356 1357 reg: 1358 case REG: 1359 regno += REGNO (x); 1360 if (FP_REGISTER_P (regno) 1361 && mode == V16SFmode) 1362 fprintf ((stream), "mtrx%s", reg_names[regno] + 2); 1363 else if (FP_REGISTER_P (REGNO (x)) 1364 && mode == V4SFmode) 1365 fprintf ((stream), "fv%s", reg_names[regno] + 2); 1366 else if (REG_P (x) 1367 && mode == V2SFmode) 1368 fprintf ((stream), "fp%s", reg_names[regno] + 2); 1369 else if (FP_REGISTER_P (REGNO (x)) 1370 && GET_MODE_SIZE (mode) > 4) 1371 fprintf ((stream), "d%s", reg_names[regno] + 1); 1372 else 1373 fputs (reg_names[regno], (stream)); 1374 break; 1375 1376 case MEM: 1377 output_address (XEXP (x, 0)); 1378 break; 1379 1380 default: 1381 if (TARGET_SH1) 1382 fputc ('#', stream); 1383 output_addr_const (stream, x); 1384 break; 1385 } 1386 break; 1387 } 1388 } 1389 1390 1391 /* Encode symbol attributes of a SYMBOL_REF into its 1392 SYMBOL_REF_FLAGS. */ 1393 static void 1394 sh_encode_section_info (tree decl, rtx rtl, int first) 1395 { 1396 default_encode_section_info (decl, rtl, first); 1397 1398 if (TREE_CODE (decl) == FUNCTION_DECL 1399 && sh2a_function_vector_p (decl) && TARGET_SH2A) 1400 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; 1401 } 1402 1403 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */ 1404 static void 1405 force_into (rtx value, rtx target) 1406 { 1407 value = force_operand (value, target); 1408 if (! rtx_equal_p (value, target)) 1409 emit_insn (gen_move_insn (target, value)); 1410 } 1411 1412 /* Emit code to perform a block move. Choose the best method. 1413 1414 OPERANDS[0] is the destination. 1415 OPERANDS[1] is the source. 1416 OPERANDS[2] is the size. 1417 OPERANDS[3] is the alignment safe to use. */ 1418 1419 int 1420 expand_block_move (rtx *operands) 1421 { 1422 int align = INTVAL (operands[3]); 1423 int constp = (CONST_INT_P (operands[2])); 1424 int bytes = (constp ? INTVAL (operands[2]) : 0); 1425 1426 if (! constp) 1427 return 0; 1428 1429 /* If we could use mov.l to move words and dest is word-aligned, we 1430 can use movua.l for loads and still generate a relatively short 1431 and efficient sequence. */ 1432 if (TARGET_SH4A_ARCH && align < 4 1433 && MEM_ALIGN (operands[0]) >= 32 1434 && can_move_by_pieces (bytes, 32)) 1435 { 1436 rtx dest = copy_rtx (operands[0]); 1437 rtx src = copy_rtx (operands[1]); 1438 /* We could use different pseudos for each copied word, but 1439 since movua can only load into r0, it's kind of 1440 pointless. */ 1441 rtx temp = gen_reg_rtx (SImode); 1442 rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); 1443 int copied = 0; 1444 1445 while (copied + 4 <= bytes) 1446 { 1447 rtx to = adjust_address (dest, SImode, copied); 1448 rtx from = adjust_automodify_address (src, BLKmode, 1449 src_addr, copied); 1450 1451 set_mem_size (from, GEN_INT (4)); 1452 emit_insn (gen_movua (temp, from)); 1453 emit_move_insn (src_addr, plus_constant (src_addr, 4)); 1454 emit_move_insn (to, temp); 1455 copied += 4; 1456 } 1457 1458 if (copied < bytes) 1459 move_by_pieces (adjust_address (dest, BLKmode, copied), 1460 adjust_automodify_address (src, BLKmode, 1461 src_addr, copied), 1462 bytes - copied, align, 0); 1463 1464 return 1; 1465 } 1466 1467 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte 1468 alignment, or if it isn't a multiple of 4 bytes, then fail. */ 1469 if (align < 4 || (bytes % 4 != 0)) 1470 return 0; 1471 1472 if (TARGET_HARD_SH4) 1473 { 1474 if (bytes < 12) 1475 return 0; 1476 else if (bytes == 12) 1477 { 1478 rtx func_addr_rtx = gen_reg_rtx (Pmode); 1479 rtx r4 = gen_rtx_REG (SImode, 4); 1480 rtx r5 = gen_rtx_REG (SImode, 5); 1481 1482 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC); 1483 force_into (XEXP (operands[0], 0), r4); 1484 force_into (XEXP (operands[1], 0), r5); 1485 emit_insn (gen_block_move_real_i4 (func_addr_rtx)); 1486 return 1; 1487 } 1488 else if (! TARGET_SMALLCODE) 1489 { 1490 const char *entry_name; 1491 rtx func_addr_rtx = gen_reg_rtx (Pmode); 1492 int dwords; 1493 rtx r4 = gen_rtx_REG (SImode, 4); 1494 rtx r5 = gen_rtx_REG (SImode, 5); 1495 rtx r6 = gen_rtx_REG (SImode, 6); 1496 1497 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even"); 1498 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC); 1499 force_into (XEXP (operands[0], 0), r4); 1500 force_into (XEXP (operands[1], 0), r5); 1501 1502 dwords = bytes >> 3; 1503 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); 1504 emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); 1505 return 1; 1506 } 1507 else 1508 return 0; 1509 } 1510 if (bytes < 64) 1511 { 1512 char entry[30]; 1513 rtx func_addr_rtx = gen_reg_rtx (Pmode); 1514 rtx r4 = gen_rtx_REG (SImode, 4); 1515 rtx r5 = gen_rtx_REG (SImode, 5); 1516 1517 sprintf (entry, "__movmemSI%d", bytes); 1518 function_symbol (func_addr_rtx, entry, SFUNC_STATIC); 1519 force_into (XEXP (operands[0], 0), r4); 1520 force_into (XEXP (operands[1], 0), r5); 1521 emit_insn (gen_block_move_real (func_addr_rtx)); 1522 return 1; 1523 } 1524 1525 /* This is the same number of bytes as a memcpy call, but to a different 1526 less common function name, so this will occasionally use more space. */ 1527 if (! TARGET_SMALLCODE) 1528 { 1529 rtx func_addr_rtx = gen_reg_rtx (Pmode); 1530 int final_switch, while_loop; 1531 rtx r4 = gen_rtx_REG (SImode, 4); 1532 rtx r5 = gen_rtx_REG (SImode, 5); 1533 rtx r6 = gen_rtx_REG (SImode, 6); 1534 1535 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC); 1536 force_into (XEXP (operands[0], 0), r4); 1537 force_into (XEXP (operands[1], 0), r5); 1538 1539 /* r6 controls the size of the move. 16 is decremented from it 1540 for each 64 bytes moved. Then the negative bit left over is used 1541 as an index into a list of move instructions. e.g., a 72 byte move 1542 would be set up with size(r6) = 14, for one iteration through the 1543 big while loop, and a switch of -2 for the last part. */ 1544 1545 final_switch = 16 - ((bytes / 4) % 16); 1546 while_loop = ((bytes / 4) / 16 - 1) * 16; 1547 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); 1548 emit_insn (gen_block_lump_real (func_addr_rtx)); 1549 return 1; 1550 } 1551 1552 return 0; 1553 } 1554 1555 /* Prepare operands for a move define_expand; specifically, one of the 1556 operands must be in a register. */ 1557 1558 int 1559 prepare_move_operands (rtx operands[], enum machine_mode mode) 1560 { 1561 if ((mode == SImode || mode == DImode) 1562 && flag_pic 1563 && ! ((mode == Pmode || mode == ptr_mode) 1564 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE)) 1565 { 1566 rtx temp; 1567 if (SYMBOLIC_CONST_P (operands[1])) 1568 { 1569 if (MEM_P (operands[0])) 1570 operands[1] = force_reg (Pmode, operands[1]); 1571 else if (TARGET_SHMEDIA 1572 && GET_CODE (operands[1]) == LABEL_REF 1573 && target_reg_operand (operands[0], mode)) 1574 /* It's ok. */; 1575 else 1576 { 1577 temp = (!can_create_pseudo_p () 1578 ? operands[0] 1579 : gen_reg_rtx (Pmode)); 1580 operands[1] = legitimize_pic_address (operands[1], mode, temp); 1581 } 1582 } 1583 else if (GET_CODE (operands[1]) == CONST 1584 && GET_CODE (XEXP (operands[1], 0)) == PLUS 1585 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0))) 1586 { 1587 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); 1588 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0), 1589 mode, temp); 1590 operands[1] = expand_binop (mode, add_optab, temp, 1591 XEXP (XEXP (operands[1], 0), 1), 1592 (!can_create_pseudo_p () 1593 ? temp 1594 : gen_reg_rtx (Pmode)), 1595 0, OPTAB_LIB_WIDEN); 1596 } 1597 } 1598 1599 if (! reload_in_progress && ! reload_completed) 1600 { 1601 /* Copy the source to a register if both operands aren't registers. */ 1602 if (! register_operand (operands[0], mode) 1603 && ! sh_register_operand (operands[1], mode)) 1604 operands[1] = copy_to_mode_reg (mode, operands[1]); 1605 1606 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode)) 1607 { 1608 /* This is like change_address_1 (operands[0], mode, 0, 1) , 1609 except that we can't use that function because it is static. */ 1610 rtx new_rtx = change_address (operands[0], mode, 0); 1611 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]); 1612 operands[0] = new_rtx; 1613 } 1614 1615 /* This case can happen while generating code to move the result 1616 of a library call to the target. Reject `st r0,@(rX,rY)' because 1617 reload will fail to find a spill register for rX, since r0 is already 1618 being used for the source. */ 1619 else if (TARGET_SH1 1620 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0) 1621 && MEM_P (operands[0]) 1622 && GET_CODE (XEXP (operands[0], 0)) == PLUS 1623 && REG_P (XEXP (XEXP (operands[0], 0), 1))) 1624 operands[1] = copy_to_mode_reg (mode, operands[1]); 1625 } 1626 1627 if (mode == Pmode || mode == ptr_mode) 1628 { 1629 rtx op0, op1, opc; 1630 enum tls_model tls_kind; 1631 1632 op0 = operands[0]; 1633 op1 = operands[1]; 1634 if (GET_CODE (op1) == CONST 1635 && GET_CODE (XEXP (op1, 0)) == PLUS 1636 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode) 1637 != TLS_MODEL_NONE)) 1638 { 1639 opc = XEXP (XEXP (op1, 0), 1); 1640 op1 = XEXP (XEXP (op1, 0), 0); 1641 } 1642 else 1643 opc = NULL_RTX; 1644 1645 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE) 1646 { 1647 rtx tga_op1, tga_ret, tmp, tmp2; 1648 1649 switch (tls_kind) 1650 { 1651 case TLS_MODEL_GLOBAL_DYNAMIC: 1652 tga_ret = gen_rtx_REG (Pmode, R0_REG); 1653 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1)); 1654 op1 = tga_ret; 1655 break; 1656 1657 case TLS_MODEL_LOCAL_DYNAMIC: 1658 tga_ret = gen_rtx_REG (Pmode, R0_REG); 1659 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1)); 1660 1661 tmp = gen_reg_rtx (Pmode); 1662 emit_move_insn (tmp, tga_ret); 1663 1664 if (register_operand (op0, Pmode)) 1665 tmp2 = op0; 1666 else 1667 tmp2 = gen_reg_rtx (Pmode); 1668 1669 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp)); 1670 op1 = tmp2; 1671 break; 1672 1673 case TLS_MODEL_INITIAL_EXEC: 1674 if (! flag_pic) 1675 { 1676 /* Don't schedule insns for getting GOT address when 1677 the first scheduling is enabled, to avoid spill 1678 failures for R0. */ 1679 if (flag_schedule_insns) 1680 emit_insn (gen_blockage ()); 1681 emit_insn (gen_GOTaddr2picreg ()); 1682 emit_use (gen_rtx_REG (SImode, PIC_REG)); 1683 if (flag_schedule_insns) 1684 emit_insn (gen_blockage ()); 1685 } 1686 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode); 1687 tmp = gen_sym2GOTTPOFF (op1); 1688 emit_insn (gen_tls_initial_exec (tga_op1, tmp)); 1689 op1 = tga_op1; 1690 break; 1691 1692 case TLS_MODEL_LOCAL_EXEC: 1693 tmp2 = gen_reg_rtx (Pmode); 1694 emit_insn (gen_load_gbr (tmp2)); 1695 tmp = gen_reg_rtx (Pmode); 1696 emit_insn (gen_symTPOFF2reg (tmp, op1)); 1697 1698 if (register_operand (op0, Pmode)) 1699 op1 = op0; 1700 else 1701 op1 = gen_reg_rtx (Pmode); 1702 1703 emit_insn (gen_addsi3 (op1, tmp, tmp2)); 1704 break; 1705 1706 default: 1707 gcc_unreachable (); 1708 } 1709 if (opc) 1710 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc))); 1711 operands[1] = op1; 1712 } 1713 } 1714 1715 return 0; 1716 } 1717 1718 enum rtx_code 1719 prepare_cbranch_operands (rtx *operands, enum machine_mode mode, 1720 enum rtx_code comparison) 1721 { 1722 rtx op1; 1723 rtx scratch = NULL_RTX; 1724 1725 if (comparison == LAST_AND_UNUSED_RTX_CODE) 1726 comparison = GET_CODE (operands[0]); 1727 else 1728 scratch = operands[4]; 1729 if (CONST_INT_P (operands[1]) 1730 && !CONST_INT_P (operands[2])) 1731 { 1732 rtx tmp = operands[1]; 1733 1734 operands[1] = operands[2]; 1735 operands[2] = tmp; 1736 comparison = swap_condition (comparison); 1737 } 1738 if (CONST_INT_P (operands[2])) 1739 { 1740 HOST_WIDE_INT val = INTVAL (operands[2]); 1741 if ((val == -1 || val == -0x81) 1742 && (comparison == GT || comparison == LE)) 1743 { 1744 comparison = (comparison == GT) ? GE : LT; 1745 operands[2] = gen_int_mode (val + 1, mode); 1746 } 1747 else if ((val == 1 || val == 0x80) 1748 && (comparison == GE || comparison == LT)) 1749 { 1750 comparison = (comparison == GE) ? GT : LE; 1751 operands[2] = gen_int_mode (val - 1, mode); 1752 } 1753 else if (val == 1 && (comparison == GEU || comparison == LTU)) 1754 { 1755 comparison = (comparison == GEU) ? NE : EQ; 1756 operands[2] = CONST0_RTX (mode); 1757 } 1758 else if (val == 0x80 && (comparison == GEU || comparison == LTU)) 1759 { 1760 comparison = (comparison == GEU) ? GTU : LEU; 1761 operands[2] = gen_int_mode (val - 1, mode); 1762 } 1763 else if (val == 0 && (comparison == GTU || comparison == LEU)) 1764 comparison = (comparison == GTU) ? NE : EQ; 1765 else if (mode == SImode 1766 && ((val == 0x7fffffff 1767 && (comparison == GTU || comparison == LEU)) 1768 || ((unsigned HOST_WIDE_INT) val 1769 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 1770 && (comparison == GEU || comparison == LTU)))) 1771 { 1772 comparison = (comparison == GTU || comparison == GEU) ? LT : GE; 1773 operands[2] = CONST0_RTX (mode); 1774 } 1775 } 1776 op1 = operands[1]; 1777 if (can_create_pseudo_p ()) 1778 operands[1] = force_reg (mode, op1); 1779 /* When we are handling DImode comparisons, we want to keep constants so 1780 that we can optimize the component comparisons; however, memory loads 1781 are better issued as a whole so that they can be scheduled well. 1782 SImode equality comparisons allow I08 constants, but only when they 1783 compare r0. Hence, if operands[1] has to be loaded from somewhere else 1784 into a register, that register might as well be r0, and we allow the 1785 constant. If it is already in a register, this is likely to be 1786 allocated to a different hard register, thus we load the constant into 1787 a register unless it is zero. */ 1788 if (!REG_P (operands[2]) 1789 && (!CONST_INT_P (operands[2]) 1790 || (mode == SImode && operands[2] != CONST0_RTX (SImode) 1791 && ((comparison != EQ && comparison != NE) 1792 || (REG_P (op1) && REGNO (op1) != R0_REG) 1793 || !satisfies_constraint_I08 (operands[2]))))) 1794 { 1795 if (scratch && GET_MODE (scratch) == mode) 1796 { 1797 emit_move_insn (scratch, operands[2]); 1798 operands[2] = scratch; 1799 } 1800 else if (can_create_pseudo_p ()) 1801 operands[2] = force_reg (mode, operands[2]); 1802 } 1803 return comparison; 1804 } 1805 1806 void 1807 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability) 1808 { 1809 rtx (*branch_expander) (rtx) = gen_branch_true; 1810 rtx jump; 1811 1812 comparison = prepare_cbranch_operands (operands, SImode, comparison); 1813 switch (comparison) 1814 { 1815 case NE: case LT: case LE: case LTU: case LEU: 1816 comparison = reverse_condition (comparison); 1817 branch_expander = gen_branch_false; 1818 default: ; 1819 } 1820 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG), 1821 gen_rtx_fmt_ee (comparison, SImode, 1822 operands[1], operands[2]))); 1823 jump = emit_jump_insn (branch_expander (operands[3])); 1824 if (probability >= 0) 1825 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability)); 1826 1827 } 1828 1829 /* ??? How should we distribute probabilities when more than one branch 1830 is generated. So far we only have soem ad-hoc observations: 1831 - If the operands are random, they are likely to differ in both parts. 1832 - If comparing items in a hash chain, the operands are random or equal; 1833 operation should be EQ or NE. 1834 - If items are searched in an ordered tree from the root, we can expect 1835 the highpart to be unequal about half of the time; operation should be 1836 an inequality comparison, operands non-constant, and overall probability 1837 about 50%. Likewise for quicksort. 1838 - Range checks will be often made against constants. Even if we assume for 1839 simplicity an even distribution of the non-constant operand over a 1840 sub-range here, the same probability could be generated with differently 1841 wide sub-ranges - as long as the ratio of the part of the subrange that 1842 is before the threshold to the part that comes after the threshold stays 1843 the same. Thus, we can't really tell anything here; 1844 assuming random distribution is at least simple. 1845 */ 1846 1847 bool 1848 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) 1849 { 1850 enum rtx_code msw_taken, msw_skip, lsw_taken; 1851 rtx skip_label = NULL_RTX; 1852 rtx op1h, op1l, op2h, op2l; 1853 int num_branches; 1854 int prob, rev_prob; 1855 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1; 1856 rtx scratch = operands[4]; 1857 1858 comparison = prepare_cbranch_operands (operands, DImode, comparison); 1859 op1h = gen_highpart_mode (SImode, DImode, operands[1]); 1860 op2h = gen_highpart_mode (SImode, DImode, operands[2]); 1861 op1l = gen_lowpart (SImode, operands[1]); 1862 op2l = gen_lowpart (SImode, operands[2]); 1863 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE; 1864 prob = split_branch_probability; 1865 rev_prob = REG_BR_PROB_BASE - prob; 1866 switch (comparison) 1867 { 1868 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons? 1869 That costs 1 cycle more when the first branch can be predicted taken, 1870 but saves us mispredicts because only one branch needs prediction. 1871 It also enables generating the cmpeqdi_t-1 pattern. */ 1872 case EQ: 1873 if (TARGET_CMPEQDI_T) 1874 { 1875 emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); 1876 emit_jump_insn (gen_branch_true (operands[3])); 1877 return true; 1878 } 1879 msw_skip = NE; 1880 lsw_taken = EQ; 1881 if (prob >= 0) 1882 { 1883 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) . 1884 */ 1885 msw_skip_prob = rev_prob; 1886 if (REG_BR_PROB_BASE <= 65535) 1887 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0; 1888 else 1889 { 1890 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64); 1891 lsw_taken_prob 1892 = (prob 1893 ? (REG_BR_PROB_BASE 1894 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob 1895 / ((HOST_WIDEST_INT) prob << 32))) 1896 : 0); 1897 } 1898 } 1899 break; 1900 case NE: 1901 if (TARGET_CMPEQDI_T) 1902 { 1903 emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); 1904 emit_jump_insn (gen_branch_false (operands[3])); 1905 return true; 1906 } 1907 msw_taken = NE; 1908 msw_taken_prob = prob; 1909 lsw_taken = NE; 1910 lsw_taken_prob = 0; 1911 break; 1912 case GTU: case GT: 1913 msw_taken = comparison; 1914 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) 1915 break; 1916 if (comparison != GTU || op2h != CONST0_RTX (SImode)) 1917 msw_skip = swap_condition (msw_taken); 1918 lsw_taken = GTU; 1919 break; 1920 case GEU: case GE: 1921 if (op2l == CONST0_RTX (SImode)) 1922 msw_taken = comparison; 1923 else 1924 { 1925 msw_taken = comparison == GE ? GT : GTU; 1926 msw_skip = swap_condition (msw_taken); 1927 lsw_taken = GEU; 1928 } 1929 break; 1930 case LTU: case LT: 1931 msw_taken = comparison; 1932 if (op2l == CONST0_RTX (SImode)) 1933 break; 1934 msw_skip = swap_condition (msw_taken); 1935 lsw_taken = LTU; 1936 break; 1937 case LEU: case LE: 1938 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) 1939 msw_taken = comparison; 1940 else 1941 { 1942 lsw_taken = LEU; 1943 if (comparison == LE) 1944 msw_taken = LT; 1945 else if (op2h != CONST0_RTX (SImode)) 1946 msw_taken = LTU; 1947 else 1948 { 1949 msw_skip = swap_condition (LTU); 1950 break; 1951 } 1952 msw_skip = swap_condition (msw_taken); 1953 } 1954 break; 1955 default: return false; 1956 } 1957 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE) 1958 + (msw_skip != LAST_AND_UNUSED_RTX_CODE) 1959 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE)); 1960 if (comparison != EQ && comparison != NE && num_branches > 1) 1961 { 1962 if (!CONSTANT_P (operands[2]) 1963 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U) 1964 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U)) 1965 { 1966 msw_taken_prob = prob / 2U; 1967 msw_skip_prob 1968 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob); 1969 lsw_taken_prob = prob; 1970 } 1971 else 1972 { 1973 msw_taken_prob = prob; 1974 msw_skip_prob = REG_BR_PROB_BASE; 1975 /* ??? If we have a constant op2h, should we use that when 1976 calculating lsw_taken_prob? */ 1977 lsw_taken_prob = prob; 1978 } 1979 } 1980 operands[1] = op1h; 1981 operands[2] = op2h; 1982 operands[4] = NULL_RTX; 1983 if (reload_completed 1984 && ! arith_reg_or_0_operand (op2h, SImode) 1985 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)) 1986 && (msw_taken != LAST_AND_UNUSED_RTX_CODE 1987 || msw_skip != LAST_AND_UNUSED_RTX_CODE)) 1988 { 1989 emit_move_insn (scratch, operands[2]); 1990 operands[2] = scratch; 1991 } 1992 if (msw_taken != LAST_AND_UNUSED_RTX_CODE) 1993 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob); 1994 if (msw_skip != LAST_AND_UNUSED_RTX_CODE) 1995 { 1996 rtx taken_label = operands[3]; 1997 1998 /* Operands were possibly modified, but msw_skip doesn't expect this. 1999 Always use the original ones. */ 2000 if (msw_taken != LAST_AND_UNUSED_RTX_CODE) 2001 { 2002 operands[1] = op1h; 2003 operands[2] = op2h; 2004 if (reload_completed 2005 && ! arith_reg_or_0_operand (op2h, SImode) 2006 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))) 2007 { 2008 emit_move_insn (scratch, operands[2]); 2009 operands[2] = scratch; 2010 } 2011 } 2012 2013 operands[3] = skip_label = gen_label_rtx (); 2014 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob); 2015 operands[3] = taken_label; 2016 } 2017 operands[1] = op1l; 2018 operands[2] = op2l; 2019 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE) 2020 { 2021 if (reload_completed 2022 && ! arith_reg_or_0_operand (op2l, SImode) 2023 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE))) 2024 { 2025 emit_move_insn (scratch, operands[2]); 2026 operands[2] = scratch; 2027 } 2028 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob); 2029 } 2030 if (msw_skip != LAST_AND_UNUSED_RTX_CODE) 2031 emit_label (skip_label); 2032 return true; 2033 } 2034 2035 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */ 2036 2037 static void 2038 sh_emit_set_t_insn (rtx insn, enum machine_mode mode) 2039 { 2040 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT) 2041 { 2042 insn = gen_rtx_PARALLEL (VOIDmode, 2043 gen_rtvec (2, insn, 2044 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))); 2045 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn); 2046 } 2047 else 2048 emit_insn (insn); 2049 } 2050 2051 /* Prepare the operands for an scc instruction; make sure that the 2052 compare has been done and the result is in T_REG. */ 2053 void 2054 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1) 2055 { 2056 rtx t_reg = gen_rtx_REG (SImode, T_REG); 2057 enum rtx_code oldcode = code; 2058 enum machine_mode mode; 2059 2060 /* First need a compare insn. */ 2061 switch (code) 2062 { 2063 case NE: 2064 /* It isn't possible to handle this case. */ 2065 gcc_unreachable (); 2066 case LT: 2067 code = GT; 2068 break; 2069 case LE: 2070 code = GE; 2071 break; 2072 case LTU: 2073 code = GTU; 2074 break; 2075 case LEU: 2076 code = GEU; 2077 break; 2078 default: 2079 break; 2080 } 2081 if (code != oldcode) 2082 { 2083 rtx tmp = op0; 2084 op0 = op1; 2085 op1 = tmp; 2086 } 2087 2088 mode = GET_MODE (op0); 2089 if (mode == VOIDmode) 2090 mode = GET_MODE (op1); 2091 2092 op0 = force_reg (mode, op0); 2093 if ((code != EQ && code != NE 2094 && (op1 != const0_rtx 2095 || code == GTU || code == GEU || code == LTU || code == LEU)) 2096 || (mode == DImode && op1 != const0_rtx) 2097 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) 2098 op1 = force_reg (mode, op1); 2099 2100 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg, 2101 gen_rtx_fmt_ee (code, SImode, op0, op1)), 2102 mode); 2103 } 2104 2105 rtx 2106 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code, 2107 rtx op0, rtx op1) 2108 { 2109 rtx target = gen_reg_rtx (SImode); 2110 rtx tmp; 2111 2112 gcc_assert (TARGET_SHMEDIA); 2113 switch (code) 2114 { 2115 case EQ: 2116 case GT: 2117 case LT: 2118 case UNORDERED: 2119 case GTU: 2120 case LTU: 2121 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1); 2122 emit_insn (gen_cstore4_media (target, tmp, op0, op1)); 2123 code = NE; 2124 break; 2125 2126 case NE: 2127 case GE: 2128 case LE: 2129 case ORDERED: 2130 case GEU: 2131 case LEU: 2132 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1); 2133 emit_insn (gen_cstore4_media (target, tmp, op0, op1)); 2134 code = EQ; 2135 break; 2136 2137 case UNEQ: 2138 case UNGE: 2139 case UNGT: 2140 case UNLE: 2141 case UNLT: 2142 case LTGT: 2143 return NULL_RTX; 2144 2145 default: 2146 gcc_unreachable (); 2147 } 2148 2149 if (mode == DImode) 2150 { 2151 rtx t2 = gen_reg_rtx (DImode); 2152 emit_insn (gen_extendsidi2 (t2, target)); 2153 target = t2; 2154 } 2155 2156 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx); 2157 } 2158 2159 /* Called from the md file, set up the operands of a compare instruction. */ 2160 2161 void 2162 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode) 2163 { 2164 enum rtx_code code = GET_CODE (operands[0]); 2165 enum rtx_code branch_code; 2166 rtx op0 = operands[1]; 2167 rtx op1 = operands[2]; 2168 rtx insn, tem; 2169 bool need_ccmpeq = false; 2170 2171 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT) 2172 { 2173 op0 = force_reg (mode, op0); 2174 op1 = force_reg (mode, op1); 2175 } 2176 else 2177 { 2178 if (code != EQ || mode == DImode) 2179 { 2180 /* Force args into regs, since we can't use constants here. */ 2181 op0 = force_reg (mode, op0); 2182 if (op1 != const0_rtx || code == GTU || code == GEU) 2183 op1 = force_reg (mode, op1); 2184 } 2185 } 2186 2187 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 2188 { 2189 if (code == LT 2190 || (code == LE && TARGET_IEEE && TARGET_SH2E) 2191 || (code == GE && !(TARGET_IEEE && TARGET_SH2E))) 2192 { 2193 tem = op0, op0 = op1, op1 = tem; 2194 code = swap_condition (code); 2195 } 2196 2197 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */ 2198 if (code == GE) 2199 { 2200 gcc_assert (TARGET_IEEE && TARGET_SH2E); 2201 need_ccmpeq = true; 2202 code = GT; 2203 } 2204 2205 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed 2206 to EQ/GT respectively. */ 2207 gcc_assert (code == EQ || code == GT || code == NE || code == LE); 2208 } 2209 2210 switch (code) 2211 { 2212 case EQ: 2213 case GT: 2214 case GE: 2215 case GTU: 2216 case GEU: 2217 branch_code = code; 2218 break; 2219 case NE: 2220 case LT: 2221 case LE: 2222 case LTU: 2223 case LEU: 2224 branch_code = reverse_condition (code); 2225 break; 2226 default: 2227 gcc_unreachable (); 2228 } 2229 2230 insn = gen_rtx_SET (VOIDmode, 2231 gen_rtx_REG (SImode, T_REG), 2232 gen_rtx_fmt_ee (branch_code, SImode, op0, op1)); 2233 2234 sh_emit_set_t_insn (insn, mode); 2235 if (need_ccmpeq) 2236 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode); 2237 2238 if (branch_code == code) 2239 emit_jump_insn (gen_branch_true (operands[3])); 2240 else 2241 emit_jump_insn (gen_branch_false (operands[3])); 2242 } 2243 2244 void 2245 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode) 2246 { 2247 enum rtx_code code = GET_CODE (operands[1]); 2248 rtx op0 = operands[2]; 2249 rtx op1 = operands[3]; 2250 rtx lab = NULL_RTX; 2251 bool invert = false; 2252 rtx tem; 2253 2254 op0 = force_reg (mode, op0); 2255 if ((code != EQ && code != NE 2256 && (op1 != const0_rtx 2257 || code == GTU || code == GEU || code == LTU || code == LEU)) 2258 || (mode == DImode && op1 != const0_rtx) 2259 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) 2260 op1 = force_reg (mode, op1); 2261 2262 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 2263 { 2264 if (code == LT || code == LE) 2265 { 2266 code = swap_condition (code); 2267 tem = op0, op0 = op1, op1 = tem; 2268 } 2269 if (code == GE) 2270 { 2271 if (TARGET_IEEE) 2272 { 2273 lab = gen_label_rtx (); 2274 sh_emit_scc_to_t (EQ, op0, op1); 2275 emit_jump_insn (gen_branch_true (lab)); 2276 code = GT; 2277 } 2278 else 2279 { 2280 code = LT; 2281 invert = true; 2282 } 2283 } 2284 } 2285 2286 if (code == NE) 2287 { 2288 code = EQ; 2289 invert = true; 2290 } 2291 2292 sh_emit_scc_to_t (code, op0, op1); 2293 if (lab) 2294 emit_label (lab); 2295 if (invert) 2296 emit_insn (gen_movnegt (operands[0])); 2297 else 2298 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG)); 2299 } 2300 2301 /* Functions to output assembly code. */ 2302 2303 /* Return a sequence of instructions to perform DI or DF move. 2304 2305 Since the SH cannot move a DI or DF in one instruction, we have 2306 to take care when we see overlapping source and dest registers. */ 2307 2308 const char * 2309 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[], 2310 enum machine_mode mode) 2311 { 2312 rtx dst = operands[0]; 2313 rtx src = operands[1]; 2314 2315 if (MEM_P (dst) 2316 && GET_CODE (XEXP (dst, 0)) == PRE_DEC) 2317 return "mov.l %T1,%0\n\tmov.l %1,%0"; 2318 2319 if (register_operand (dst, mode) 2320 && register_operand (src, mode)) 2321 { 2322 if (REGNO (src) == MACH_REG) 2323 return "sts mach,%S0\n\tsts macl,%R0"; 2324 2325 /* When mov.d r1,r2 do r2->r3 then r1->r2; 2326 when mov.d r1,r0 do r1->r0 then r2->r1. */ 2327 2328 if (REGNO (src) + 1 == REGNO (dst)) 2329 return "mov %T1,%T0\n\tmov %1,%0"; 2330 else 2331 return "mov %1,%0\n\tmov %T1,%T0"; 2332 } 2333 else if (CONST_INT_P (src)) 2334 { 2335 if (INTVAL (src) < 0) 2336 output_asm_insn ("mov #-1,%S0", operands); 2337 else 2338 output_asm_insn ("mov #0,%S0", operands); 2339 2340 return "mov %1,%R0"; 2341 } 2342 else if (MEM_P (src)) 2343 { 2344 int ptrreg = -1; 2345 int dreg = REGNO (dst); 2346 rtx inside = XEXP (src, 0); 2347 2348 switch (GET_CODE (inside)) 2349 { 2350 case REG: 2351 ptrreg = REGNO (inside); 2352 break; 2353 2354 case SUBREG: 2355 ptrreg = subreg_regno (inside); 2356 break; 2357 2358 case PLUS: 2359 ptrreg = REGNO (XEXP (inside, 0)); 2360 /* ??? A r0+REG address shouldn't be possible here, because it isn't 2361 an offsettable address. Unfortunately, offsettable addresses use 2362 QImode to check the offset, and a QImode offsettable address 2363 requires r0 for the other operand, which is not currently 2364 supported, so we can't use the 'o' constraint. 2365 Thus we must check for and handle r0+REG addresses here. 2366 We punt for now, since this is likely very rare. */ 2367 gcc_assert (!REG_P (XEXP (inside, 1))); 2368 break; 2369 2370 case LABEL_REF: 2371 return "mov.l %1,%0\n\tmov.l %1+4,%T0"; 2372 case POST_INC: 2373 return "mov.l %1,%0\n\tmov.l %1,%T0"; 2374 default: 2375 gcc_unreachable (); 2376 } 2377 2378 /* Work out the safe way to copy. Copy into the second half first. */ 2379 if (dreg == ptrreg) 2380 return "mov.l %T1,%T0\n\tmov.l %1,%0"; 2381 } 2382 2383 return "mov.l %1,%0\n\tmov.l %T1,%T0"; 2384 } 2385 2386 /* Print an instruction which would have gone into a delay slot after 2387 another instruction, but couldn't because the other instruction expanded 2388 into a sequence where putting the slot insn at the end wouldn't work. */ 2389 2390 static void 2391 print_slot (rtx insn) 2392 { 2393 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL); 2394 2395 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1; 2396 } 2397 2398 const char * 2399 output_far_jump (rtx insn, rtx op) 2400 { 2401 struct { rtx lab, reg, op; } this_jmp; 2402 rtx braf_base_lab = NULL_RTX; 2403 const char *jump; 2404 int far; 2405 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); 2406 rtx prev; 2407 2408 this_jmp.lab = gen_label_rtx (); 2409 2410 if (TARGET_SH2 2411 && offset >= -32764 2412 && offset - get_attr_length (insn) <= 32766) 2413 { 2414 far = 0; 2415 jump = "mov.w %O0,%1; braf %1"; 2416 } 2417 else 2418 { 2419 far = 1; 2420 if (flag_pic) 2421 { 2422 if (TARGET_SH2) 2423 jump = "mov.l %O0,%1; braf %1"; 2424 else 2425 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1"; 2426 } 2427 else 2428 jump = "mov.l %O0,%1; jmp @%1"; 2429 } 2430 /* If we have a scratch register available, use it. */ 2431 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn))) 2432 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) 2433 { 2434 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0)); 2435 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2) 2436 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1"; 2437 output_asm_insn (jump, &this_jmp.lab); 2438 if (dbr_sequence_length ()) 2439 print_slot (final_sequence); 2440 else 2441 output_asm_insn ("nop", 0); 2442 } 2443 else 2444 { 2445 /* Output the delay slot insn first if any. */ 2446 if (dbr_sequence_length ()) 2447 print_slot (final_sequence); 2448 2449 this_jmp.reg = gen_rtx_REG (SImode, 13); 2450 /* We must keep the stack aligned to 8-byte boundaries on SH5. 2451 Fortunately, MACL is fixed and call-clobbered, and we never 2452 need its value across jumps, so save r13 in it instead of in 2453 the stack. */ 2454 if (TARGET_SH5) 2455 output_asm_insn ("lds r13, macl", 0); 2456 else 2457 output_asm_insn ("mov.l r13,@-r15", 0); 2458 output_asm_insn (jump, &this_jmp.lab); 2459 if (TARGET_SH5) 2460 output_asm_insn ("sts macl, r13", 0); 2461 else 2462 output_asm_insn ("mov.l @r15+,r13", 0); 2463 } 2464 if (far && flag_pic && TARGET_SH2) 2465 { 2466 braf_base_lab = gen_label_rtx (); 2467 (*targetm.asm_out.internal_label) (asm_out_file, "L", 2468 CODE_LABEL_NUMBER (braf_base_lab)); 2469 } 2470 if (far) 2471 output_asm_insn (".align 2", 0); 2472 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab)); 2473 this_jmp.op = op; 2474 if (far && flag_pic) 2475 { 2476 if (TARGET_SH2) 2477 this_jmp.lab = braf_base_lab; 2478 output_asm_insn (".long %O2-%O0", &this_jmp.lab); 2479 } 2480 else 2481 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab); 2482 return ""; 2483 } 2484 2485 /* Local label counter, used for constants in the pool and inside 2486 pattern branches. */ 2487 2488 static int lf = 100; 2489 2490 /* Output code for ordinary branches. */ 2491 2492 const char * 2493 output_branch (int logic, rtx insn, rtx *operands) 2494 { 2495 switch (get_attr_length (insn)) 2496 { 2497 case 6: 2498 /* This can happen if filling the delay slot has caused a forward 2499 branch to exceed its range (we could reverse it, but only 2500 when we know we won't overextend other branches; this should 2501 best be handled by relaxation). 2502 It can also happen when other condbranches hoist delay slot insn 2503 from their destination, thus leading to code size increase. 2504 But the branch will still be in the range -4092..+4098 bytes. */ 2505 2506 if (! TARGET_RELAX) 2507 { 2508 int label = lf++; 2509 /* The call to print_slot will clobber the operands. */ 2510 rtx op0 = operands[0]; 2511 2512 /* If the instruction in the delay slot is annulled (true), then 2513 there is no delay slot where we can put it now. The only safe 2514 place for it is after the label. final will do that by default. */ 2515 2516 if (final_sequence 2517 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)) 2518 && get_attr_length (XVECEXP (final_sequence, 0, 1))) 2519 { 2520 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", 2521 ASSEMBLER_DIALECT ? "/" : ".", label); 2522 print_slot (final_sequence); 2523 } 2524 else 2525 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); 2526 2527 output_asm_insn ("bra\t%l0", &op0); 2528 fprintf (asm_out_file, "\tnop\n"); 2529 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); 2530 2531 return ""; 2532 } 2533 /* When relaxing, handle this like a short branch. The linker 2534 will fix it up if it still doesn't fit after relaxation. */ 2535 case 2: 2536 return logic ? "bt%.\t%l0" : "bf%.\t%l0"; 2537 2538 /* These are for SH2e, in which we have to account for the 2539 extra nop because of the hardware bug in annulled branches. */ 2540 case 8: 2541 if (! TARGET_RELAX) 2542 { 2543 int label = lf++; 2544 2545 gcc_assert (!final_sequence 2546 || !(INSN_ANNULLED_BRANCH_P 2547 (XVECEXP (final_sequence, 0, 0)))); 2548 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n", 2549 logic ? "f" : "t", 2550 ASSEMBLER_DIALECT ? "/" : ".", label); 2551 fprintf (asm_out_file, "\tnop\n"); 2552 output_asm_insn ("bra\t%l0", operands); 2553 fprintf (asm_out_file, "\tnop\n"); 2554 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); 2555 2556 return ""; 2557 } 2558 /* When relaxing, fall through. */ 2559 case 4: 2560 { 2561 char buffer[10]; 2562 2563 sprintf (buffer, "b%s%ss\t%%l0", 2564 logic ? "t" : "f", 2565 ASSEMBLER_DIALECT ? "/" : "."); 2566 output_asm_insn (buffer, &operands[0]); 2567 return "nop"; 2568 } 2569 2570 default: 2571 /* There should be no longer branches now - that would 2572 indicate that something has destroyed the branches set 2573 up in machine_dependent_reorg. */ 2574 gcc_unreachable (); 2575 } 2576 } 2577 2578 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before, 2579 fill in operands 9 as a label to the successor insn. 2580 We try to use jump threading where possible. 2581 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump, 2582 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means 2583 follow jmp and bt, if the address is in range. */ 2584 const char * 2585 output_branchy_insn (enum rtx_code code, const char *templ, 2586 rtx insn, rtx *operands) 2587 { 2588 rtx next_insn = NEXT_INSN (insn); 2589 2590 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn)) 2591 { 2592 rtx src = SET_SRC (PATTERN (next_insn)); 2593 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) 2594 { 2595 /* Following branch not taken */ 2596 operands[9] = gen_label_rtx (); 2597 emit_label_after (operands[9], next_insn); 2598 INSN_ADDRESSES_NEW (operands[9], 2599 INSN_ADDRESSES (INSN_UID (next_insn)) 2600 + get_attr_length (next_insn)); 2601 return templ; 2602 } 2603 else 2604 { 2605 int offset = (branch_dest (next_insn) 2606 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4); 2607 if (offset >= -252 && offset <= 258) 2608 { 2609 if (GET_CODE (src) == IF_THEN_ELSE) 2610 /* branch_true */ 2611 src = XEXP (src, 1); 2612 operands[9] = src; 2613 return templ; 2614 } 2615 } 2616 } 2617 operands[9] = gen_label_rtx (); 2618 emit_label_after (operands[9], insn); 2619 INSN_ADDRESSES_NEW (operands[9], 2620 INSN_ADDRESSES (INSN_UID (insn)) 2621 + get_attr_length (insn)); 2622 return templ; 2623 } 2624 2625 const char * 2626 output_ieee_ccmpeq (rtx insn, rtx *operands) 2627 { 2628 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0", 2629 insn, operands); 2630 } 2631 2632 /* Output the start of the assembler file. */ 2633 2634 static void 2635 sh_file_start (void) 2636 { 2637 default_file_start (); 2638 2639 #ifdef SYMBIAN 2640 /* Declare the .directive section before it is used. */ 2641 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file); 2642 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file); 2643 #endif 2644 2645 if (TARGET_ELF) 2646 /* We need to show the text section with the proper 2647 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out 2648 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS 2649 will complain. We can teach GAS specifically about the 2650 default attributes for our choice of text section, but 2651 then we would have to change GAS again if/when we change 2652 the text section name. */ 2653 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP); 2654 else 2655 /* Switch to the data section so that the coffsem symbol 2656 isn't in the text section. */ 2657 switch_to_section (data_section); 2658 2659 if (TARGET_LITTLE_ENDIAN) 2660 fputs ("\t.little\n", asm_out_file); 2661 2662 if (!TARGET_ELF) 2663 { 2664 if (TARGET_SHCOMPACT) 2665 fputs ("\t.mode\tSHcompact\n", asm_out_file); 2666 else if (TARGET_SHMEDIA) 2667 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n", 2668 TARGET_SHMEDIA64 ? 64 : 32); 2669 } 2670 } 2671 2672 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */ 2673 2674 static bool 2675 unspec_caller_rtx_p (rtx pat) 2676 { 2677 rtx base, offset; 2678 int i; 2679 2680 split_const (pat, &base, &offset); 2681 if (GET_CODE (base) == UNSPEC) 2682 { 2683 if (XINT (base, 1) == UNSPEC_CALLER) 2684 return true; 2685 for (i = 0; i < XVECLEN (base, 0); i++) 2686 if (unspec_caller_rtx_p (XVECEXP (base, 0, i))) 2687 return true; 2688 } 2689 return false; 2690 } 2691 2692 /* Indicate that INSN cannot be duplicated. This is true for insn 2693 that generates a unique label. */ 2694 2695 static bool 2696 sh_cannot_copy_insn_p (rtx insn) 2697 { 2698 rtx pat; 2699 2700 if (!reload_completed || !flag_pic) 2701 return false; 2702 2703 if (!NONJUMP_INSN_P (insn)) 2704 return false; 2705 if (asm_noperands (insn) >= 0) 2706 return false; 2707 2708 pat = PATTERN (insn); 2709 if (GET_CODE (pat) != SET) 2710 return false; 2711 pat = SET_SRC (pat); 2712 2713 if (unspec_caller_rtx_p (pat)) 2714 return true; 2715 2716 return false; 2717 } 2718 2719 /* Actual number of instructions used to make a shift by N. */ 2720 static const char ashiftrt_insns[] = 2721 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; 2722 2723 /* Left shift and logical right shift are the same. */ 2724 static const char shift_insns[] = 2725 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; 2726 2727 /* Individual shift amounts needed to get the above length sequences. 2728 One bit right shifts clobber the T bit, so when possible, put one bit 2729 shifts in the middle of the sequence, so the ends are eligible for 2730 branch delay slots. */ 2731 static const short shift_amounts[32][5] = { 2732 {0}, {1}, {2}, {2, 1}, 2733 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2}, 2734 {8}, {8, 1}, {8, 2}, {8, 1, 2}, 2735 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8}, 2736 {16}, {16, 1}, {16, 2}, {16, 1, 2}, 2737 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, 2738 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, 2739 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; 2740 2741 /* Likewise, but for shift amounts < 16, up to three highmost bits 2742 might be clobbered. This is typically used when combined with some 2743 kind of sign or zero extension. */ 2744 2745 static const char ext_shift_insns[] = 2746 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; 2747 2748 static const short ext_shift_amounts[32][4] = { 2749 {0}, {1}, {2}, {2, 1}, 2750 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1}, 2751 {8}, {8, 1}, {8, 2}, {8, 1, 2}, 2752 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1}, 2753 {16}, {16, 1}, {16, 2}, {16, 1, 2}, 2754 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, 2755 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, 2756 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; 2757 2758 /* Assuming we have a value that has been sign-extended by at least one bit, 2759 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift 2760 to shift it by N without data loss, and quicker than by other means? */ 2761 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) 2762 2763 /* This is used in length attributes in sh.md to help compute the length 2764 of arbitrary constant shift instructions. */ 2765 2766 int 2767 shift_insns_rtx (rtx insn) 2768 { 2769 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 2770 int shift_count = INTVAL (XEXP (set_src, 1)) & 31; 2771 enum rtx_code shift_code = GET_CODE (set_src); 2772 2773 switch (shift_code) 2774 { 2775 case ASHIFTRT: 2776 return ashiftrt_insns[shift_count]; 2777 case LSHIFTRT: 2778 case ASHIFT: 2779 return shift_insns[shift_count]; 2780 default: 2781 gcc_unreachable (); 2782 } 2783 } 2784 2785 /* Return the cost of a shift. */ 2786 2787 static inline int 2788 shiftcosts (rtx x) 2789 { 2790 int value; 2791 2792 if (TARGET_SHMEDIA) 2793 return 1; 2794 2795 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 2796 { 2797 if (GET_MODE (x) == DImode 2798 && CONST_INT_P (XEXP (x, 1)) 2799 && INTVAL (XEXP (x, 1)) == 1) 2800 return 2; 2801 2802 /* Everything else is invalid, because there is no pattern for it. */ 2803 return MAX_COST; 2804 } 2805 /* If shift by a non constant, then this will be expensive. */ 2806 if (!CONST_INT_P (XEXP (x, 1))) 2807 return SH_DYNAMIC_SHIFT_COST; 2808 2809 /* Otherwise, return the true cost in instructions. Cope with out of range 2810 shift counts more or less arbitrarily. */ 2811 value = INTVAL (XEXP (x, 1)) & 31; 2812 2813 if (GET_CODE (x) == ASHIFTRT) 2814 { 2815 int cost = ashiftrt_insns[value]; 2816 /* If SH3, then we put the constant in a reg and use shad. */ 2817 if (cost > 1 + SH_DYNAMIC_SHIFT_COST) 2818 cost = 1 + SH_DYNAMIC_SHIFT_COST; 2819 return cost; 2820 } 2821 else 2822 return shift_insns[value]; 2823 } 2824 2825 /* Return the cost of an AND operation. */ 2826 2827 static inline int 2828 andcosts (rtx x) 2829 { 2830 int i; 2831 2832 /* Anding with a register is a single cycle and instruction. */ 2833 if (!CONST_INT_P (XEXP (x, 1))) 2834 return 1; 2835 2836 i = INTVAL (XEXP (x, 1)); 2837 2838 if (TARGET_SHMEDIA) 2839 { 2840 if (satisfies_constraint_I10 (XEXP (x, 1)) 2841 || satisfies_constraint_J16 (XEXP (x, 1))) 2842 return 1; 2843 else 2844 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size); 2845 } 2846 2847 /* These constants are single cycle extu.[bw] instructions. */ 2848 if (i == 0xff || i == 0xffff) 2849 return 1; 2850 /* Constants that can be used in an and immediate instruction in a single 2851 cycle, but this requires r0, so make it a little more expensive. */ 2852 if (CONST_OK_FOR_K08 (i)) 2853 return 2; 2854 /* Constants that can be loaded with a mov immediate and an and. 2855 This case is probably unnecessary. */ 2856 if (CONST_OK_FOR_I08 (i)) 2857 return 2; 2858 /* Any other constants requires a 2 cycle pc-relative load plus an and. 2859 This case is probably unnecessary. */ 2860 return 3; 2861 } 2862 2863 /* Return the cost of an addition or a subtraction. */ 2864 2865 static inline int 2866 addsubcosts (rtx x) 2867 { 2868 /* Adding a register is a single cycle insn. */ 2869 if (REG_P (XEXP (x, 1)) 2870 || GET_CODE (XEXP (x, 1)) == SUBREG) 2871 return 1; 2872 2873 /* Likewise for small constants. */ 2874 if (CONST_INT_P (XEXP (x, 1)) 2875 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1)))) 2876 return 1; 2877 2878 if (TARGET_SHMEDIA) 2879 switch (GET_CODE (XEXP (x, 1))) 2880 { 2881 case CONST: 2882 case LABEL_REF: 2883 case SYMBOL_REF: 2884 return TARGET_SHMEDIA64 ? 5 : 3; 2885 2886 case CONST_INT: 2887 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)))) 2888 return 2; 2889 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16)) 2890 return 3; 2891 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16)) 2892 return 4; 2893 2894 /* Fall through. */ 2895 default: 2896 return 5; 2897 } 2898 2899 /* Any other constant requires a 2 cycle pc-relative load plus an 2900 addition. */ 2901 return 3; 2902 } 2903 2904 /* Return the cost of a multiply. */ 2905 static inline int 2906 multcosts (rtx x ATTRIBUTE_UNUSED) 2907 { 2908 if (sh_multcost >= 0) 2909 return sh_multcost; 2910 if (TARGET_SHMEDIA) 2911 /* ??? We have a mul insn, but it has a latency of three, and doesn't 2912 accept constants. Ideally, we would use a cost of one or two and 2913 add the cost of the operand, but disregard the latter when inside loops 2914 and loop invariant code motion is still to follow. 2915 Using a multiply first and splitting it later if it's a loss 2916 doesn't work because of different sign / zero extension semantics 2917 of multiplies vs. shifts. */ 2918 return TARGET_SMALLCODE ? 2 : 3; 2919 2920 if (TARGET_SH2) 2921 { 2922 /* We have a mul insn, so we can never take more than the mul and the 2923 read of the mac reg, but count more because of the latency and extra 2924 reg usage. */ 2925 if (TARGET_SMALLCODE) 2926 return 2; 2927 return 3; 2928 } 2929 2930 /* If we're aiming at small code, then just count the number of 2931 insns in a multiply call sequence. */ 2932 if (TARGET_SMALLCODE) 2933 return 5; 2934 2935 /* Otherwise count all the insns in the routine we'd be calling too. */ 2936 return 20; 2937 } 2938 2939 /* Compute a (partial) cost for rtx X. Return true if the complete 2940 cost has been computed, and false if subexpressions should be 2941 scanned. In either case, *TOTAL contains the cost result. */ 2942 2943 static bool 2944 sh_rtx_costs (rtx x, int code, int outer_code, int *total, 2945 bool speed ATTRIBUTE_UNUSED) 2946 { 2947 switch (code) 2948 { 2949 case CONST_INT: 2950 if (TARGET_SHMEDIA) 2951 { 2952 if (INTVAL (x) == 0) 2953 *total = 0; 2954 else if (outer_code == AND && and_operand ((x), DImode)) 2955 *total = 0; 2956 else if ((outer_code == IOR || outer_code == XOR 2957 || outer_code == PLUS) 2958 && CONST_OK_FOR_I10 (INTVAL (x))) 2959 *total = 0; 2960 else if (CONST_OK_FOR_I16 (INTVAL (x))) 2961 *total = COSTS_N_INSNS (outer_code != SET); 2962 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16)) 2963 *total = COSTS_N_INSNS ((outer_code != SET) + 1); 2964 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16)) 2965 *total = COSTS_N_INSNS ((outer_code != SET) + 2); 2966 else 2967 *total = COSTS_N_INSNS ((outer_code != SET) + 3); 2968 return true; 2969 } 2970 if (CONST_OK_FOR_I08 (INTVAL (x))) 2971 *total = 0; 2972 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR) 2973 && CONST_OK_FOR_K08 (INTVAL (x))) 2974 *total = 1; 2975 /* prepare_cmp_insn will force costly constants int registers before 2976 the cbranch[sd]i4 patterns can see them, so preserve potentially 2977 interesting ones not covered by I08 above. */ 2978 else if (outer_code == COMPARE 2979 && ((unsigned HOST_WIDE_INT) INTVAL (x) 2980 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 2981 || INTVAL (x) == 0x7fffffff 2982 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81)) 2983 *total = 1; 2984 else 2985 *total = 8; 2986 return true; 2987 2988 case CONST: 2989 case LABEL_REF: 2990 case SYMBOL_REF: 2991 if (TARGET_SHMEDIA64) 2992 *total = COSTS_N_INSNS (4); 2993 else if (TARGET_SHMEDIA32) 2994 *total = COSTS_N_INSNS (2); 2995 else 2996 *total = 5; 2997 return true; 2998 2999 case CONST_DOUBLE: 3000 if (TARGET_SHMEDIA) 3001 *total = COSTS_N_INSNS (4); 3002 /* prepare_cmp_insn will force costly constants int registers before 3003 the cbranchdi4 pattern can see them, so preserve potentially 3004 interesting ones. */ 3005 else if (outer_code == COMPARE && GET_MODE (x) == DImode) 3006 *total = 1; 3007 else 3008 *total = 10; 3009 return true; 3010 case CONST_VECTOR: 3011 if (x == CONST0_RTX (GET_MODE (x))) 3012 *total = 0; 3013 else if (sh_1el_vec (x, VOIDmode)) 3014 *total = outer_code != SET; 3015 if (sh_rep_vec (x, VOIDmode)) 3016 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 3017 + (outer_code != SET)); 3018 *total = COSTS_N_INSNS (3) + (outer_code != SET); 3019 return true; 3020 3021 case PLUS: 3022 case MINUS: 3023 *total = COSTS_N_INSNS (addsubcosts (x)); 3024 return true; 3025 3026 case AND: 3027 *total = COSTS_N_INSNS (andcosts (x)); 3028 return true; 3029 3030 case MULT: 3031 *total = COSTS_N_INSNS (multcosts (x)); 3032 return true; 3033 3034 case ASHIFT: 3035 case ASHIFTRT: 3036 case LSHIFTRT: 3037 *total = COSTS_N_INSNS (shiftcosts (x)); 3038 return true; 3039 3040 case DIV: 3041 case UDIV: 3042 case MOD: 3043 case UMOD: 3044 *total = COSTS_N_INSNS (20); 3045 return true; 3046 3047 case PARALLEL: 3048 if (sh_1el_vec (x, VOIDmode)) 3049 *total = outer_code != SET; 3050 if (sh_rep_vec (x, VOIDmode)) 3051 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 3052 + (outer_code != SET)); 3053 *total = COSTS_N_INSNS (3) + (outer_code != SET); 3054 return true; 3055 3056 case FLOAT: 3057 case FIX: 3058 *total = 100; 3059 return true; 3060 3061 default: 3062 return false; 3063 } 3064 } 3065 3066 /* Compute the cost of an address. For the SH, all valid addresses are 3067 the same cost. Use a slightly higher cost for reg + reg addressing, 3068 since it increases pressure on r0. */ 3069 3070 static int 3071 sh_address_cost (rtx X, 3072 bool speed ATTRIBUTE_UNUSED) 3073 { 3074 return (GET_CODE (X) == PLUS 3075 && ! CONSTANT_P (XEXP (X, 1)) 3076 && ! TARGET_SHMEDIA ? 1 : 0); 3077 } 3078 3079 /* Code to expand a shift. */ 3080 3081 void 3082 gen_ashift (int type, int n, rtx reg) 3083 { 3084 /* Negative values here come from the shift_amounts array. */ 3085 if (n < 0) 3086 { 3087 if (type == ASHIFT) 3088 type = LSHIFTRT; 3089 else 3090 type = ASHIFT; 3091 n = -n; 3092 } 3093 3094 switch (type) 3095 { 3096 case ASHIFTRT: 3097 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n))); 3098 break; 3099 case LSHIFTRT: 3100 if (n == 1) 3101 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n))); 3102 else 3103 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n))); 3104 break; 3105 case ASHIFT: 3106 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n))); 3107 break; 3108 } 3109 } 3110 3111 /* Same for HImode */ 3112 3113 void 3114 gen_ashift_hi (int type, int n, rtx reg) 3115 { 3116 /* Negative values here come from the shift_amounts array. */ 3117 if (n < 0) 3118 { 3119 if (type == ASHIFT) 3120 type = LSHIFTRT; 3121 else 3122 type = ASHIFT; 3123 n = -n; 3124 } 3125 3126 switch (type) 3127 { 3128 case ASHIFTRT: 3129 case LSHIFTRT: 3130 /* We don't have HImode right shift operations because using the 3131 ordinary 32 bit shift instructions for that doesn't generate proper 3132 zero/sign extension. 3133 gen_ashift_hi is only called in contexts where we know that the 3134 sign extension works out correctly. */ 3135 { 3136 int offset = 0; 3137 if (GET_CODE (reg) == SUBREG) 3138 { 3139 offset = SUBREG_BYTE (reg); 3140 reg = SUBREG_REG (reg); 3141 } 3142 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset)); 3143 break; 3144 } 3145 case ASHIFT: 3146 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); 3147 break; 3148 } 3149 } 3150 3151 /* Output RTL to split a constant shift into its component SH constant 3152 shift instructions. */ 3153 3154 void 3155 gen_shifty_op (int code, rtx *operands) 3156 { 3157 int value = INTVAL (operands[2]); 3158 int max, i; 3159 3160 /* Truncate the shift count in case it is out of bounds. */ 3161 value = value & 31; 3162 3163 if (value == 31) 3164 { 3165 if (code == LSHIFTRT) 3166 { 3167 emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); 3168 emit_insn (gen_movt (operands[0])); 3169 return; 3170 } 3171 else if (code == ASHIFT) 3172 { 3173 /* There is a two instruction sequence for 31 bit left shifts, 3174 but it requires r0. */ 3175 if (REG_P (operands[0]) && REGNO (operands[0]) == 0) 3176 { 3177 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); 3178 emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); 3179 return; 3180 } 3181 } 3182 } 3183 else if (value == 0) 3184 { 3185 /* This can happen even when optimizing, if there were subregs before 3186 reload. Don't output a nop here, as this is never optimized away; 3187 use a no-op move instead. */ 3188 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0])); 3189 return; 3190 } 3191 3192 max = shift_insns[value]; 3193 for (i = 0; i < max; i++) 3194 gen_ashift (code, shift_amounts[value][i], operands[0]); 3195 } 3196 3197 /* Same as above, but optimized for values where the topmost bits don't 3198 matter. */ 3199 3200 void 3201 gen_shifty_hi_op (int code, rtx *operands) 3202 { 3203 int value = INTVAL (operands[2]); 3204 int max, i; 3205 void (*gen_fun) (int, int, rtx); 3206 3207 /* This operation is used by and_shl for SImode values with a few 3208 high bits known to be cleared. */ 3209 value &= 31; 3210 if (value == 0) 3211 { 3212 emit_insn (gen_nop ()); 3213 return; 3214 } 3215 3216 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; 3217 if (code == ASHIFT) 3218 { 3219 max = ext_shift_insns[value]; 3220 for (i = 0; i < max; i++) 3221 gen_fun (code, ext_shift_amounts[value][i], operands[0]); 3222 } 3223 else 3224 /* When shifting right, emit the shifts in reverse order, so that 3225 solitary negative values come first. */ 3226 for (i = ext_shift_insns[value] - 1; i >= 0; i--) 3227 gen_fun (code, ext_shift_amounts[value][i], operands[0]); 3228 } 3229 3230 /* Output RTL for an arithmetic right shift. */ 3231 3232 /* ??? Rewrite to use super-optimizer sequences. */ 3233 3234 int 3235 expand_ashiftrt (rtx *operands) 3236 { 3237 rtx wrk; 3238 char func[18]; 3239 int value; 3240 3241 if (TARGET_SH3) 3242 { 3243 if (!CONST_INT_P (operands[2])) 3244 { 3245 rtx count = copy_to_mode_reg (SImode, operands[2]); 3246 emit_insn (gen_negsi2 (count, count)); 3247 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); 3248 return 1; 3249 } 3250 else if (ashiftrt_insns[INTVAL (operands[2]) & 31] 3251 > 1 + SH_DYNAMIC_SHIFT_COST) 3252 { 3253 rtx count 3254 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); 3255 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); 3256 return 1; 3257 } 3258 } 3259 if (!CONST_INT_P (operands[2])) 3260 return 0; 3261 3262 value = INTVAL (operands[2]) & 31; 3263 3264 if (value == 31) 3265 { 3266 /* If we are called from abs expansion, arrange things so that we 3267 we can use a single MT instruction that doesn't clobber the source, 3268 if LICM can hoist out the load of the constant zero. */ 3269 if (currently_expanding_to_rtl) 3270 { 3271 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)), 3272 operands[1])); 3273 emit_insn (gen_mov_neg_si_t (operands[0])); 3274 return 1; 3275 } 3276 emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); 3277 return 1; 3278 } 3279 else if (value >= 16 && value <= 19) 3280 { 3281 wrk = gen_reg_rtx (SImode); 3282 emit_insn (gen_ashrsi2_16 (wrk, operands[1])); 3283 value -= 16; 3284 while (value--) 3285 gen_ashift (ASHIFTRT, 1, wrk); 3286 emit_move_insn (operands[0], wrk); 3287 return 1; 3288 } 3289 /* Expand a short sequence inline, longer call a magic routine. */ 3290 else if (value <= 5) 3291 { 3292 wrk = gen_reg_rtx (SImode); 3293 emit_move_insn (wrk, operands[1]); 3294 while (value--) 3295 gen_ashift (ASHIFTRT, 1, wrk); 3296 emit_move_insn (operands[0], wrk); 3297 return 1; 3298 } 3299 3300 wrk = gen_reg_rtx (Pmode); 3301 3302 /* Load the value into an arg reg and call a helper. */ 3303 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); 3304 sprintf (func, "__ashiftrt_r4_%d", value); 3305 function_symbol (wrk, func, SFUNC_STATIC); 3306 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); 3307 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); 3308 return 1; 3309 } 3310 3311 int 3312 sh_dynamicalize_shift_p (rtx count) 3313 { 3314 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST; 3315 } 3316 3317 /* Try to find a good way to implement the combiner pattern 3318 [(set (match_operand:SI 0 "register_operand" "r") 3319 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") 3320 (match_operand:SI 2 "const_int_operand" "n")) 3321 (match_operand:SI 3 "const_int_operand" "n"))) . 3322 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. 3323 return 0 for simple right / left or left/right shift combination. 3324 return 1 for a combination of shifts with zero_extend. 3325 return 2 for a combination of shifts with an AND that needs r0. 3326 return 3 for a combination of shifts with an AND that needs an extra 3327 scratch register, when the three highmost bits of the AND mask are clear. 3328 return 4 for a combination of shifts with an AND that needs an extra 3329 scratch register, when any of the three highmost bits of the AND mask 3330 is set. 3331 If ATTRP is set, store an initial right shift width in ATTRP[0], 3332 and the instruction length in ATTRP[1] . These values are not valid 3333 when returning 0. 3334 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into 3335 shift_amounts for the last shift value that is to be used before the 3336 sign extend. */ 3337 int 3338 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp) 3339 { 3340 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; 3341 int left = INTVAL (left_rtx), right; 3342 int best = 0; 3343 int cost, best_cost = 10000; 3344 int best_right = 0, best_len = 0; 3345 int i; 3346 int can_ext; 3347 3348 if (left < 0 || left > 31) 3349 return 0; 3350 if (CONST_INT_P (mask_rtx)) 3351 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; 3352 else 3353 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; 3354 /* Can this be expressed as a right shift / left shift pair? */ 3355 lsb = ((mask ^ (mask - 1)) >> 1) + 1; 3356 right = exact_log2 (lsb); 3357 mask2 = ~(mask + lsb - 1); 3358 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; 3359 /* mask has no zeroes but trailing zeroes <==> ! mask2 */ 3360 if (! mask2) 3361 best_cost = shift_insns[right] + shift_insns[right + left]; 3362 /* mask has no trailing zeroes <==> ! right */ 3363 else if (! right && mask2 == ~(lsb2 - 1)) 3364 { 3365 int late_right = exact_log2 (lsb2); 3366 best_cost = shift_insns[left + late_right] + shift_insns[late_right]; 3367 } 3368 /* Try to use zero extend. */ 3369 if (mask2 == ~(lsb2 - 1)) 3370 { 3371 int width, first; 3372 3373 for (width = 8; width <= 16; width += 8) 3374 { 3375 /* Can we zero-extend right away? */ 3376 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width) 3377 { 3378 cost 3379 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right]; 3380 if (cost < best_cost) 3381 { 3382 best = 1; 3383 best_cost = cost; 3384 best_right = right; 3385 best_len = cost; 3386 if (attrp) 3387 attrp[2] = -1; 3388 } 3389 continue; 3390 } 3391 /* ??? Could try to put zero extend into initial right shift, 3392 or even shift a bit left before the right shift. */ 3393 /* Determine value of first part of left shift, to get to the 3394 zero extend cut-off point. */ 3395 first = width - exact_log2 (lsb2) + right; 3396 if (first >= 0 && right + left - first >= 0) 3397 { 3398 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1 3399 + ext_shift_insns[right + left - first]; 3400 if (cost < best_cost) 3401 { 3402 best = 1; 3403 best_cost = cost; 3404 best_right = right; 3405 best_len = cost; 3406 if (attrp) 3407 attrp[2] = first; 3408 } 3409 } 3410 } 3411 } 3412 /* Try to use r0 AND pattern */ 3413 for (i = 0; i <= 2; i++) 3414 { 3415 if (i > right) 3416 break; 3417 if (! CONST_OK_FOR_K08 (mask >> i)) 3418 continue; 3419 cost = (i != 0) + 2 + ext_shift_insns[left + i]; 3420 if (cost < best_cost) 3421 { 3422 best = 2; 3423 best_cost = cost; 3424 best_right = i; 3425 best_len = cost - 1; 3426 } 3427 } 3428 /* Try to use a scratch register to hold the AND operand. */ 3429 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0; 3430 for (i = 0; i <= 2; i++) 3431 { 3432 if (i > right) 3433 break; 3434 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3) 3435 + (can_ext ? ext_shift_insns : shift_insns)[left + i]; 3436 if (cost < best_cost) 3437 { 3438 best = 4 - can_ext; 3439 best_cost = cost; 3440 best_right = i; 3441 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i); 3442 } 3443 } 3444 3445 if (attrp) 3446 { 3447 attrp[0] = best_right; 3448 attrp[1] = best_len; 3449 } 3450 return best; 3451 } 3452 3453 /* This is used in length attributes of the unnamed instructions 3454 corresponding to shl_and_kind return values of 1 and 2. */ 3455 int 3456 shl_and_length (rtx insn) 3457 { 3458 rtx set_src, left_rtx, mask_rtx; 3459 int attributes[3]; 3460 3461 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 3462 left_rtx = XEXP (XEXP (set_src, 0), 1); 3463 mask_rtx = XEXP (set_src, 1); 3464 shl_and_kind (left_rtx, mask_rtx, attributes); 3465 return attributes[1]; 3466 } 3467 3468 /* This is used in length attribute of the and_shl_scratch instruction. */ 3469 3470 int 3471 shl_and_scr_length (rtx insn) 3472 { 3473 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 3474 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31]; 3475 rtx op = XEXP (set_src, 0); 3476 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1; 3477 op = XEXP (XEXP (op, 0), 0); 3478 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31]; 3479 } 3480 3481 /* Generate rtl for instructions for which shl_and_kind advised a particular 3482 method of generating them, i.e. returned zero. */ 3483 3484 int 3485 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source) 3486 { 3487 int attributes[3]; 3488 unsigned HOST_WIDE_INT mask; 3489 int kind = shl_and_kind (left_rtx, mask_rtx, attributes); 3490 int right, total_shift; 3491 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op; 3492 3493 right = attributes[0]; 3494 total_shift = INTVAL (left_rtx) + right; 3495 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; 3496 switch (kind) 3497 { 3498 default: 3499 return -1; 3500 case 1: 3501 { 3502 int first = attributes[2]; 3503 rtx operands[3]; 3504 3505 if (first < 0) 3506 { 3507 emit_insn ((mask << right) <= 0xff 3508 ? gen_zero_extendqisi2 (dest, 3509 gen_lowpart (QImode, source)) 3510 : gen_zero_extendhisi2 (dest, 3511 gen_lowpart (HImode, source))); 3512 source = dest; 3513 } 3514 if (source != dest) 3515 emit_insn (gen_movsi (dest, source)); 3516 operands[0] = dest; 3517 if (right) 3518 { 3519 operands[2] = GEN_INT (right); 3520 gen_shifty_hi_op (LSHIFTRT, operands); 3521 } 3522 if (first > 0) 3523 { 3524 operands[2] = GEN_INT (first); 3525 gen_shifty_hi_op (ASHIFT, operands); 3526 total_shift -= first; 3527 mask <<= first; 3528 } 3529 if (first >= 0) 3530 emit_insn (mask <= 0xff 3531 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest)) 3532 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest))); 3533 if (total_shift > 0) 3534 { 3535 operands[2] = GEN_INT (total_shift); 3536 gen_shifty_hi_op (ASHIFT, operands); 3537 } 3538 break; 3539 } 3540 case 4: 3541 shift_gen_fun = gen_shifty_op; 3542 case 3: 3543 /* If the topmost bit that matters is set, set the topmost bits 3544 that don't matter. This way, we might be able to get a shorter 3545 signed constant. */ 3546 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift))) 3547 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift); 3548 case 2: 3549 /* Don't expand fine-grained when combining, because that will 3550 make the pattern fail. */ 3551 if (currently_expanding_to_rtl 3552 || reload_in_progress || reload_completed) 3553 { 3554 rtx operands[3]; 3555 3556 /* Cases 3 and 4 should be handled by this split 3557 only while combining */ 3558 gcc_assert (kind <= 2); 3559 if (right) 3560 { 3561 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); 3562 source = dest; 3563 } 3564 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); 3565 if (total_shift) 3566 { 3567 operands[0] = dest; 3568 operands[1] = dest; 3569 operands[2] = GEN_INT (total_shift); 3570 shift_gen_fun (ASHIFT, operands); 3571 } 3572 break; 3573 } 3574 else 3575 { 3576 int neg = 0; 3577 if (kind != 4 && total_shift < 16) 3578 { 3579 neg = -ext_shift_amounts[total_shift][1]; 3580 if (neg > 0) 3581 neg -= ext_shift_amounts[total_shift][2]; 3582 else 3583 neg = 0; 3584 } 3585 emit_insn (gen_and_shl_scratch (dest, source, 3586 GEN_INT (right), 3587 GEN_INT (mask), 3588 GEN_INT (total_shift + neg), 3589 GEN_INT (neg))); 3590 emit_insn (gen_movsi (dest, dest)); 3591 break; 3592 } 3593 } 3594 return 0; 3595 } 3596 3597 /* Try to find a good way to implement the combiner pattern 3598 [(set (match_operand:SI 0 "register_operand" "=r") 3599 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") 3600 (match_operand:SI 2 "const_int_operand" "n") 3601 (match_operand:SI 3 "const_int_operand" "n") 3602 (const_int 0))) 3603 (clobber (reg:SI T_REG))] 3604 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. 3605 return 0 for simple left / right shift combination. 3606 return 1 for left shift / 8 bit sign extend / left shift. 3607 return 2 for left shift / 16 bit sign extend / left shift. 3608 return 3 for left shift / 8 bit sign extend / shift / sign extend. 3609 return 4 for left shift / 16 bit sign extend / shift / sign extend. 3610 return 5 for left shift / 16 bit sign extend / right shift 3611 return 6 for < 8 bit sign extend / left shift. 3612 return 7 for < 8 bit sign extend / left shift / single right shift. 3613 If COSTP is nonzero, assign the calculated cost to *COSTP. */ 3614 3615 int 3616 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp) 3617 { 3618 int left, size, insize, ext; 3619 int cost = 0, best_cost; 3620 int kind; 3621 3622 left = INTVAL (left_rtx); 3623 size = INTVAL (size_rtx); 3624 insize = size - left; 3625 gcc_assert (insize > 0); 3626 /* Default to left / right shift. */ 3627 kind = 0; 3628 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size]; 3629 if (size <= 16) 3630 { 3631 /* 16 bit shift / sign extend / 16 bit shift */ 3632 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size]; 3633 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden 3634 below, by alternative 3 or something even better. */ 3635 if (cost < best_cost) 3636 { 3637 kind = 5; 3638 best_cost = cost; 3639 } 3640 } 3641 /* Try a plain sign extend between two shifts. */ 3642 for (ext = 16; ext >= insize; ext -= 8) 3643 { 3644 if (ext <= size) 3645 { 3646 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext]; 3647 if (cost < best_cost) 3648 { 3649 kind = ext / (unsigned) 8; 3650 best_cost = cost; 3651 } 3652 } 3653 /* Check if we can do a sloppy shift with a final signed shift 3654 restoring the sign. */ 3655 if (EXT_SHIFT_SIGNED (size - ext)) 3656 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1; 3657 /* If not, maybe it's still cheaper to do the second shift sloppy, 3658 and do a final sign extend? */ 3659 else if (size <= 16) 3660 cost = ext_shift_insns[ext - insize] + 1 3661 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1; 3662 else 3663 continue; 3664 if (cost < best_cost) 3665 { 3666 kind = ext / (unsigned) 8 + 2; 3667 best_cost = cost; 3668 } 3669 } 3670 /* Check if we can sign extend in r0 */ 3671 if (insize < 8) 3672 { 3673 cost = 3 + shift_insns[left]; 3674 if (cost < best_cost) 3675 { 3676 kind = 6; 3677 best_cost = cost; 3678 } 3679 /* Try the same with a final signed shift. */ 3680 if (left < 31) 3681 { 3682 cost = 3 + ext_shift_insns[left + 1] + 1; 3683 if (cost < best_cost) 3684 { 3685 kind = 7; 3686 best_cost = cost; 3687 } 3688 } 3689 } 3690 if (TARGET_SH3) 3691 { 3692 /* Try to use a dynamic shift. */ 3693 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST; 3694 if (cost < best_cost) 3695 { 3696 kind = 0; 3697 best_cost = cost; 3698 } 3699 } 3700 if (costp) 3701 *costp = cost; 3702 return kind; 3703 } 3704 3705 /* Function to be used in the length attribute of the instructions 3706 implementing this pattern. */ 3707 3708 int 3709 shl_sext_length (rtx insn) 3710 { 3711 rtx set_src, left_rtx, size_rtx; 3712 int cost; 3713 3714 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 3715 left_rtx = XEXP (XEXP (set_src, 0), 1); 3716 size_rtx = XEXP (set_src, 1); 3717 shl_sext_kind (left_rtx, size_rtx, &cost); 3718 return cost; 3719 } 3720 3721 /* Generate rtl for this pattern */ 3722 3723 int 3724 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source) 3725 { 3726 int kind; 3727 int left, size, insize, cost; 3728 rtx operands[3]; 3729 3730 kind = shl_sext_kind (left_rtx, size_rtx, &cost); 3731 left = INTVAL (left_rtx); 3732 size = INTVAL (size_rtx); 3733 insize = size - left; 3734 switch (kind) 3735 { 3736 case 1: 3737 case 2: 3738 case 3: 3739 case 4: 3740 { 3741 int ext = kind & 1 ? 8 : 16; 3742 int shift2 = size - ext; 3743 3744 /* Don't expand fine-grained when combining, because that will 3745 make the pattern fail. */ 3746 if (! currently_expanding_to_rtl 3747 && ! reload_in_progress && ! reload_completed) 3748 { 3749 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 3750 emit_insn (gen_movsi (dest, source)); 3751 break; 3752 } 3753 if (dest != source) 3754 emit_insn (gen_movsi (dest, source)); 3755 operands[0] = dest; 3756 if (ext - insize) 3757 { 3758 operands[2] = GEN_INT (ext - insize); 3759 gen_shifty_hi_op (ASHIFT, operands); 3760 } 3761 emit_insn (kind & 1 3762 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) 3763 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 3764 if (kind <= 2) 3765 { 3766 if (shift2) 3767 { 3768 operands[2] = GEN_INT (shift2); 3769 gen_shifty_op (ASHIFT, operands); 3770 } 3771 } 3772 else 3773 { 3774 if (shift2 > 0) 3775 { 3776 if (EXT_SHIFT_SIGNED (shift2)) 3777 { 3778 operands[2] = GEN_INT (shift2 + 1); 3779 gen_shifty_op (ASHIFT, operands); 3780 operands[2] = const1_rtx; 3781 gen_shifty_op (ASHIFTRT, operands); 3782 break; 3783 } 3784 operands[2] = GEN_INT (shift2); 3785 gen_shifty_hi_op (ASHIFT, operands); 3786 } 3787 else if (shift2) 3788 { 3789 operands[2] = GEN_INT (-shift2); 3790 gen_shifty_hi_op (LSHIFTRT, operands); 3791 } 3792 emit_insn (size <= 8 3793 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) 3794 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 3795 } 3796 break; 3797 } 3798 case 5: 3799 { 3800 int i = 16 - size; 3801 if (! currently_expanding_to_rtl 3802 && ! reload_in_progress && ! reload_completed) 3803 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 3804 else 3805 { 3806 operands[0] = dest; 3807 operands[2] = GEN_INT (16 - insize); 3808 gen_shifty_hi_op (ASHIFT, operands); 3809 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 3810 } 3811 /* Don't use gen_ashrsi3 because it generates new pseudos. */ 3812 while (--i >= 0) 3813 gen_ashift (ASHIFTRT, 1, dest); 3814 break; 3815 } 3816 case 6: 3817 case 7: 3818 /* Don't expand fine-grained when combining, because that will 3819 make the pattern fail. */ 3820 if (! currently_expanding_to_rtl 3821 && ! reload_in_progress && ! reload_completed) 3822 { 3823 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 3824 emit_insn (gen_movsi (dest, source)); 3825 break; 3826 } 3827 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); 3828 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); 3829 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); 3830 operands[0] = dest; 3831 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; 3832 gen_shifty_op (ASHIFT, operands); 3833 if (kind == 7) 3834 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx)); 3835 break; 3836 default: 3837 return -1; 3838 } 3839 return 0; 3840 } 3841 3842 /* Prefix a symbol_ref name with "datalabel". */ 3843 3844 rtx 3845 gen_datalabel_ref (rtx sym) 3846 { 3847 const char *str; 3848 3849 if (GET_CODE (sym) == LABEL_REF) 3850 return gen_rtx_CONST (GET_MODE (sym), 3851 gen_rtx_UNSPEC (GET_MODE (sym), 3852 gen_rtvec (1, sym), 3853 UNSPEC_DATALABEL)); 3854 3855 gcc_assert (GET_CODE (sym) == SYMBOL_REF); 3856 3857 str = XSTR (sym, 0); 3858 /* Share all SYMBOL_REF strings with the same value - that is important 3859 for cse. */ 3860 str = IDENTIFIER_POINTER (get_identifier (str)); 3861 XSTR (sym, 0) = str; 3862 3863 return sym; 3864 } 3865 3866 3867 static alloc_pool label_ref_list_pool; 3868 3869 typedef struct label_ref_list_d 3870 { 3871 rtx label; 3872 struct label_ref_list_d *next; 3873 } *label_ref_list_t; 3874 3875 /* The SH cannot load a large constant into a register, constants have to 3876 come from a pc relative load. The reference of a pc relative load 3877 instruction must be less than 1k in front of the instruction. This 3878 means that we often have to dump a constant inside a function, and 3879 generate code to branch around it. 3880 3881 It is important to minimize this, since the branches will slow things 3882 down and make things bigger. 3883 3884 Worst case code looks like: 3885 3886 mov.l L1,rn 3887 bra L2 3888 nop 3889 align 3890 L1: .long value 3891 L2: 3892 .. 3893 3894 mov.l L3,rn 3895 bra L4 3896 nop 3897 align 3898 L3: .long value 3899 L4: 3900 .. 3901 3902 We fix this by performing a scan before scheduling, which notices which 3903 instructions need to have their operands fetched from the constant table 3904 and builds the table. 3905 3906 The algorithm is: 3907 3908 scan, find an instruction which needs a pcrel move. Look forward, find the 3909 last barrier which is within MAX_COUNT bytes of the requirement. 3910 If there isn't one, make one. Process all the instructions between 3911 the find and the barrier. 3912 3913 In the above example, we can tell that L3 is within 1k of L1, so 3914 the first move can be shrunk from the 3 insn+constant sequence into 3915 just 1 insn, and the constant moved to L3 to make: 3916 3917 mov.l L1,rn 3918 .. 3919 mov.l L3,rn 3920 bra L4 3921 nop 3922 align 3923 L3:.long value 3924 L4:.long value 3925 3926 Then the second move becomes the target for the shortening process. */ 3927 3928 typedef struct 3929 { 3930 rtx value; /* Value in table. */ 3931 rtx label; /* Label of value. */ 3932 label_ref_list_t wend; /* End of window. */ 3933 enum machine_mode mode; /* Mode of value. */ 3934 3935 /* True if this constant is accessed as part of a post-increment 3936 sequence. Note that HImode constants are never accessed in this way. */ 3937 bool part_of_sequence_p; 3938 } pool_node; 3939 3940 /* The maximum number of constants that can fit into one pool, since 3941 constants in the range 0..510 are at least 2 bytes long, and in the 3942 range from there to 1018 at least 4 bytes. */ 3943 3944 #define MAX_POOL_SIZE 372 3945 static pool_node pool_vector[MAX_POOL_SIZE]; 3946 static int pool_size; 3947 static rtx pool_window_label; 3948 static int pool_window_last; 3949 3950 static int max_labelno_before_reorg; 3951 3952 /* ??? If we need a constant in HImode which is the truncated value of a 3953 constant we need in SImode, we could combine the two entries thus saving 3954 two bytes. Is this common enough to be worth the effort of implementing 3955 it? */ 3956 3957 /* ??? This stuff should be done at the same time that we shorten branches. 3958 As it is now, we must assume that all branches are the maximum size, and 3959 this causes us to almost always output constant pools sooner than 3960 necessary. */ 3961 3962 /* Add a constant to the pool and return its label. */ 3963 3964 static rtx 3965 add_constant (rtx x, enum machine_mode mode, rtx last_value) 3966 { 3967 int i; 3968 rtx lab, new_rtx; 3969 label_ref_list_t ref, newref; 3970 3971 /* First see if we've already got it. */ 3972 for (i = 0; i < pool_size; i++) 3973 { 3974 if (x->code == pool_vector[i].value->code 3975 && mode == pool_vector[i].mode) 3976 { 3977 if (x->code == CODE_LABEL) 3978 { 3979 if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) 3980 continue; 3981 } 3982 if (rtx_equal_p (x, pool_vector[i].value)) 3983 { 3984 lab = new_rtx = 0; 3985 if (! last_value 3986 || ! i 3987 || ! rtx_equal_p (last_value, pool_vector[i-1].value)) 3988 { 3989 new_rtx = gen_label_rtx (); 3990 LABEL_REFS (new_rtx) = pool_vector[i].label; 3991 pool_vector[i].label = lab = new_rtx; 3992 } 3993 if (lab && pool_window_label) 3994 { 3995 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool); 3996 newref->label = pool_window_label; 3997 ref = pool_vector[pool_window_last].wend; 3998 newref->next = ref; 3999 pool_vector[pool_window_last].wend = newref; 4000 } 4001 if (new_rtx) 4002 pool_window_label = new_rtx; 4003 pool_window_last = i; 4004 return lab; 4005 } 4006 } 4007 } 4008 4009 /* Need a new one. */ 4010 pool_vector[pool_size].value = x; 4011 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) 4012 { 4013 lab = 0; 4014 pool_vector[pool_size - 1].part_of_sequence_p = true; 4015 } 4016 else 4017 lab = gen_label_rtx (); 4018 pool_vector[pool_size].mode = mode; 4019 pool_vector[pool_size].label = lab; 4020 pool_vector[pool_size].wend = NULL; 4021 pool_vector[pool_size].part_of_sequence_p = (lab == 0); 4022 if (lab && pool_window_label) 4023 { 4024 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool); 4025 newref->label = pool_window_label; 4026 ref = pool_vector[pool_window_last].wend; 4027 newref->next = ref; 4028 pool_vector[pool_window_last].wend = newref; 4029 } 4030 if (lab) 4031 pool_window_label = lab; 4032 pool_window_last = pool_size; 4033 pool_size++; 4034 return lab; 4035 } 4036 4037 /* Output the literal table. START, if nonzero, is the first instruction 4038 this table is needed for, and also indicates that there is at least one 4039 casesi_worker_2 instruction; We have to emit the operand3 labels from 4040 these insns at a 4-byte aligned position. BARRIER is the barrier 4041 after which we are to place the table. */ 4042 4043 static void 4044 dump_table (rtx start, rtx barrier) 4045 { 4046 rtx scan = barrier; 4047 int i; 4048 int need_align = 1; 4049 rtx lab; 4050 label_ref_list_t ref; 4051 int have_df = 0; 4052 4053 /* Do two passes, first time dump out the HI sized constants. */ 4054 4055 for (i = 0; i < pool_size; i++) 4056 { 4057 pool_node *p = &pool_vector[i]; 4058 4059 if (p->mode == HImode) 4060 { 4061 if (need_align) 4062 { 4063 scan = emit_insn_after (gen_align_2 (), scan); 4064 need_align = 0; 4065 } 4066 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4067 scan = emit_label_after (lab, scan); 4068 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx), 4069 scan); 4070 for (ref = p->wend; ref; ref = ref->next) 4071 { 4072 lab = ref->label; 4073 scan = emit_insn_after (gen_consttable_window_end (lab), scan); 4074 } 4075 } 4076 else if (p->mode == DFmode) 4077 have_df = 1; 4078 } 4079 4080 need_align = 1; 4081 4082 if (start) 4083 { 4084 scan = emit_insn_after (gen_align_4 (), scan); 4085 need_align = 0; 4086 for (; start != barrier; start = NEXT_INSN (start)) 4087 if (NONJUMP_INSN_P (start) 4088 && recog_memoized (start) == CODE_FOR_casesi_worker_2) 4089 { 4090 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0)); 4091 rtx lab = XEXP (XVECEXP (src, 0, 3), 0); 4092 4093 scan = emit_label_after (lab, scan); 4094 } 4095 } 4096 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df) 4097 { 4098 rtx align_insn = NULL_RTX; 4099 4100 scan = emit_label_after (gen_label_rtx (), scan); 4101 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); 4102 need_align = 0; 4103 4104 for (i = 0; i < pool_size; i++) 4105 { 4106 pool_node *p = &pool_vector[i]; 4107 4108 switch (p->mode) 4109 { 4110 case HImode: 4111 break; 4112 case SImode: 4113 case SFmode: 4114 if (align_insn && !p->part_of_sequence_p) 4115 { 4116 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4117 emit_label_before (lab, align_insn); 4118 emit_insn_before (gen_consttable_4 (p->value, const0_rtx), 4119 align_insn); 4120 for (ref = p->wend; ref; ref = ref->next) 4121 { 4122 lab = ref->label; 4123 emit_insn_before (gen_consttable_window_end (lab), 4124 align_insn); 4125 } 4126 delete_insn (align_insn); 4127 align_insn = NULL_RTX; 4128 continue; 4129 } 4130 else 4131 { 4132 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4133 scan = emit_label_after (lab, scan); 4134 scan = emit_insn_after (gen_consttable_4 (p->value, 4135 const0_rtx), scan); 4136 need_align = ! need_align; 4137 } 4138 break; 4139 case DFmode: 4140 if (need_align) 4141 { 4142 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); 4143 align_insn = scan; 4144 need_align = 0; 4145 } 4146 case DImode: 4147 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4148 scan = emit_label_after (lab, scan); 4149 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), 4150 scan); 4151 break; 4152 default: 4153 gcc_unreachable (); 4154 } 4155 4156 if (p->mode != HImode) 4157 { 4158 for (ref = p->wend; ref; ref = ref->next) 4159 { 4160 lab = ref->label; 4161 scan = emit_insn_after (gen_consttable_window_end (lab), 4162 scan); 4163 } 4164 } 4165 } 4166 4167 pool_size = 0; 4168 } 4169 4170 for (i = 0; i < pool_size; i++) 4171 { 4172 pool_node *p = &pool_vector[i]; 4173 4174 switch (p->mode) 4175 { 4176 case HImode: 4177 break; 4178 case SImode: 4179 case SFmode: 4180 if (need_align) 4181 { 4182 need_align = 0; 4183 scan = emit_label_after (gen_label_rtx (), scan); 4184 scan = emit_insn_after (gen_align_4 (), scan); 4185 } 4186 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4187 scan = emit_label_after (lab, scan); 4188 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx), 4189 scan); 4190 break; 4191 case DFmode: 4192 case DImode: 4193 if (need_align) 4194 { 4195 need_align = 0; 4196 scan = emit_label_after (gen_label_rtx (), scan); 4197 scan = emit_insn_after (gen_align_4 (), scan); 4198 } 4199 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4200 scan = emit_label_after (lab, scan); 4201 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), 4202 scan); 4203 break; 4204 default: 4205 gcc_unreachable (); 4206 } 4207 4208 if (p->mode != HImode) 4209 { 4210 for (ref = p->wend; ref; ref = ref->next) 4211 { 4212 lab = ref->label; 4213 scan = emit_insn_after (gen_consttable_window_end (lab), scan); 4214 } 4215 } 4216 } 4217 4218 scan = emit_insn_after (gen_consttable_end (), scan); 4219 scan = emit_barrier_after (scan); 4220 pool_size = 0; 4221 pool_window_label = NULL_RTX; 4222 pool_window_last = 0; 4223 } 4224 4225 /* Return nonzero if constant would be an ok source for a 4226 mov.w instead of a mov.l. */ 4227 4228 static int 4229 hi_const (rtx src) 4230 { 4231 return (CONST_INT_P (src) 4232 && INTVAL (src) >= -32768 4233 && INTVAL (src) <= 32767); 4234 } 4235 4236 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0) 4237 4238 /* Nonzero if the insn is a move instruction which needs to be fixed. */ 4239 4240 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the 4241 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't 4242 need to fix it if the input value is CONST_OK_FOR_I08. */ 4243 4244 static int 4245 broken_move (rtx insn) 4246 { 4247 if (NONJUMP_INSN_P (insn)) 4248 { 4249 rtx pat = PATTERN (insn); 4250 if (GET_CODE (pat) == PARALLEL) 4251 pat = XVECEXP (pat, 0, 0); 4252 if (GET_CODE (pat) == SET 4253 /* We can load any 8-bit value if we don't care what the high 4254 order bits end up as. */ 4255 && GET_MODE (SET_DEST (pat)) != QImode 4256 && (CONSTANT_P (SET_SRC (pat)) 4257 /* Match mova_const. */ 4258 || (GET_CODE (SET_SRC (pat)) == UNSPEC 4259 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA 4260 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST)) 4261 && ! (TARGET_SH2E 4262 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE 4263 && (fp_zero_operand (SET_SRC (pat)) 4264 || fp_one_operand (SET_SRC (pat))) 4265 /* In general we don't know the current setting of fpscr, so disable fldi. 4266 There is an exception if this was a register-register move 4267 before reload - and hence it was ascertained that we have 4268 single precision setting - and in a post-reload optimization 4269 we changed this to do a constant load. In that case 4270 we don't have an r0 clobber, hence we must use fldi. */ 4271 && (TARGET_FMOVD 4272 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) 4273 == SCRATCH)) 4274 && REG_P (SET_DEST (pat)) 4275 && FP_REGISTER_P (REGNO (SET_DEST (pat)))) 4276 && ! (TARGET_SH2A 4277 && GET_MODE (SET_DEST (pat)) == SImode 4278 && (satisfies_constraint_I20 (SET_SRC (pat)) 4279 || satisfies_constraint_I28 (SET_SRC (pat)))) 4280 && ! satisfies_constraint_I08 (SET_SRC (pat))) 4281 return 1; 4282 } 4283 4284 return 0; 4285 } 4286 4287 static int 4288 mova_p (rtx insn) 4289 { 4290 return (NONJUMP_INSN_P (insn) 4291 && GET_CODE (PATTERN (insn)) == SET 4292 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC 4293 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA 4294 /* Don't match mova_const. */ 4295 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF); 4296 } 4297 4298 /* Fix up a mova from a switch that went out of range. */ 4299 static void 4300 fixup_mova (rtx mova) 4301 { 4302 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode); 4303 if (! flag_pic) 4304 { 4305 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova); 4306 INSN_CODE (mova) = -1; 4307 } 4308 else 4309 { 4310 rtx worker = mova; 4311 rtx lab = gen_label_rtx (); 4312 rtx wpat, wpat0, wpat1, wsrc, target, base, diff; 4313 4314 do 4315 { 4316 worker = NEXT_INSN (worker); 4317 gcc_assert (worker 4318 && !LABEL_P (worker) 4319 && !JUMP_P (worker)); 4320 } while (NOTE_P (worker) 4321 || recog_memoized (worker) != CODE_FOR_casesi_worker_1); 4322 wpat = PATTERN (worker); 4323 wpat0 = XVECEXP (wpat, 0, 0); 4324 wpat1 = XVECEXP (wpat, 0, 1); 4325 wsrc = SET_SRC (wpat0); 4326 PATTERN (worker) = (gen_casesi_worker_2 4327 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1), 4328 XEXP (XVECEXP (wsrc, 0, 2), 0), lab, 4329 XEXP (wpat1, 0))); 4330 INSN_CODE (worker) = -1; 4331 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); 4332 base = gen_rtx_LABEL_REF (Pmode, lab); 4333 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF); 4334 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff); 4335 INSN_CODE (mova) = -1; 4336 } 4337 } 4338 4339 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update 4340 *num_mova, and check if the new mova is not nested within the first one. 4341 return 0 if *first_mova was replaced, 1 if new_mova was replaced, 4342 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */ 4343 static int 4344 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova) 4345 { 4346 int n_addr = 0; /* Initialization to shut up spurious warning. */ 4347 int f_target, n_target = 0; /* Likewise. */ 4348 4349 if (optimize) 4350 { 4351 /* If NEW_MOVA has no address yet, it will be handled later. */ 4352 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova)) 4353 return -1; 4354 4355 n_addr = INSN_ADDRESSES (INSN_UID (new_mova)); 4356 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0))); 4357 if (n_addr > n_target || n_addr + 1022 < n_target) 4358 { 4359 /* Change the mova into a load. 4360 broken_move will then return true for it. */ 4361 fixup_mova (new_mova); 4362 return 1; 4363 } 4364 } 4365 if (!(*num_mova)++) 4366 { 4367 *first_mova = new_mova; 4368 return 2; 4369 } 4370 if (!optimize 4371 || ((f_target 4372 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0)))) 4373 >= n_target)) 4374 return -1; 4375 4376 (*num_mova)--; 4377 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova)) 4378 > n_target - n_addr) 4379 { 4380 fixup_mova (*first_mova); 4381 return 0; 4382 } 4383 else 4384 { 4385 fixup_mova (new_mova); 4386 return 1; 4387 } 4388 } 4389 4390 /* Find the last barrier from insn FROM which is close enough to hold the 4391 constant pool. If we can't find one, then create one near the end of 4392 the range. */ 4393 4394 static rtx 4395 find_barrier (int num_mova, rtx mova, rtx from) 4396 { 4397 int count_si = 0; 4398 int count_hi = 0; 4399 int found_hi = 0; 4400 int found_si = 0; 4401 int found_di = 0; 4402 int hi_align = 2; 4403 int si_align = 2; 4404 int leading_mova = num_mova; 4405 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0; 4406 int si_limit; 4407 int hi_limit; 4408 rtx orig = from; 4409 rtx last_got = NULL_RTX; 4410 rtx last_symoff = NULL_RTX; 4411 4412 /* For HImode: range is 510, add 4 because pc counts from address of 4413 second instruction after this one, subtract 2 for the jump instruction 4414 that we may need to emit before the table, subtract 2 for the instruction 4415 that fills the jump delay slot (in very rare cases, reorg will take an 4416 instruction from after the constant pool or will leave the delay slot 4417 empty). This gives 510. 4418 For SImode: range is 1020, add 4 because pc counts from address of 4419 second instruction after this one, subtract 2 in case pc is 2 byte 4420 aligned, subtract 2 for the jump instruction that we may need to emit 4421 before the table, subtract 2 for the instruction that fills the jump 4422 delay slot. This gives 1018. */ 4423 4424 /* The branch will always be shortened now that the reference address for 4425 forward branches is the successor address, thus we need no longer make 4426 adjustments to the [sh]i_limit for -O0. */ 4427 4428 si_limit = 1018; 4429 hi_limit = 510; 4430 4431 while (from && count_si < si_limit && count_hi < hi_limit) 4432 { 4433 int inc = get_attr_length (from); 4434 int new_align = 1; 4435 4436 /* If this is a label that existed at the time of the compute_alignments 4437 call, determine the alignment. N.B. When find_barrier recurses for 4438 an out-of-reach mova, we might see labels at the start of previously 4439 inserted constant tables. */ 4440 if (LABEL_P (from) 4441 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg) 4442 { 4443 if (optimize) 4444 new_align = 1 << label_to_alignment (from); 4445 else if (BARRIER_P (prev_nonnote_insn (from))) 4446 new_align = 1 << barrier_align (from); 4447 else 4448 new_align = 1; 4449 inc = 0; 4450 } 4451 /* In case we are scanning a constant table because of recursion, check 4452 for explicit alignments. If the table is long, we might be forced 4453 to emit the new table in front of it; the length of the alignment 4454 might be the last straw. */ 4455 else if (NONJUMP_INSN_P (from) 4456 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE 4457 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN) 4458 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0)); 4459 /* When we find the end of a constant table, paste the new constant 4460 at the end. That is better than putting it in front because 4461 this way, we don't need extra alignment for adding a 4-byte-aligned 4462 mov(a) label to a 2/4 or 8/4 byte aligned table. */ 4463 else if (NONJUMP_INSN_P (from) 4464 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE 4465 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END) 4466 return from; 4467 4468 if (BARRIER_P (from)) 4469 { 4470 rtx next; 4471 4472 found_barrier = from; 4473 4474 /* If we are at the end of the function, or in front of an alignment 4475 instruction, we need not insert an extra alignment. We prefer 4476 this kind of barrier. */ 4477 if (barrier_align (from) > 2) 4478 good_barrier = from; 4479 4480 /* If we are at the end of a hot/cold block, dump the constants 4481 here. */ 4482 next = NEXT_INSN (from); 4483 if (next 4484 && NOTE_P (next) 4485 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS) 4486 break; 4487 } 4488 4489 if (broken_move (from)) 4490 { 4491 rtx pat, src, dst; 4492 enum machine_mode mode; 4493 4494 pat = PATTERN (from); 4495 if (GET_CODE (pat) == PARALLEL) 4496 pat = XVECEXP (pat, 0, 0); 4497 src = SET_SRC (pat); 4498 dst = SET_DEST (pat); 4499 mode = GET_MODE (dst); 4500 4501 /* GOT pcrelat setting comes in pair of 4502 mova .L8,r0 4503 mov.l .L8,r12 4504 instructions. (plus add r0,r12). 4505 Remember if we see one without the other. */ 4506 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0))) 4507 last_got = last_got ? NULL_RTX : from; 4508 else if (PIC_ADDR_P (src)) 4509 last_got = last_got ? NULL_RTX : from; 4510 4511 /* We must explicitly check the mode, because sometimes the 4512 front end will generate code to load unsigned constants into 4513 HImode targets without properly sign extending them. */ 4514 if (mode == HImode 4515 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG)) 4516 { 4517 found_hi += 2; 4518 /* We put the short constants before the long constants, so 4519 we must count the length of short constants in the range 4520 for the long constants. */ 4521 /* ??? This isn't optimal, but is easy to do. */ 4522 si_limit -= 2; 4523 } 4524 else 4525 { 4526 /* We dump DF/DI constants before SF/SI ones, because 4527 the limit is the same, but the alignment requirements 4528 are higher. We may waste up to 4 additional bytes 4529 for alignment, and the DF/DI constant may have 4530 another SF/SI constant placed before it. */ 4531 if (TARGET_SHCOMPACT 4532 && ! found_di 4533 && (mode == DFmode || mode == DImode)) 4534 { 4535 found_di = 1; 4536 si_limit -= 8; 4537 } 4538 while (si_align > 2 && found_si + si_align - 2 > count_si) 4539 si_align >>= 1; 4540 if (found_si > count_si) 4541 count_si = found_si; 4542 found_si += GET_MODE_SIZE (mode); 4543 if (num_mova) 4544 si_limit -= GET_MODE_SIZE (mode); 4545 } 4546 } 4547 4548 if (mova_p (from)) 4549 { 4550 switch (untangle_mova (&num_mova, &mova, from)) 4551 { 4552 case 1: 4553 if (flag_pic) 4554 { 4555 rtx src = SET_SRC (PATTERN (from)); 4556 if (GET_CODE (src) == CONST 4557 && GET_CODE (XEXP (src, 0)) == UNSPEC 4558 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF) 4559 last_symoff = from; 4560 } 4561 break; 4562 case 0: return find_barrier (0, 0, mova); 4563 case 2: 4564 { 4565 leading_mova = 0; 4566 barrier_before_mova 4567 = good_barrier ? good_barrier : found_barrier; 4568 } 4569 default: break; 4570 } 4571 if (found_si > count_si) 4572 count_si = found_si; 4573 } 4574 else if (JUMP_TABLE_DATA_P (from)) 4575 { 4576 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode) 4577 || (num_mova 4578 && (prev_nonnote_insn (from) 4579 == XEXP (MOVA_LABELREF (mova), 0)))) 4580 num_mova--; 4581 if (barrier_align (next_real_insn (from)) == align_jumps_log) 4582 { 4583 /* We have just passed the barrier in front of the 4584 ADDR_DIFF_VEC, which is stored in found_barrier. Since 4585 the ADDR_DIFF_VEC is accessed as data, just like our pool 4586 constants, this is a good opportunity to accommodate what 4587 we have gathered so far. 4588 If we waited any longer, we could end up at a barrier in 4589 front of code, which gives worse cache usage for separated 4590 instruction / data caches. */ 4591 good_barrier = found_barrier; 4592 break; 4593 } 4594 else 4595 { 4596 rtx body = PATTERN (from); 4597 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); 4598 } 4599 } 4600 /* For the SH1, we generate alignments even after jumps-around-jumps. */ 4601 else if (JUMP_P (from) 4602 && ! TARGET_SH2 4603 && ! TARGET_SMALLCODE) 4604 new_align = 4; 4605 4606 /* There is a possibility that a bf is transformed into a bf/s by the 4607 delay slot scheduler. */ 4608 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from) 4609 && get_attr_type (from) == TYPE_CBRANCH 4610 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE) 4611 inc += 2; 4612 4613 if (found_si) 4614 { 4615 count_si += inc; 4616 if (new_align > si_align) 4617 { 4618 si_limit -= (count_si - 1) & (new_align - si_align); 4619 si_align = new_align; 4620 } 4621 count_si = (count_si + new_align - 1) & -new_align; 4622 } 4623 if (found_hi) 4624 { 4625 count_hi += inc; 4626 if (new_align > hi_align) 4627 { 4628 hi_limit -= (count_hi - 1) & (new_align - hi_align); 4629 hi_align = new_align; 4630 } 4631 count_hi = (count_hi + new_align - 1) & -new_align; 4632 } 4633 from = NEXT_INSN (from); 4634 } 4635 4636 if (num_mova) 4637 { 4638 if (leading_mova) 4639 { 4640 /* Try as we might, the leading mova is out of range. Change 4641 it into a load (which will become a pcload) and retry. */ 4642 fixup_mova (mova); 4643 return find_barrier (0, 0, mova); 4644 } 4645 else 4646 { 4647 /* Insert the constant pool table before the mova instruction, 4648 to prevent the mova label reference from going out of range. */ 4649 from = mova; 4650 good_barrier = found_barrier = barrier_before_mova; 4651 } 4652 } 4653 4654 if (found_barrier) 4655 { 4656 if (good_barrier && next_real_insn (found_barrier)) 4657 found_barrier = good_barrier; 4658 } 4659 else 4660 { 4661 /* We didn't find a barrier in time to dump our stuff, 4662 so we'll make one. */ 4663 rtx label = gen_label_rtx (); 4664 4665 /* Don't emit a constant table in the middle of insns for 4666 casesi_worker_2. This is a bit overkill but is enough 4667 because casesi_worker_2 wouldn't appear so frequently. */ 4668 if (last_symoff) 4669 from = last_symoff; 4670 4671 /* If we exceeded the range, then we must back up over the last 4672 instruction we looked at. Otherwise, we just need to undo the 4673 NEXT_INSN at the end of the loop. */ 4674 if (PREV_INSN (from) != orig 4675 && (count_hi > hi_limit || count_si > si_limit)) 4676 from = PREV_INSN (PREV_INSN (from)); 4677 else 4678 from = PREV_INSN (from); 4679 4680 /* Don't emit a constant table int the middle of global pointer setting, 4681 since that that would move the addressing base GOT into another table. 4682 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_ 4683 in the pool anyway, so just move up the whole constant pool. */ 4684 if (last_got) 4685 from = PREV_INSN (last_got); 4686 4687 /* Don't insert the constant pool table at the position which 4688 may be the landing pad. */ 4689 if (flag_exceptions 4690 && CALL_P (from) 4691 && find_reg_note (from, REG_EH_REGION, NULL_RTX)) 4692 from = PREV_INSN (from); 4693 4694 /* Walk back to be just before any jump or label. 4695 Putting it before a label reduces the number of times the branch 4696 around the constant pool table will be hit. Putting it before 4697 a jump makes it more likely that the bra delay slot will be 4698 filled. */ 4699 while (NOTE_P (from) || JUMP_P (from) 4700 || LABEL_P (from)) 4701 from = PREV_INSN (from); 4702 4703 from = emit_jump_insn_after (gen_jump (label), from); 4704 JUMP_LABEL (from) = label; 4705 LABEL_NUSES (label) = 1; 4706 found_barrier = emit_barrier_after (from); 4707 emit_label_after (label, found_barrier); 4708 } 4709 4710 return found_barrier; 4711 } 4712 4713 /* If the instruction INSN is implemented by a special function, and we can 4714 positively find the register that is used to call the sfunc, and this 4715 register is not used anywhere else in this instruction - except as the 4716 destination of a set, return this register; else, return 0. */ 4717 rtx 4718 sfunc_uses_reg (rtx insn) 4719 { 4720 int i; 4721 rtx pattern, part, reg_part, reg; 4722 4723 if (!NONJUMP_INSN_P (insn)) 4724 return 0; 4725 pattern = PATTERN (insn); 4726 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) 4727 return 0; 4728 4729 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) 4730 { 4731 part = XVECEXP (pattern, 0, i); 4732 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) 4733 reg_part = part; 4734 } 4735 if (! reg_part) 4736 return 0; 4737 reg = XEXP (reg_part, 0); 4738 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--) 4739 { 4740 part = XVECEXP (pattern, 0, i); 4741 if (part == reg_part || GET_CODE (part) == CLOBBER) 4742 continue; 4743 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET 4744 && REG_P (SET_DEST (part))) 4745 ? SET_SRC (part) : part))) 4746 return 0; 4747 } 4748 return reg; 4749 } 4750 4751 /* See if the only way in which INSN uses REG is by calling it, or by 4752 setting it while calling it. Set *SET to a SET rtx if the register 4753 is set by INSN. */ 4754 4755 static int 4756 noncall_uses_reg (rtx reg, rtx insn, rtx *set) 4757 { 4758 rtx pattern, reg2; 4759 4760 *set = NULL_RTX; 4761 4762 reg2 = sfunc_uses_reg (insn); 4763 if (reg2 && REGNO (reg2) == REGNO (reg)) 4764 { 4765 pattern = single_set (insn); 4766 if (pattern 4767 && REG_P (SET_DEST (pattern)) 4768 && REGNO (reg) == REGNO (SET_DEST (pattern))) 4769 *set = pattern; 4770 return 0; 4771 } 4772 if (!CALL_P (insn)) 4773 { 4774 /* We don't use rtx_equal_p because we don't care if the mode is 4775 different. */ 4776 pattern = single_set (insn); 4777 if (pattern 4778 && REG_P (SET_DEST (pattern)) 4779 && REGNO (reg) == REGNO (SET_DEST (pattern))) 4780 { 4781 rtx par, part; 4782 int i; 4783 4784 *set = pattern; 4785 par = PATTERN (insn); 4786 if (GET_CODE (par) == PARALLEL) 4787 for (i = XVECLEN (par, 0) - 1; i >= 0; i--) 4788 { 4789 part = XVECEXP (par, 0, i); 4790 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) 4791 return 1; 4792 } 4793 return reg_mentioned_p (reg, SET_SRC (pattern)); 4794 } 4795 4796 return 1; 4797 } 4798 4799 pattern = PATTERN (insn); 4800 4801 if (GET_CODE (pattern) == PARALLEL) 4802 { 4803 int i; 4804 4805 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--) 4806 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) 4807 return 1; 4808 pattern = XVECEXP (pattern, 0, 0); 4809 } 4810 4811 if (GET_CODE (pattern) == SET) 4812 { 4813 if (reg_mentioned_p (reg, SET_DEST (pattern))) 4814 { 4815 /* We don't use rtx_equal_p, because we don't care if the 4816 mode is different. */ 4817 if (!REG_P (SET_DEST (pattern)) 4818 || REGNO (reg) != REGNO (SET_DEST (pattern))) 4819 return 1; 4820 4821 *set = pattern; 4822 } 4823 4824 pattern = SET_SRC (pattern); 4825 } 4826 4827 if (GET_CODE (pattern) != CALL 4828 || !MEM_P (XEXP (pattern, 0)) 4829 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) 4830 return 1; 4831 4832 return 0; 4833 } 4834 4835 /* Given a X, a pattern of an insn or a part of it, return a mask of used 4836 general registers. Bits 0..15 mean that the respective registers 4837 are used as inputs in the instruction. Bits 16..31 mean that the 4838 registers 0..15, respectively, are used as outputs, or are clobbered. 4839 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ 4840 int 4841 regs_used (rtx x, int is_dest) 4842 { 4843 enum rtx_code code; 4844 const char *fmt; 4845 int i, used = 0; 4846 4847 if (! x) 4848 return used; 4849 code = GET_CODE (x); 4850 switch (code) 4851 { 4852 case REG: 4853 if (REGNO (x) < 16) 4854 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) 4855 << (REGNO (x) + is_dest)); 4856 return 0; 4857 case SUBREG: 4858 { 4859 rtx y = SUBREG_REG (x); 4860 4861 if (!REG_P (y)) 4862 break; 4863 if (REGNO (y) < 16) 4864 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) 4865 << (REGNO (y) + 4866 subreg_regno_offset (REGNO (y), 4867 GET_MODE (y), 4868 SUBREG_BYTE (x), 4869 GET_MODE (x)) + is_dest)); 4870 return 0; 4871 } 4872 case SET: 4873 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); 4874 case RETURN: 4875 /* If there was a return value, it must have been indicated with USE. */ 4876 return 0x00ffff00; 4877 case CLOBBER: 4878 is_dest = 1; 4879 break; 4880 case MEM: 4881 is_dest = 0; 4882 break; 4883 case CALL: 4884 used |= 0x00ff00f0; 4885 break; 4886 default: 4887 break; 4888 } 4889 4890 fmt = GET_RTX_FORMAT (code); 4891 4892 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 4893 { 4894 if (fmt[i] == 'E') 4895 { 4896 register int j; 4897 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 4898 used |= regs_used (XVECEXP (x, i, j), is_dest); 4899 } 4900 else if (fmt[i] == 'e') 4901 used |= regs_used (XEXP (x, i), is_dest); 4902 } 4903 return used; 4904 } 4905 4906 /* Create an instruction that prevents redirection of a conditional branch 4907 to the destination of the JUMP with address ADDR. 4908 If the branch needs to be implemented as an indirect jump, try to find 4909 a scratch register for it. 4910 If NEED_BLOCK is 0, don't do anything unless we need a scratch register. 4911 If any preceding insn that doesn't fit into a delay slot is good enough, 4912 pass 1. Pass 2 if a definite blocking insn is needed. 4913 -1 is used internally to avoid deep recursion. 4914 If a blocking instruction is made or recognized, return it. */ 4915 4916 static rtx 4917 gen_block_redirect (rtx jump, int addr, int need_block) 4918 { 4919 int dead = 0; 4920 rtx prev = prev_nonnote_insn (jump); 4921 rtx dest; 4922 4923 /* First, check if we already have an instruction that satisfies our need. */ 4924 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev)) 4925 { 4926 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) 4927 return prev; 4928 if (GET_CODE (PATTERN (prev)) == USE 4929 || GET_CODE (PATTERN (prev)) == CLOBBER 4930 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) 4931 prev = jump; 4932 else if ((need_block &= ~1) < 0) 4933 return prev; 4934 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) 4935 need_block = 0; 4936 } 4937 if (GET_CODE (PATTERN (jump)) == RETURN) 4938 { 4939 if (! need_block) 4940 return prev; 4941 /* Reorg even does nasty things with return insns that cause branches 4942 to go out of range - see find_end_label and callers. */ 4943 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump); 4944 } 4945 /* We can't use JUMP_LABEL here because it might be undefined 4946 when not optimizing. */ 4947 dest = XEXP (SET_SRC (PATTERN (jump)), 0); 4948 /* If the branch is out of range, try to find a scratch register for it. */ 4949 if (optimize 4950 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 4951 > 4092 + 4098)) 4952 { 4953 rtx scan; 4954 /* Don't look for the stack pointer as a scratch register, 4955 it would cause trouble if an interrupt occurred. */ 4956 unsigned attempt = 0x7fff, used; 4957 int jump_left = flag_expensive_optimizations + 1; 4958 4959 /* It is likely that the most recent eligible instruction is wanted for 4960 the delay slot. Therefore, find out which registers it uses, and 4961 try to avoid using them. */ 4962 4963 for (scan = jump; (scan = PREV_INSN (scan)); ) 4964 { 4965 enum rtx_code code; 4966 4967 if (INSN_DELETED_P (scan)) 4968 continue; 4969 code = GET_CODE (scan); 4970 if (code == CODE_LABEL || code == JUMP_INSN) 4971 break; 4972 if (code == INSN 4973 && GET_CODE (PATTERN (scan)) != USE 4974 && GET_CODE (PATTERN (scan)) != CLOBBER 4975 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) 4976 { 4977 attempt &= ~regs_used (PATTERN (scan), 0); 4978 break; 4979 } 4980 } 4981 for (used = dead = 0, scan = JUMP_LABEL (jump); 4982 (scan = NEXT_INSN (scan)); ) 4983 { 4984 enum rtx_code code; 4985 4986 if (INSN_DELETED_P (scan)) 4987 continue; 4988 code = GET_CODE (scan); 4989 if (INSN_P (scan)) 4990 { 4991 used |= regs_used (PATTERN (scan), 0); 4992 if (code == CALL_INSN) 4993 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); 4994 dead |= (used >> 16) & ~used; 4995 if (dead & attempt) 4996 { 4997 dead &= attempt; 4998 break; 4999 } 5000 if (code == JUMP_INSN) 5001 { 5002 if (jump_left-- && simplejump_p (scan)) 5003 scan = JUMP_LABEL (scan); 5004 else 5005 break; 5006 } 5007 } 5008 } 5009 /* Mask out the stack pointer again, in case it was 5010 the only 'free' register we have found. */ 5011 dead &= 0x7fff; 5012 } 5013 /* If the immediate destination is still in range, check for possible 5014 threading with a jump beyond the delay slot insn. 5015 Don't check if we are called recursively; the jump has been or will be 5016 checked in a different invocation then. */ 5017 5018 else if (optimize && need_block >= 0) 5019 { 5020 rtx next = next_active_insn (next_active_insn (dest)); 5021 if (next && JUMP_P (next) 5022 && GET_CODE (PATTERN (next)) == SET 5023 && recog_memoized (next) == CODE_FOR_jump_compact) 5024 { 5025 dest = JUMP_LABEL (next); 5026 if (dest 5027 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 5028 > 4092 + 4098)) 5029 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1); 5030 } 5031 } 5032 5033 if (dead) 5034 { 5035 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead)); 5036 5037 /* It would be nice if we could convert the jump into an indirect 5038 jump / far branch right now, and thus exposing all constituent 5039 instructions to further optimization. However, reorg uses 5040 simplejump_p to determine if there is an unconditional jump where 5041 it should try to schedule instructions from the target of the 5042 branch; simplejump_p fails for indirect jumps even if they have 5043 a JUMP_LABEL. */ 5044 rtx insn = emit_insn_before (gen_indirect_jump_scratch 5045 (reg, GEN_INT (unspec_bbr_uid++)), 5046 jump); 5047 /* ??? We would like this to have the scope of the jump, but that 5048 scope will change when a delay slot insn of an inner scope is added. 5049 Hence, after delay slot scheduling, we'll have to expect 5050 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and 5051 the jump. */ 5052 5053 INSN_LOCATOR (insn) = INSN_LOCATOR (jump); 5054 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; 5055 return insn; 5056 } 5057 else if (need_block) 5058 /* We can't use JUMP_LABEL here because it might be undefined 5059 when not optimizing. */ 5060 return emit_insn_before (gen_block_branch_redirect 5061 (GEN_INT (unspec_bbr_uid++)), 5062 jump); 5063 return prev; 5064 } 5065 5066 #define CONDJUMP_MIN -252 5067 #define CONDJUMP_MAX 262 5068 struct far_branch 5069 { 5070 /* A label (to be placed) in front of the jump 5071 that jumps to our ultimate destination. */ 5072 rtx near_label; 5073 /* Where we are going to insert it if we cannot move the jump any farther, 5074 or the jump itself if we have picked up an existing jump. */ 5075 rtx insert_place; 5076 /* The ultimate destination. */ 5077 rtx far_label; 5078 struct far_branch *prev; 5079 /* If the branch has already been created, its address; 5080 else the address of its first prospective user. */ 5081 int address; 5082 }; 5083 5084 static void gen_far_branch (struct far_branch *); 5085 enum mdep_reorg_phase_e mdep_reorg_phase; 5086 static void 5087 gen_far_branch (struct far_branch *bp) 5088 { 5089 rtx insn = bp->insert_place; 5090 rtx jump; 5091 rtx label = gen_label_rtx (); 5092 int ok; 5093 5094 emit_label_after (label, insn); 5095 if (bp->far_label) 5096 { 5097 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); 5098 LABEL_NUSES (bp->far_label)++; 5099 } 5100 else 5101 jump = emit_jump_insn_after (gen_return (), insn); 5102 /* Emit a barrier so that reorg knows that any following instructions 5103 are not reachable via a fall-through path. 5104 But don't do this when not optimizing, since we wouldn't suppress the 5105 alignment for the barrier then, and could end up with out-of-range 5106 pc-relative loads. */ 5107 if (optimize) 5108 emit_barrier_after (jump); 5109 emit_label_after (bp->near_label, insn); 5110 JUMP_LABEL (jump) = bp->far_label; 5111 ok = invert_jump (insn, label, 1); 5112 gcc_assert (ok); 5113 5114 /* If we are branching around a jump (rather than a return), prevent 5115 reorg from using an insn from the jump target as the delay slot insn - 5116 when reorg did this, it pessimized code (we rather hide the delay slot) 5117 and it could cause branches to go out of range. */ 5118 if (bp->far_label) 5119 (emit_insn_after 5120 (gen_stuff_delay_slot 5121 (GEN_INT (unspec_bbr_uid++), 5122 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)), 5123 insn)); 5124 /* Prevent reorg from undoing our splits. */ 5125 gen_block_redirect (jump, bp->address += 2, 2); 5126 } 5127 5128 /* Fix up ADDR_DIFF_VECs. */ 5129 void 5130 fixup_addr_diff_vecs (rtx first) 5131 { 5132 rtx insn; 5133 5134 for (insn = first; insn; insn = NEXT_INSN (insn)) 5135 { 5136 rtx vec_lab, pat, prev, prevpat, x, braf_label; 5137 5138 if (!JUMP_P (insn) 5139 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 5140 continue; 5141 pat = PATTERN (insn); 5142 vec_lab = XEXP (XEXP (pat, 0), 0); 5143 5144 /* Search the matching casesi_jump_2. */ 5145 for (prev = vec_lab; ; prev = PREV_INSN (prev)) 5146 { 5147 if (!JUMP_P (prev)) 5148 continue; 5149 prevpat = PATTERN (prev); 5150 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) 5151 continue; 5152 x = XVECEXP (prevpat, 0, 1); 5153 if (GET_CODE (x) != USE) 5154 continue; 5155 x = XEXP (x, 0); 5156 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) 5157 break; 5158 } 5159 /* FIXME: This is a bug in the optimizer, but it seems harmless 5160 to just avoid panicing. */ 5161 if (!prev) 5162 continue; 5163 5164 /* Emit the reference label of the braf where it belongs, right after 5165 the casesi_jump_2 (i.e. braf). */ 5166 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); 5167 emit_label_after (braf_label, prev); 5168 5169 /* Fix up the ADDR_DIF_VEC to be relative 5170 to the reference address of the braf. */ 5171 XEXP (XEXP (pat, 0), 0) = braf_label; 5172 } 5173 } 5174 5175 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following 5176 a barrier. Return the base 2 logarithm of the desired alignment. */ 5177 int 5178 barrier_align (rtx barrier_or_label) 5179 { 5180 rtx next = next_real_insn (barrier_or_label), pat, prev; 5181 int slot, credit, jump_to_next = 0; 5182 5183 if (! next) 5184 return 0; 5185 5186 pat = PATTERN (next); 5187 5188 if (GET_CODE (pat) == ADDR_DIFF_VEC) 5189 return 2; 5190 5191 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN) 5192 /* This is a barrier in front of a constant table. */ 5193 return 0; 5194 5195 prev = prev_real_insn (barrier_or_label); 5196 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC) 5197 { 5198 pat = PATTERN (prev); 5199 /* If this is a very small table, we want to keep the alignment after 5200 the table to the minimum for proper code alignment. */ 5201 return ((TARGET_SMALLCODE 5202 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) 5203 <= (unsigned) 1 << (CACHE_LOG - 2))) 5204 ? 1 << TARGET_SHMEDIA : align_jumps_log); 5205 } 5206 5207 if (TARGET_SMALLCODE) 5208 return 0; 5209 5210 if (! TARGET_SH2 || ! optimize) 5211 return align_jumps_log; 5212 5213 /* When fixing up pcloads, a constant table might be inserted just before 5214 the basic block that ends with the barrier. Thus, we can't trust the 5215 instruction lengths before that. */ 5216 if (mdep_reorg_phase > SH_FIXUP_PCLOAD) 5217 { 5218 /* Check if there is an immediately preceding branch to the insn beyond 5219 the barrier. We must weight the cost of discarding useful information 5220 from the current cache line when executing this branch and there is 5221 an alignment, against that of fetching unneeded insn in front of the 5222 branch target when there is no alignment. */ 5223 5224 /* There are two delay_slot cases to consider. One is the simple case 5225 where the preceding branch is to the insn beyond the barrier (simple 5226 delay slot filling), and the other is where the preceding branch has 5227 a delay slot that is a duplicate of the insn after the barrier 5228 (fill_eager_delay_slots) and the branch is to the insn after the insn 5229 after the barrier. */ 5230 5231 /* PREV is presumed to be the JUMP_INSN for the barrier under 5232 investigation. Skip to the insn before it. */ 5233 prev = prev_real_insn (prev); 5234 5235 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2; 5236 credit >= 0 && prev && NONJUMP_INSN_P (prev); 5237 prev = prev_real_insn (prev)) 5238 { 5239 jump_to_next = 0; 5240 if (GET_CODE (PATTERN (prev)) == USE 5241 || GET_CODE (PATTERN (prev)) == CLOBBER) 5242 continue; 5243 if (GET_CODE (PATTERN (prev)) == SEQUENCE) 5244 { 5245 prev = XVECEXP (PATTERN (prev), 0, 1); 5246 if (INSN_UID (prev) == INSN_UID (next)) 5247 { 5248 /* Delay slot was filled with insn at jump target. */ 5249 jump_to_next = 1; 5250 continue; 5251 } 5252 } 5253 5254 if (slot && 5255 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) 5256 slot = 0; 5257 credit -= get_attr_length (prev); 5258 } 5259 if (prev 5260 && JUMP_P (prev) 5261 && JUMP_LABEL (prev)) 5262 { 5263 rtx x; 5264 if (jump_to_next 5265 || next_real_insn (JUMP_LABEL (prev)) == next 5266 /* If relax_delay_slots() decides NEXT was redundant 5267 with some previous instruction, it will have 5268 redirected PREV's jump to the following insn. */ 5269 || JUMP_LABEL (prev) == next_nonnote_insn (next) 5270 /* There is no upper bound on redundant instructions 5271 that might have been skipped, but we must not put an 5272 alignment where none had been before. */ 5273 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))), 5274 (INSN_P (x) 5275 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect 5276 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch 5277 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot)))) 5278 { 5279 rtx pat = PATTERN (prev); 5280 if (GET_CODE (pat) == PARALLEL) 5281 pat = XVECEXP (pat, 0, 0); 5282 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0)) 5283 return 0; 5284 } 5285 } 5286 } 5287 5288 return align_jumps_log; 5289 } 5290 5291 /* If we are inside a phony loop, almost any kind of label can turn up as the 5292 first one in the loop. Aligning a braf label causes incorrect switch 5293 destination addresses; we can detect braf labels because they are 5294 followed by a BARRIER. 5295 Applying loop alignment to small constant or switch tables is a waste 5296 of space, so we suppress this too. */ 5297 int 5298 sh_loop_align (rtx label) 5299 { 5300 rtx next = label; 5301 5302 do 5303 next = next_nonnote_insn (next); 5304 while (next && LABEL_P (next)); 5305 5306 if (! next 5307 || ! INSN_P (next) 5308 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC 5309 || recog_memoized (next) == CODE_FOR_consttable_2) 5310 return 0; 5311 5312 return align_loops_log; 5313 } 5314 5315 /* Do a final pass over the function, just before delayed branch 5316 scheduling. */ 5317 5318 static void 5319 sh_reorg (void) 5320 { 5321 rtx first, insn, mova = NULL_RTX; 5322 int num_mova; 5323 rtx r0_rtx = gen_rtx_REG (Pmode, 0); 5324 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx); 5325 5326 first = get_insns (); 5327 max_labelno_before_reorg = max_label_num (); 5328 5329 /* We must split call insns before introducing `mova's. If we're 5330 optimizing, they'll have already been split. Otherwise, make 5331 sure we don't split them too late. */ 5332 if (! optimize) 5333 split_all_insns_noflow (); 5334 5335 if (TARGET_SHMEDIA) 5336 return; 5337 5338 /* If relaxing, generate pseudo-ops to associate function calls with 5339 the symbols they call. It does no harm to not generate these 5340 pseudo-ops. However, when we can generate them, it enables to 5341 linker to potentially relax the jsr to a bsr, and eliminate the 5342 register load and, possibly, the constant pool entry. */ 5343 5344 mdep_reorg_phase = SH_INSERT_USES_LABELS; 5345 if (TARGET_RELAX) 5346 { 5347 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our 5348 own purposes. This works because none of the remaining passes 5349 need to look at them. 5350 5351 ??? But it may break in the future. We should use a machine 5352 dependent REG_NOTE, or some other approach entirely. */ 5353 for (insn = first; insn; insn = NEXT_INSN (insn)) 5354 { 5355 if (INSN_P (insn)) 5356 { 5357 rtx note; 5358 5359 while ((note = find_reg_note (insn, REG_LABEL_OPERAND, 5360 NULL_RTX)) != 0) 5361 remove_note (insn, note); 5362 } 5363 } 5364 5365 for (insn = first; insn; insn = NEXT_INSN (insn)) 5366 { 5367 rtx pattern, reg, link, set, scan, dies, label; 5368 int rescan = 0, foundinsn = 0; 5369 5370 if (CALL_P (insn)) 5371 { 5372 pattern = PATTERN (insn); 5373 5374 if (GET_CODE (pattern) == PARALLEL) 5375 pattern = XVECEXP (pattern, 0, 0); 5376 if (GET_CODE (pattern) == SET) 5377 pattern = SET_SRC (pattern); 5378 5379 if (GET_CODE (pattern) != CALL 5380 || !MEM_P (XEXP (pattern, 0))) 5381 continue; 5382 5383 reg = XEXP (XEXP (pattern, 0), 0); 5384 } 5385 else 5386 { 5387 reg = sfunc_uses_reg (insn); 5388 if (! reg) 5389 continue; 5390 } 5391 5392 if (!REG_P (reg)) 5393 continue; 5394 5395 /* Try scanning backward to find where the register is set. */ 5396 link = NULL; 5397 for (scan = PREV_INSN (insn); 5398 scan && !LABEL_P (scan); 5399 scan = PREV_INSN (scan)) 5400 { 5401 if (! INSN_P (scan)) 5402 continue; 5403 5404 if (! reg_mentioned_p (reg, scan)) 5405 continue; 5406 5407 if (noncall_uses_reg (reg, scan, &set)) 5408 break; 5409 5410 if (set) 5411 { 5412 link = scan; 5413 break; 5414 } 5415 } 5416 5417 if (! link) 5418 continue; 5419 5420 /* The register is set at LINK. */ 5421 5422 /* We can only optimize the function call if the register is 5423 being set to a symbol. In theory, we could sometimes 5424 optimize calls to a constant location, but the assembler 5425 and linker do not support that at present. */ 5426 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF 5427 && GET_CODE (SET_SRC (set)) != LABEL_REF) 5428 continue; 5429 5430 /* Scan forward from LINK to the place where REG dies, and 5431 make sure that the only insns which use REG are 5432 themselves function calls. */ 5433 5434 /* ??? This doesn't work for call targets that were allocated 5435 by reload, since there may not be a REG_DEAD note for the 5436 register. */ 5437 5438 dies = NULL_RTX; 5439 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) 5440 { 5441 rtx scanset; 5442 5443 /* Don't try to trace forward past a CODE_LABEL if we haven't 5444 seen INSN yet. Ordinarily, we will only find the setting insn 5445 if it is in the same basic block. However, 5446 cross-jumping can insert code labels in between the load and 5447 the call, and can result in situations where a single call 5448 insn may have two targets depending on where we came from. */ 5449 5450 if (LABEL_P (scan) && ! foundinsn) 5451 break; 5452 5453 if (! INSN_P (scan)) 5454 continue; 5455 5456 /* Don't try to trace forward past a JUMP. To optimize 5457 safely, we would have to check that all the 5458 instructions at the jump destination did not use REG. */ 5459 5460 if (JUMP_P (scan)) 5461 break; 5462 5463 if (! reg_mentioned_p (reg, scan)) 5464 continue; 5465 5466 if (noncall_uses_reg (reg, scan, &scanset)) 5467 break; 5468 5469 if (scan == insn) 5470 foundinsn = 1; 5471 5472 if (scan != insn 5473 && (CALL_P (scan) || sfunc_uses_reg (scan))) 5474 { 5475 /* There is a function call to this register other 5476 than the one we are checking. If we optimize 5477 this call, we need to rescan again below. */ 5478 rescan = 1; 5479 } 5480 5481 /* ??? We shouldn't have to worry about SCANSET here. 5482 We should just be able to check for a REG_DEAD note 5483 on a function call. However, the REG_DEAD notes are 5484 apparently not dependable around libcalls; c-torture 5485 execute/920501-2 is a test case. If SCANSET is set, 5486 then this insn sets the register, so it must have 5487 died earlier. Unfortunately, this will only handle 5488 the cases in which the register is, in fact, set in a 5489 later insn. */ 5490 5491 /* ??? We shouldn't have to use FOUNDINSN here. 5492 This dates back to when we used LOG_LINKS to find 5493 the most recent insn which sets the register. */ 5494 5495 if (foundinsn 5496 && (scanset 5497 || find_reg_note (scan, REG_DEAD, reg))) 5498 { 5499 dies = scan; 5500 break; 5501 } 5502 } 5503 5504 if (! dies) 5505 { 5506 /* Either there was a branch, or some insn used REG 5507 other than as a function call address. */ 5508 continue; 5509 } 5510 5511 /* Create a code label, and put it in a REG_LABEL_OPERAND note 5512 on the insn which sets the register, and on each call insn 5513 which uses the register. In final_prescan_insn we look for 5514 the REG_LABEL_OPERAND notes, and output the appropriate label 5515 or pseudo-op. */ 5516 5517 label = gen_label_rtx (); 5518 add_reg_note (link, REG_LABEL_OPERAND, label); 5519 add_reg_note (insn, REG_LABEL_OPERAND, label); 5520 if (rescan) 5521 { 5522 scan = link; 5523 do 5524 { 5525 rtx reg2; 5526 5527 scan = NEXT_INSN (scan); 5528 if (scan != insn 5529 && ((CALL_P (scan) 5530 && reg_mentioned_p (reg, scan)) 5531 || ((reg2 = sfunc_uses_reg (scan)) 5532 && REGNO (reg2) == REGNO (reg)))) 5533 add_reg_note (scan, REG_LABEL_OPERAND, label); 5534 } 5535 while (scan != dies); 5536 } 5537 } 5538 } 5539 5540 if (TARGET_SH2) 5541 fixup_addr_diff_vecs (first); 5542 5543 if (optimize) 5544 { 5545 mdep_reorg_phase = SH_SHORTEN_BRANCHES0; 5546 shorten_branches (first); 5547 } 5548 5549 /* Scan the function looking for move instructions which have to be 5550 changed to pc-relative loads and insert the literal tables. */ 5551 label_ref_list_pool = create_alloc_pool ("label references list", 5552 sizeof (struct label_ref_list_d), 5553 30); 5554 mdep_reorg_phase = SH_FIXUP_PCLOAD; 5555 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) 5556 { 5557 if (mova_p (insn)) 5558 { 5559 /* ??? basic block reordering can move a switch table dispatch 5560 below the switch table. Check if that has happened. 5561 We only have the addresses available when optimizing; but then, 5562 this check shouldn't be needed when not optimizing. */ 5563 if (!untangle_mova (&num_mova, &mova, insn)) 5564 { 5565 insn = mova; 5566 num_mova = 0; 5567 } 5568 } 5569 else if (JUMP_P (insn) 5570 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC 5571 && num_mova 5572 /* ??? loop invariant motion can also move a mova out of a 5573 loop. Since loop does this code motion anyway, maybe we 5574 should wrap UNSPEC_MOVA into a CONST, so that reload can 5575 move it back. */ 5576 && ((num_mova > 1 5577 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode) 5578 || (prev_nonnote_insn (insn) 5579 == XEXP (MOVA_LABELREF (mova), 0)))) 5580 { 5581 rtx scan; 5582 int total; 5583 5584 num_mova--; 5585 5586 /* Some code might have been inserted between the mova and 5587 its ADDR_DIFF_VEC. Check if the mova is still in range. */ 5588 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) 5589 total += get_attr_length (scan); 5590 5591 /* range of mova is 1020, add 4 because pc counts from address of 5592 second instruction after this one, subtract 2 in case pc is 2 5593 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC 5594 cancels out with alignment effects of the mova itself. */ 5595 if (total > 1022) 5596 { 5597 /* Change the mova into a load, and restart scanning 5598 there. broken_move will then return true for mova. */ 5599 fixup_mova (mova); 5600 insn = mova; 5601 } 5602 } 5603 if (broken_move (insn) 5604 || (NONJUMP_INSN_P (insn) 5605 && recog_memoized (insn) == CODE_FOR_casesi_worker_2)) 5606 { 5607 rtx scan; 5608 /* Scan ahead looking for a barrier to stick the constant table 5609 behind. */ 5610 rtx barrier = find_barrier (num_mova, mova, insn); 5611 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL; 5612 int need_aligned_label = 0; 5613 5614 if (num_mova && ! mova_p (mova)) 5615 { 5616 /* find_barrier had to change the first mova into a 5617 pcload; thus, we have to start with this new pcload. */ 5618 insn = mova; 5619 num_mova = 0; 5620 } 5621 /* Now find all the moves between the points and modify them. */ 5622 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) 5623 { 5624 if (LABEL_P (scan)) 5625 last_float = 0; 5626 if (NONJUMP_INSN_P (scan) 5627 && recog_memoized (scan) == CODE_FOR_casesi_worker_2) 5628 need_aligned_label = 1; 5629 if (broken_move (scan)) 5630 { 5631 rtx *patp = &PATTERN (scan), pat = *patp; 5632 rtx src, dst; 5633 rtx lab; 5634 rtx newsrc; 5635 enum machine_mode mode; 5636 5637 if (GET_CODE (pat) == PARALLEL) 5638 patp = &XVECEXP (pat, 0, 0), pat = *patp; 5639 src = SET_SRC (pat); 5640 dst = SET_DEST (pat); 5641 mode = GET_MODE (dst); 5642 5643 if (mode == SImode && hi_const (src) 5644 && REGNO (dst) != FPUL_REG) 5645 { 5646 int offset = 0; 5647 5648 mode = HImode; 5649 while (GET_CODE (dst) == SUBREG) 5650 { 5651 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)), 5652 GET_MODE (SUBREG_REG (dst)), 5653 SUBREG_BYTE (dst), 5654 GET_MODE (dst)); 5655 dst = SUBREG_REG (dst); 5656 } 5657 dst = gen_rtx_REG (HImode, REGNO (dst) + offset); 5658 } 5659 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst))) 5660 { 5661 /* This must be an insn that clobbers r0. */ 5662 rtx *clobberp = &XVECEXP (PATTERN (scan), 0, 5663 XVECLEN (PATTERN (scan), 0) 5664 - 1); 5665 rtx clobber = *clobberp; 5666 5667 gcc_assert (GET_CODE (clobber) == CLOBBER 5668 && rtx_equal_p (XEXP (clobber, 0), r0_rtx)); 5669 5670 if (last_float 5671 && reg_set_between_p (r0_rtx, last_float_move, scan)) 5672 last_float = 0; 5673 if (last_float 5674 && TARGET_SHCOMPACT 5675 && GET_MODE_SIZE (mode) != 4 5676 && GET_MODE_SIZE (GET_MODE (last_float)) == 4) 5677 last_float = 0; 5678 lab = add_constant (src, mode, last_float); 5679 if (lab) 5680 emit_insn_before (gen_mova (lab), scan); 5681 else 5682 { 5683 /* There will be a REG_UNUSED note for r0 on 5684 LAST_FLOAT_MOVE; we have to change it to REG_INC, 5685 lest reorg:mark_target_live_regs will not 5686 consider r0 to be used, and we end up with delay 5687 slot insn in front of SCAN that clobbers r0. */ 5688 rtx note 5689 = find_regno_note (last_float_move, REG_UNUSED, 0); 5690 5691 /* If we are not optimizing, then there may not be 5692 a note. */ 5693 if (note) 5694 PUT_REG_NOTE_KIND (note, REG_INC); 5695 5696 *last_float_addr = r0_inc_rtx; 5697 } 5698 last_float_move = scan; 5699 last_float = src; 5700 newsrc = gen_const_mem (mode, 5701 (((TARGET_SH4 && ! TARGET_FMOVD) 5702 || REGNO (dst) == FPUL_REG) 5703 ? r0_inc_rtx 5704 : r0_rtx)); 5705 last_float_addr = &XEXP (newsrc, 0); 5706 5707 /* Remove the clobber of r0. */ 5708 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber), 5709 gen_rtx_SCRATCH (Pmode)); 5710 } 5711 /* This is a mova needing a label. Create it. */ 5712 else if (GET_CODE (src) == UNSPEC 5713 && XINT (src, 1) == UNSPEC_MOVA 5714 && GET_CODE (XVECEXP (src, 0, 0)) == CONST) 5715 { 5716 lab = add_constant (XVECEXP (src, 0, 0), mode, 0); 5717 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 5718 newsrc = gen_rtx_UNSPEC (SImode, 5719 gen_rtvec (1, newsrc), 5720 UNSPEC_MOVA); 5721 } 5722 else 5723 { 5724 lab = add_constant (src, mode, 0); 5725 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 5726 newsrc = gen_const_mem (mode, newsrc); 5727 } 5728 *patp = gen_rtx_SET (VOIDmode, dst, newsrc); 5729 INSN_CODE (scan) = -1; 5730 } 5731 } 5732 dump_table (need_aligned_label ? insn : 0, barrier); 5733 insn = barrier; 5734 } 5735 } 5736 free_alloc_pool (label_ref_list_pool); 5737 for (insn = first; insn; insn = NEXT_INSN (insn)) 5738 PUT_MODE (insn, VOIDmode); 5739 5740 mdep_reorg_phase = SH_SHORTEN_BRANCHES1; 5741 INSN_ADDRESSES_FREE (); 5742 split_branches (first); 5743 5744 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it 5745 also has an effect on the register that holds the address of the sfunc. 5746 Insert an extra dummy insn in front of each sfunc that pretends to 5747 use this register. */ 5748 if (flag_delayed_branch) 5749 { 5750 for (insn = first; insn; insn = NEXT_INSN (insn)) 5751 { 5752 rtx reg = sfunc_uses_reg (insn); 5753 5754 if (! reg) 5755 continue; 5756 emit_insn_before (gen_use_sfunc_addr (reg), insn); 5757 } 5758 } 5759 #if 0 5760 /* fpscr is not actually a user variable, but we pretend it is for the 5761 sake of the previous optimization passes, since we want it handled like 5762 one. However, we don't have any debugging information for it, so turn 5763 it into a non-user variable now. */ 5764 if (TARGET_SH4) 5765 REG_USERVAR_P (get_fpscr_rtx ()) = 0; 5766 #endif 5767 mdep_reorg_phase = SH_AFTER_MDEP_REORG; 5768 } 5769 5770 int 5771 get_dest_uid (rtx label, int max_uid) 5772 { 5773 rtx dest = next_real_insn (label); 5774 int dest_uid; 5775 if (! dest) 5776 /* This can happen for an undefined label. */ 5777 return 0; 5778 dest_uid = INSN_UID (dest); 5779 /* If this is a newly created branch redirection blocking instruction, 5780 we cannot index the branch_uid or insn_addresses arrays with its 5781 uid. But then, we won't need to, because the actual destination is 5782 the following branch. */ 5783 while (dest_uid >= max_uid) 5784 { 5785 dest = NEXT_INSN (dest); 5786 dest_uid = INSN_UID (dest); 5787 } 5788 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN) 5789 return 0; 5790 return dest_uid; 5791 } 5792 5793 /* Split condbranches that are out of range. Also add clobbers for 5794 scratch registers that are needed in far jumps. 5795 We do this before delay slot scheduling, so that it can take our 5796 newly created instructions into account. It also allows us to 5797 find branches with common targets more easily. */ 5798 5799 static void 5800 split_branches (rtx first) 5801 { 5802 rtx insn; 5803 struct far_branch **uid_branch, *far_branch_list = 0; 5804 int max_uid = get_max_uid (); 5805 int ok; 5806 5807 /* Find out which branches are out of range. */ 5808 shorten_branches (first); 5809 5810 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); 5811 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch); 5812 5813 for (insn = first; insn; insn = NEXT_INSN (insn)) 5814 if (! INSN_P (insn)) 5815 continue; 5816 else if (INSN_DELETED_P (insn)) 5817 { 5818 /* Shorten_branches would split this instruction again, 5819 so transform it into a note. */ 5820 SET_INSN_DELETED (insn); 5821 } 5822 else if (JUMP_P (insn) 5823 /* Don't mess with ADDR_DIFF_VEC */ 5824 && (GET_CODE (PATTERN (insn)) == SET 5825 || GET_CODE (PATTERN (insn)) == RETURN)) 5826 { 5827 enum attr_type type = get_attr_type (insn); 5828 if (type == TYPE_CBRANCH) 5829 { 5830 rtx next, beyond; 5831 5832 if (get_attr_length (insn) > 4) 5833 { 5834 rtx src = SET_SRC (PATTERN (insn)); 5835 rtx olabel = XEXP (XEXP (src, 1), 0); 5836 int addr = INSN_ADDRESSES (INSN_UID (insn)); 5837 rtx label = 0; 5838 int dest_uid = get_dest_uid (olabel, max_uid); 5839 struct far_branch *bp = uid_branch[dest_uid]; 5840 5841 /* redirect_jump needs a valid JUMP_LABEL, and it might delete 5842 the label if the LABEL_NUSES count drops to zero. There is 5843 always a jump_optimize pass that sets these values, but it 5844 proceeds to delete unreferenced code, and then if not 5845 optimizing, to un-delete the deleted instructions, thus 5846 leaving labels with too low uses counts. */ 5847 if (! optimize) 5848 { 5849 JUMP_LABEL (insn) = olabel; 5850 LABEL_NUSES (olabel)++; 5851 } 5852 if (! bp) 5853 { 5854 bp = (struct far_branch *) alloca (sizeof *bp); 5855 uid_branch[dest_uid] = bp; 5856 bp->prev = far_branch_list; 5857 far_branch_list = bp; 5858 bp->far_label 5859 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); 5860 LABEL_NUSES (bp->far_label)++; 5861 } 5862 else 5863 { 5864 label = bp->near_label; 5865 if (! label && bp->address - addr >= CONDJUMP_MIN) 5866 { 5867 rtx block = bp->insert_place; 5868 5869 if (GET_CODE (PATTERN (block)) == RETURN) 5870 block = PREV_INSN (block); 5871 else 5872 block = gen_block_redirect (block, 5873 bp->address, 2); 5874 label = emit_label_after (gen_label_rtx (), 5875 PREV_INSN (block)); 5876 bp->near_label = label; 5877 } 5878 else if (label && ! NEXT_INSN (label)) 5879 { 5880 if (addr + 2 - bp->address <= CONDJUMP_MAX) 5881 bp->insert_place = insn; 5882 else 5883 gen_far_branch (bp); 5884 } 5885 } 5886 if (! label 5887 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)) 5888 { 5889 bp->near_label = label = gen_label_rtx (); 5890 bp->insert_place = insn; 5891 bp->address = addr; 5892 } 5893 ok = redirect_jump (insn, label, 0); 5894 gcc_assert (ok); 5895 } 5896 else 5897 { 5898 /* get_attr_length (insn) == 2 */ 5899 /* Check if we have a pattern where reorg wants to redirect 5900 the branch to a label from an unconditional branch that 5901 is too far away. */ 5902 /* We can't use JUMP_LABEL here because it might be undefined 5903 when not optimizing. */ 5904 /* A syntax error might cause beyond to be NULL_RTX. */ 5905 beyond 5906 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 5907 0)); 5908 5909 if (beyond 5910 && (JUMP_P (beyond) 5911 || ((beyond = next_active_insn (beyond)) 5912 && JUMP_P (beyond))) 5913 && GET_CODE (PATTERN (beyond)) == SET 5914 && recog_memoized (beyond) == CODE_FOR_jump_compact 5915 && ((INSN_ADDRESSES 5916 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))) 5917 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) 5918 > 252 + 258 + 2)) 5919 gen_block_redirect (beyond, 5920 INSN_ADDRESSES (INSN_UID (beyond)), 1); 5921 } 5922 5923 next = next_active_insn (insn); 5924 5925 if (next 5926 && (JUMP_P (next) 5927 || ((next = next_active_insn (next)) 5928 && JUMP_P (next))) 5929 && GET_CODE (PATTERN (next)) == SET 5930 && recog_memoized (next) == CODE_FOR_jump_compact 5931 && ((INSN_ADDRESSES 5932 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))) 5933 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) 5934 > 252 + 258 + 2)) 5935 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1); 5936 } 5937 else if (type == TYPE_JUMP || type == TYPE_RETURN) 5938 { 5939 int addr = INSN_ADDRESSES (INSN_UID (insn)); 5940 rtx far_label = 0; 5941 int dest_uid = 0; 5942 struct far_branch *bp; 5943 5944 if (type == TYPE_JUMP) 5945 { 5946 far_label = XEXP (SET_SRC (PATTERN (insn)), 0); 5947 dest_uid = get_dest_uid (far_label, max_uid); 5948 if (! dest_uid) 5949 { 5950 /* Parse errors can lead to labels outside 5951 the insn stream. */ 5952 if (! NEXT_INSN (far_label)) 5953 continue; 5954 5955 if (! optimize) 5956 { 5957 JUMP_LABEL (insn) = far_label; 5958 LABEL_NUSES (far_label)++; 5959 } 5960 redirect_jump (insn, NULL_RTX, 1); 5961 far_label = 0; 5962 } 5963 } 5964 bp = uid_branch[dest_uid]; 5965 if (! bp) 5966 { 5967 bp = (struct far_branch *) alloca (sizeof *bp); 5968 uid_branch[dest_uid] = bp; 5969 bp->prev = far_branch_list; 5970 far_branch_list = bp; 5971 bp->near_label = 0; 5972 bp->far_label = far_label; 5973 if (far_label) 5974 LABEL_NUSES (far_label)++; 5975 } 5976 else if (bp->near_label && ! NEXT_INSN (bp->near_label)) 5977 if (addr - bp->address <= CONDJUMP_MAX) 5978 emit_label_after (bp->near_label, PREV_INSN (insn)); 5979 else 5980 { 5981 gen_far_branch (bp); 5982 bp->near_label = 0; 5983 } 5984 else 5985 bp->near_label = 0; 5986 bp->address = addr; 5987 bp->insert_place = insn; 5988 if (! far_label) 5989 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); 5990 else 5991 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); 5992 } 5993 } 5994 /* Generate all pending far branches, 5995 and free our references to the far labels. */ 5996 while (far_branch_list) 5997 { 5998 if (far_branch_list->near_label 5999 && ! NEXT_INSN (far_branch_list->near_label)) 6000 gen_far_branch (far_branch_list); 6001 if (optimize 6002 && far_branch_list->far_label 6003 && ! --LABEL_NUSES (far_branch_list->far_label)) 6004 delete_insn (far_branch_list->far_label); 6005 far_branch_list = far_branch_list->prev; 6006 } 6007 6008 /* Instruction length information is no longer valid due to the new 6009 instructions that have been generated. */ 6010 init_insn_lengths (); 6011 } 6012 6013 /* Dump out instruction addresses, which is useful for debugging the 6014 constant pool table stuff. 6015 6016 If relaxing, output the label and pseudo-ops used to link together 6017 calls and the instruction which set the registers. */ 6018 6019 /* ??? The addresses printed by this routine for insns are nonsense for 6020 insns which are inside of a sequence where none of the inner insns have 6021 variable length. This is because the second pass of shorten_branches 6022 does not bother to update them. */ 6023 6024 void 6025 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED, 6026 int noperands ATTRIBUTE_UNUSED) 6027 { 6028 if (TARGET_DUMPISIZE) 6029 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); 6030 6031 if (TARGET_RELAX) 6032 { 6033 rtx note; 6034 6035 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX); 6036 if (note) 6037 { 6038 rtx pattern; 6039 6040 pattern = PATTERN (insn); 6041 if (GET_CODE (pattern) == PARALLEL) 6042 pattern = XVECEXP (pattern, 0, 0); 6043 switch (GET_CODE (pattern)) 6044 { 6045 case SET: 6046 if (GET_CODE (SET_SRC (pattern)) != CALL 6047 && get_attr_type (insn) != TYPE_SFUNC) 6048 { 6049 targetm.asm_out.internal_label 6050 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0))); 6051 break; 6052 } 6053 /* else FALLTHROUGH */ 6054 case CALL: 6055 asm_fprintf (asm_out_file, "\t.uses %LL%d\n", 6056 CODE_LABEL_NUMBER (XEXP (note, 0))); 6057 break; 6058 6059 default: 6060 gcc_unreachable (); 6061 } 6062 } 6063 } 6064 } 6065 6066 /* Dump out any constants accumulated in the final pass. These will 6067 only be labels. */ 6068 6069 const char * 6070 output_jump_label_table (void) 6071 { 6072 int i; 6073 6074 if (pool_size) 6075 { 6076 fprintf (asm_out_file, "\t.align 2\n"); 6077 for (i = 0; i < pool_size; i++) 6078 { 6079 pool_node *p = &pool_vector[i]; 6080 6081 (*targetm.asm_out.internal_label) (asm_out_file, "L", 6082 CODE_LABEL_NUMBER (p->label)); 6083 output_asm_insn (".long %O0", &p->value); 6084 } 6085 pool_size = 0; 6086 } 6087 6088 return ""; 6089 } 6090 6091 /* A full frame looks like: 6092 6093 arg-5 6094 arg-4 6095 [ if current_function_anonymous_args 6096 arg-3 6097 arg-2 6098 arg-1 6099 arg-0 ] 6100 saved-fp 6101 saved-r10 6102 saved-r11 6103 saved-r12 6104 saved-pr 6105 local-n 6106 .. 6107 local-1 6108 local-0 <- fp points here. */ 6109 6110 /* Number of bytes pushed for anonymous args, used to pass information 6111 between expand_prologue and expand_epilogue. */ 6112 6113 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be 6114 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's 6115 for an epilogue and a negative value means that it's for a sibcall 6116 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of 6117 all the registers that are about to be restored, and hence dead. */ 6118 6119 static void 6120 output_stack_adjust (int size, rtx reg, int epilogue_p, 6121 HARD_REG_SET *live_regs_mask, bool frame_p) 6122 { 6123 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn; 6124 if (size) 6125 { 6126 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; 6127 6128 /* This test is bogus, as output_stack_adjust is used to re-align the 6129 stack. */ 6130 #if 0 6131 gcc_assert (!(size % align)); 6132 #endif 6133 6134 if (CONST_OK_FOR_ADD (size)) 6135 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size))); 6136 /* Try to do it with two partial adjustments; however, we must make 6137 sure that the stack is properly aligned at all times, in case 6138 an interrupt occurs between the two partial adjustments. */ 6139 else if (CONST_OK_FOR_ADD (size / 2 & -align) 6140 && CONST_OK_FOR_ADD (size - (size / 2 & -align))) 6141 { 6142 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align))); 6143 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align)))); 6144 } 6145 else 6146 { 6147 rtx const_reg; 6148 rtx insn; 6149 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1); 6150 int i; 6151 6152 /* If TEMP is invalid, we could temporarily save a general 6153 register to MACL. However, there is currently no need 6154 to handle this case, so just die when we see it. */ 6155 if (epilogue_p < 0 6156 || current_function_interrupt 6157 || ! call_really_used_regs[temp] || fixed_regs[temp]) 6158 temp = -1; 6159 if (temp < 0 && ! current_function_interrupt 6160 && (TARGET_SHMEDIA || epilogue_p >= 0)) 6161 { 6162 HARD_REG_SET temps; 6163 COPY_HARD_REG_SET (temps, call_used_reg_set); 6164 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set); 6165 if (epilogue_p > 0) 6166 { 6167 int nreg = 0; 6168 if (crtl->return_rtx) 6169 { 6170 enum machine_mode mode; 6171 mode = GET_MODE (crtl->return_rtx); 6172 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG) 6173 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode); 6174 } 6175 for (i = 0; i < nreg; i++) 6176 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i); 6177 if (crtl->calls_eh_return) 6178 { 6179 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO); 6180 for (i = 0; i <= 3; i++) 6181 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i)); 6182 } 6183 } 6184 if (TARGET_SHMEDIA && epilogue_p < 0) 6185 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++) 6186 CLEAR_HARD_REG_BIT (temps, i); 6187 if (epilogue_p <= 0) 6188 { 6189 for (i = FIRST_PARM_REG; 6190 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++) 6191 CLEAR_HARD_REG_BIT (temps, i); 6192 if (cfun->static_chain_decl != NULL) 6193 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM); 6194 } 6195 temp = scavenge_reg (&temps); 6196 } 6197 if (temp < 0 && live_regs_mask) 6198 { 6199 HARD_REG_SET temps; 6200 6201 COPY_HARD_REG_SET (temps, *live_regs_mask); 6202 CLEAR_HARD_REG_BIT (temps, REGNO (reg)); 6203 temp = scavenge_reg (&temps); 6204 } 6205 if (temp < 0) 6206 { 6207 rtx adj_reg, tmp_reg, mem; 6208 6209 /* If we reached here, the most likely case is the (sibcall) 6210 epilogue for non SHmedia. Put a special push/pop sequence 6211 for such case as the last resort. This looks lengthy but 6212 would not be problem because it seems to be very 6213 rare. */ 6214 6215 gcc_assert (!TARGET_SHMEDIA && epilogue_p); 6216 6217 6218 /* ??? There is still the slight possibility that r4 or 6219 r5 have been reserved as fixed registers or assigned 6220 as global registers, and they change during an 6221 interrupt. There are possible ways to handle this: 6222 6223 - If we are adjusting the frame pointer (r14), we can do 6224 with a single temp register and an ordinary push / pop 6225 on the stack. 6226 - Grab any call-used or call-saved registers (i.e. not 6227 fixed or globals) for the temps we need. We might 6228 also grab r14 if we are adjusting the stack pointer. 6229 If we can't find enough available registers, issue 6230 a diagnostic and die - the user must have reserved 6231 way too many registers. 6232 But since all this is rather unlikely to happen and 6233 would require extra testing, we just die if r4 / r5 6234 are not available. */ 6235 gcc_assert (!fixed_regs[4] && !fixed_regs[5] 6236 && !global_regs[4] && !global_regs[5]); 6237 6238 adj_reg = gen_rtx_REG (GET_MODE (reg), 4); 6239 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5); 6240 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg); 6241 emit_insn (GEN_MOV (adj_reg, GEN_INT (size))); 6242 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg)); 6243 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); 6244 emit_move_insn (mem, tmp_reg); 6245 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg)); 6246 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); 6247 emit_move_insn (mem, tmp_reg); 6248 emit_move_insn (reg, adj_reg); 6249 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); 6250 emit_move_insn (adj_reg, mem); 6251 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); 6252 emit_move_insn (tmp_reg, mem); 6253 /* Tell flow the insns that pop r4/r5 aren't dead. */ 6254 emit_use (tmp_reg); 6255 emit_use (adj_reg); 6256 return; 6257 } 6258 const_reg = gen_rtx_REG (GET_MODE (reg), temp); 6259 6260 /* If SIZE is negative, subtract the positive value. 6261 This sometimes allows a constant pool entry to be shared 6262 between prologue and epilogue code. */ 6263 if (size < 0) 6264 { 6265 emit_insn (GEN_MOV (const_reg, GEN_INT (-size))); 6266 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg)); 6267 } 6268 else 6269 { 6270 emit_insn (GEN_MOV (const_reg, GEN_INT (size))); 6271 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg)); 6272 } 6273 if (! epilogue_p) 6274 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 6275 gen_rtx_SET (VOIDmode, reg, 6276 gen_rtx_PLUS (SImode, reg, 6277 GEN_INT (size)))); 6278 } 6279 } 6280 } 6281 6282 static rtx 6283 frame_insn (rtx x) 6284 { 6285 x = emit_insn (x); 6286 RTX_FRAME_RELATED_P (x) = 1; 6287 return x; 6288 } 6289 6290 /* Output RTL to push register RN onto the stack. */ 6291 6292 static rtx 6293 push (int rn) 6294 { 6295 rtx x; 6296 if (rn == FPUL_REG) 6297 x = gen_push_fpul (); 6298 else if (rn == FPSCR_REG) 6299 x = gen_push_fpscr (); 6300 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE 6301 && FP_OR_XD_REGISTER_P (rn)) 6302 { 6303 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) 6304 return NULL_RTX; 6305 x = gen_push_4 (gen_rtx_REG (DFmode, rn)); 6306 } 6307 else if (TARGET_SH2E && FP_REGISTER_P (rn)) 6308 x = gen_push_e (gen_rtx_REG (SFmode, rn)); 6309 else 6310 x = gen_push (gen_rtx_REG (SImode, rn)); 6311 6312 x = frame_insn (x); 6313 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); 6314 return x; 6315 } 6316 6317 /* Output RTL to pop register RN from the stack. */ 6318 6319 static void 6320 pop (int rn) 6321 { 6322 rtx x; 6323 if (rn == FPUL_REG) 6324 x = gen_pop_fpul (); 6325 else if (rn == FPSCR_REG) 6326 x = gen_pop_fpscr (); 6327 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE 6328 && FP_OR_XD_REGISTER_P (rn)) 6329 { 6330 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) 6331 return; 6332 x = gen_pop_4 (gen_rtx_REG (DFmode, rn)); 6333 } 6334 else if (TARGET_SH2E && FP_REGISTER_P (rn)) 6335 x = gen_pop_e (gen_rtx_REG (SFmode, rn)); 6336 else 6337 x = gen_pop (gen_rtx_REG (SImode, rn)); 6338 6339 x = emit_insn (x); 6340 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); 6341 } 6342 6343 /* Generate code to push the regs specified in the mask. */ 6344 6345 static void 6346 push_regs (HARD_REG_SET *mask, int interrupt_handler) 6347 { 6348 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0; 6349 int skip_fpscr = 0; 6350 6351 /* Push PR last; this gives better latencies after the prologue, and 6352 candidates for the return delay slot when there are no general 6353 registers pushed. */ 6354 for (; i < FIRST_PSEUDO_REGISTER; i++) 6355 { 6356 /* If this is an interrupt handler, and the SZ bit varies, 6357 and we have to push any floating point register, we need 6358 to switch to the correct precision first. */ 6359 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD 6360 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS])) 6361 { 6362 HARD_REG_SET unsaved; 6363 6364 push (FPSCR_REG); 6365 COMPL_HARD_REG_SET (unsaved, *mask); 6366 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved); 6367 skip_fpscr = 1; 6368 } 6369 if (i != PR_REG 6370 && (i != FPSCR_REG || ! skip_fpscr) 6371 && TEST_HARD_REG_BIT (*mask, i)) 6372 { 6373 /* If the ISR has RESBANK attribute assigned, don't push any of 6374 the following registers - R0-R14, MACH, MACL and GBR. */ 6375 if (! (sh_cfun_resbank_handler_p () 6376 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG) 6377 || i == MACH_REG 6378 || i == MACL_REG 6379 || i == GBR_REG))) 6380 push (i); 6381 } 6382 } 6383 6384 /* Push banked registers last to improve delay slot opportunities. */ 6385 if (interrupt_handler) 6386 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 6387 if (TEST_HARD_REG_BIT (*mask, i)) 6388 push (i); 6389 6390 /* Don't push PR register for an ISR with RESBANK attribute assigned. */ 6391 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ()) 6392 push (PR_REG); 6393 } 6394 6395 /* Calculate how much extra space is needed to save all callee-saved 6396 target registers. 6397 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ 6398 6399 static int 6400 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask) 6401 { 6402 int reg; 6403 int stack_space = 0; 6404 int interrupt_handler = sh_cfun_interrupt_handler_p (); 6405 6406 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--) 6407 if ((! call_really_used_regs[reg] || interrupt_handler) 6408 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg)) 6409 /* Leave space to save this target register on the stack, 6410 in case target register allocation wants to use it. */ 6411 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); 6412 return stack_space; 6413 } 6414 6415 /* Decide whether we should reserve space for callee-save target registers, 6416 in case target register allocation wants to use them. REGS_SAVED is 6417 the space, in bytes, that is already required for register saves. 6418 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ 6419 6420 static int 6421 shmedia_reserve_space_for_target_registers_p (int regs_saved, 6422 HARD_REG_SET *live_regs_mask) 6423 { 6424 if (optimize_size) 6425 return 0; 6426 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved; 6427 } 6428 6429 /* Decide how much space to reserve for callee-save target registers 6430 in case target register allocation wants to use them. 6431 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ 6432 6433 static int 6434 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask) 6435 { 6436 if (shmedia_space_reserved_for_target_registers) 6437 return shmedia_target_regs_stack_space (live_regs_mask); 6438 else 6439 return 0; 6440 } 6441 6442 /* Work out the registers which need to be saved, both as a mask and a 6443 count of saved words. Return the count. 6444 6445 If doing a pragma interrupt function, then push all regs used by the 6446 function, and if we call another function (we can tell by looking at PR), 6447 make sure that all the regs it clobbers are safe too. */ 6448 6449 static int 6450 calc_live_regs (HARD_REG_SET *live_regs_mask) 6451 { 6452 unsigned int reg; 6453 int count; 6454 tree attrs; 6455 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler; 6456 bool nosave_low_regs; 6457 int pr_live, has_call; 6458 6459 attrs = DECL_ATTRIBUTES (current_function_decl); 6460 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p (); 6461 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE; 6462 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler; 6463 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE; 6464 6465 CLEAR_HARD_REG_SET (*live_regs_mask); 6466 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler 6467 && df_regs_ever_live_p (FPSCR_REG)) 6468 target_flags &= ~MASK_FPU_SINGLE; 6469 /* If we can save a lot of saves by switching to double mode, do that. */ 6470 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE) 6471 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) 6472 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1) 6473 && (! call_really_used_regs[reg] 6474 || interrupt_handler) 6475 && ++count > 2) 6476 { 6477 target_flags &= ~MASK_FPU_SINGLE; 6478 break; 6479 } 6480 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already 6481 knows how to use it. That means the pseudo originally allocated for 6482 the initial value can become the PR_MEDIA_REG hard register, as seen for 6483 execute/20010122-1.c:test9. */ 6484 if (TARGET_SHMEDIA) 6485 /* ??? this function is called from initial_elimination_offset, hence we 6486 can't use the result of sh_media_register_for_return here. */ 6487 pr_live = sh_pr_n_sets (); 6488 else 6489 { 6490 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG); 6491 pr_live = (pr_initial 6492 ? (!REG_P (pr_initial) 6493 || REGNO (pr_initial) != (PR_REG)) 6494 : df_regs_ever_live_p (PR_REG)); 6495 /* For Shcompact, if not optimizing, we end up with a memory reference 6496 using the return address pointer for __builtin_return_address even 6497 though there is no actual need to put the PR register on the stack. */ 6498 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM); 6499 } 6500 /* Force PR to be live if the prologue has to call the SHmedia 6501 argument decoder or register saver. */ 6502 if (TARGET_SHCOMPACT 6503 && ((crtl->args.info.call_cookie 6504 & ~ CALL_COOKIE_RET_TRAMP (1)) 6505 || crtl->saves_all_registers)) 6506 pr_live = 1; 6507 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live; 6508 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; ) 6509 { 6510 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG) 6511 ? pr_live 6512 : interrupt_handler 6513 ? (/* Need to save all the regs ever live. */ 6514 (df_regs_ever_live_p (reg) 6515 || (call_really_used_regs[reg] 6516 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG 6517 || reg == PIC_OFFSET_TABLE_REGNUM) 6518 && has_call) 6519 || (TARGET_SHMEDIA && has_call 6520 && REGISTER_NATURAL_MODE (reg) == SImode 6521 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg)))) 6522 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM 6523 && reg != RETURN_ADDRESS_POINTER_REGNUM 6524 && reg != T_REG && reg != GBR_REG 6525 /* Push fpscr only on targets which have FPU */ 6526 && (reg != FPSCR_REG || TARGET_FPU_ANY)) 6527 : (/* Only push those regs which are used and need to be saved. */ 6528 (TARGET_SHCOMPACT 6529 && flag_pic 6530 && crtl->args.info.call_cookie 6531 && reg == PIC_OFFSET_TABLE_REGNUM) 6532 || (df_regs_ever_live_p (reg) 6533 && ((!call_really_used_regs[reg] 6534 && !(reg != PIC_OFFSET_TABLE_REGNUM 6535 && fixed_regs[reg] && call_used_regs[reg])) 6536 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY))) 6537 || (crtl->calls_eh_return 6538 && (reg == EH_RETURN_DATA_REGNO (0) 6539 || reg == EH_RETURN_DATA_REGNO (1) 6540 || reg == EH_RETURN_DATA_REGNO (2) 6541 || reg == EH_RETURN_DATA_REGNO (3))) 6542 || ((reg == MACL_REG || reg == MACH_REG) 6543 && df_regs_ever_live_p (reg) 6544 && sh_cfun_attr_renesas_p ()) 6545 )) 6546 { 6547 SET_HARD_REG_BIT (*live_regs_mask, reg); 6548 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); 6549 6550 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD 6551 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT) 6552 { 6553 if (FP_REGISTER_P (reg)) 6554 { 6555 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1)) 6556 { 6557 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1)); 6558 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1)); 6559 } 6560 } 6561 else if (XD_REGISTER_P (reg)) 6562 { 6563 /* Must switch to double mode to access these registers. */ 6564 target_flags &= ~MASK_FPU_SINGLE; 6565 } 6566 } 6567 } 6568 if (nosave_low_regs && reg == R8_REG) 6569 break; 6570 } 6571 /* If we have a target register optimization pass after prologue / epilogue 6572 threading, we need to assume all target registers will be live even if 6573 they aren't now. */ 6574 if (flag_branch_target_load_optimize2 6575 && TARGET_SAVE_ALL_TARGET_REGS 6576 && shmedia_space_reserved_for_target_registers) 6577 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--) 6578 if ((! call_really_used_regs[reg] || interrupt_handler) 6579 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg)) 6580 { 6581 SET_HARD_REG_BIT (*live_regs_mask, reg); 6582 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); 6583 } 6584 /* If this is an interrupt handler, we don't have any call-clobbered 6585 registers we can conveniently use for target register save/restore. 6586 Make sure we save at least one general purpose register when we need 6587 to save target registers. */ 6588 if (interrupt_handler 6589 && hard_reg_set_intersect_p (*live_regs_mask, 6590 reg_class_contents[TARGET_REGS]) 6591 && ! hard_reg_set_intersect_p (*live_regs_mask, 6592 reg_class_contents[GENERAL_REGS])) 6593 { 6594 SET_HARD_REG_BIT (*live_regs_mask, R0_REG); 6595 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG)); 6596 } 6597 6598 return count; 6599 } 6600 6601 /* Code to generate prologue and epilogue sequences */ 6602 6603 /* PUSHED is the number of bytes that are being pushed on the 6604 stack for register saves. Return the frame size, padded 6605 appropriately so that the stack stays properly aligned. */ 6606 static HOST_WIDE_INT 6607 rounded_frame_size (int pushed) 6608 { 6609 HOST_WIDE_INT size = get_frame_size (); 6610 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; 6611 6612 return ((size + pushed + align - 1) & -align) - pushed; 6613 } 6614 6615 /* Choose a call-clobbered target-branch register that remains 6616 unchanged along the whole function. We set it up as the return 6617 value in the prologue. */ 6618 int 6619 sh_media_register_for_return (void) 6620 { 6621 int regno; 6622 int tr0_used; 6623 6624 if (! current_function_is_leaf) 6625 return -1; 6626 if (lookup_attribute ("interrupt_handler", 6627 DECL_ATTRIBUTES (current_function_decl))) 6628 return -1; 6629 if (sh_cfun_interrupt_handler_p ()) 6630 return -1; 6631 6632 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); 6633 6634 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++) 6635 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno)) 6636 return regno; 6637 6638 return -1; 6639 } 6640 6641 /* The maximum registers we need to save are: 6642 - 62 general purpose registers (r15 is stack pointer, r63 is zero) 6643 - 32 floating point registers (for each pair, we save none, 6644 one single precision value, or a double precision value). 6645 - 8 target registers 6646 - add 1 entry for a delimiter. */ 6647 #define MAX_SAVED_REGS (62+32+8) 6648 6649 typedef struct save_entry_s 6650 { 6651 unsigned char reg; 6652 unsigned char mode; 6653 short offset; 6654 } save_entry; 6655 6656 #define MAX_TEMPS 4 6657 6658 /* There will be a delimiter entry with VOIDmode both at the start and the 6659 end of a filled in schedule. The end delimiter has the offset of the 6660 save with the smallest (i.e. most negative) offset. */ 6661 typedef struct save_schedule_s 6662 { 6663 save_entry entries[MAX_SAVED_REGS + 2]; 6664 int temps[MAX_TEMPS+1]; 6665 } save_schedule; 6666 6667 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero, 6668 use reverse order. Returns the last entry written to (not counting 6669 the delimiter). OFFSET_BASE is a number to be added to all offset 6670 entries. */ 6671 6672 static save_entry * 6673 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule, 6674 int offset_base) 6675 { 6676 int align, i; 6677 save_entry *entry = schedule->entries; 6678 int tmpx = 0; 6679 int offset; 6680 6681 if (! current_function_interrupt) 6682 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++) 6683 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG 6684 && ! FUNCTION_ARG_REGNO_P (i) 6685 && i != FIRST_RET_REG 6686 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM) 6687 && ! (crtl->calls_eh_return 6688 && (i == EH_RETURN_STACKADJ_REGNO 6689 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0) 6690 && (unsigned) i <= EH_RETURN_DATA_REGNO (3))))) 6691 schedule->temps[tmpx++] = i; 6692 entry->reg = -1; 6693 entry->mode = VOIDmode; 6694 entry->offset = offset_base; 6695 entry++; 6696 /* We loop twice: first, we save 8-byte aligned registers in the 6697 higher addresses, that are known to be aligned. Then, we 6698 proceed to saving 32-bit registers that don't need 8-byte 6699 alignment. 6700 If this is an interrupt function, all registers that need saving 6701 need to be saved in full. moreover, we need to postpone saving 6702 target registers till we have saved some general purpose registers 6703 we can then use as scratch registers. */ 6704 offset = offset_base; 6705 for (align = 1; align >= 0; align--) 6706 { 6707 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) 6708 if (TEST_HARD_REG_BIT (*live_regs_mask, i)) 6709 { 6710 enum machine_mode mode = REGISTER_NATURAL_MODE (i); 6711 int reg = i; 6712 6713 if (current_function_interrupt) 6714 { 6715 if (TARGET_REGISTER_P (i)) 6716 continue; 6717 if (GENERAL_REGISTER_P (i)) 6718 mode = DImode; 6719 } 6720 if (mode == SFmode && (i % 2) == 1 6721 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i) 6722 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1)))) 6723 { 6724 mode = DFmode; 6725 i--; 6726 reg--; 6727 } 6728 6729 /* If we're doing the aligned pass and this is not aligned, 6730 or we're doing the unaligned pass and this is aligned, 6731 skip it. */ 6732 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0) 6733 != align) 6734 continue; 6735 6736 if (current_function_interrupt 6737 && GENERAL_REGISTER_P (i) 6738 && tmpx < MAX_TEMPS) 6739 schedule->temps[tmpx++] = i; 6740 6741 offset -= GET_MODE_SIZE (mode); 6742 entry->reg = i; 6743 entry->mode = mode; 6744 entry->offset = offset; 6745 entry++; 6746 } 6747 if (align && current_function_interrupt) 6748 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--) 6749 if (TEST_HARD_REG_BIT (*live_regs_mask, i)) 6750 { 6751 offset -= GET_MODE_SIZE (DImode); 6752 entry->reg = i; 6753 entry->mode = DImode; 6754 entry->offset = offset; 6755 entry++; 6756 } 6757 } 6758 entry->reg = -1; 6759 entry->mode = VOIDmode; 6760 entry->offset = offset; 6761 schedule->temps[tmpx] = -1; 6762 return entry - 1; 6763 } 6764 6765 void 6766 sh_expand_prologue (void) 6767 { 6768 HARD_REG_SET live_regs_mask; 6769 int d, i; 6770 int d_rounding = 0; 6771 int save_flags = target_flags; 6772 int pretend_args; 6773 tree sp_switch_attr 6774 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)); 6775 6776 current_function_interrupt = sh_cfun_interrupt_handler_p (); 6777 6778 /* We have pretend args if we had an object sent partially in registers 6779 and partially on the stack, e.g. a large structure. */ 6780 pretend_args = crtl->args.pretend_args_size; 6781 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl) 6782 && (NPARM_REGS(SImode) 6783 > crtl->args.info.arg_count[(int) SH_ARG_INT])) 6784 pretend_args = 0; 6785 /* Dwarf2 module doesn't expect frame related insns here. */ 6786 output_stack_adjust (-pretend_args 6787 - crtl->args.info.stack_regs * 8, 6788 stack_pointer_rtx, 0, NULL, false); 6789 6790 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie) 6791 /* We're going to use the PIC register to load the address of the 6792 incoming-argument decoder and/or of the return trampoline from 6793 the GOT, so make sure the PIC register is preserved and 6794 initialized. */ 6795 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 6796 6797 if (TARGET_SHCOMPACT 6798 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1))) 6799 { 6800 int reg; 6801 6802 /* First, make all registers with incoming arguments that will 6803 be pushed onto the stack live, so that register renaming 6804 doesn't overwrite them. */ 6805 for (reg = 0; reg < NPARM_REGS (SImode); reg++) 6806 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie) 6807 >= NPARM_REGS (SImode) - reg) 6808 for (; reg < NPARM_REGS (SImode); reg++) 6809 emit_insn (gen_shcompact_preserve_incoming_args 6810 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg))); 6811 else if (CALL_COOKIE_INT_REG_GET 6812 (crtl->args.info.call_cookie, reg) == 1) 6813 emit_insn (gen_shcompact_preserve_incoming_args 6814 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg))); 6815 6816 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG), 6817 stack_pointer_rtx); 6818 emit_move_insn (gen_rtx_REG (SImode, R0_REG), 6819 GEN_INT (crtl->args.info.call_cookie)); 6820 emit_move_insn (gen_rtx_REG (SImode, MACH_REG), 6821 gen_rtx_REG (SImode, R0_REG)); 6822 } 6823 else if (TARGET_SHMEDIA) 6824 { 6825 int tr = sh_media_register_for_return (); 6826 6827 if (tr >= 0) 6828 emit_move_insn (gen_rtx_REG (DImode, tr), 6829 gen_rtx_REG (DImode, PR_MEDIA_REG)); 6830 } 6831 6832 /* Emit the code for SETUP_VARARGS. */ 6833 if (cfun->stdarg) 6834 { 6835 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) 6836 { 6837 /* Push arg regs as if they'd been provided by caller in stack. */ 6838 for (i = 0; i < NPARM_REGS(SImode); i++) 6839 { 6840 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; 6841 rtx insn; 6842 6843 if (i >= (NPARM_REGS(SImode) 6844 - crtl->args.info.arg_count[(int) SH_ARG_INT] 6845 )) 6846 break; 6847 insn = push (rn); 6848 } 6849 } 6850 } 6851 6852 /* If we're supposed to switch stacks at function entry, do so now. */ 6853 if (sp_switch_attr) 6854 { 6855 rtx lab, newsrc; 6856 /* The argument specifies a variable holding the address of the 6857 stack the interrupt function should switch to/from at entry/exit. */ 6858 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr)); 6859 const char *s 6860 = ggc_strdup (TREE_STRING_POINTER (arg)); 6861 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s); 6862 6863 lab = add_constant (sp_switch, SImode, 0); 6864 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 6865 newsrc = gen_const_mem (SImode, newsrc); 6866 6867 emit_insn (gen_sp_switch_1 (newsrc)); 6868 } 6869 6870 d = calc_live_regs (&live_regs_mask); 6871 /* ??? Maybe we could save some switching if we can move a mode switch 6872 that already happens to be at the function start into the prologue. */ 6873 if (target_flags != save_flags && ! current_function_interrupt) 6874 emit_insn (gen_toggle_sz ()); 6875 6876 if (TARGET_SH5) 6877 { 6878 int offset_base, offset; 6879 rtx r0 = NULL_RTX; 6880 int offset_in_r0 = -1; 6881 int sp_in_r0 = 0; 6882 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask); 6883 int total_size, save_size; 6884 save_schedule schedule; 6885 save_entry *entry; 6886 int *tmp_pnt; 6887 6888 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG] 6889 && ! current_function_interrupt) 6890 r0 = gen_rtx_REG (Pmode, R0_REG); 6891 6892 /* D is the actual number of bytes that we need for saving registers, 6893 however, in initial_elimination_offset we have committed to using 6894 an additional TREGS_SPACE amount of bytes - in order to keep both 6895 addresses to arguments supplied by the caller and local variables 6896 valid, we must keep this gap. Place it between the incoming 6897 arguments and the actually saved registers in a bid to optimize 6898 locality of reference. */ 6899 total_size = d + tregs_space; 6900 total_size += rounded_frame_size (total_size); 6901 save_size = total_size - rounded_frame_size (d); 6902 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT)) 6903 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) 6904 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT)); 6905 6906 /* If adjusting the stack in a single step costs nothing extra, do so. 6907 I.e. either if a single addi is enough, or we need a movi anyway, 6908 and we don't exceed the maximum offset range (the test for the 6909 latter is conservative for simplicity). */ 6910 if (TARGET_SHMEDIA 6911 && (CONST_OK_FOR_I10 (-total_size) 6912 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding)) 6913 && total_size <= 2044))) 6914 d_rounding = total_size - save_size; 6915 6916 offset_base = d + d_rounding; 6917 6918 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx, 6919 0, NULL, true); 6920 6921 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base); 6922 tmp_pnt = schedule.temps; 6923 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++) 6924 { 6925 enum machine_mode mode = (enum machine_mode) entry->mode; 6926 unsigned int reg = entry->reg; 6927 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX; 6928 rtx orig_reg_rtx; 6929 6930 offset = entry->offset; 6931 6932 reg_rtx = gen_rtx_REG (mode, reg); 6933 6934 mem_rtx = gen_frame_mem (mode, 6935 gen_rtx_PLUS (Pmode, 6936 stack_pointer_rtx, 6937 GEN_INT (offset))); 6938 6939 if (!memory_address_p (mode, XEXP (mem_rtx, 0))) 6940 { 6941 gcc_assert (r0); 6942 mem_rtx = NULL_RTX; 6943 } 6944 6945 if (HAVE_PRE_DECREMENT 6946 && (offset_in_r0 - offset == GET_MODE_SIZE (mode) 6947 || mem_rtx == NULL_RTX 6948 || reg == PR_REG || SPECIAL_REGISTER_P (reg))) 6949 { 6950 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0)); 6951 6952 if (!memory_address_p (mode, XEXP (pre_dec, 0))) 6953 pre_dec = NULL_RTX; 6954 else 6955 { 6956 mem_rtx = NULL_RTX; 6957 offset += GET_MODE_SIZE (mode); 6958 } 6959 } 6960 6961 if (mem_rtx != NULL_RTX) 6962 goto addr_ok; 6963 6964 if (offset_in_r0 == -1) 6965 { 6966 emit_move_insn (r0, GEN_INT (offset)); 6967 offset_in_r0 = offset; 6968 } 6969 else if (offset != offset_in_r0) 6970 { 6971 emit_move_insn (r0, 6972 gen_rtx_PLUS 6973 (Pmode, r0, 6974 GEN_INT (offset - offset_in_r0))); 6975 offset_in_r0 += offset - offset_in_r0; 6976 } 6977 6978 if (pre_dec != NULL_RTX) 6979 { 6980 if (! sp_in_r0) 6981 { 6982 emit_move_insn (r0, 6983 gen_rtx_PLUS 6984 (Pmode, r0, stack_pointer_rtx)); 6985 sp_in_r0 = 1; 6986 } 6987 6988 offset -= GET_MODE_SIZE (mode); 6989 offset_in_r0 -= GET_MODE_SIZE (mode); 6990 6991 mem_rtx = pre_dec; 6992 } 6993 else if (sp_in_r0) 6994 mem_rtx = gen_frame_mem (mode, r0); 6995 else 6996 mem_rtx = gen_frame_mem (mode, 6997 gen_rtx_PLUS (Pmode, 6998 stack_pointer_rtx, 6999 r0)); 7000 7001 /* We must not use an r0-based address for target-branch 7002 registers or for special registers without pre-dec 7003 memory addresses, since we store their values in r0 7004 first. */ 7005 gcc_assert (!TARGET_REGISTER_P (reg) 7006 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg)) 7007 || mem_rtx == pre_dec)); 7008 7009 addr_ok: 7010 orig_reg_rtx = reg_rtx; 7011 if (TARGET_REGISTER_P (reg) 7012 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg)) 7013 && mem_rtx != pre_dec)) 7014 { 7015 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt); 7016 7017 emit_move_insn (tmp_reg, reg_rtx); 7018 7019 if (REGNO (tmp_reg) == R0_REG) 7020 { 7021 offset_in_r0 = -1; 7022 sp_in_r0 = 0; 7023 gcc_assert (!refers_to_regno_p 7024 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0)); 7025 } 7026 7027 if (*++tmp_pnt <= 0) 7028 tmp_pnt = schedule.temps; 7029 7030 reg_rtx = tmp_reg; 7031 } 7032 { 7033 rtx insn; 7034 7035 /* Mark as interesting for dwarf cfi generator */ 7036 insn = emit_move_insn (mem_rtx, reg_rtx); 7037 RTX_FRAME_RELATED_P (insn) = 1; 7038 /* If we use an intermediate register for the save, we can't 7039 describe this exactly in cfi as a copy of the to-be-saved 7040 register into the temporary register and then the temporary 7041 register on the stack, because the temporary register can 7042 have a different natural size than the to-be-saved register. 7043 Thus, we gloss over the intermediate copy and pretend we do 7044 a direct save from the to-be-saved register. */ 7045 if (REGNO (reg_rtx) != reg) 7046 { 7047 rtx set; 7048 7049 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx); 7050 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); 7051 } 7052 7053 if (TARGET_SHCOMPACT && (offset_in_r0 != -1)) 7054 { 7055 rtx reg_rtx = gen_rtx_REG (mode, reg); 7056 rtx set; 7057 rtx mem_rtx = gen_frame_mem (mode, 7058 gen_rtx_PLUS (Pmode, 7059 stack_pointer_rtx, 7060 GEN_INT (offset))); 7061 7062 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx); 7063 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); 7064 } 7065 } 7066 } 7067 7068 gcc_assert (entry->offset == d_rounding); 7069 } 7070 else 7071 push_regs (&live_regs_mask, current_function_interrupt); 7072 7073 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) 7074 emit_insn (gen_GOTaddr2picreg ()); 7075 7076 if (SHMEDIA_REGS_STACK_ADJUST ()) 7077 { 7078 /* This must NOT go through the PLT, otherwise mach and macl 7079 may be clobbered. */ 7080 function_symbol (gen_rtx_REG (Pmode, R0_REG), 7081 (TARGET_FPU_ANY 7082 ? "__GCC_push_shmedia_regs" 7083 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT); 7084 emit_insn (gen_shmedia_save_restore_regs_compact 7085 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ()))); 7086 } 7087 7088 if (target_flags != save_flags && ! current_function_interrupt) 7089 emit_insn (gen_toggle_sz ()); 7090 7091 target_flags = save_flags; 7092 7093 output_stack_adjust (-rounded_frame_size (d) + d_rounding, 7094 stack_pointer_rtx, 0, NULL, true); 7095 7096 if (frame_pointer_needed) 7097 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx)); 7098 7099 if (TARGET_SHCOMPACT 7100 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1))) 7101 { 7102 /* This must NOT go through the PLT, otherwise mach and macl 7103 may be clobbered. */ 7104 function_symbol (gen_rtx_REG (Pmode, R0_REG), 7105 "__GCC_shcompact_incoming_args", SFUNC_GOT); 7106 emit_insn (gen_shcompact_incoming_args ()); 7107 } 7108 } 7109 7110 void 7111 sh_expand_epilogue (bool sibcall_p) 7112 { 7113 HARD_REG_SET live_regs_mask; 7114 int d, i; 7115 int d_rounding = 0; 7116 7117 int save_flags = target_flags; 7118 int frame_size, save_size; 7119 int fpscr_deferred = 0; 7120 int e = sibcall_p ? -1 : 1; 7121 7122 d = calc_live_regs (&live_regs_mask); 7123 7124 save_size = d; 7125 frame_size = rounded_frame_size (d); 7126 7127 if (TARGET_SH5) 7128 { 7129 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask); 7130 int total_size; 7131 if (d % (STACK_BOUNDARY / BITS_PER_UNIT)) 7132 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) 7133 - d % (STACK_BOUNDARY / BITS_PER_UNIT)); 7134 7135 total_size = d + tregs_space; 7136 total_size += rounded_frame_size (total_size); 7137 save_size = total_size - frame_size; 7138 7139 /* If adjusting the stack in a single step costs nothing extra, do so. 7140 I.e. either if a single addi is enough, or we need a movi anyway, 7141 and we don't exceed the maximum offset range (the test for the 7142 latter is conservative for simplicity). */ 7143 if (TARGET_SHMEDIA 7144 && ! frame_pointer_needed 7145 && (CONST_OK_FOR_I10 (total_size) 7146 || (! CONST_OK_FOR_I10 (save_size + d_rounding) 7147 && total_size <= 2044))) 7148 d_rounding = frame_size; 7149 7150 frame_size -= d_rounding; 7151 } 7152 7153 if (frame_pointer_needed) 7154 { 7155 /* We must avoid scheduling the epilogue with previous basic blocks. 7156 See PR/18032 and PR/40313. */ 7157 emit_insn (gen_blockage ()); 7158 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e, 7159 &live_regs_mask, false); 7160 7161 /* We must avoid moving the stack pointer adjustment past code 7162 which reads from the local frame, else an interrupt could 7163 occur after the SP adjustment and clobber data in the local 7164 frame. */ 7165 emit_insn (gen_blockage ()); 7166 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx)); 7167 } 7168 else if (frame_size) 7169 { 7170 /* We must avoid moving the stack pointer adjustment past code 7171 which reads from the local frame, else an interrupt could 7172 occur after the SP adjustment and clobber data in the local 7173 frame. */ 7174 emit_insn (gen_blockage ()); 7175 output_stack_adjust (frame_size, stack_pointer_rtx, e, 7176 &live_regs_mask, false); 7177 } 7178 7179 if (SHMEDIA_REGS_STACK_ADJUST ()) 7180 { 7181 function_symbol (gen_rtx_REG (Pmode, R0_REG), 7182 (TARGET_FPU_ANY 7183 ? "__GCC_pop_shmedia_regs" 7184 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT); 7185 /* This must NOT go through the PLT, otherwise mach and macl 7186 may be clobbered. */ 7187 emit_insn (gen_shmedia_save_restore_regs_compact 7188 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ()))); 7189 } 7190 7191 /* Pop all the registers. */ 7192 7193 if (target_flags != save_flags && ! current_function_interrupt) 7194 emit_insn (gen_toggle_sz ()); 7195 if (TARGET_SH5) 7196 { 7197 int offset_base, offset; 7198 int offset_in_r0 = -1; 7199 int sp_in_r0 = 0; 7200 rtx r0 = gen_rtx_REG (Pmode, R0_REG); 7201 save_schedule schedule; 7202 save_entry *entry; 7203 int *tmp_pnt; 7204 7205 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding); 7206 offset_base = -entry[1].offset + d_rounding; 7207 tmp_pnt = schedule.temps; 7208 for (; entry->mode != VOIDmode; entry--) 7209 { 7210 enum machine_mode mode = (enum machine_mode) entry->mode; 7211 int reg = entry->reg; 7212 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn; 7213 7214 offset = offset_base + entry->offset; 7215 reg_rtx = gen_rtx_REG (mode, reg); 7216 7217 mem_rtx = gen_frame_mem (mode, 7218 gen_rtx_PLUS (Pmode, 7219 stack_pointer_rtx, 7220 GEN_INT (offset))); 7221 7222 if (!memory_address_p (mode, XEXP (mem_rtx, 0))) 7223 mem_rtx = NULL_RTX; 7224 7225 if (HAVE_POST_INCREMENT 7226 && (offset == offset_in_r0 7227 || (offset + GET_MODE_SIZE (mode) != d + d_rounding 7228 && mem_rtx == NULL_RTX) 7229 || reg == PR_REG || SPECIAL_REGISTER_P (reg))) 7230 { 7231 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0)); 7232 7233 if (!memory_address_p (mode, XEXP (post_inc, 0))) 7234 post_inc = NULL_RTX; 7235 else 7236 mem_rtx = NULL_RTX; 7237 } 7238 7239 if (mem_rtx != NULL_RTX) 7240 goto addr_ok; 7241 7242 if (offset_in_r0 == -1) 7243 { 7244 emit_move_insn (r0, GEN_INT (offset)); 7245 offset_in_r0 = offset; 7246 } 7247 else if (offset != offset_in_r0) 7248 { 7249 emit_move_insn (r0, 7250 gen_rtx_PLUS 7251 (Pmode, r0, 7252 GEN_INT (offset - offset_in_r0))); 7253 offset_in_r0 += offset - offset_in_r0; 7254 } 7255 7256 if (post_inc != NULL_RTX) 7257 { 7258 if (! sp_in_r0) 7259 { 7260 emit_move_insn (r0, 7261 gen_rtx_PLUS 7262 (Pmode, r0, stack_pointer_rtx)); 7263 sp_in_r0 = 1; 7264 } 7265 7266 mem_rtx = post_inc; 7267 7268 offset_in_r0 += GET_MODE_SIZE (mode); 7269 } 7270 else if (sp_in_r0) 7271 mem_rtx = gen_frame_mem (mode, r0); 7272 else 7273 mem_rtx = gen_frame_mem (mode, 7274 gen_rtx_PLUS (Pmode, 7275 stack_pointer_rtx, 7276 r0)); 7277 7278 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg)) 7279 || mem_rtx == post_inc); 7280 7281 addr_ok: 7282 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg)) 7283 && mem_rtx != post_inc) 7284 { 7285 insn = emit_move_insn (r0, mem_rtx); 7286 mem_rtx = r0; 7287 } 7288 else if (TARGET_REGISTER_P (reg)) 7289 { 7290 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt); 7291 7292 /* Give the scheduler a bit of freedom by using up to 7293 MAX_TEMPS registers in a round-robin fashion. */ 7294 insn = emit_move_insn (tmp_reg, mem_rtx); 7295 mem_rtx = tmp_reg; 7296 if (*++tmp_pnt < 0) 7297 tmp_pnt = schedule.temps; 7298 } 7299 7300 insn = emit_move_insn (reg_rtx, mem_rtx); 7301 } 7302 7303 gcc_assert (entry->offset + offset_base == d + d_rounding); 7304 } 7305 else /* ! TARGET_SH5 */ 7306 { 7307 int last_reg; 7308 7309 save_size = 0; 7310 /* For an ISR with RESBANK attribute assigned, don't pop PR 7311 register. */ 7312 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG) 7313 && !sh_cfun_resbank_handler_p ()) 7314 { 7315 if (!frame_pointer_needed) 7316 emit_insn (gen_blockage ()); 7317 pop (PR_REG); 7318 } 7319 7320 /* Banked registers are popped first to avoid being scheduled in the 7321 delay slot. RTE switches banks before the ds instruction. */ 7322 if (current_function_interrupt) 7323 { 7324 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--) 7325 if (TEST_HARD_REG_BIT (live_regs_mask, i)) 7326 pop (i); 7327 7328 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1; 7329 } 7330 else 7331 last_reg = FIRST_PSEUDO_REGISTER; 7332 7333 for (i = 0; i < last_reg; i++) 7334 { 7335 int j = (FIRST_PSEUDO_REGISTER - 1) - i; 7336 7337 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD 7338 && hard_reg_set_intersect_p (live_regs_mask, 7339 reg_class_contents[DF_REGS])) 7340 fpscr_deferred = 1; 7341 /* For an ISR with RESBANK attribute assigned, don't pop 7342 following registers, R0-R14, MACH, MACL and GBR. */ 7343 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 7344 && ! (sh_cfun_resbank_handler_p () 7345 && ((j >= FIRST_GENERAL_REG 7346 && j < LAST_GENERAL_REG) 7347 || j == MACH_REG 7348 || j == MACL_REG 7349 || j == GBR_REG))) 7350 pop (j); 7351 7352 if (j == FIRST_FP_REG && fpscr_deferred) 7353 pop (FPSCR_REG); 7354 } 7355 } 7356 if (target_flags != save_flags && ! current_function_interrupt) 7357 emit_insn (gen_toggle_sz ()); 7358 target_flags = save_flags; 7359 7360 output_stack_adjust (crtl->args.pretend_args_size 7361 + save_size + d_rounding 7362 + crtl->args.info.stack_regs * 8, 7363 stack_pointer_rtx, e, NULL, false); 7364 7365 if (crtl->calls_eh_return) 7366 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx, 7367 EH_RETURN_STACKADJ_RTX)); 7368 7369 /* Switch back to the normal stack if necessary. */ 7370 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl))) 7371 emit_insn (gen_sp_switch_2 ()); 7372 7373 /* Tell flow the insn that pops PR isn't dead. */ 7374 /* PR_REG will never be live in SHmedia mode, and we don't need to 7375 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG 7376 by the return pattern. */ 7377 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)) 7378 emit_use (gen_rtx_REG (SImode, PR_REG)); 7379 } 7380 7381 static int sh_need_epilogue_known = 0; 7382 7383 int 7384 sh_need_epilogue (void) 7385 { 7386 if (! sh_need_epilogue_known) 7387 { 7388 rtx epilogue; 7389 7390 start_sequence (); 7391 sh_expand_epilogue (0); 7392 epilogue = get_insns (); 7393 end_sequence (); 7394 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1); 7395 } 7396 return sh_need_epilogue_known > 0; 7397 } 7398 7399 /* Emit code to change the current function's return address to RA. 7400 TEMP is available as a scratch register, if needed. */ 7401 7402 void 7403 sh_set_return_address (rtx ra, rtx tmp) 7404 { 7405 HARD_REG_SET live_regs_mask; 7406 int d; 7407 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG; 7408 int pr_offset; 7409 7410 d = calc_live_regs (&live_regs_mask); 7411 7412 /* If pr_reg isn't life, we can set it (or the register given in 7413 sh_media_register_for_return) directly. */ 7414 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg)) 7415 { 7416 rtx rr; 7417 7418 if (TARGET_SHMEDIA) 7419 { 7420 int rr_regno = sh_media_register_for_return (); 7421 7422 if (rr_regno < 0) 7423 rr_regno = pr_reg; 7424 7425 rr = gen_rtx_REG (DImode, rr_regno); 7426 } 7427 else 7428 rr = gen_rtx_REG (SImode, pr_reg); 7429 7430 emit_insn (GEN_MOV (rr, ra)); 7431 /* Tell flow the register for return isn't dead. */ 7432 emit_use (rr); 7433 return; 7434 } 7435 7436 if (TARGET_SH5) 7437 { 7438 int offset; 7439 save_schedule schedule; 7440 save_entry *entry; 7441 7442 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0); 7443 offset = entry[1].offset; 7444 for (; entry->mode != VOIDmode; entry--) 7445 if (entry->reg == pr_reg) 7446 goto found; 7447 7448 /* We can't find pr register. */ 7449 gcc_unreachable (); 7450 7451 found: 7452 offset = entry->offset - offset; 7453 pr_offset = (rounded_frame_size (d) + offset 7454 + SHMEDIA_REGS_STACK_ADJUST ()); 7455 } 7456 else 7457 pr_offset = rounded_frame_size (d); 7458 7459 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset))); 7460 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx)); 7461 7462 tmp = gen_frame_mem (Pmode, tmp); 7463 emit_insn (GEN_MOV (tmp, ra)); 7464 /* Tell this store isn't dead. */ 7465 emit_use (tmp); 7466 } 7467 7468 /* Clear variables at function end. */ 7469 7470 static void 7471 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 7472 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 7473 { 7474 sh_need_epilogue_known = 0; 7475 } 7476 7477 static rtx 7478 sh_builtin_saveregs (void) 7479 { 7480 /* First unnamed integer register. */ 7481 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT]; 7482 /* Number of integer registers we need to save. */ 7483 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); 7484 /* First unnamed SFmode float reg */ 7485 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT]; 7486 /* Number of SFmode float regs to save. */ 7487 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); 7488 rtx regbuf, fpregs; 7489 int bufsize, regno; 7490 alias_set_type alias_set; 7491 7492 if (TARGET_SH5) 7493 { 7494 if (n_intregs) 7495 { 7496 int pushregs = n_intregs; 7497 7498 while (pushregs < NPARM_REGS (SImode) - 1 7499 && (CALL_COOKIE_INT_REG_GET 7500 (crtl->args.info.call_cookie, 7501 NPARM_REGS (SImode) - pushregs) 7502 == 1)) 7503 { 7504 crtl->args.info.call_cookie 7505 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode) 7506 - pushregs, 1); 7507 pushregs++; 7508 } 7509 7510 if (pushregs == NPARM_REGS (SImode)) 7511 crtl->args.info.call_cookie 7512 |= (CALL_COOKIE_INT_REG (0, 1) 7513 | CALL_COOKIE_STACKSEQ (pushregs - 1)); 7514 else 7515 crtl->args.info.call_cookie 7516 |= CALL_COOKIE_STACKSEQ (pushregs); 7517 7518 crtl->args.pretend_args_size += 8 * n_intregs; 7519 } 7520 if (TARGET_SHCOMPACT) 7521 return const0_rtx; 7522 } 7523 7524 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5) 7525 { 7526 error ("__builtin_saveregs not supported by this subtarget"); 7527 return const0_rtx; 7528 } 7529 7530 if (TARGET_SHMEDIA) 7531 n_floatregs = 0; 7532 7533 /* Allocate block of memory for the regs. */ 7534 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? 7535 Or can assign_stack_local accept a 0 SIZE argument? */ 7536 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); 7537 7538 if (TARGET_SHMEDIA) 7539 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM)); 7540 else if (n_floatregs & 1) 7541 { 7542 rtx addr; 7543 7544 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); 7545 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0)); 7546 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD))); 7547 regbuf = change_address (regbuf, BLKmode, addr); 7548 } 7549 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs) 7550 { 7551 rtx addr, mask; 7552 7553 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); 7554 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4)); 7555 mask = copy_to_mode_reg (Pmode, GEN_INT (-8)); 7556 emit_insn (gen_andsi3 (addr, addr, mask)); 7557 regbuf = change_address (regbuf, BLKmode, addr); 7558 } 7559 else 7560 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0); 7561 alias_set = get_varargs_alias_set (); 7562 set_mem_alias_set (regbuf, alias_set); 7563 7564 /* Save int args. 7565 This is optimized to only save the regs that are necessary. Explicitly 7566 named args need not be saved. */ 7567 if (n_intregs > 0) 7568 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, 7569 adjust_address (regbuf, BLKmode, 7570 n_floatregs * UNITS_PER_WORD), 7571 n_intregs); 7572 7573 if (TARGET_SHMEDIA) 7574 /* Return the address of the regbuf. */ 7575 return XEXP (regbuf, 0); 7576 7577 /* Save float args. 7578 This is optimized to only save the regs that are necessary. Explicitly 7579 named args need not be saved. 7580 We explicitly build a pointer to the buffer because it halves the insn 7581 count when not optimizing (otherwise the pointer is built for each reg 7582 saved). 7583 We emit the moves in reverse order so that we can use predecrement. */ 7584 7585 fpregs = copy_to_mode_reg (Pmode, 7586 plus_constant (XEXP (regbuf, 0), 7587 n_floatregs * UNITS_PER_WORD)); 7588 if (TARGET_SH4 || TARGET_SH2A_DOUBLE) 7589 { 7590 rtx mem; 7591 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) 7592 { 7593 emit_insn (gen_addsi3 (fpregs, fpregs, 7594 GEN_INT (-2 * UNITS_PER_WORD))); 7595 mem = change_address (regbuf, DFmode, fpregs); 7596 emit_move_insn (mem, 7597 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno)); 7598 } 7599 regno = first_floatreg; 7600 if (regno & 1) 7601 { 7602 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); 7603 mem = change_address (regbuf, SFmode, fpregs); 7604 emit_move_insn (mem, 7605 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno 7606 - (TARGET_LITTLE_ENDIAN != 0))); 7607 } 7608 } 7609 else 7610 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) 7611 { 7612 rtx mem; 7613 7614 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); 7615 mem = change_address (regbuf, SFmode, fpregs); 7616 emit_move_insn (mem, 7617 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno)); 7618 } 7619 7620 /* Return the address of the regbuf. */ 7621 return XEXP (regbuf, 0); 7622 } 7623 7624 /* Define the `__builtin_va_list' type for the ABI. */ 7625 7626 static tree 7627 sh_build_builtin_va_list (void) 7628 { 7629 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 7630 tree record; 7631 7632 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) 7633 || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) 7634 return ptr_type_node; 7635 7636 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 7637 7638 f_next_o = build_decl (BUILTINS_LOCATION, 7639 FIELD_DECL, get_identifier ("__va_next_o"), 7640 ptr_type_node); 7641 f_next_o_limit = build_decl (BUILTINS_LOCATION, 7642 FIELD_DECL, 7643 get_identifier ("__va_next_o_limit"), 7644 ptr_type_node); 7645 f_next_fp = build_decl (BUILTINS_LOCATION, 7646 FIELD_DECL, get_identifier ("__va_next_fp"), 7647 ptr_type_node); 7648 f_next_fp_limit = build_decl (BUILTINS_LOCATION, 7649 FIELD_DECL, 7650 get_identifier ("__va_next_fp_limit"), 7651 ptr_type_node); 7652 f_next_stack = build_decl (BUILTINS_LOCATION, 7653 FIELD_DECL, get_identifier ("__va_next_stack"), 7654 ptr_type_node); 7655 7656 DECL_FIELD_CONTEXT (f_next_o) = record; 7657 DECL_FIELD_CONTEXT (f_next_o_limit) = record; 7658 DECL_FIELD_CONTEXT (f_next_fp) = record; 7659 DECL_FIELD_CONTEXT (f_next_fp_limit) = record; 7660 DECL_FIELD_CONTEXT (f_next_stack) = record; 7661 7662 TYPE_FIELDS (record) = f_next_o; 7663 TREE_CHAIN (f_next_o) = f_next_o_limit; 7664 TREE_CHAIN (f_next_o_limit) = f_next_fp; 7665 TREE_CHAIN (f_next_fp) = f_next_fp_limit; 7666 TREE_CHAIN (f_next_fp_limit) = f_next_stack; 7667 7668 layout_type (record); 7669 7670 return record; 7671 } 7672 7673 /* Implement `va_start' for varargs and stdarg. */ 7674 7675 static void 7676 sh_va_start (tree valist, rtx nextarg) 7677 { 7678 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 7679 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; 7680 tree t, u; 7681 int nfp, nint; 7682 7683 if (TARGET_SH5) 7684 { 7685 expand_builtin_saveregs (); 7686 std_expand_builtin_va_start (valist, nextarg); 7687 return; 7688 } 7689 7690 if ((! TARGET_SH2E && ! TARGET_SH4) 7691 || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) 7692 { 7693 std_expand_builtin_va_start (valist, nextarg); 7694 return; 7695 } 7696 7697 f_next_o = TYPE_FIELDS (va_list_type_node); 7698 f_next_o_limit = TREE_CHAIN (f_next_o); 7699 f_next_fp = TREE_CHAIN (f_next_o_limit); 7700 f_next_fp_limit = TREE_CHAIN (f_next_fp); 7701 f_next_stack = TREE_CHAIN (f_next_fp_limit); 7702 7703 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, 7704 NULL_TREE); 7705 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), 7706 valist, f_next_o_limit, NULL_TREE); 7707 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp, 7708 NULL_TREE); 7709 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), 7710 valist, f_next_fp_limit, NULL_TREE); 7711 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), 7712 valist, f_next_stack, NULL_TREE); 7713 7714 /* Call __builtin_saveregs. */ 7715 u = make_tree (sizetype, expand_builtin_saveregs ()); 7716 u = fold_convert (ptr_type_node, u); 7717 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u); 7718 TREE_SIDE_EFFECTS (t) = 1; 7719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7720 7721 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT]; 7722 if (nfp < 8) 7723 nfp = 8 - nfp; 7724 else 7725 nfp = 0; 7726 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, 7727 size_int (UNITS_PER_WORD * nfp)); 7728 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u); 7729 TREE_SIDE_EFFECTS (t) = 1; 7730 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7731 7732 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u); 7733 TREE_SIDE_EFFECTS (t) = 1; 7734 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7735 7736 nint = crtl->args.info.arg_count[SH_ARG_INT]; 7737 if (nint < 4) 7738 nint = 4 - nint; 7739 else 7740 nint = 0; 7741 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, 7742 size_int (UNITS_PER_WORD * nint)); 7743 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u); 7744 TREE_SIDE_EFFECTS (t) = 1; 7745 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7746 7747 u = make_tree (ptr_type_node, nextarg); 7748 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u); 7749 TREE_SIDE_EFFECTS (t) = 1; 7750 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7751 } 7752 7753 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized 7754 member, return it. */ 7755 static tree 7756 find_sole_member (tree type) 7757 { 7758 tree field, member = NULL_TREE; 7759 7760 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 7761 { 7762 if (TREE_CODE (field) != FIELD_DECL) 7763 continue; 7764 if (!DECL_SIZE (field)) 7765 return NULL_TREE; 7766 if (integer_zerop (DECL_SIZE (field))) 7767 continue; 7768 if (member) 7769 return NULL_TREE; 7770 member = field; 7771 } 7772 return member; 7773 } 7774 /* Implement `va_arg'. */ 7775 7776 static tree 7777 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 7778 gimple_seq *post_p ATTRIBUTE_UNUSED) 7779 { 7780 HOST_WIDE_INT size, rsize; 7781 tree tmp, pptr_type_node; 7782 tree addr, lab_over = NULL, result = NULL; 7783 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type); 7784 tree eff_type; 7785 7786 if (pass_by_ref) 7787 type = build_pointer_type (type); 7788 7789 size = int_size_in_bytes (type); 7790 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; 7791 pptr_type_node = build_pointer_type (ptr_type_node); 7792 7793 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) 7794 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ())) 7795 { 7796 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 7797 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; 7798 int pass_as_float; 7799 tree lab_false; 7800 tree member; 7801 7802 f_next_o = TYPE_FIELDS (va_list_type_node); 7803 f_next_o_limit = TREE_CHAIN (f_next_o); 7804 f_next_fp = TREE_CHAIN (f_next_o_limit); 7805 f_next_fp_limit = TREE_CHAIN (f_next_fp); 7806 f_next_stack = TREE_CHAIN (f_next_fp_limit); 7807 7808 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, 7809 NULL_TREE); 7810 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), 7811 valist, f_next_o_limit, NULL_TREE); 7812 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), 7813 valist, f_next_fp, NULL_TREE); 7814 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), 7815 valist, f_next_fp_limit, NULL_TREE); 7816 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), 7817 valist, f_next_stack, NULL_TREE); 7818 7819 /* Structures with a single member with a distinct mode are passed 7820 like their member. This is relevant if the latter has a REAL_TYPE 7821 or COMPLEX_TYPE type. */ 7822 eff_type = type; 7823 while (TREE_CODE (eff_type) == RECORD_TYPE 7824 && (member = find_sole_member (eff_type)) 7825 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE 7826 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE 7827 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE)) 7828 { 7829 tree field_type = TREE_TYPE (member); 7830 7831 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type)) 7832 eff_type = field_type; 7833 else 7834 { 7835 gcc_assert ((TYPE_ALIGN (eff_type) 7836 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type))) 7837 || (TYPE_ALIGN (eff_type) 7838 > GET_MODE_BITSIZE (TYPE_MODE (field_type)))); 7839 break; 7840 } 7841 } 7842 7843 if (TARGET_SH4 || TARGET_SH2A_DOUBLE) 7844 { 7845 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8) 7846 || (TREE_CODE (eff_type) == COMPLEX_TYPE 7847 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE 7848 && size <= 16)); 7849 } 7850 else 7851 { 7852 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4); 7853 } 7854 7855 addr = create_tmp_var (pptr_type_node, NULL); 7856 lab_false = create_artificial_label (UNKNOWN_LOCATION); 7857 lab_over = create_artificial_label (UNKNOWN_LOCATION); 7858 7859 valist = build1 (INDIRECT_REF, ptr_type_node, addr); 7860 7861 if (pass_as_float) 7862 { 7863 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL); 7864 tree cmp; 7865 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE; 7866 7867 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp)); 7868 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7869 7870 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p); 7871 tmp = next_fp_limit; 7872 if (size > 4 && !is_double) 7873 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), 7874 unshare_expr (tmp), size_int (4 - size)); 7875 tmp = build2 (GE_EXPR, boolean_type_node, 7876 unshare_expr (next_fp_tmp), unshare_expr (tmp)); 7877 cmp = build3 (COND_EXPR, void_type_node, tmp, 7878 build1 (GOTO_EXPR, void_type_node, 7879 unshare_expr (lab_false)), NULL_TREE); 7880 if (!is_double) 7881 gimplify_and_add (cmp, pre_p); 7882 7883 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD 7884 || (is_double || size == 16)) 7885 { 7886 tmp = fold_convert (sizetype, next_fp_tmp); 7887 tmp = build2 (BIT_AND_EXPR, sizetype, tmp, 7888 size_int (UNITS_PER_WORD)); 7889 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, 7890 unshare_expr (next_fp_tmp), tmp); 7891 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p); 7892 } 7893 if (is_double) 7894 gimplify_and_add (cmp, pre_p); 7895 7896 #ifdef FUNCTION_ARG_SCmode_WART 7897 if (TYPE_MODE (eff_type) == SCmode 7898 && TARGET_SH4 && TARGET_LITTLE_ENDIAN) 7899 { 7900 tree subtype = TREE_TYPE (eff_type); 7901 tree real, imag; 7902 7903 imag 7904 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); 7905 imag = get_initialized_tmp_var (imag, pre_p, NULL); 7906 7907 real 7908 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); 7909 real = get_initialized_tmp_var (real, pre_p, NULL); 7910 7911 result = build2 (COMPLEX_EXPR, eff_type, real, imag); 7912 if (type != eff_type) 7913 result = build1 (VIEW_CONVERT_EXPR, type, result); 7914 result = get_initialized_tmp_var (result, pre_p, NULL); 7915 } 7916 #endif /* FUNCTION_ARG_SCmode_WART */ 7917 7918 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); 7919 gimplify_and_add (tmp, pre_p); 7920 7921 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); 7922 gimplify_and_add (tmp, pre_p); 7923 7924 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); 7925 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7926 gimplify_assign (unshare_expr (next_fp_tmp), 7927 unshare_expr (valist), pre_p); 7928 7929 gimplify_assign (unshare_expr (valist), 7930 unshare_expr (next_fp_tmp), post_p); 7931 valist = next_fp_tmp; 7932 } 7933 else 7934 { 7935 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, 7936 unshare_expr (next_o), size_int (rsize)); 7937 tmp = build2 (GT_EXPR, boolean_type_node, tmp, 7938 unshare_expr (next_o_limit)); 7939 tmp = build3 (COND_EXPR, void_type_node, tmp, 7940 build1 (GOTO_EXPR, void_type_node, 7941 unshare_expr (lab_false)), 7942 NULL_TREE); 7943 gimplify_and_add (tmp, pre_p); 7944 7945 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o)); 7946 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7947 7948 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); 7949 gimplify_and_add (tmp, pre_p); 7950 7951 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); 7952 gimplify_and_add (tmp, pre_p); 7953 7954 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A)) 7955 gimplify_assign (unshare_expr (next_o), 7956 unshare_expr (next_o_limit), pre_p); 7957 7958 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); 7959 gimplify_assign (unshare_expr (addr), tmp, pre_p); 7960 } 7961 7962 if (!result) 7963 { 7964 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); 7965 gimplify_and_add (tmp, pre_p); 7966 } 7967 } 7968 7969 /* ??? In va-sh.h, there had been code to make values larger than 7970 size 8 indirect. This does not match the FUNCTION_ARG macros. */ 7971 7972 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL); 7973 if (result) 7974 { 7975 gimplify_assign (result, tmp, pre_p); 7976 result = build1 (NOP_EXPR, TREE_TYPE (result), result); 7977 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); 7978 gimplify_and_add (tmp, pre_p); 7979 } 7980 else 7981 result = tmp; 7982 7983 if (pass_by_ref) 7984 result = build_va_arg_indirect_ref (result); 7985 7986 return result; 7987 } 7988 7989 /* 64 bit floating points memory transfers are paired single precision loads 7990 or store. So DWARF information needs fixing in little endian (unless 7991 PR=SZ=1 in FPSCR). */ 7992 rtx 7993 sh_dwarf_register_span (rtx reg) 7994 { 7995 unsigned regno = REGNO (reg); 7996 7997 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode) 7998 return NULL_RTX; 7999 8000 return 8001 gen_rtx_PARALLEL (VOIDmode, 8002 gen_rtvec (2, 8003 gen_rtx_REG (SFmode, 8004 DBX_REGISTER_NUMBER (regno+1)), 8005 gen_rtx_REG (SFmode, 8006 DBX_REGISTER_NUMBER (regno)))); 8007 } 8008 8009 static enum machine_mode 8010 sh_promote_function_mode (const_tree type, enum machine_mode mode, 8011 int *punsignedp, const_tree funtype, 8012 int for_return ATTRIBUTE_UNUSED) 8013 { 8014 if (sh_promote_prototypes (funtype)) 8015 return promote_mode (type, mode, punsignedp); 8016 else 8017 return mode; 8018 } 8019 8020 static bool 8021 sh_promote_prototypes (const_tree type) 8022 { 8023 if (TARGET_HITACHI) 8024 return 0; 8025 if (! type) 8026 return 1; 8027 return ! sh_attr_renesas_p (type); 8028 } 8029 8030 /* Whether an argument must be passed by reference. On SHcompact, we 8031 pretend arguments wider than 32-bits that would have been passed in 8032 registers are passed by reference, so that an SHmedia trampoline 8033 loads them into the full 64-bits registers. */ 8034 8035 static int 8036 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode, 8037 const_tree type, bool named) 8038 { 8039 unsigned HOST_WIDE_INT size; 8040 8041 if (type) 8042 size = int_size_in_bytes (type); 8043 else 8044 size = GET_MODE_SIZE (mode); 8045 8046 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode) 8047 && (!named 8048 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT 8049 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT 8050 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode))) 8051 && size > 4 8052 && !SHCOMPACT_FORCE_ON_STACK (mode, type) 8053 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named)) 8054 return size; 8055 else 8056 return 0; 8057 } 8058 8059 static bool 8060 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode, 8061 const_tree type, bool named) 8062 { 8063 if (targetm.calls.must_pass_in_stack (mode, type)) 8064 return true; 8065 8066 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function 8067 wants to know about pass-by-reference semantics for incoming 8068 arguments. */ 8069 if (! cum) 8070 return false; 8071 8072 if (TARGET_SHCOMPACT) 8073 { 8074 cum->byref = shcompact_byref (cum, mode, type, named); 8075 return cum->byref != 0; 8076 } 8077 8078 return false; 8079 } 8080 8081 static bool 8082 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode, 8083 const_tree type, bool named ATTRIBUTE_UNUSED) 8084 { 8085 /* ??? How can it possibly be correct to return true only on the 8086 caller side of the equation? Is there someplace else in the 8087 sh backend that's magically producing the copies? */ 8088 return (cum->outgoing 8089 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) 8090 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0)); 8091 } 8092 8093 static int 8094 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, 8095 tree type, bool named ATTRIBUTE_UNUSED) 8096 { 8097 int words = 0; 8098 8099 if (!TARGET_SH5 8100 && PASS_IN_REG_P (*cum, mode, type) 8101 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE) 8102 && (ROUND_REG (*cum, mode) 8103 + (mode != BLKmode 8104 ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) 8105 : ROUND_ADVANCE (int_size_in_bytes (type))) 8106 > NPARM_REGS (mode))) 8107 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode); 8108 8109 else if (!TARGET_SHCOMPACT 8110 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named)) 8111 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT]; 8112 8113 return words * UNITS_PER_WORD; 8114 } 8115 8116 8117 /* Define where to put the arguments to a function. 8118 Value is zero to push the argument on the stack, 8119 or a hard register in which to store the argument. 8120 8121 MODE is the argument's machine mode. 8122 TYPE is the data type of the argument (as a tree). 8123 This is null for libcalls where that information may 8124 not be available. 8125 CUM is a variable of type CUMULATIVE_ARGS which gives info about 8126 the preceding args and about the function being called. 8127 NAMED is nonzero if this argument is a named parameter 8128 (otherwise it is an extra parameter matching an ellipsis). 8129 8130 On SH the first args are normally in registers 8131 and the rest are pushed. Any arg that starts within the first 8132 NPARM_REGS words is at least partially passed in a register unless 8133 its data type forbids. */ 8134 8135 8136 rtx 8137 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode, 8138 tree type, int named) 8139 { 8140 if (! TARGET_SH5 && mode == VOIDmode) 8141 return GEN_INT (ca->renesas_abi ? 1 : 0); 8142 8143 if (! TARGET_SH5 8144 && PASS_IN_REG_P (*ca, mode, type) 8145 && (named || ! (TARGET_HITACHI || ca->renesas_abi))) 8146 { 8147 int regno; 8148 8149 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN 8150 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1))) 8151 { 8152 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode, 8153 gen_rtx_REG (SFmode, 8154 BASE_ARG_REG (mode) 8155 + (ROUND_REG (*ca, mode) ^ 1)), 8156 const0_rtx); 8157 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode, 8158 gen_rtx_REG (SFmode, 8159 BASE_ARG_REG (mode) 8160 + ((ROUND_REG (*ca, mode) + 1) ^ 1)), 8161 GEN_INT (4)); 8162 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2)); 8163 } 8164 8165 /* If the alignment of a DF value causes an SF register to be 8166 skipped, we will use that skipped register for the next SF 8167 value. */ 8168 if ((TARGET_HITACHI || ca->renesas_abi) 8169 && ca->free_single_fp_reg 8170 && mode == SFmode) 8171 return gen_rtx_REG (mode, ca->free_single_fp_reg); 8172 8173 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode)) 8174 ^ (mode == SFmode && TARGET_SH4 8175 && TARGET_LITTLE_ENDIAN != 0 8176 && ! TARGET_HITACHI && ! ca->renesas_abi); 8177 return gen_rtx_REG (mode, regno); 8178 8179 } 8180 8181 if (TARGET_SH5) 8182 { 8183 if (mode == VOIDmode && TARGET_SHCOMPACT) 8184 return GEN_INT (ca->call_cookie); 8185 8186 /* The following test assumes unnamed arguments are promoted to 8187 DFmode. */ 8188 if (mode == SFmode && ca->free_single_fp_reg) 8189 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg); 8190 8191 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT) 8192 && (named || ! ca->prototype_p) 8193 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode)) 8194 { 8195 if (! ca->prototype_p && TARGET_SHMEDIA) 8196 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode); 8197 8198 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, 8199 FIRST_FP_PARM_REG 8200 + ca->arg_count[(int) SH_ARG_FLOAT]); 8201 } 8202 8203 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) 8204 && (! TARGET_SHCOMPACT 8205 || (! SHCOMPACT_FORCE_ON_STACK (mode, type) 8206 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode, 8207 type, named)))) 8208 { 8209 return gen_rtx_REG (mode, (FIRST_PARM_REG 8210 + ca->arg_count[(int) SH_ARG_INT])); 8211 } 8212 8213 return 0; 8214 } 8215 8216 return 0; 8217 } 8218 8219 /* Update the data in CUM to advance over an argument 8220 of mode MODE and data type TYPE. 8221 (TYPE is null for libcalls where that information may not be 8222 available.) */ 8223 8224 void 8225 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode, 8226 tree type, int named) 8227 { 8228 if (ca->force_mem) 8229 ca->force_mem = 0; 8230 else if (TARGET_SH5) 8231 { 8232 tree type2 = (ca->byref && type 8233 ? TREE_TYPE (type) 8234 : type); 8235 enum machine_mode mode2 = (ca->byref && type 8236 ? TYPE_MODE (type2) 8237 : mode); 8238 int dwords = ((ca->byref 8239 ? ca->byref 8240 : mode2 == BLKmode 8241 ? int_size_in_bytes (type2) 8242 : GET_MODE_SIZE (mode2)) + 7) / 8; 8243 int numregs = MIN (dwords, NPARM_REGS (SImode) 8244 - ca->arg_count[(int) SH_ARG_INT]); 8245 8246 if (numregs) 8247 { 8248 ca->arg_count[(int) SH_ARG_INT] += numregs; 8249 if (TARGET_SHCOMPACT 8250 && SHCOMPACT_FORCE_ON_STACK (mode2, type2)) 8251 { 8252 ca->call_cookie 8253 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] 8254 - numregs, 1); 8255 /* N.B. We want this also for outgoing. */ 8256 ca->stack_regs += numregs; 8257 } 8258 else if (ca->byref) 8259 { 8260 if (! ca->outgoing) 8261 ca->stack_regs += numregs; 8262 ca->byref_regs += numregs; 8263 ca->byref = 0; 8264 do 8265 ca->call_cookie 8266 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] 8267 - numregs, 2); 8268 while (--numregs); 8269 ca->call_cookie 8270 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] 8271 - 1, 1); 8272 } 8273 else if (dwords > numregs) 8274 { 8275 int pushregs = numregs; 8276 8277 if (TARGET_SHCOMPACT) 8278 ca->stack_regs += numregs; 8279 while (pushregs < NPARM_REGS (SImode) - 1 8280 && (CALL_COOKIE_INT_REG_GET 8281 (ca->call_cookie, 8282 NPARM_REGS (SImode) - pushregs) 8283 == 1)) 8284 { 8285 ca->call_cookie 8286 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode) 8287 - pushregs, 1); 8288 pushregs++; 8289 } 8290 if (numregs == NPARM_REGS (SImode)) 8291 ca->call_cookie 8292 |= CALL_COOKIE_INT_REG (0, 1) 8293 | CALL_COOKIE_STACKSEQ (numregs - 1); 8294 else 8295 ca->call_cookie 8296 |= CALL_COOKIE_STACKSEQ (numregs); 8297 } 8298 } 8299 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT 8300 && (named || ! ca->prototype_p)) 8301 { 8302 if (mode2 == SFmode && ca->free_single_fp_reg) 8303 ca->free_single_fp_reg = 0; 8304 else if (ca->arg_count[(int) SH_ARG_FLOAT] 8305 < NPARM_REGS (SFmode)) 8306 { 8307 int numfpregs 8308 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2, 8309 NPARM_REGS (SFmode) 8310 - ca->arg_count[(int) SH_ARG_FLOAT]); 8311 8312 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs; 8313 8314 if (TARGET_SHCOMPACT && ! ca->prototype_p) 8315 { 8316 if (ca->outgoing && numregs > 0) 8317 do 8318 { 8319 ca->call_cookie 8320 |= (CALL_COOKIE_INT_REG 8321 (ca->arg_count[(int) SH_ARG_INT] 8322 - numregs + ((numfpregs - 2) / 2), 8323 4 + (ca->arg_count[(int) SH_ARG_FLOAT] 8324 - numfpregs) / 2)); 8325 } 8326 while (numfpregs -= 2); 8327 } 8328 else if (mode2 == SFmode && (named) 8329 && (ca->arg_count[(int) SH_ARG_FLOAT] 8330 < NPARM_REGS (SFmode))) 8331 ca->free_single_fp_reg 8332 = FIRST_FP_PARM_REG - numfpregs 8333 + ca->arg_count[(int) SH_ARG_FLOAT] + 1; 8334 } 8335 } 8336 return; 8337 } 8338 8339 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE) 8340 { 8341 /* Note that we've used the skipped register. */ 8342 if (mode == SFmode && ca->free_single_fp_reg) 8343 { 8344 ca->free_single_fp_reg = 0; 8345 return; 8346 } 8347 /* When we have a DF after an SF, there's an SF register that get 8348 skipped in order to align the DF value. We note this skipped 8349 register, because the next SF value will use it, and not the 8350 SF that follows the DF. */ 8351 if (mode == DFmode 8352 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode)) 8353 { 8354 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode) 8355 + BASE_ARG_REG (mode)); 8356 } 8357 } 8358 8359 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi) 8360 || PASS_IN_REG_P (*ca, mode, type)) 8361 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)] 8362 = (ROUND_REG (*ca, mode) 8363 + (mode == BLKmode 8364 ? ROUND_ADVANCE (int_size_in_bytes (type)) 8365 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))); 8366 } 8367 8368 /* The Renesas calling convention doesn't quite fit into this scheme since 8369 the address is passed like an invisible argument, but one that is always 8370 passed in memory. */ 8371 static rtx 8372 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED) 8373 { 8374 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl)) 8375 return 0; 8376 return gen_rtx_REG (Pmode, 2); 8377 } 8378 8379 /* Worker function for TARGET_FUNCTION_VALUE. 8380 8381 For the SH, this is like LIBCALL_VALUE, except that we must change the 8382 mode like PROMOTE_MODE does. 8383 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types 8384 tested here has to be kept in sync with the one in explow.c:promote_mode. 8385 */ 8386 8387 static rtx 8388 sh_function_value (const_tree valtype, 8389 const_tree fn_decl_or_type, 8390 bool outgoing ATTRIBUTE_UNUSED) 8391 { 8392 if (fn_decl_or_type 8393 && !DECL_P (fn_decl_or_type)) 8394 fn_decl_or_type = NULL; 8395 8396 return gen_rtx_REG ( 8397 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT 8398 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4 8399 && (TREE_CODE (valtype) == INTEGER_TYPE 8400 || TREE_CODE (valtype) == ENUMERAL_TYPE 8401 || TREE_CODE (valtype) == BOOLEAN_TYPE 8402 || TREE_CODE (valtype) == REAL_TYPE 8403 || TREE_CODE (valtype) == OFFSET_TYPE)) 8404 && sh_promote_prototypes (fn_decl_or_type) 8405 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)), 8406 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype))); 8407 } 8408 8409 /* Worker function for TARGET_LIBCALL_VALUE. */ 8410 8411 static rtx 8412 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) 8413 { 8414 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode)); 8415 } 8416 8417 /* Worker function for FUNCTION_VALUE_REGNO_P. */ 8418 8419 bool 8420 sh_function_value_regno_p (const unsigned int regno) 8421 { 8422 return ((regno) == FIRST_RET_REG 8423 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG) 8424 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG)); 8425 } 8426 8427 /* Worker function for TARGET_RETURN_IN_MEMORY. */ 8428 8429 static bool 8430 sh_return_in_memory (const_tree type, const_tree fndecl) 8431 { 8432 if (TARGET_SH5) 8433 { 8434 if (TYPE_MODE (type) == BLKmode) 8435 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8; 8436 else 8437 return GET_MODE_SIZE (TYPE_MODE (type)) > 8; 8438 } 8439 else 8440 { 8441 return (TYPE_MODE (type) == BLKmode 8442 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl)) 8443 && TREE_CODE (type) == RECORD_TYPE)); 8444 } 8445 } 8446 8447 /* We actually emit the code in sh_expand_prologue. We used to use 8448 a static variable to flag that we need to emit this code, but that 8449 doesn't when inlining, when functions are deferred and then emitted 8450 later. Fortunately, we already have two flags that are part of struct 8451 function that tell if a function uses varargs or stdarg. */ 8452 static void 8453 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca, 8454 enum machine_mode mode, 8455 tree type, 8456 int *pretend_arg_size, 8457 int second_time ATTRIBUTE_UNUSED) 8458 { 8459 gcc_assert (cfun->stdarg); 8460 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) 8461 { 8462 int named_parm_regs, anon_parm_regs; 8463 8464 named_parm_regs = (ROUND_REG (*ca, mode) 8465 + (mode == BLKmode 8466 ? ROUND_ADVANCE (int_size_in_bytes (type)) 8467 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))); 8468 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs; 8469 if (anon_parm_regs > 0) 8470 *pretend_arg_size = anon_parm_regs * 4; 8471 } 8472 } 8473 8474 static bool 8475 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED) 8476 { 8477 return TARGET_SH5; 8478 } 8479 8480 static bool 8481 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca) 8482 { 8483 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5; 8484 } 8485 8486 8487 /* Define the offset between two registers, one to be eliminated, and 8488 the other its replacement, at the start of a routine. */ 8489 8490 int 8491 initial_elimination_offset (int from, int to) 8492 { 8493 int regs_saved; 8494 int regs_saved_rounding = 0; 8495 int total_saved_regs_space; 8496 int total_auto_space; 8497 int save_flags = target_flags; 8498 int copy_flags; 8499 HARD_REG_SET live_regs_mask; 8500 8501 shmedia_space_reserved_for_target_registers = false; 8502 regs_saved = calc_live_regs (&live_regs_mask); 8503 regs_saved += SHMEDIA_REGS_STACK_ADJUST (); 8504 8505 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask)) 8506 { 8507 shmedia_space_reserved_for_target_registers = true; 8508 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask); 8509 } 8510 8511 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT)) 8512 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) 8513 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT)); 8514 8515 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding; 8516 copy_flags = target_flags; 8517 target_flags = save_flags; 8518 8519 total_saved_regs_space = regs_saved + regs_saved_rounding; 8520 8521 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 8522 return total_saved_regs_space + total_auto_space 8523 + crtl->args.info.byref_regs * 8; 8524 8525 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 8526 return total_saved_regs_space + total_auto_space 8527 + crtl->args.info.byref_regs * 8; 8528 8529 /* Initial gap between fp and sp is 0. */ 8530 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 8531 return 0; 8532 8533 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 8534 return rounded_frame_size (0); 8535 8536 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 8537 return rounded_frame_size (0); 8538 8539 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM 8540 && (to == HARD_FRAME_POINTER_REGNUM 8541 || to == STACK_POINTER_REGNUM)); 8542 if (TARGET_SH5) 8543 { 8544 int n = total_saved_regs_space; 8545 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG; 8546 save_schedule schedule; 8547 save_entry *entry; 8548 8549 n += total_auto_space; 8550 8551 /* If it wasn't saved, there's not much we can do. */ 8552 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg)) 8553 return n; 8554 8555 target_flags = copy_flags; 8556 8557 sh5_schedule_saves (&live_regs_mask, &schedule, n); 8558 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++) 8559 if (entry->reg == pr_reg) 8560 { 8561 target_flags = save_flags; 8562 return entry->offset; 8563 } 8564 gcc_unreachable (); 8565 } 8566 else 8567 return total_auto_space; 8568 } 8569 8570 /* Parse the -mfixed-range= option string. */ 8571 void 8572 sh_fix_range (const char *const_str) 8573 { 8574 int i, first, last; 8575 char *str, *dash, *comma; 8576 8577 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 8578 REG2 are either register names or register numbers. The effect 8579 of this option is to mark the registers in the range from REG1 to 8580 REG2 as ``fixed'' so they won't be used by the compiler. */ 8581 8582 i = strlen (const_str); 8583 str = (char *) alloca (i + 1); 8584 memcpy (str, const_str, i + 1); 8585 8586 while (1) 8587 { 8588 dash = strchr (str, '-'); 8589 if (!dash) 8590 { 8591 warning (0, "value of -mfixed-range must have form REG1-REG2"); 8592 return; 8593 } 8594 *dash = '\0'; 8595 comma = strchr (dash + 1, ','); 8596 if (comma) 8597 *comma = '\0'; 8598 8599 first = decode_reg_name (str); 8600 if (first < 0) 8601 { 8602 warning (0, "unknown register name: %s", str); 8603 return; 8604 } 8605 8606 last = decode_reg_name (dash + 1); 8607 if (last < 0) 8608 { 8609 warning (0, "unknown register name: %s", dash + 1); 8610 return; 8611 } 8612 8613 *dash = '-'; 8614 8615 if (first > last) 8616 { 8617 warning (0, "%s-%s is an empty range", str, dash + 1); 8618 return; 8619 } 8620 8621 for (i = first; i <= last; ++i) 8622 fixed_regs[i] = call_used_regs[i] = 1; 8623 8624 if (!comma) 8625 break; 8626 8627 *comma = ','; 8628 str = comma + 1; 8629 } 8630 } 8631 8632 /* Insert any deferred function attributes from earlier pragmas. */ 8633 static void 8634 sh_insert_attributes (tree node, tree *attributes) 8635 { 8636 tree attrs; 8637 8638 if (TREE_CODE (node) != FUNCTION_DECL) 8639 return; 8640 8641 /* We are only interested in fields. */ 8642 if (!DECL_P (node)) 8643 return; 8644 8645 /* Append the attributes to the deferred attributes. */ 8646 *sh_deferred_function_attributes_tail = *attributes; 8647 attrs = sh_deferred_function_attributes; 8648 if (!attrs) 8649 return; 8650 8651 /* Some attributes imply or require the interrupt attribute. */ 8652 if (!lookup_attribute ("interrupt_handler", attrs) 8653 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node))) 8654 { 8655 /* If we have a trapa_handler, but no interrupt_handler attribute, 8656 insert an interrupt_handler attribute. */ 8657 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE) 8658 /* We can't use sh_pr_interrupt here because that's not in the 8659 java frontend. */ 8660 attrs 8661 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs); 8662 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank, 8663 if the interrupt attribute is missing, we ignore the attribute 8664 and warn. */ 8665 else if (lookup_attribute ("sp_switch", attrs) 8666 || lookup_attribute ("trap_exit", attrs) 8667 || lookup_attribute ("nosave_low_regs", attrs) 8668 || lookup_attribute ("resbank", attrs)) 8669 { 8670 tree *tail; 8671 8672 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs)) 8673 { 8674 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs)) 8675 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs)) 8676 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)) 8677 || is_attribute_p ("resbank", TREE_PURPOSE (attrs))) 8678 warning (OPT_Wattributes, 8679 "%qE attribute only applies to interrupt functions", 8680 TREE_PURPOSE (attrs)); 8681 else 8682 { 8683 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE, 8684 NULL_TREE); 8685 tail = &TREE_CHAIN (*tail); 8686 } 8687 } 8688 attrs = *attributes; 8689 } 8690 } 8691 8692 /* Install the processed list. */ 8693 *attributes = attrs; 8694 8695 /* Clear deferred attributes. */ 8696 sh_deferred_function_attributes = NULL_TREE; 8697 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; 8698 8699 return; 8700 } 8701 8702 /* Supported attributes: 8703 8704 interrupt_handler -- specifies this function is an interrupt handler. 8705 8706 trapa_handler - like above, but don't save all registers. 8707 8708 sp_switch -- specifies an alternate stack for an interrupt handler 8709 to run on. 8710 8711 trap_exit -- use a trapa to exit an interrupt function instead of 8712 an rte instruction. 8713 8714 nosave_low_regs - don't save r0..r7 in an interrupt handler. 8715 This is useful on the SH3 and upwards, 8716 which has a separate set of low regs for User and Supervisor modes. 8717 This should only be used for the lowest level of interrupts. Higher levels 8718 of interrupts must save the registers in case they themselves are 8719 interrupted. 8720 8721 renesas -- use Renesas calling/layout conventions (functions and 8722 structures). 8723 8724 resbank -- In case of an ISR, use a register bank to save registers 8725 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets. 8726 */ 8727 8728 /* Handle a 'resbank' attribute. */ 8729 static tree 8730 sh_handle_resbank_handler_attribute (tree * node, tree name, 8731 tree args ATTRIBUTE_UNUSED, 8732 int flags ATTRIBUTE_UNUSED, 8733 bool * no_add_attrs) 8734 { 8735 if (!TARGET_SH2A) 8736 { 8737 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A", 8738 name); 8739 *no_add_attrs = true; 8740 } 8741 if (TREE_CODE (*node) != FUNCTION_DECL) 8742 { 8743 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8744 name); 8745 *no_add_attrs = true; 8746 } 8747 8748 return NULL_TREE; 8749 } 8750 8751 /* Handle an "interrupt_handler" attribute; arguments as in 8752 struct attribute_spec.handler. */ 8753 static tree 8754 sh_handle_interrupt_handler_attribute (tree *node, tree name, 8755 tree args ATTRIBUTE_UNUSED, 8756 int flags ATTRIBUTE_UNUSED, 8757 bool *no_add_attrs) 8758 { 8759 if (TREE_CODE (*node) != FUNCTION_DECL) 8760 { 8761 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8762 name); 8763 *no_add_attrs = true; 8764 } 8765 else if (TARGET_SHCOMPACT) 8766 { 8767 error ("attribute interrupt_handler is not compatible with -m5-compact"); 8768 *no_add_attrs = true; 8769 } 8770 8771 return NULL_TREE; 8772 } 8773 8774 /* Handle an 'function_vector' attribute; arguments as in 8775 struct attribute_spec.handler. */ 8776 static tree 8777 sh2a_handle_function_vector_handler_attribute (tree * node, tree name, 8778 tree args ATTRIBUTE_UNUSED, 8779 int flags ATTRIBUTE_UNUSED, 8780 bool * no_add_attrs) 8781 { 8782 if (!TARGET_SH2A) 8783 { 8784 warning (OPT_Wattributes, "%qE attribute only applies to SH2A", 8785 name); 8786 *no_add_attrs = true; 8787 } 8788 else if (TREE_CODE (*node) != FUNCTION_DECL) 8789 { 8790 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8791 name); 8792 *no_add_attrs = true; 8793 } 8794 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) 8795 { 8796 /* The argument must be a constant integer. */ 8797 warning (OPT_Wattributes, 8798 "%qE attribute argument not an integer constant", 8799 name); 8800 *no_add_attrs = true; 8801 } 8802 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255) 8803 { 8804 /* The argument value must be between 0 to 255. */ 8805 warning (OPT_Wattributes, 8806 "%qE attribute argument should be between 0 to 255", 8807 name); 8808 *no_add_attrs = true; 8809 } 8810 return NULL_TREE; 8811 } 8812 8813 /* Returns 1 if current function has been assigned the attribute 8814 'function_vector'. */ 8815 int 8816 sh2a_is_function_vector_call (rtx x) 8817 { 8818 if (GET_CODE (x) == SYMBOL_REF 8819 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) 8820 { 8821 tree tr = SYMBOL_REF_DECL (x); 8822 8823 if (sh2a_function_vector_p (tr)) 8824 return 1; 8825 } 8826 8827 return 0; 8828 } 8829 8830 /* Returns the function vector number, if the the attribute 8831 'function_vector' is assigned, otherwise returns zero. */ 8832 int 8833 sh2a_get_function_vector_number (rtx x) 8834 { 8835 int num; 8836 tree list, t; 8837 8838 if ((GET_CODE (x) == SYMBOL_REF) 8839 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) 8840 { 8841 t = SYMBOL_REF_DECL (x); 8842 8843 if (TREE_CODE (t) != FUNCTION_DECL) 8844 return 0; 8845 8846 list = SH_ATTRIBUTES (t); 8847 while (list) 8848 { 8849 if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) 8850 { 8851 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list))); 8852 return num; 8853 } 8854 8855 list = TREE_CHAIN (list); 8856 } 8857 8858 return 0; 8859 } 8860 else 8861 return 0; 8862 } 8863 8864 /* Handle an "sp_switch" attribute; arguments as in 8865 struct attribute_spec.handler. */ 8866 static tree 8867 sh_handle_sp_switch_attribute (tree *node, tree name, tree args, 8868 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 8869 { 8870 if (TREE_CODE (*node) != FUNCTION_DECL) 8871 { 8872 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8873 name); 8874 *no_add_attrs = true; 8875 } 8876 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) 8877 { 8878 /* The argument must be a constant string. */ 8879 warning (OPT_Wattributes, "%qE attribute argument not a string constant", 8880 name); 8881 *no_add_attrs = true; 8882 } 8883 8884 return NULL_TREE; 8885 } 8886 8887 /* Handle an "trap_exit" attribute; arguments as in 8888 struct attribute_spec.handler. */ 8889 static tree 8890 sh_handle_trap_exit_attribute (tree *node, tree name, tree args, 8891 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 8892 { 8893 if (TREE_CODE (*node) != FUNCTION_DECL) 8894 { 8895 warning (OPT_Wattributes, "%qE attribute only applies to functions", 8896 name); 8897 *no_add_attrs = true; 8898 } 8899 /* The argument specifies a trap number to be used in a trapa instruction 8900 at function exit (instead of an rte instruction). */ 8901 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) 8902 { 8903 /* The argument must be a constant integer. */ 8904 warning (OPT_Wattributes, "%qE attribute argument not an " 8905 "integer constant", name); 8906 *no_add_attrs = true; 8907 } 8908 8909 return NULL_TREE; 8910 } 8911 8912 static tree 8913 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED, 8914 tree name ATTRIBUTE_UNUSED, 8915 tree args ATTRIBUTE_UNUSED, 8916 int flags ATTRIBUTE_UNUSED, 8917 bool *no_add_attrs ATTRIBUTE_UNUSED) 8918 { 8919 return NULL_TREE; 8920 } 8921 8922 /* True if __attribute__((renesas)) or -mrenesas. */ 8923 int 8924 sh_attr_renesas_p (const_tree td) 8925 { 8926 if (TARGET_HITACHI) 8927 return 1; 8928 if (td == 0) 8929 return 0; 8930 if (DECL_P (td)) 8931 td = TREE_TYPE (td); 8932 if (td == error_mark_node) 8933 return 0; 8934 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) 8935 != NULL_TREE); 8936 } 8937 8938 /* True if __attribute__((renesas)) or -mrenesas, for the current 8939 function. */ 8940 int 8941 sh_cfun_attr_renesas_p (void) 8942 { 8943 return sh_attr_renesas_p (current_function_decl); 8944 } 8945 8946 int 8947 sh_cfun_interrupt_handler_p (void) 8948 { 8949 return (lookup_attribute ("interrupt_handler", 8950 DECL_ATTRIBUTES (current_function_decl)) 8951 != NULL_TREE); 8952 } 8953 8954 /* Returns 1 if FUNC has been assigned the attribute 8955 "function_vector". */ 8956 int 8957 sh2a_function_vector_p (tree func) 8958 { 8959 tree list; 8960 if (TREE_CODE (func) != FUNCTION_DECL) 8961 return 0; 8962 8963 list = SH_ATTRIBUTES (func); 8964 while (list) 8965 { 8966 if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) 8967 return 1; 8968 8969 list = TREE_CHAIN (list); 8970 } 8971 return 0; 8972 } 8973 8974 /* Returns TRUE if given tree has the "resbank" attribute. */ 8975 8976 int 8977 sh_cfun_resbank_handler_p (void) 8978 { 8979 return ((lookup_attribute ("resbank", 8980 DECL_ATTRIBUTES (current_function_decl)) 8981 != NULL_TREE) 8982 && (lookup_attribute ("interrupt_handler", 8983 DECL_ATTRIBUTES (current_function_decl)) 8984 != NULL_TREE) && TARGET_SH2A); 8985 } 8986 8987 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */ 8988 8989 static const char * 8990 sh_check_pch_target_flags (int old_flags) 8991 { 8992 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3 8993 | MASK_SH_E | MASK_HARD_SH4 8994 | MASK_FPU_SINGLE | MASK_SH4)) 8995 return _("created and used with different architectures / ABIs"); 8996 if ((old_flags ^ target_flags) & MASK_HITACHI) 8997 return _("created and used with different ABIs"); 8998 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN) 8999 return _("created and used with different endianness"); 9000 return NULL; 9001 } 9002 9003 /* Predicates used by the templates. */ 9004 9005 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx. 9006 Used only in general_movsrc_operand. */ 9007 9008 int 9009 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 9010 { 9011 switch (REGNO (op)) 9012 { 9013 case PR_REG: 9014 case MACL_REG: 9015 case MACH_REG: 9016 return 1; 9017 } 9018 return 0; 9019 } 9020 9021 /* Nonzero if OP is a floating point value with value 0.0. */ 9022 9023 int 9024 fp_zero_operand (rtx op) 9025 { 9026 REAL_VALUE_TYPE r; 9027 9028 if (GET_MODE (op) != SFmode) 9029 return 0; 9030 9031 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 9032 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r); 9033 } 9034 9035 /* Nonzero if OP is a floating point value with value 1.0. */ 9036 9037 int 9038 fp_one_operand (rtx op) 9039 { 9040 REAL_VALUE_TYPE r; 9041 9042 if (GET_MODE (op) != SFmode) 9043 return 0; 9044 9045 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 9046 return REAL_VALUES_EQUAL (r, dconst1); 9047 } 9048 9049 /* In general mode switching is used. If we are 9050 compiling without -mfmovd, movsf_ie isn't taken into account for 9051 mode switching. We could check in machine_dependent_reorg for 9052 cases where we know we are in single precision mode, but there is 9053 interface to find that out during reload, so we must avoid 9054 choosing an fldi alternative during reload and thus failing to 9055 allocate a scratch register for the constant loading. */ 9056 int 9057 fldi_ok (void) 9058 { 9059 return 1; 9060 } 9061 9062 int 9063 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 9064 { 9065 enum rtx_code code = GET_CODE (op); 9066 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE); 9067 } 9068 9069 /* Return the TLS type for TLS symbols, 0 for otherwise. */ 9070 enum tls_model 9071 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 9072 { 9073 if (GET_CODE (op) != SYMBOL_REF) 9074 return TLS_MODEL_NONE; 9075 return SYMBOL_REF_TLS_MODEL (op); 9076 } 9077 9078 /* Return the destination address of a branch. */ 9079 9080 static int 9081 branch_dest (rtx branch) 9082 { 9083 rtx dest = SET_SRC (PATTERN (branch)); 9084 int dest_uid; 9085 9086 if (GET_CODE (dest) == IF_THEN_ELSE) 9087 dest = XEXP (dest, 1); 9088 dest = XEXP (dest, 0); 9089 dest_uid = INSN_UID (dest); 9090 return INSN_ADDRESSES (dest_uid); 9091 } 9092 9093 /* Return nonzero if REG is not used after INSN. 9094 We assume REG is a reload reg, and therefore does 9095 not live past labels. It may live past calls or jumps though. */ 9096 int 9097 reg_unused_after (rtx reg, rtx insn) 9098 { 9099 enum rtx_code code; 9100 rtx set; 9101 9102 /* If the reg is set by this instruction, then it is safe for our 9103 case. Disregard the case where this is a store to memory, since 9104 we are checking a register used in the store address. */ 9105 set = single_set (insn); 9106 if (set && !MEM_P (SET_DEST (set)) 9107 && reg_overlap_mentioned_p (reg, SET_DEST (set))) 9108 return 1; 9109 9110 while ((insn = NEXT_INSN (insn))) 9111 { 9112 rtx set; 9113 if (!INSN_P (insn)) 9114 continue; 9115 9116 code = GET_CODE (insn); 9117 9118 #if 0 9119 /* If this is a label that existed before reload, then the register 9120 if dead here. However, if this is a label added by reorg, then 9121 the register may still be live here. We can't tell the difference, 9122 so we just ignore labels completely. */ 9123 if (code == CODE_LABEL) 9124 return 1; 9125 /* else */ 9126 #endif 9127 9128 if (code == JUMP_INSN) 9129 return 0; 9130 9131 /* If this is a sequence, we must handle them all at once. 9132 We could have for instance a call that sets the target register, 9133 and an insn in a delay slot that uses the register. In this case, 9134 we must return 0. */ 9135 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) 9136 { 9137 int i; 9138 int retval = 0; 9139 9140 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) 9141 { 9142 rtx this_insn = XVECEXP (PATTERN (insn), 0, i); 9143 rtx set = single_set (this_insn); 9144 9145 if (CALL_P (this_insn)) 9146 code = CALL_INSN; 9147 else if (JUMP_P (this_insn)) 9148 { 9149 if (INSN_ANNULLED_BRANCH_P (this_insn)) 9150 return 0; 9151 code = JUMP_INSN; 9152 } 9153 9154 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) 9155 return 0; 9156 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) 9157 { 9158 if (!MEM_P (SET_DEST (set))) 9159 retval = 1; 9160 else 9161 return 0; 9162 } 9163 if (set == 0 9164 && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) 9165 return 0; 9166 } 9167 if (retval == 1) 9168 return 1; 9169 else if (code == JUMP_INSN) 9170 return 0; 9171 } 9172 9173 set = single_set (insn); 9174 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) 9175 return 0; 9176 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) 9177 return !MEM_P (SET_DEST (set)); 9178 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) 9179 return 0; 9180 9181 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)]) 9182 return 1; 9183 } 9184 return 1; 9185 } 9186 9187 #include "ggc.h" 9188 9189 static GTY(()) rtx fpscr_rtx; 9190 rtx 9191 get_fpscr_rtx (void) 9192 { 9193 if (! fpscr_rtx) 9194 { 9195 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG); 9196 REG_USERVAR_P (fpscr_rtx) = 1; 9197 mark_user_reg (fpscr_rtx); 9198 } 9199 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG) 9200 mark_user_reg (fpscr_rtx); 9201 return fpscr_rtx; 9202 } 9203 9204 static GTY(()) tree fpscr_values; 9205 9206 static void 9207 emit_fpu_switch (rtx scratch, int index) 9208 { 9209 rtx dst, src; 9210 9211 if (fpscr_values == NULL) 9212 { 9213 tree t; 9214 9215 t = build_index_type (integer_one_node); 9216 t = build_array_type (integer_type_node, t); 9217 t = build_decl (BUILTINS_LOCATION, 9218 VAR_DECL, get_identifier ("__fpscr_values"), t); 9219 DECL_ARTIFICIAL (t) = 1; 9220 DECL_IGNORED_P (t) = 1; 9221 DECL_EXTERNAL (t) = 1; 9222 TREE_STATIC (t) = 1; 9223 TREE_PUBLIC (t) = 1; 9224 TREE_USED (t) = 1; 9225 9226 fpscr_values = t; 9227 } 9228 9229 src = DECL_RTL (fpscr_values); 9230 if (!can_create_pseudo_p ()) 9231 { 9232 emit_move_insn (scratch, XEXP (src, 0)); 9233 if (index != 0) 9234 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4))); 9235 src = adjust_automodify_address (src, PSImode, scratch, index * 4); 9236 } 9237 else 9238 src = adjust_address (src, PSImode, index * 4); 9239 9240 dst = get_fpscr_rtx (); 9241 emit_move_insn (dst, src); 9242 } 9243 9244 void 9245 emit_sf_insn (rtx pat) 9246 { 9247 emit_insn (pat); 9248 } 9249 9250 void 9251 emit_df_insn (rtx pat) 9252 { 9253 emit_insn (pat); 9254 } 9255 9256 void 9257 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands) 9258 { 9259 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); 9260 } 9261 9262 void 9263 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands) 9264 { 9265 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2], 9266 get_fpscr_rtx ())); 9267 } 9268 9269 void 9270 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands) 9271 { 9272 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); 9273 } 9274 9275 void 9276 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands) 9277 { 9278 emit_df_insn ((*fun) (operands[0], operands[1], operands[2], 9279 get_fpscr_rtx ())); 9280 } 9281 9282 static rtx get_free_reg (HARD_REG_SET); 9283 9284 /* This function returns a register to use to load the address to load 9285 the fpscr from. Currently it always returns r1 or r7, but when we are 9286 able to use pseudo registers after combine, or have a better mechanism 9287 for choosing a register, it should be done here. */ 9288 /* REGS_LIVE is the liveness information for the point for which we 9289 need this allocation. In some bare-bones exit blocks, r1 is live at the 9290 start. We can even have all of r0..r3 being live: 9291 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; } 9292 INSN before which new insns are placed with will clobber the register 9293 we return. If a basic block consists only of setting the return value 9294 register to a pseudo and using that register, the return value is not 9295 live before or after this block, yet we we'll insert our insns right in 9296 the middle. */ 9297 9298 static rtx 9299 get_free_reg (HARD_REG_SET regs_live) 9300 { 9301 if (! TEST_HARD_REG_BIT (regs_live, 1)) 9302 return gen_rtx_REG (Pmode, 1); 9303 9304 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target, 9305 there shouldn't be anything but a jump before the function end. */ 9306 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7)); 9307 return gen_rtx_REG (Pmode, 7); 9308 } 9309 9310 /* This function will set the fpscr from memory. 9311 MODE is the mode we are setting it to. */ 9312 void 9313 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live) 9314 { 9315 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode; 9316 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE); 9317 rtx addr_reg; 9318 9319 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX; 9320 emit_fpu_switch (addr_reg, fp_mode == norm_mode); 9321 } 9322 9323 /* Is the given character a logical line separator for the assembler? */ 9324 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR 9325 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';') 9326 #endif 9327 9328 int 9329 sh_insn_length_adjustment (rtx insn) 9330 { 9331 /* Instructions with unfilled delay slots take up an extra two bytes for 9332 the nop in the delay slot. */ 9333 if (((NONJUMP_INSN_P (insn) 9334 && GET_CODE (PATTERN (insn)) != USE 9335 && GET_CODE (PATTERN (insn)) != CLOBBER) 9336 || CALL_P (insn) 9337 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn))) 9338 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE 9339 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES) 9340 return 2; 9341 9342 /* SH2e has a bug that prevents the use of annulled branches, so if 9343 the delay slot is not filled, we'll have to put a NOP in it. */ 9344 if (sh_cpu_attr == CPU_SH2E 9345 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn) 9346 && get_attr_type (insn) == TYPE_CBRANCH 9347 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE) 9348 return 2; 9349 9350 /* sh-dsp parallel processing insn take four bytes instead of two. */ 9351 9352 if (NONJUMP_INSN_P (insn)) 9353 { 9354 int sum = 0; 9355 rtx body = PATTERN (insn); 9356 const char *templ; 9357 char c; 9358 int maybe_label = 1; 9359 9360 if (GET_CODE (body) == ASM_INPUT) 9361 templ = XSTR (body, 0); 9362 else if (asm_noperands (body) >= 0) 9363 templ 9364 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL); 9365 else 9366 return 0; 9367 do 9368 { 9369 int ppi_adjust = 0; 9370 9371 do 9372 c = *templ++; 9373 while (c == ' ' || c == '\t'); 9374 /* all sh-dsp parallel-processing insns start with p. 9375 The only non-ppi sh insn starting with p is pref. 9376 The only ppi starting with pr is prnd. */ 9377 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2)) 9378 ppi_adjust = 2; 9379 /* The repeat pseudo-insn expands two three insns, a total of 9380 six bytes in size. */ 9381 else if ((c == 'r' || c == 'R') 9382 && ! strncasecmp ("epeat", templ, 5)) 9383 ppi_adjust = 4; 9384 while (c && c != '\n' 9385 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ)) 9386 { 9387 /* If this is a label, it is obviously not a ppi insn. */ 9388 if (c == ':' && maybe_label) 9389 { 9390 ppi_adjust = 0; 9391 break; 9392 } 9393 else if (c == '\'' || c == '"') 9394 maybe_label = 0; 9395 c = *templ++; 9396 } 9397 sum += ppi_adjust; 9398 maybe_label = c != ':'; 9399 } 9400 while (c); 9401 return sum; 9402 } 9403 return 0; 9404 } 9405 9406 /* Return TRUE for a valid displacement for the REG+disp addressing 9407 with MODE. */ 9408 9409 /* ??? The SH2e does not have the REG+disp addressing mode when loading values 9410 into the FRx registers. We implement this by setting the maximum offset 9411 to zero when the value is SFmode. This also restricts loading of SFmode 9412 values into the integer registers, but that can't be helped. */ 9413 9414 /* The SH allows a displacement in a QI or HI amode, but only when the 9415 other operand is R0. GCC doesn't handle this very well, so we forgot 9416 all of that. 9417 9418 A legitimate index for a QI or HI is 0, SI can be any number 0..63, 9419 DI can be any number 0..60. */ 9420 9421 bool 9422 sh_legitimate_index_p (enum machine_mode mode, rtx op) 9423 { 9424 if (CONST_INT_P (op)) 9425 { 9426 if (TARGET_SHMEDIA) 9427 { 9428 int size; 9429 9430 /* Check if this the address of an unaligned load / store. */ 9431 if (mode == VOIDmode) 9432 return CONST_OK_FOR_I06 (INTVAL (op)); 9433 9434 size = GET_MODE_SIZE (mode); 9435 return (!(INTVAL (op) & (size - 1)) 9436 && INTVAL (op) >= -512 * size 9437 && INTVAL (op) < 512 * size); 9438 } 9439 9440 if (TARGET_SH2A) 9441 { 9442 if (GET_MODE_SIZE (mode) == 1 9443 && (unsigned) INTVAL (op) < 4096) 9444 return true; 9445 } 9446 9447 if ((GET_MODE_SIZE (mode) == 4 9448 && (unsigned) INTVAL (op) < 64 9449 && !(INTVAL (op) & 3) 9450 && !(TARGET_SH2E && mode == SFmode)) 9451 || (GET_MODE_SIZE (mode) == 4 9452 && (unsigned) INTVAL (op) < 16383 9453 && !(INTVAL (op) & 3) && TARGET_SH2A)) 9454 return true; 9455 9456 if ((GET_MODE_SIZE (mode) == 8 9457 && (unsigned) INTVAL (op) < 60 9458 && !(INTVAL (op) & 3) 9459 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode)) 9460 || ((GET_MODE_SIZE (mode)==8) 9461 && (unsigned) INTVAL (op) < 8192 9462 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3)) 9463 && (TARGET_SH2A && mode == DFmode))) 9464 return true; 9465 } 9466 9467 return false; 9468 } 9469 9470 /* Recognize an RTL expression that is a valid memory address for 9471 an instruction. 9472 The MODE argument is the machine mode for the MEM expression 9473 that wants to use this address. 9474 Allow REG 9475 REG+disp 9476 REG+r0 9477 REG++ 9478 --REG */ 9479 9480 static bool 9481 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) 9482 { 9483 if (MAYBE_BASE_REGISTER_RTX_P (x, strict)) 9484 return true; 9485 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) 9486 && ! TARGET_SHMEDIA 9487 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict)) 9488 return true; 9489 else if (GET_CODE (x) == PLUS 9490 && (mode != PSImode || reload_completed)) 9491 { 9492 rtx xop0 = XEXP (x, 0); 9493 rtx xop1 = XEXP (x, 1); 9494 9495 if (GET_MODE_SIZE (mode) <= 8 9496 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict) 9497 && sh_legitimate_index_p (mode, xop1)) 9498 return true; 9499 9500 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode 9501 || ((xop0 == stack_pointer_rtx 9502 || xop0 == hard_frame_pointer_rtx) 9503 && REG_P (xop1) && REGNO (xop1) == R0_REG) 9504 || ((xop1 == stack_pointer_rtx 9505 || xop1 == hard_frame_pointer_rtx) 9506 && REG_P (xop0) && REGNO (xop0) == R0_REG)) 9507 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4) 9508 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8) 9509 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) 9510 && TARGET_FMOVD && mode == DFmode))) 9511 { 9512 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict) 9513 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict)) 9514 return true; 9515 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict) 9516 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)) 9517 return true; 9518 } 9519 } 9520 9521 return false; 9522 } 9523 9524 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol 9525 isn't protected by a PIC unspec. */ 9526 int 9527 nonpic_symbol_mentioned_p (rtx x) 9528 { 9529 register const char *fmt; 9530 register int i; 9531 9532 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF 9533 || GET_CODE (x) == PC) 9534 return 1; 9535 9536 /* We don't want to look into the possible MEM location of a 9537 CONST_DOUBLE, since we're not going to use it, in general. */ 9538 if (GET_CODE (x) == CONST_DOUBLE) 9539 return 0; 9540 9541 if (GET_CODE (x) == UNSPEC 9542 && (XINT (x, 1) == UNSPEC_PIC 9543 || XINT (x, 1) == UNSPEC_GOT 9544 || XINT (x, 1) == UNSPEC_GOTOFF 9545 || XINT (x, 1) == UNSPEC_GOTPLT 9546 || XINT (x, 1) == UNSPEC_GOTTPOFF 9547 || XINT (x, 1) == UNSPEC_DTPOFF 9548 || XINT (x, 1) == UNSPEC_TPOFF 9549 || XINT (x, 1) == UNSPEC_PLT 9550 || XINT (x, 1) == UNSPEC_SYMOFF 9551 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF)) 9552 return 0; 9553 9554 fmt = GET_RTX_FORMAT (GET_CODE (x)); 9555 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 9556 { 9557 if (fmt[i] == 'E') 9558 { 9559 register int j; 9560 9561 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 9562 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j))) 9563 return 1; 9564 } 9565 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i))) 9566 return 1; 9567 } 9568 9569 return 0; 9570 } 9571 9572 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or 9573 @GOTOFF in `reg'. */ 9574 rtx 9575 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED, 9576 rtx reg) 9577 { 9578 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE) 9579 return orig; 9580 9581 if (GET_CODE (orig) == LABEL_REF 9582 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig))) 9583 { 9584 if (reg == 0) 9585 reg = gen_reg_rtx (Pmode); 9586 9587 emit_insn (gen_symGOTOFF2reg (reg, orig)); 9588 return reg; 9589 } 9590 else if (GET_CODE (orig) == SYMBOL_REF) 9591 { 9592 if (reg == 0) 9593 reg = gen_reg_rtx (Pmode); 9594 9595 emit_insn (gen_symGOT2reg (reg, orig)); 9596 return reg; 9597 } 9598 return orig; 9599 } 9600 9601 /* Try machine-dependent ways of modifying an illegitimate address 9602 to be legitimate. If we find one, return the new, valid address. 9603 Otherwise, return X. 9604 9605 For the SH, if X is almost suitable for indexing, but the offset is 9606 out of range, convert it into a normal form so that CSE has a chance 9607 of reducing the number of address registers used. */ 9608 9609 static rtx 9610 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode) 9611 { 9612 if (flag_pic) 9613 x = legitimize_pic_address (oldx, mode, NULL_RTX); 9614 9615 if (GET_CODE (x) == PLUS 9616 && (GET_MODE_SIZE (mode) == 4 9617 || GET_MODE_SIZE (mode) == 8) 9618 && CONST_INT_P (XEXP (x, 1)) 9619 && BASE_REGISTER_RTX_P (XEXP (x, 0)) 9620 && ! TARGET_SHMEDIA 9621 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode) 9622 && ! (TARGET_SH2E && mode == SFmode)) 9623 { 9624 rtx index_rtx = XEXP (x, 1); 9625 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; 9626 rtx sum; 9627 9628 /* On rare occasions, we might get an unaligned pointer 9629 that is indexed in a way to give an aligned address. 9630 Therefore, keep the lower two bits in offset_base. */ 9631 /* Instead of offset_base 128..131 use 124..127, so that 9632 simple add suffices. */ 9633 if (offset > 127) 9634 offset_base = ((offset + 4) & ~60) - 4; 9635 else 9636 offset_base = offset & ~60; 9637 9638 /* Sometimes the normal form does not suit DImode. We 9639 could avoid that by using smaller ranges, but that 9640 would give less optimized code when SImode is 9641 prevalent. */ 9642 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64) 9643 { 9644 sum = expand_binop (Pmode, add_optab, XEXP (x, 0), 9645 GEN_INT (offset_base), NULL_RTX, 0, 9646 OPTAB_LIB_WIDEN); 9647 9648 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base)); 9649 } 9650 } 9651 9652 return x; 9653 } 9654 9655 /* Mark the use of a constant in the literal table. If the constant 9656 has multiple labels, make it unique. */ 9657 static rtx 9658 mark_constant_pool_use (rtx x) 9659 { 9660 rtx insn, lab, pattern; 9661 9662 if (x == NULL) 9663 return x; 9664 9665 switch (GET_CODE (x)) 9666 { 9667 case LABEL_REF: 9668 x = XEXP (x, 0); 9669 case CODE_LABEL: 9670 break; 9671 default: 9672 return x; 9673 } 9674 9675 /* Get the first label in the list of labels for the same constant 9676 and delete another labels in the list. */ 9677 lab = x; 9678 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn)) 9679 { 9680 if (!LABEL_P (insn) 9681 || LABEL_REFS (insn) != NEXT_INSN (insn)) 9682 break; 9683 lab = insn; 9684 } 9685 9686 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn)) 9687 INSN_DELETED_P (insn) = 1; 9688 9689 /* Mark constants in a window. */ 9690 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn)) 9691 { 9692 if (!NONJUMP_INSN_P (insn)) 9693 continue; 9694 9695 pattern = PATTERN (insn); 9696 if (GET_CODE (pattern) != UNSPEC_VOLATILE) 9697 continue; 9698 9699 switch (XINT (pattern, 1)) 9700 { 9701 case UNSPECV_CONST2: 9702 case UNSPECV_CONST4: 9703 case UNSPECV_CONST8: 9704 XVECEXP (pattern, 0, 1) = const1_rtx; 9705 break; 9706 case UNSPECV_WINDOW_END: 9707 if (XVECEXP (pattern, 0, 0) == x) 9708 return lab; 9709 break; 9710 case UNSPECV_CONST_END: 9711 return lab; 9712 default: 9713 break; 9714 } 9715 } 9716 9717 return lab; 9718 } 9719 9720 /* Return true if it's possible to redirect BRANCH1 to the destination 9721 of an unconditional jump BRANCH2. We only want to do this if the 9722 resulting branch will have a short displacement. */ 9723 int 9724 sh_can_redirect_branch (rtx branch1, rtx branch2) 9725 { 9726 if (flag_expensive_optimizations && simplejump_p (branch2)) 9727 { 9728 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0); 9729 rtx insn; 9730 int distance; 9731 9732 for (distance = 0, insn = NEXT_INSN (branch1); 9733 insn && distance < 256; 9734 insn = PREV_INSN (insn)) 9735 { 9736 if (insn == dest) 9737 return 1; 9738 else 9739 distance += get_attr_length (insn); 9740 } 9741 for (distance = 0, insn = NEXT_INSN (branch1); 9742 insn && distance < 256; 9743 insn = NEXT_INSN (insn)) 9744 { 9745 if (insn == dest) 9746 return 1; 9747 else 9748 distance += get_attr_length (insn); 9749 } 9750 } 9751 return 0; 9752 } 9753 9754 /* Return nonzero if register old_reg can be renamed to register new_reg. */ 9755 int 9756 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, 9757 unsigned int new_reg) 9758 { 9759 /* Interrupt functions can only use registers that have already been 9760 saved by the prologue, even if they would normally be 9761 call-clobbered. */ 9762 9763 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg)) 9764 return 0; 9765 9766 return 1; 9767 } 9768 9769 /* Function to update the integer COST 9770 based on the relationship between INSN that is dependent on 9771 DEP_INSN through the dependence LINK. The default is to make no 9772 adjustment to COST. This can be used for example to specify to 9773 the scheduler that an output- or anti-dependence does not incur 9774 the same cost as a data-dependence. The return value should be 9775 the new value for COST. */ 9776 static int 9777 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost) 9778 { 9779 rtx reg, use_pat; 9780 9781 if (TARGET_SHMEDIA) 9782 { 9783 /* On SHmedia, if the dependence is an anti-dependence or 9784 output-dependence, there is no cost. */ 9785 if (REG_NOTE_KIND (link) != 0) 9786 { 9787 /* However, dependencies between target register loads and 9788 uses of the register in a subsequent block that are separated 9789 by a conditional branch are not modelled - we have to do with 9790 the anti-dependency between the target register load and the 9791 conditional branch that ends the current block. */ 9792 if (REG_NOTE_KIND (link) == REG_DEP_ANTI 9793 && GET_CODE (PATTERN (dep_insn)) == SET 9794 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA 9795 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA) 9796 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA) 9797 { 9798 int orig_cost = cost; 9799 rtx note = find_reg_note (insn, REG_BR_PROB, 0); 9800 rtx target = ((! note 9801 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE) 9802 ? insn : JUMP_LABEL (insn)); 9803 /* On the likely path, the branch costs 1, on the unlikely path, 9804 it costs 3. */ 9805 cost--; 9806 do 9807 target = next_active_insn (target); 9808 while (target && ! flow_dependent_p (target, dep_insn) 9809 && --cost > 0); 9810 /* If two branches are executed in immediate succession, with the 9811 first branch properly predicted, this causes a stall at the 9812 second branch, hence we won't need the target for the 9813 second branch for two cycles after the launch of the first 9814 branch. */ 9815 if (cost > orig_cost - 2) 9816 cost = orig_cost - 2; 9817 } 9818 else 9819 cost = 0; 9820 } 9821 9822 else if (get_attr_is_mac_media (insn) 9823 && get_attr_is_mac_media (dep_insn)) 9824 cost = 1; 9825 9826 else if (! reload_completed 9827 && GET_CODE (PATTERN (insn)) == SET 9828 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT 9829 && GET_CODE (PATTERN (dep_insn)) == SET 9830 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode) 9831 && cost < 4) 9832 cost = 4; 9833 /* Schedule the ptabs for a casesi_jump_media in preference to stuff 9834 that is needed at the target. */ 9835 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA 9836 && ! flow_dependent_p (insn, dep_insn)) 9837 cost--; 9838 } 9839 else if (REG_NOTE_KIND (link) == 0) 9840 { 9841 enum attr_type type; 9842 rtx dep_set; 9843 9844 if (recog_memoized (insn) < 0 9845 || recog_memoized (dep_insn) < 0) 9846 return cost; 9847 9848 dep_set = single_set (dep_insn); 9849 9850 /* The latency that we specify in the scheduling description refers 9851 to the actual output, not to an auto-increment register; for that, 9852 the latency is one. */ 9853 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1) 9854 { 9855 rtx set = single_set (insn); 9856 9857 if (set 9858 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set)) 9859 && (!MEM_P (SET_DEST (set)) 9860 || !reg_mentioned_p (SET_DEST (dep_set), 9861 XEXP (SET_DEST (set), 0)))) 9862 cost = 1; 9863 } 9864 /* The only input for a call that is timing-critical is the 9865 function's address. */ 9866 if (CALL_P (insn)) 9867 { 9868 rtx call = PATTERN (insn); 9869 9870 if (GET_CODE (call) == PARALLEL) 9871 call = XVECEXP (call, 0 ,0); 9872 if (GET_CODE (call) == SET) 9873 call = SET_SRC (call); 9874 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0)) 9875 /* sibcalli_thunk uses a symbol_ref in an unspec. */ 9876 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC 9877 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))) 9878 cost -= TARGET_SH4_300 ? 3 : 6; 9879 } 9880 /* Likewise, the most timing critical input for an sfuncs call 9881 is the function address. However, sfuncs typically start 9882 using their arguments pretty quickly. 9883 Assume a four cycle delay for SH4 before they are needed. 9884 Cached ST40-300 calls are quicker, so assume only a one 9885 cycle delay there. 9886 ??? Maybe we should encode the delays till input registers 9887 are needed by sfuncs into the sfunc call insn. */ 9888 /* All sfunc calls are parallels with at least four components. 9889 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ 9890 else if (GET_CODE (PATTERN (insn)) == PARALLEL 9891 && XVECLEN (PATTERN (insn), 0) >= 4 9892 && (reg = sfunc_uses_reg (insn))) 9893 { 9894 if (! reg_set_p (reg, dep_insn)) 9895 cost -= TARGET_SH4_300 ? 1 : 4; 9896 } 9897 if (TARGET_HARD_SH4 && !TARGET_SH4_300) 9898 { 9899 enum attr_type dep_type = get_attr_type (dep_insn); 9900 9901 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD) 9902 cost--; 9903 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI) 9904 && (type = get_attr_type (insn)) != TYPE_CALL 9905 && type != TYPE_SFUNC) 9906 cost--; 9907 /* When the preceding instruction loads the shift amount of 9908 the following SHAD/SHLD, the latency of the load is increased 9909 by 1 cycle. */ 9910 if (get_attr_type (insn) == TYPE_DYN_SHIFT 9911 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES 9912 && reg_overlap_mentioned_p (SET_DEST (dep_set), 9913 XEXP (SET_SRC (single_set (insn)), 9914 1))) 9915 cost++; 9916 /* When an LS group instruction with a latency of less than 9917 3 cycles is followed by a double-precision floating-point 9918 instruction, FIPR, or FTRV, the latency of the first 9919 instruction is increased to 3 cycles. */ 9920 else if (cost < 3 9921 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP 9922 && get_attr_dfp_comp (insn) == DFP_COMP_YES) 9923 cost = 3; 9924 /* The lsw register of a double-precision computation is ready one 9925 cycle earlier. */ 9926 else if (reload_completed 9927 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES 9928 && (use_pat = single_set (insn)) 9929 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))), 9930 SET_SRC (use_pat))) 9931 cost -= 1; 9932 9933 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES 9934 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES) 9935 cost -= 1; 9936 } 9937 else if (TARGET_SH4_300) 9938 { 9939 /* Stores need their input register two cycles later. */ 9940 if (dep_set && cost >= 1 9941 && ((type = get_attr_type (insn)) == TYPE_STORE 9942 || type == TYPE_PSTORE 9943 || type == TYPE_FSTORE || type == TYPE_MAC_MEM)) 9944 { 9945 rtx set = single_set (insn); 9946 9947 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0)) 9948 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set))) 9949 { 9950 cost -= 2; 9951 /* But don't reduce the cost below 1 if the address depends 9952 on a side effect of dep_insn. */ 9953 if (cost < 1 9954 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn)) 9955 cost = 1; 9956 } 9957 } 9958 } 9959 } 9960 /* An anti-dependence penalty of two applies if the first insn is a double 9961 precision fadd / fsub / fmul. */ 9962 else if (!TARGET_SH4_300 9963 && REG_NOTE_KIND (link) == REG_DEP_ANTI 9964 && recog_memoized (dep_insn) >= 0 9965 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH 9966 || get_attr_type (dep_insn) == TYPE_DFP_MUL) 9967 /* A lot of alleged anti-flow dependences are fake, 9968 so check this one is real. */ 9969 && flow_dependent_p (dep_insn, insn)) 9970 cost = 2; 9971 9972 return cost; 9973 } 9974 9975 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check 9976 if DEP_INSN is anti-flow dependent on INSN. */ 9977 static int 9978 flow_dependent_p (rtx insn, rtx dep_insn) 9979 { 9980 rtx tmp = PATTERN (insn); 9981 9982 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp); 9983 return tmp == NULL_RTX; 9984 } 9985 9986 /* A helper function for flow_dependent_p called through note_stores. */ 9987 static void 9988 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) 9989 { 9990 rtx * pinsn = (rtx *) data; 9991 9992 if (*pinsn && reg_referenced_p (x, *pinsn)) 9993 *pinsn = NULL_RTX; 9994 } 9995 9996 /* For use by sh_allocate_initial_value. Note that sh.md contains some 9997 'special function' patterns (type sfunc) that clobber pr, but that 9998 do not look like function calls to leaf_function_p. Hence we must 9999 do this extra check. */ 10000 static int 10001 sh_pr_n_sets (void) 10002 { 10003 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG); 10004 } 10005 10006 /* Return where to allocate pseudo for a given hard register initial 10007 value. */ 10008 static rtx 10009 sh_allocate_initial_value (rtx hard_reg) 10010 { 10011 rtx x; 10012 10013 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)) 10014 { 10015 if (current_function_is_leaf 10016 && ! sh_pr_n_sets () 10017 && ! (TARGET_SHCOMPACT 10018 && ((crtl->args.info.call_cookie 10019 & ~ CALL_COOKIE_RET_TRAMP (1)) 10020 || crtl->saves_all_registers))) 10021 x = hard_reg; 10022 else 10023 x = gen_frame_mem (Pmode, return_address_pointer_rtx); 10024 } 10025 else 10026 x = NULL_RTX; 10027 10028 return x; 10029 } 10030 10031 /* This function returns "2" to indicate dual issue for the SH4 10032 processor. To be used by the DFA pipeline description. */ 10033 static int 10034 sh_issue_rate (void) 10035 { 10036 if (TARGET_SUPERSCALAR) 10037 return 2; 10038 else 10039 return 1; 10040 } 10041 10042 /* Functions for ready queue reordering for sched1. */ 10043 10044 /* Get weight for mode for a set x. */ 10045 static short 10046 find_set_regmode_weight (rtx x, enum machine_mode mode) 10047 { 10048 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode)) 10049 return 1; 10050 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode)) 10051 { 10052 if (REG_P (SET_DEST (x))) 10053 { 10054 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x))) 10055 return 1; 10056 else 10057 return 0; 10058 } 10059 return 1; 10060 } 10061 return 0; 10062 } 10063 10064 /* Get regmode weight for insn. */ 10065 static short 10066 find_insn_regmode_weight (rtx insn, enum machine_mode mode) 10067 { 10068 short reg_weight = 0; 10069 rtx x; 10070 10071 /* Increment weight for each register born here. */ 10072 x = PATTERN (insn); 10073 reg_weight += find_set_regmode_weight (x, mode); 10074 if (GET_CODE (x) == PARALLEL) 10075 { 10076 int j; 10077 for (j = XVECLEN (x, 0) - 1; j >= 0; j--) 10078 { 10079 x = XVECEXP (PATTERN (insn), 0, j); 10080 reg_weight += find_set_regmode_weight (x, mode); 10081 } 10082 } 10083 /* Decrement weight for each register that dies here. */ 10084 for (x = REG_NOTES (insn); x; x = XEXP (x, 1)) 10085 { 10086 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED) 10087 { 10088 rtx note = XEXP (x, 0); 10089 if (REG_P (note) && GET_MODE (note) == mode) 10090 reg_weight--; 10091 } 10092 } 10093 return reg_weight; 10094 } 10095 10096 /* Calculate regmode weights for all insns of a basic block. */ 10097 static void 10098 find_regmode_weight (basic_block b, enum machine_mode mode) 10099 { 10100 rtx insn, next_tail, head, tail; 10101 10102 get_ebb_head_tail (b, b, &head, &tail); 10103 next_tail = NEXT_INSN (tail); 10104 10105 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 10106 { 10107 /* Handle register life information. */ 10108 if (!INSN_P (insn)) 10109 continue; 10110 10111 if (mode == SFmode) 10112 INSN_REGMODE_WEIGHT (insn, mode) = 10113 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode); 10114 else if (mode == SImode) 10115 INSN_REGMODE_WEIGHT (insn, mode) = 10116 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode); 10117 } 10118 } 10119 10120 /* Comparison function for ready queue sorting. */ 10121 static int 10122 rank_for_reorder (const void *x, const void *y) 10123 { 10124 rtx tmp = *(const rtx *) y; 10125 rtx tmp2 = *(const rtx *) x; 10126 10127 /* The insn in a schedule group should be issued the first. */ 10128 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2)) 10129 return SCHED_GROUP_P (tmp2) ? 1 : -1; 10130 10131 /* If insns are equally good, sort by INSN_LUID (original insn order), This 10132 minimizes instruction movement, thus minimizing sched's effect on 10133 register pressure. */ 10134 return INSN_LUID (tmp) - INSN_LUID (tmp2); 10135 } 10136 10137 /* Resort the array A in which only element at index N may be out of order. */ 10138 static void 10139 swap_reorder (rtx *a, int n) 10140 { 10141 rtx insn = a[n - 1]; 10142 int i = n - 2; 10143 10144 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0) 10145 { 10146 a[i + 1] = a[i]; 10147 i -= 1; 10148 } 10149 a[i + 1] = insn; 10150 } 10151 10152 #define SCHED_REORDER(READY, N_READY) \ 10153 do \ 10154 { \ 10155 if ((N_READY) == 2) \ 10156 swap_reorder (READY, N_READY); \ 10157 else if ((N_READY) > 2) \ 10158 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \ 10159 } \ 10160 while (0) 10161 10162 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER 10163 macro. */ 10164 static void 10165 ready_reorder (rtx *ready, int nready) 10166 { 10167 SCHED_REORDER (ready, nready); 10168 } 10169 10170 /* Count life regions of r0 for a block. */ 10171 static int 10172 find_r0_life_regions (basic_block b) 10173 { 10174 rtx end, insn; 10175 rtx pset; 10176 rtx r0_reg; 10177 int live; 10178 int set; 10179 int death = 0; 10180 10181 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG)) 10182 { 10183 set = 1; 10184 live = 1; 10185 } 10186 else 10187 { 10188 set = 0; 10189 live = 0; 10190 } 10191 10192 insn = BB_HEAD (b); 10193 end = BB_END (b); 10194 r0_reg = gen_rtx_REG (SImode, R0_REG); 10195 while (1) 10196 { 10197 if (INSN_P (insn)) 10198 { 10199 if (find_regno_note (insn, REG_DEAD, R0_REG)) 10200 { 10201 death++; 10202 live = 0; 10203 } 10204 if (!live 10205 && (pset = single_set (insn)) 10206 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset)) 10207 && !find_regno_note (insn, REG_UNUSED, R0_REG)) 10208 { 10209 set++; 10210 live = 1; 10211 } 10212 } 10213 if (insn == end) 10214 break; 10215 insn = NEXT_INSN (insn); 10216 } 10217 return set - death; 10218 } 10219 10220 /* Calculate regmode weights for all insns of all basic block. */ 10221 static void 10222 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED, 10223 int verbose ATTRIBUTE_UNUSED, 10224 int old_max_uid) 10225 { 10226 basic_block b; 10227 10228 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short)); 10229 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short)); 10230 r0_life_regions = 0; 10231 10232 FOR_EACH_BB_REVERSE (b) 10233 { 10234 find_regmode_weight (b, SImode); 10235 find_regmode_weight (b, SFmode); 10236 if (!reload_completed) 10237 r0_life_regions += find_r0_life_regions (b); 10238 } 10239 10240 CURR_REGMODE_PRESSURE (SImode) = 0; 10241 CURR_REGMODE_PRESSURE (SFmode) = 0; 10242 10243 } 10244 10245 /* Cleanup. */ 10246 static void 10247 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED, 10248 int verbose ATTRIBUTE_UNUSED) 10249 { 10250 if (regmode_weight[0]) 10251 { 10252 free (regmode_weight[0]); 10253 regmode_weight[0] = NULL; 10254 } 10255 if (regmode_weight[1]) 10256 { 10257 free (regmode_weight[1]); 10258 regmode_weight[1] = NULL; 10259 } 10260 } 10261 10262 /* The scalar modes supported differs from the default version in TImode 10263 for 32-bit SHMEDIA. */ 10264 static bool 10265 sh_scalar_mode_supported_p (enum machine_mode mode) 10266 { 10267 if (TARGET_SHMEDIA32 && mode == TImode) 10268 return false; 10269 10270 return default_scalar_mode_supported_p (mode); 10271 } 10272 10273 /* Cache the can_issue_more so that we can return it from reorder2. Also, 10274 keep count of register pressures on SImode and SFmode. */ 10275 static int 10276 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED, 10277 int sched_verbose ATTRIBUTE_UNUSED, 10278 rtx insn, 10279 int can_issue_more) 10280 { 10281 if (GET_CODE (PATTERN (insn)) != USE 10282 && GET_CODE (PATTERN (insn)) != CLOBBER) 10283 cached_can_issue_more = can_issue_more - 1; 10284 else 10285 cached_can_issue_more = can_issue_more; 10286 10287 if (reload_completed) 10288 return cached_can_issue_more; 10289 10290 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode); 10291 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode); 10292 10293 return cached_can_issue_more; 10294 } 10295 10296 static void 10297 sh_md_init (FILE *dump ATTRIBUTE_UNUSED, 10298 int verbose ATTRIBUTE_UNUSED, 10299 int veclen ATTRIBUTE_UNUSED) 10300 { 10301 CURR_REGMODE_PRESSURE (SImode) = 0; 10302 CURR_REGMODE_PRESSURE (SFmode) = 0; 10303 } 10304 10305 /* Some magic numbers. */ 10306 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for 10307 functions that already have high pressure on r0. */ 10308 #define R0_MAX_LIFE_REGIONS 2 10309 /* Register Pressure thresholds for SImode and SFmode registers. */ 10310 #define SIMODE_MAX_WEIGHT 5 10311 #define SFMODE_MAX_WEIGHT 10 10312 10313 /* Return true if the pressure is high for MODE. */ 10314 static short 10315 high_pressure (enum machine_mode mode) 10316 { 10317 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for 10318 functions that already have high pressure on r0. */ 10319 if (r0_life_regions >= R0_MAX_LIFE_REGIONS) 10320 return 1; 10321 10322 if (mode == SFmode) 10323 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT); 10324 else 10325 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT); 10326 } 10327 10328 /* Reorder ready queue if register pressure is high. */ 10329 static int 10330 sh_reorder (FILE *dump ATTRIBUTE_UNUSED, 10331 int sched_verbose ATTRIBUTE_UNUSED, 10332 rtx *ready, 10333 int *n_readyp, 10334 int clock_var ATTRIBUTE_UNUSED) 10335 { 10336 if (reload_completed) 10337 return sh_issue_rate (); 10338 10339 if (high_pressure (SFmode) || high_pressure (SImode)) 10340 { 10341 ready_reorder (ready, *n_readyp); 10342 } 10343 10344 return sh_issue_rate (); 10345 } 10346 10347 /* Skip cycles if the current register pressure is high. */ 10348 static int 10349 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED, 10350 int sched_verbose ATTRIBUTE_UNUSED, 10351 rtx *ready ATTRIBUTE_UNUSED, 10352 int *n_readyp ATTRIBUTE_UNUSED, 10353 int clock_var ATTRIBUTE_UNUSED) 10354 { 10355 if (reload_completed) 10356 return cached_can_issue_more; 10357 10358 if (high_pressure(SFmode) || high_pressure (SImode)) 10359 skip_cycles = 1; 10360 10361 return cached_can_issue_more; 10362 } 10363 10364 /* Skip cycles without sorting the ready queue. This will move insn from 10365 Q->R. If this is the last cycle we are skipping; allow sorting of ready 10366 queue by sh_reorder. */ 10367 10368 /* Generally, skipping these many cycles are sufficient for all insns to move 10369 from Q -> R. */ 10370 #define MAX_SKIPS 8 10371 10372 static int 10373 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED, 10374 int sched_verbose ATTRIBUTE_UNUSED, 10375 rtx insn ATTRIBUTE_UNUSED, 10376 int last_clock_var, 10377 int clock_var, 10378 int *sort_p) 10379 { 10380 if (reload_completed) 10381 return 0; 10382 10383 if (skip_cycles) 10384 { 10385 if ((clock_var - last_clock_var) < MAX_SKIPS) 10386 { 10387 *sort_p = 0; 10388 return 1; 10389 } 10390 /* If this is the last cycle we are skipping, allow reordering of R. */ 10391 if ((clock_var - last_clock_var) == MAX_SKIPS) 10392 { 10393 *sort_p = 1; 10394 return 1; 10395 } 10396 } 10397 10398 skip_cycles = 0; 10399 10400 return 0; 10401 } 10402 10403 /* SHmedia requires registers for branches, so we can't generate new 10404 branches past reload. */ 10405 static bool 10406 sh_cannot_modify_jumps_p (void) 10407 { 10408 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed)); 10409 } 10410 10411 static enum reg_class 10412 sh_target_reg_class (void) 10413 { 10414 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS; 10415 } 10416 10417 static bool 10418 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen) 10419 { 10420 HARD_REG_SET dummy; 10421 #if 0 10422 rtx insn; 10423 #endif 10424 10425 if (! shmedia_space_reserved_for_target_registers) 10426 return 0; 10427 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS) 10428 return 0; 10429 if (calc_live_regs (&dummy) >= 6 * 8) 10430 return 1; 10431 return 0; 10432 } 10433 10434 static bool 10435 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED) 10436 { 10437 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type)); 10438 } 10439 10440 /* 10441 On the SH1..SH4, the trampoline looks like 10442 2 0002 D202 mov.l l2,r2 10443 1 0000 D301 mov.l l1,r3 10444 3 0004 422B jmp @r2 10445 4 0006 0009 nop 10446 5 0008 00000000 l1: .long area 10447 6 000c 00000000 l2: .long function 10448 10449 SH5 (compact) uses r1 instead of r3 for the static chain. */ 10450 10451 10452 /* Emit RTL insns to initialize the variable parts of a trampoline. 10453 FNADDR is an RTX for the address of the function's pure code. 10454 CXT is an RTX for the static chain value for the function. */ 10455 10456 static void 10457 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt) 10458 { 10459 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 10460 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0)); 10461 10462 if (TARGET_SHMEDIA64) 10463 { 10464 rtx tramp_templ; 10465 int fixed_len; 10466 10467 rtx movi1 = GEN_INT (0xcc000010); 10468 rtx shori1 = GEN_INT (0xc8000010); 10469 rtx src, dst; 10470 10471 /* The following trampoline works within a +- 128 KB range for cxt: 10472 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0; 10473 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0 10474 gettr tr1,r1; blink tr0,r63 */ 10475 /* Address rounding makes it hard to compute the exact bounds of the 10476 offset for this trampoline, but we have a rather generous offset 10477 range, so frame_offset should do fine as an upper bound. */ 10478 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000) 10479 { 10480 /* ??? could optimize this trampoline initialization 10481 by writing DImode words with two insns each. */ 10482 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00)); 10483 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp); 10484 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2)); 10485 insn = gen_rtx_AND (DImode, insn, mask); 10486 /* Or in ptb/u .,tr1 pattern */ 10487 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode)); 10488 insn = force_operand (insn, NULL_RTX); 10489 insn = gen_lowpart (SImode, insn); 10490 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn); 10491 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38)); 10492 insn = gen_rtx_AND (DImode, insn, mask); 10493 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX); 10494 insn = gen_lowpart (SImode, insn); 10495 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn); 10496 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22)); 10497 insn = gen_rtx_AND (DImode, insn, mask); 10498 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); 10499 insn = gen_lowpart (SImode, insn); 10500 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn); 10501 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6)); 10502 insn = gen_rtx_AND (DImode, insn, mask); 10503 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); 10504 insn = gen_lowpart (SImode, insn); 10505 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn); 10506 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10)); 10507 insn = gen_rtx_AND (DImode, insn, mask); 10508 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); 10509 insn = gen_lowpart (SImode, insn); 10510 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn); 10511 emit_move_insn (adjust_address (tramp_mem, SImode, 20), 10512 GEN_INT (0x6bf10600)); 10513 emit_move_insn (adjust_address (tramp_mem, SImode, 24), 10514 GEN_INT (0x4415fc10)); 10515 emit_move_insn (adjust_address (tramp_mem, SImode, 28), 10516 GEN_INT (0x4401fff0)); 10517 emit_insn (gen_ic_invalidate_line (tramp)); 10518 return; 10519 } 10520 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline"); 10521 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode); 10522 10523 tramp_templ = gen_datalabel_ref (tramp_templ); 10524 dst = tramp_mem; 10525 src = gen_const_mem (BLKmode, tramp_templ); 10526 set_mem_align (dst, 256); 10527 set_mem_align (src, 64); 10528 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL); 10529 10530 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr); 10531 emit_move_insn (adjust_address (tramp_mem, Pmode, 10532 fixed_len + GET_MODE_SIZE (Pmode)), 10533 cxt); 10534 emit_insn (gen_ic_invalidate_line (tramp)); 10535 return; 10536 } 10537 else if (TARGET_SHMEDIA) 10538 { 10539 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0 10540 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */ 10541 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode); 10542 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode); 10543 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated, 10544 rotated 10 right, and higher 16 bit of every 32 selected. */ 10545 rtx movishori 10546 = force_reg (V2HImode, (simplify_gen_subreg 10547 (V2HImode, GEN_INT (0x4330432), SImode, 0))); 10548 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600)); 10549 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0)); 10550 10551 fnaddr = force_reg (SImode, fnaddr); 10552 cxt = force_reg (SImode, cxt); 10553 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0), 10554 gen_rtx_SUBREG (V2HImode, fnaddr, 0), 10555 movishori)); 10556 emit_insn (gen_rotrdi3_mextr (quad0, quad0, 10557 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); 10558 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx)); 10559 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0); 10560 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0), 10561 gen_rtx_SUBREG (V2HImode, cxt, 0), 10562 movishori)); 10563 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload, 10564 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); 10565 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx)); 10566 if (TARGET_LITTLE_ENDIAN) 10567 { 10568 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload)); 10569 emit_insn (gen_mextr4 (quad2, cxtload, blink)); 10570 } 10571 else 10572 { 10573 emit_insn (gen_mextr4 (quad1, cxtload, ptabs)); 10574 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload)); 10575 } 10576 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1); 10577 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2); 10578 emit_insn (gen_ic_invalidate_line (tramp)); 10579 return; 10580 } 10581 else if (TARGET_SHCOMPACT) 10582 { 10583 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr)); 10584 return; 10585 } 10586 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), 10587 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301, 10588 SImode)); 10589 emit_move_insn (adjust_address (tramp_mem, SImode, 4), 10590 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009, 10591 SImode)); 10592 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt); 10593 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr); 10594 if (TARGET_HARVARD) 10595 { 10596 if (!TARGET_INLINE_IC_INVALIDATE 10597 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE)) 10598 emit_library_call (function_symbol (NULL, "__ic_invalidate", 10599 FUNCTION_ORDINARY), 10600 LCT_NORMAL, VOIDmode, 1, tramp, SImode); 10601 else 10602 emit_insn (gen_ic_invalidate_line (tramp)); 10603 } 10604 } 10605 10606 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */ 10607 10608 static rtx 10609 sh_trampoline_adjust_address (rtx tramp) 10610 { 10611 if (TARGET_SHMEDIA) 10612 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx, 10613 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN); 10614 return tramp; 10615 } 10616 10617 /* FIXME: This is overly conservative. A SHcompact function that 10618 receives arguments ``by reference'' will have them stored in its 10619 own stack frame, so it must not pass pointers or references to 10620 these arguments to other functions by means of sibling calls. */ 10621 /* If PIC, we cannot make sibling calls to global functions 10622 because the PLT requires r12 to be live. */ 10623 static bool 10624 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 10625 { 10626 return (1 10627 && (! TARGET_SHCOMPACT 10628 || crtl->args.info.stack_regs == 0) 10629 && ! sh_cfun_interrupt_handler_p () 10630 && (! flag_pic 10631 || (decl && ! TREE_PUBLIC (decl)) 10632 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT))); 10633 } 10634 10635 /* Machine specific built-in functions. */ 10636 10637 struct builtin_description 10638 { 10639 const enum insn_code icode; 10640 const char *const name; 10641 int signature; 10642 tree fndecl; 10643 }; 10644 10645 /* describe number and signedness of arguments; arg[0] == result 10646 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */ 10647 /* 9: 64-bit pointer, 10: 32-bit pointer */ 10648 static const char signature_args[][4] = 10649 { 10650 #define SH_BLTIN_V2SI2 0 10651 { 4, 4 }, 10652 #define SH_BLTIN_V4HI2 1 10653 { 4, 4 }, 10654 #define SH_BLTIN_V2SI3 2 10655 { 4, 4, 4 }, 10656 #define SH_BLTIN_V4HI3 3 10657 { 4, 4, 4 }, 10658 #define SH_BLTIN_V8QI3 4 10659 { 4, 4, 4 }, 10660 #define SH_BLTIN_MAC_HISI 5 10661 { 1, 4, 4, 1 }, 10662 #define SH_BLTIN_SH_HI 6 10663 { 4, 4, 1 }, 10664 #define SH_BLTIN_SH_SI 7 10665 { 4, 4, 1 }, 10666 #define SH_BLTIN_V4HI2V2SI 8 10667 { 4, 4, 4 }, 10668 #define SH_BLTIN_V4HI2V8QI 9 10669 { 4, 4, 4 }, 10670 #define SH_BLTIN_SISF 10 10671 { 4, 2 }, 10672 #define SH_BLTIN_LDUA_L 11 10673 { 2, 10 }, 10674 #define SH_BLTIN_LDUA_Q 12 10675 { 1, 10 }, 10676 #define SH_BLTIN_STUA_L 13 10677 { 0, 10, 2 }, 10678 #define SH_BLTIN_STUA_Q 14 10679 { 0, 10, 1 }, 10680 #define SH_BLTIN_LDUA_L64 15 10681 { 2, 9 }, 10682 #define SH_BLTIN_LDUA_Q64 16 10683 { 1, 9 }, 10684 #define SH_BLTIN_STUA_L64 17 10685 { 0, 9, 2 }, 10686 #define SH_BLTIN_STUA_Q64 18 10687 { 0, 9, 1 }, 10688 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19 10689 #define SH_BLTIN_2 19 10690 #define SH_BLTIN_SU 19 10691 { 1, 2 }, 10692 #define SH_BLTIN_3 20 10693 #define SH_BLTIN_SUS 20 10694 { 2, 2, 1 }, 10695 #define SH_BLTIN_PSSV 21 10696 { 0, 8, 2, 2 }, 10697 #define SH_BLTIN_XXUU 22 10698 #define SH_BLTIN_UUUU 22 10699 { 1, 1, 1, 1 }, 10700 #define SH_BLTIN_PV 23 10701 { 0, 8 }, 10702 }; 10703 /* mcmv: operands considered unsigned. */ 10704 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */ 10705 /* mperm: control value considered unsigned int. */ 10706 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */ 10707 /* mshards_q: returns signed short. */ 10708 /* nsb: takes long long arg, returns unsigned char. */ 10709 static struct builtin_description bdesc[] = 10710 { 10711 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 }, 10712 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 }, 10713 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 }, 10714 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 }, 10715 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 }, 10716 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 }, 10717 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 }, 10718 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 }, 10719 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 }, 10720 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 }, 10721 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 }, 10722 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 }, 10723 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 }, 10724 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 }, 10725 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 }, 10726 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 }, 10727 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 }, 10728 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 }, 10729 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 }, 10730 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 }, 10731 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 }, 10732 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 }, 10733 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 }, 10734 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 }, 10735 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 }, 10736 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 }, 10737 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 }, 10738 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 }, 10739 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 }, 10740 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 }, 10741 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 }, 10742 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 }, 10743 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 }, 10744 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 }, 10745 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 }, 10746 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 }, 10747 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 }, 10748 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 }, 10749 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 }, 10750 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 }, 10751 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 }, 10752 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 }, 10753 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 }, 10754 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 }, 10755 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 }, 10756 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 }, 10757 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 }, 10758 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 }, 10759 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 }, 10760 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 }, 10761 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 }, 10762 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 }, 10763 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 }, 10764 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 }, 10765 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 }, 10766 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 }, 10767 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 }, 10768 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 }, 10769 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 }, 10770 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 }, 10771 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 }, 10772 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 }, 10773 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 }, 10774 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 }, 10775 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 }, 10776 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 }, 10777 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 }, 10778 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 }, 10779 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 }, 10780 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 }, 10781 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 }, 10782 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 }, 10783 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 }, 10784 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 }, 10785 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 }, 10786 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 }, 10787 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 }, 10788 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 }, 10789 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 }, 10790 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 }, 10791 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 }, 10792 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 }, 10793 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 }, 10794 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 }, 10795 }; 10796 10797 static void 10798 sh_media_init_builtins (void) 10799 { 10800 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES]; 10801 struct builtin_description *d; 10802 10803 memset (shared, 0, sizeof shared); 10804 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++) 10805 { 10806 tree type, arg_type = 0; 10807 int signature = d->signature; 10808 int i; 10809 10810 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature]) 10811 type = shared[signature]; 10812 else 10813 { 10814 int has_result = signature_args[signature][0] != 0; 10815 10816 if ((signature_args[signature][1] & 8) 10817 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32) 10818 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64))) 10819 continue; 10820 if (! TARGET_FPU_ANY 10821 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode)) 10822 continue; 10823 type = void_list_node; 10824 for (i = 3; ; i--) 10825 { 10826 int arg = signature_args[signature][i]; 10827 int opno = i - 1 + has_result; 10828 10829 if (arg & 8) 10830 arg_type = ptr_type_node; 10831 else if (arg) 10832 arg_type = (*lang_hooks.types.type_for_mode) 10833 (insn_data[d->icode].operand[opno].mode, 10834 (arg & 1)); 10835 else if (i) 10836 continue; 10837 else 10838 arg_type = void_type_node; 10839 if (i == 0) 10840 break; 10841 type = tree_cons (NULL_TREE, arg_type, type); 10842 } 10843 type = build_function_type (arg_type, type); 10844 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES) 10845 shared[signature] = type; 10846 } 10847 d->fndecl = 10848 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD, 10849 NULL, NULL_TREE); 10850 } 10851 } 10852 10853 /* Returns the shmedia builtin decl for CODE. */ 10854 10855 static tree 10856 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 10857 { 10858 if (code >= ARRAY_SIZE (bdesc)) 10859 return error_mark_node; 10860 10861 return bdesc[code].fndecl; 10862 } 10863 10864 /* Implements target hook vector_mode_supported_p. */ 10865 bool 10866 sh_vector_mode_supported_p (enum machine_mode mode) 10867 { 10868 if (TARGET_FPU_ANY 10869 && ((mode == V2SFmode) 10870 || (mode == V4SFmode) 10871 || (mode == V16SFmode))) 10872 return true; 10873 10874 else if (TARGET_SHMEDIA 10875 && ((mode == V8QImode) 10876 || (mode == V2HImode) 10877 || (mode == V4HImode) 10878 || (mode == V2SImode))) 10879 return true; 10880 10881 return false; 10882 } 10883 10884 /* Implements target hook dwarf_calling_convention. Return an enum 10885 of dwarf_calling_convention. */ 10886 int 10887 sh_dwarf_calling_convention (const_tree func) 10888 { 10889 if (sh_attr_renesas_p (func)) 10890 return DW_CC_GNU_renesas_sh; 10891 10892 return DW_CC_normal; 10893 } 10894 10895 static void 10896 sh_init_builtins (void) 10897 { 10898 if (TARGET_SHMEDIA) 10899 sh_media_init_builtins (); 10900 } 10901 10902 /* Returns the sh builtin decl for CODE. */ 10903 10904 static tree 10905 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 10906 { 10907 if (TARGET_SHMEDIA) 10908 return sh_media_builtin_decl (code, initialize_p); 10909 10910 return error_mark_node; 10911 } 10912 10913 /* Expand an expression EXP that calls a built-in function, 10914 with result going to TARGET if that's convenient 10915 (and in mode MODE if that's convenient). 10916 SUBTARGET may be used as the target for computing one of EXP's operands. 10917 IGNORE is nonzero if the value is to be ignored. */ 10918 10919 static rtx 10920 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 10921 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore) 10922 { 10923 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 10924 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 10925 const struct builtin_description *d = &bdesc[fcode]; 10926 enum insn_code icode = d->icode; 10927 int signature = d->signature; 10928 enum machine_mode tmode = VOIDmode; 10929 int nop = 0, i; 10930 rtx op[4]; 10931 rtx pat = 0; 10932 10933 if (signature_args[signature][0]) 10934 { 10935 if (ignore) 10936 return 0; 10937 10938 tmode = insn_data[icode].operand[0].mode; 10939 if (! target 10940 || GET_MODE (target) != tmode 10941 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 10942 target = gen_reg_rtx (tmode); 10943 op[nop++] = target; 10944 } 10945 else 10946 target = 0; 10947 10948 for (i = 1; i <= 3; i++, nop++) 10949 { 10950 tree arg; 10951 enum machine_mode opmode, argmode; 10952 tree optype; 10953 10954 if (! signature_args[signature][i]) 10955 break; 10956 arg = CALL_EXPR_ARG (exp, i - 1); 10957 if (arg == error_mark_node) 10958 return const0_rtx; 10959 if (signature_args[signature][i] & 8) 10960 { 10961 opmode = ptr_mode; 10962 optype = ptr_type_node; 10963 } 10964 else 10965 { 10966 opmode = insn_data[icode].operand[nop].mode; 10967 optype = (*lang_hooks.types.type_for_mode) (opmode, 0); 10968 } 10969 argmode = TYPE_MODE (TREE_TYPE (arg)); 10970 if (argmode != opmode) 10971 arg = build1 (NOP_EXPR, optype, arg); 10972 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL); 10973 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode)) 10974 op[nop] = copy_to_mode_reg (opmode, op[nop]); 10975 } 10976 10977 switch (nop) 10978 { 10979 case 1: 10980 pat = (*insn_data[d->icode].genfun) (op[0]); 10981 break; 10982 case 2: 10983 pat = (*insn_data[d->icode].genfun) (op[0], op[1]); 10984 break; 10985 case 3: 10986 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]); 10987 break; 10988 case 4: 10989 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]); 10990 break; 10991 default: 10992 gcc_unreachable (); 10993 } 10994 if (! pat) 10995 return 0; 10996 emit_insn (pat); 10997 return target; 10998 } 10999 11000 void 11001 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1) 11002 { 11003 rtx sel0 = const0_rtx; 11004 rtx sel1 = const1_rtx; 11005 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op; 11006 rtx op = gen_rtx_fmt_e (code, SFmode, op1); 11007 11008 emit_insn ((*fn) (op0, op1, op, sel0, sel0)); 11009 emit_insn ((*fn) (op0, op1, op, sel1, sel1)); 11010 } 11011 11012 void 11013 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2) 11014 { 11015 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2); 11016 11017 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op)); 11018 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op)); 11019 } 11020 11021 /* Return true if hard register REGNO can hold a value of machine-mode MODE. 11022 We can allow any mode in any general register. The special registers 11023 only allow SImode. Don't allow any mode in the PR. 11024 11025 We cannot hold DCmode values in the XD registers because alter_reg 11026 handles subregs of them incorrectly. We could work around this by 11027 spacing the XD registers like the DR registers, but this would require 11028 additional memory in every compilation to hold larger register vectors. 11029 We could hold SFmode / SCmode values in XD registers, but that 11030 would require a tertiary reload when reloading from / to memory, 11031 and a secondary reload to reload from / to general regs; that 11032 seems to be a loosing proposition. 11033 11034 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode, 11035 it won't be ferried through GP registers first. */ 11036 11037 bool 11038 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) 11039 { 11040 if (SPECIAL_REGISTER_P (regno)) 11041 return mode == SImode; 11042 11043 if (regno == FPUL_REG) 11044 return (mode == SImode || mode == SFmode); 11045 11046 if (FP_REGISTER_P (regno) && mode == SFmode) 11047 return true; 11048 11049 if (mode == V2SFmode) 11050 { 11051 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0) 11052 || GENERAL_REGISTER_P (regno))) 11053 return true; 11054 else 11055 return false; 11056 } 11057 11058 if (mode == V4SFmode) 11059 { 11060 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0) 11061 || GENERAL_REGISTER_P (regno)) 11062 return true; 11063 else 11064 return false; 11065 } 11066 11067 if (mode == V16SFmode) 11068 { 11069 if (TARGET_SHMEDIA) 11070 { 11071 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0) 11072 return true; 11073 else 11074 return false; 11075 } 11076 else 11077 return regno == FIRST_XD_REG; 11078 } 11079 11080 if (FP_REGISTER_P (regno)) 11081 { 11082 if (mode == SFmode 11083 || mode == SImode 11084 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode) 11085 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode) 11086 || mode == DCmode 11087 || (TARGET_SHMEDIA 11088 && (mode == DFmode || mode == DImode 11089 || mode == V2SFmode || mode == TImode))) 11090 && ((regno - FIRST_FP_REG) & 1) == 0) 11091 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode 11092 && ((regno - FIRST_FP_REG) & 3) == 0)) 11093 return true; 11094 else 11095 return false; 11096 } 11097 11098 if (XD_REGISTER_P (regno)) 11099 return mode == DFmode; 11100 11101 if (TARGET_REGISTER_P (regno)) 11102 return (mode == DImode || mode == SImode || mode == PDImode); 11103 11104 if (regno == PR_REG) 11105 return mode == SImode; 11106 11107 if (regno == FPSCR_REG) 11108 return mode == PSImode; 11109 11110 /* FIXME. This works around PR target/37633 for -O0. */ 11111 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4) 11112 { 11113 unsigned int n = GET_MODE_SIZE (mode) / 8; 11114 11115 if (regno >= FIRST_GENERAL_REG + 10 - n + 1 11116 && regno <= FIRST_GENERAL_REG + 14) 11117 return false; 11118 } 11119 11120 return true; 11121 } 11122 11123 /* Return the class of registers for which a mode change from FROM to TO 11124 is invalid. */ 11125 bool 11126 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 11127 enum reg_class rclass) 11128 { 11129 /* We want to enable the use of SUBREGs as a means to 11130 VEC_SELECT a single element of a vector. */ 11131 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode) 11132 return (reg_classes_intersect_p (GENERAL_REGS, rclass)); 11133 11134 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)) 11135 { 11136 if (TARGET_LITTLE_ENDIAN) 11137 { 11138 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8) 11139 return reg_classes_intersect_p (DF_REGS, rclass); 11140 } 11141 else 11142 { 11143 if (GET_MODE_SIZE (from) < 8) 11144 return reg_classes_intersect_p (DF_HI_REGS, rclass); 11145 } 11146 } 11147 return 0; 11148 } 11149 11150 11151 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times 11152 that label is used. */ 11153 11154 void 11155 sh_mark_label (rtx address, int nuses) 11156 { 11157 if (GOTOFF_P (address)) 11158 { 11159 /* Extract the label or symbol. */ 11160 address = XEXP (address, 0); 11161 if (GET_CODE (address) == PLUS) 11162 address = XEXP (address, 0); 11163 address = XVECEXP (address, 0, 0); 11164 } 11165 if (GET_CODE (address) == LABEL_REF 11166 && LABEL_P (XEXP (address, 0))) 11167 LABEL_NUSES (XEXP (address, 0)) += nuses; 11168 } 11169 11170 /* Compute extra cost of moving data between one register class 11171 and another. */ 11172 11173 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass 11174 uses this information. Hence, the general register <-> floating point 11175 register information here is not used for SFmode. */ 11176 11177 int 11178 sh_register_move_cost (enum machine_mode mode, 11179 enum reg_class srcclass, enum reg_class dstclass) 11180 { 11181 if (dstclass == T_REGS || dstclass == PR_REGS) 11182 return 10; 11183 11184 if (dstclass == MAC_REGS && srcclass == MAC_REGS) 11185 return 4; 11186 11187 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD 11188 && REGCLASS_HAS_FP_REG (srcclass) 11189 && REGCLASS_HAS_FP_REG (dstclass)) 11190 return 4; 11191 11192 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS) 11193 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7); 11194 11195 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS) 11196 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass))) 11197 return 9; 11198 11199 if ((REGCLASS_HAS_FP_REG (dstclass) 11200 && REGCLASS_HAS_GENERAL_REG (srcclass)) 11201 || (REGCLASS_HAS_GENERAL_REG (dstclass) 11202 && REGCLASS_HAS_FP_REG (srcclass))) 11203 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) 11204 * ((GET_MODE_SIZE (mode) + 7) / 8U)); 11205 11206 if ((dstclass == FPUL_REGS 11207 && REGCLASS_HAS_GENERAL_REG (srcclass)) 11208 || (srcclass == FPUL_REGS 11209 && REGCLASS_HAS_GENERAL_REG (dstclass))) 11210 return 5; 11211 11212 if ((dstclass == FPUL_REGS 11213 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS)) 11214 || (srcclass == FPUL_REGS 11215 && (dstclass == PR_REGS || dstclass == MAC_REGS))) 11216 return 7; 11217 11218 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) 11219 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) 11220 return 20; 11221 11222 /* ??? ptabs faults on (value & 0x3) == 0x3 */ 11223 if (TARGET_SHMEDIA 11224 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS)) 11225 { 11226 if (sh_gettrcost >= 0) 11227 return sh_gettrcost; 11228 else if (!TARGET_PT_FIXED) 11229 return 100; 11230 } 11231 11232 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) 11233 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) 11234 return 4; 11235 11236 if (TARGET_SHMEDIA 11237 || (TARGET_FMOVD 11238 && ! REGCLASS_HAS_GENERAL_REG (srcclass) 11239 && ! REGCLASS_HAS_GENERAL_REG (dstclass))) 11240 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U); 11241 11242 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U); 11243 } 11244 11245 static rtx emit_load_ptr (rtx, rtx); 11246 11247 static rtx 11248 emit_load_ptr (rtx reg, rtx addr) 11249 { 11250 rtx mem = gen_const_mem (ptr_mode, addr); 11251 11252 if (Pmode != ptr_mode) 11253 mem = gen_rtx_SIGN_EXTEND (Pmode, mem); 11254 return emit_move_insn (reg, mem); 11255 } 11256 11257 static void 11258 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 11259 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 11260 tree function) 11261 { 11262 CUMULATIVE_ARGS cum; 11263 int structure_value_byref = 0; 11264 rtx this_rtx, this_value, sibcall, insns, funexp; 11265 tree funtype = TREE_TYPE (function); 11266 int simple_add = CONST_OK_FOR_ADD (delta); 11267 int did_load = 0; 11268 rtx scratch0, scratch1, scratch2; 11269 unsigned i; 11270 11271 reload_completed = 1; 11272 epilogue_completed = 1; 11273 current_function_uses_only_leaf_regs = 1; 11274 11275 emit_note (NOTE_INSN_PROLOGUE_END); 11276 11277 /* Find the "this" pointer. We have such a wide range of ABIs for the 11278 SH that it's best to do this completely machine independently. 11279 "this" is passed as first argument, unless a structure return pointer 11280 comes first, in which case "this" comes second. */ 11281 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1); 11282 #ifndef PCC_STATIC_STRUCT_RETURN 11283 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 11284 structure_value_byref = 1; 11285 #endif /* not PCC_STATIC_STRUCT_RETURN */ 11286 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0) 11287 { 11288 tree ptype = build_pointer_type (TREE_TYPE (funtype)); 11289 11290 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1); 11291 } 11292 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1); 11293 11294 /* For SHcompact, we only have r0 for a scratch register: r1 is the 11295 static chain pointer (even if you can't have nested virtual functions 11296 right now, someone might implement them sometime), and the rest of the 11297 registers are used for argument passing, are callee-saved, or reserved. */ 11298 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg / 11299 -ffixed-reg has been used. */ 11300 if (! call_used_regs[0] || fixed_regs[0]) 11301 error ("r0 needs to be available as a call-clobbered register"); 11302 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0); 11303 if (! TARGET_SH5) 11304 { 11305 if (call_used_regs[1] && ! fixed_regs[1]) 11306 scratch1 = gen_rtx_REG (ptr_mode, 1); 11307 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer 11308 pointing where to return struct values. */ 11309 if (call_used_regs[3] && ! fixed_regs[3]) 11310 scratch2 = gen_rtx_REG (Pmode, 3); 11311 } 11312 else if (TARGET_SHMEDIA) 11313 { 11314 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++) 11315 if (i != REGNO (scratch0) && 11316 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i)) 11317 { 11318 scratch1 = gen_rtx_REG (ptr_mode, i); 11319 break; 11320 } 11321 if (scratch1 == scratch0) 11322 error ("Need a second call-clobbered general purpose register"); 11323 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++) 11324 if (call_used_regs[i] && ! fixed_regs[i]) 11325 { 11326 scratch2 = gen_rtx_REG (Pmode, i); 11327 break; 11328 } 11329 if (scratch2 == scratch0) 11330 error ("Need a call-clobbered target register"); 11331 } 11332 11333 this_value = plus_constant (this_rtx, delta); 11334 if (vcall_offset 11335 && (simple_add || scratch0 != scratch1) 11336 && strict_memory_address_p (ptr_mode, this_value)) 11337 { 11338 emit_load_ptr (scratch0, this_value); 11339 did_load = 1; 11340 } 11341 11342 if (!delta) 11343 ; /* Do nothing. */ 11344 else if (simple_add) 11345 emit_move_insn (this_rtx, this_value); 11346 else 11347 { 11348 emit_move_insn (scratch1, GEN_INT (delta)); 11349 emit_insn (gen_add2_insn (this_rtx, scratch1)); 11350 } 11351 11352 if (vcall_offset) 11353 { 11354 rtx offset_addr; 11355 11356 if (!did_load) 11357 emit_load_ptr (scratch0, this_rtx); 11358 11359 offset_addr = plus_constant (scratch0, vcall_offset); 11360 if (strict_memory_address_p (ptr_mode, offset_addr)) 11361 ; /* Do nothing. */ 11362 else if (! TARGET_SH5 && scratch0 != scratch1) 11363 { 11364 /* scratch0 != scratch1, and we have indexed loads. Get better 11365 schedule by loading the offset into r1 and using an indexed 11366 load - then the load of r1 can issue before the load from 11367 (this_rtx + delta) finishes. */ 11368 emit_move_insn (scratch1, GEN_INT (vcall_offset)); 11369 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1); 11370 } 11371 else if (CONST_OK_FOR_ADD (vcall_offset)) 11372 { 11373 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset))); 11374 offset_addr = scratch0; 11375 } 11376 else if (scratch0 != scratch1) 11377 { 11378 emit_move_insn (scratch1, GEN_INT (vcall_offset)); 11379 emit_insn (gen_add2_insn (scratch0, scratch1)); 11380 offset_addr = scratch0; 11381 } 11382 else 11383 gcc_unreachable (); /* FIXME */ 11384 emit_load_ptr (scratch0, offset_addr); 11385 11386 if (Pmode != ptr_mode) 11387 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0); 11388 emit_insn (gen_add2_insn (this_rtx, scratch0)); 11389 } 11390 11391 /* Generate a tail call to the target function. */ 11392 if (! TREE_USED (function)) 11393 { 11394 assemble_external (function); 11395 TREE_USED (function) = 1; 11396 } 11397 funexp = XEXP (DECL_RTL (function), 0); 11398 /* If the function is overridden, so is the thunk, hence we don't 11399 need GOT addressing even if this is a public symbol. */ 11400 #if 0 11401 if (TARGET_SH1 && ! flag_weak) 11402 sibcall = gen_sibcalli_thunk (funexp, const0_rtx); 11403 else 11404 #endif 11405 if (TARGET_SH2 && flag_pic) 11406 { 11407 sibcall = gen_sibcall_pcrel (funexp, const0_rtx); 11408 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2; 11409 } 11410 else 11411 { 11412 if (TARGET_SHMEDIA && flag_pic) 11413 { 11414 funexp = gen_sym2PIC (funexp); 11415 PUT_MODE (funexp, Pmode); 11416 } 11417 emit_move_insn (scratch2, funexp); 11418 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2); 11419 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX); 11420 } 11421 sibcall = emit_call_insn (sibcall); 11422 SIBLING_CALL_P (sibcall) = 1; 11423 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx); 11424 emit_barrier (); 11425 11426 /* Run just enough of rest_of_compilation to do scheduling and get 11427 the insns emitted. Note that use_thunk calls 11428 assemble_start_function and assemble_end_function. */ 11429 11430 insn_locators_alloc (); 11431 insns = get_insns (); 11432 11433 if (optimize > 0) 11434 { 11435 if (! cfun->cfg) 11436 init_flow (cfun); 11437 split_all_insns_noflow (); 11438 } 11439 11440 sh_reorg (); 11441 shorten_branches (insns); 11442 final_start_function (insns, file, 1); 11443 final (insns, file, 1); 11444 final_end_function (); 11445 11446 reload_completed = 0; 11447 epilogue_completed = 0; 11448 } 11449 11450 rtx 11451 function_symbol (rtx target, const char *name, enum sh_function_kind kind) 11452 { 11453 rtx sym; 11454 11455 /* If this is not an ordinary function, the name usually comes from a 11456 string literal or an sprintf buffer. Make sure we use the same 11457 string consistently, so that cse will be able to unify address loads. */ 11458 if (kind != FUNCTION_ORDINARY) 11459 name = IDENTIFIER_POINTER (get_identifier (name)); 11460 sym = gen_rtx_SYMBOL_REF (Pmode, name); 11461 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION; 11462 if (flag_pic) 11463 switch (kind) 11464 { 11465 case FUNCTION_ORDINARY: 11466 break; 11467 case SFUNC_GOT: 11468 { 11469 rtx reg = target ? target : gen_reg_rtx (Pmode); 11470 11471 emit_insn (gen_symGOT2reg (reg, sym)); 11472 sym = reg; 11473 break; 11474 } 11475 case SFUNC_STATIC: 11476 { 11477 /* ??? To allow cse to work, we use GOTOFF relocations. 11478 we could add combiner patterns to transform this into 11479 straight pc-relative calls with sym2PIC / bsrf when 11480 label load and function call are still 1:1 and in the 11481 same basic block during combine. */ 11482 rtx reg = target ? target : gen_reg_rtx (Pmode); 11483 11484 emit_insn (gen_symGOTOFF2reg (reg, sym)); 11485 sym = reg; 11486 break; 11487 } 11488 } 11489 if (target && sym != target) 11490 { 11491 emit_move_insn (target, sym); 11492 return target; 11493 } 11494 return sym; 11495 } 11496 11497 /* Find the number of a general purpose register in S. */ 11498 static int 11499 scavenge_reg (HARD_REG_SET *s) 11500 { 11501 int r; 11502 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++) 11503 if (TEST_HARD_REG_BIT (*s, r)) 11504 return r; 11505 return -1; 11506 } 11507 11508 rtx 11509 sh_get_pr_initial_val (void) 11510 { 11511 rtx val; 11512 11513 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the 11514 PR register on SHcompact, because it might be clobbered by the prologue. 11515 We check first if that is known to be the case. */ 11516 if (TARGET_SHCOMPACT 11517 && ((crtl->args.info.call_cookie 11518 & ~ CALL_COOKIE_RET_TRAMP (1)) 11519 || crtl->saves_all_registers)) 11520 return gen_frame_mem (SImode, return_address_pointer_rtx); 11521 11522 /* If we haven't finished rtl generation, there might be a nonlocal label 11523 that we haven't seen yet. 11524 ??? get_hard_reg_initial_val fails if it is called after register 11525 allocation has started, unless it has been called before for the 11526 same register. And even then, we end in trouble if we didn't use 11527 the register in the same basic block before. So call 11528 get_hard_reg_initial_val now and wrap it in an unspec if we might 11529 need to replace it. */ 11530 /* ??? We also must do this for TARGET_SH1 in general, because otherwise 11531 combine can put the pseudo returned by get_hard_reg_initial_val into 11532 instructions that need a general purpose registers, which will fail to 11533 be recognized when the pseudo becomes allocated to PR. */ 11534 val 11535 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG); 11536 if (TARGET_SH1) 11537 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA); 11538 return val; 11539 } 11540 11541 int 11542 sh_expand_t_scc (rtx operands[]) 11543 { 11544 enum rtx_code code = GET_CODE (operands[1]); 11545 rtx target = operands[0]; 11546 rtx op0 = operands[2]; 11547 rtx op1 = operands[3]; 11548 rtx result = target; 11549 HOST_WIDE_INT val; 11550 11551 if (!REG_P (op0) || REGNO (op0) != T_REG 11552 || !CONST_INT_P (op1)) 11553 return 0; 11554 if (!REG_P (result)) 11555 result = gen_reg_rtx (SImode); 11556 val = INTVAL (op1); 11557 if ((code == EQ && val == 1) || (code == NE && val == 0)) 11558 emit_insn (gen_movt (result)); 11559 else if (TARGET_SH2A && ((code == EQ && val == 0) 11560 || (code == NE && val == 1))) 11561 emit_insn (gen_xorsi3_movrt (result)); 11562 else if ((code == EQ && val == 0) || (code == NE && val == 1)) 11563 { 11564 emit_clobber (result); 11565 emit_insn (gen_subc (result, result, result)); 11566 emit_insn (gen_addsi3 (result, result, const1_rtx)); 11567 } 11568 else if (code == EQ || code == NE) 11569 emit_insn (gen_move_insn (result, GEN_INT (code == NE))); 11570 else 11571 return 0; 11572 if (result != target) 11573 emit_move_insn (target, result); 11574 return 1; 11575 } 11576 11577 /* INSN is an sfunc; return the rtx that describes the address used. */ 11578 static rtx 11579 extract_sfunc_addr (rtx insn) 11580 { 11581 rtx pattern, part = NULL_RTX; 11582 int len, i; 11583 11584 pattern = PATTERN (insn); 11585 len = XVECLEN (pattern, 0); 11586 for (i = 0; i < len; i++) 11587 { 11588 part = XVECEXP (pattern, 0, i); 11589 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode 11590 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0)))) 11591 return XEXP (part, 0); 11592 } 11593 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE); 11594 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1); 11595 } 11596 11597 /* Verify that the register in use_sfunc_addr still agrees with the address 11598 used in the sfunc. This prevents fill_slots_from_thread from changing 11599 use_sfunc_addr. 11600 INSN is the use_sfunc_addr instruction, and REG is the register it 11601 guards. */ 11602 int 11603 check_use_sfunc_addr (rtx insn, rtx reg) 11604 { 11605 /* Search for the sfunc. It should really come right after INSN. */ 11606 while ((insn = NEXT_INSN (insn))) 11607 { 11608 if (LABEL_P (insn) || JUMP_P (insn)) 11609 break; 11610 if (! INSN_P (insn)) 11611 continue; 11612 11613 if (GET_CODE (PATTERN (insn)) == SEQUENCE) 11614 insn = XVECEXP (PATTERN (insn), 0, 0); 11615 if (GET_CODE (PATTERN (insn)) != PARALLEL 11616 || get_attr_type (insn) != TYPE_SFUNC) 11617 continue; 11618 return rtx_equal_p (extract_sfunc_addr (insn), reg); 11619 } 11620 gcc_unreachable (); 11621 } 11622 11623 /* This function returns a constant rtx that represents pi / 2**15 in 11624 SFmode. it's used to scale SFmode angles, in radians, to a 11625 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi 11626 maps to 0x10000). */ 11627 11628 static GTY(()) rtx sh_fsca_sf2int_rtx; 11629 11630 rtx 11631 sh_fsca_sf2int (void) 11632 { 11633 if (! sh_fsca_sf2int_rtx) 11634 { 11635 REAL_VALUE_TYPE rv; 11636 11637 real_from_string (&rv, "10430.378350470453"); 11638 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode); 11639 } 11640 11641 return sh_fsca_sf2int_rtx; 11642 } 11643 11644 /* This function returns a constant rtx that represents pi / 2**15 in 11645 DFmode. it's used to scale DFmode angles, in radians, to a 11646 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi 11647 maps to 0x10000). */ 11648 11649 static GTY(()) rtx sh_fsca_df2int_rtx; 11650 11651 rtx 11652 sh_fsca_df2int (void) 11653 { 11654 if (! sh_fsca_df2int_rtx) 11655 { 11656 REAL_VALUE_TYPE rv; 11657 11658 real_from_string (&rv, "10430.378350470453"); 11659 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode); 11660 } 11661 11662 return sh_fsca_df2int_rtx; 11663 } 11664 11665 /* This function returns a constant rtx that represents 2**15 / pi in 11666 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction 11667 of a full circle back to a SFmode value, i.e., 0x10000 maps to 11668 2*pi). */ 11669 11670 static GTY(()) rtx sh_fsca_int2sf_rtx; 11671 11672 rtx 11673 sh_fsca_int2sf (void) 11674 { 11675 if (! sh_fsca_int2sf_rtx) 11676 { 11677 REAL_VALUE_TYPE rv; 11678 11679 real_from_string (&rv, "9.587379924285257e-5"); 11680 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode); 11681 } 11682 11683 return sh_fsca_int2sf_rtx; 11684 } 11685 11686 /* Initialize the CUMULATIVE_ARGS structure. */ 11687 11688 void 11689 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum, 11690 tree fntype, 11691 rtx libname ATTRIBUTE_UNUSED, 11692 tree fndecl, 11693 signed int n_named_args, 11694 enum machine_mode mode) 11695 { 11696 pcum->arg_count [(int) SH_ARG_FLOAT] = 0; 11697 pcum->free_single_fp_reg = 0; 11698 pcum->stack_regs = 0; 11699 pcum->byref_regs = 0; 11700 pcum->byref = 0; 11701 pcum->outgoing = (n_named_args == -1) ? 0 : 1; 11702 11703 /* XXX - Should we check TARGET_HITACHI here ??? */ 11704 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0; 11705 11706 if (fntype) 11707 { 11708 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi) 11709 && aggregate_value_p (TREE_TYPE (fntype), fndecl)); 11710 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE; 11711 pcum->arg_count [(int) SH_ARG_INT] 11712 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl); 11713 11714 pcum->call_cookie 11715 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT 11716 && pcum->arg_count [(int) SH_ARG_INT] == 0 11717 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode 11718 ? int_size_in_bytes (TREE_TYPE (fntype)) 11719 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4 11720 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype))) 11721 == FIRST_RET_REG)); 11722 } 11723 else 11724 { 11725 pcum->arg_count [(int) SH_ARG_INT] = 0; 11726 pcum->prototype_p = FALSE; 11727 if (mode != VOIDmode) 11728 { 11729 pcum->call_cookie = 11730 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT 11731 && GET_MODE_SIZE (mode) > 4 11732 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG); 11733 11734 /* If the default ABI is the Renesas ABI then all library 11735 calls must assume that the library will be using the 11736 Renesas ABI. So if the function would return its result 11737 in memory then we must force the address of this memory 11738 block onto the stack. Ideally we would like to call 11739 targetm.calls.return_in_memory() here but we do not have 11740 the TYPE or the FNDECL available so we synthesize the 11741 contents of that function as best we can. */ 11742 pcum->force_mem = 11743 (TARGET_DEFAULT & MASK_HITACHI) 11744 && (mode == BLKmode 11745 || (GET_MODE_SIZE (mode) > 4 11746 && !(mode == DFmode 11747 && TARGET_FPU_DOUBLE))); 11748 } 11749 else 11750 { 11751 pcum->call_cookie = 0; 11752 pcum->force_mem = FALSE; 11753 } 11754 } 11755 } 11756 11757 /* Replace any occurrence of FROM(n) in X with TO(n). The function does 11758 not enter into CONST_DOUBLE for the replace. 11759 11760 Note that copying is not done so X must not be shared unless all copies 11761 are to be modified. 11762 11763 This is like replace_rtx, except that we operate on N_REPLACEMENTS 11764 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is 11765 replacements[n*2+1] - and that we take mode changes into account. 11766 11767 If a replacement is ambiguous, return NULL_RTX. 11768 11769 If MODIFY is zero, don't modify any rtl in place, 11770 just return zero or nonzero for failure / success. */ 11771 11772 rtx 11773 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify) 11774 { 11775 int i, j; 11776 const char *fmt; 11777 11778 /* The following prevents loops occurrence when we change MEM in 11779 CONST_DOUBLE onto the same CONST_DOUBLE. */ 11780 if (x != 0 && GET_CODE (x) == CONST_DOUBLE) 11781 return x; 11782 11783 for (i = n_replacements - 1; i >= 0 ; i--) 11784 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1])) 11785 return replacements[i*2+1]; 11786 11787 /* Allow this function to make replacements in EXPR_LISTs. */ 11788 if (x == 0) 11789 return 0; 11790 11791 if (GET_CODE (x) == SUBREG) 11792 { 11793 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements, 11794 n_replacements, modify); 11795 11796 if (CONST_INT_P (new_rtx)) 11797 { 11798 x = simplify_subreg (GET_MODE (x), new_rtx, 11799 GET_MODE (SUBREG_REG (x)), 11800 SUBREG_BYTE (x)); 11801 if (! x) 11802 abort (); 11803 } 11804 else if (modify) 11805 SUBREG_REG (x) = new_rtx; 11806 11807 return x; 11808 } 11809 else if (REG_P (x)) 11810 { 11811 unsigned regno = REGNO (x); 11812 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER 11813 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1); 11814 rtx result = NULL_RTX; 11815 11816 for (i = n_replacements - 1; i >= 0; i--) 11817 { 11818 rtx from = replacements[i*2]; 11819 rtx to = replacements[i*2+1]; 11820 unsigned from_regno, from_nregs, to_regno, new_regno; 11821 11822 if (!REG_P (from)) 11823 continue; 11824 from_regno = REGNO (from); 11825 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER 11826 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1); 11827 if (regno < from_regno + from_nregs && regno + nregs > from_regno) 11828 { 11829 if (regno < from_regno 11830 || regno + nregs > from_regno + nregs 11831 || !REG_P (to) 11832 || result) 11833 return NULL_RTX; 11834 to_regno = REGNO (to); 11835 if (to_regno < FIRST_PSEUDO_REGISTER) 11836 { 11837 new_regno = regno + to_regno - from_regno; 11838 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x)) 11839 != nregs) 11840 return NULL_RTX; 11841 result = gen_rtx_REG (GET_MODE (x), new_regno); 11842 } 11843 else if (GET_MODE (x) <= GET_MODE (to)) 11844 result = gen_lowpart_common (GET_MODE (x), to); 11845 else 11846 result = gen_lowpart_SUBREG (GET_MODE (x), to); 11847 } 11848 } 11849 return result ? result : x; 11850 } 11851 else if (GET_CODE (x) == ZERO_EXTEND) 11852 { 11853 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements, 11854 n_replacements, modify); 11855 11856 if (CONST_INT_P (new_rtx)) 11857 { 11858 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x), 11859 new_rtx, GET_MODE (XEXP (x, 0))); 11860 if (! x) 11861 abort (); 11862 } 11863 else if (modify) 11864 XEXP (x, 0) = new_rtx; 11865 11866 return x; 11867 } 11868 11869 fmt = GET_RTX_FORMAT (GET_CODE (x)); 11870 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 11871 { 11872 rtx new_rtx; 11873 11874 if (fmt[i] == 'e') 11875 { 11876 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements, 11877 n_replacements, modify); 11878 if (!new_rtx) 11879 return NULL_RTX; 11880 if (modify) 11881 XEXP (x, i) = new_rtx; 11882 } 11883 else if (fmt[i] == 'E') 11884 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 11885 { 11886 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements, 11887 n_replacements, modify); 11888 if (!new_rtx) 11889 return NULL_RTX; 11890 if (modify) 11891 XVECEXP (x, i, j) = new_rtx; 11892 } 11893 } 11894 11895 return x; 11896 } 11897 11898 rtx 11899 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext) 11900 { 11901 enum rtx_code code = TRUNCATE; 11902 11903 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND) 11904 { 11905 rtx inner = XEXP (x, 0); 11906 enum machine_mode inner_mode = GET_MODE (inner); 11907 11908 if (inner_mode == mode) 11909 return inner; 11910 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode)) 11911 x = inner; 11912 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode) 11913 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND)) 11914 { 11915 code = GET_CODE (x); 11916 x = inner; 11917 } 11918 } 11919 return gen_rtx_fmt_e (code, mode, x); 11920 } 11921 11922 /* called via for_each_rtx after reload, to clean up truncates of 11923 registers that span multiple actual hard registers. */ 11924 int 11925 shmedia_cleanup_truncate (rtx *p, void *n_changes) 11926 { 11927 rtx x = *p, reg; 11928 11929 if (GET_CODE (x) != TRUNCATE) 11930 return 0; 11931 reg = XEXP (x, 0); 11932 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg)) 11933 { 11934 enum machine_mode reg_mode = GET_MODE (reg); 11935 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, 11936 subreg_lowpart_offset (DImode, reg_mode)); 11937 *(int*) n_changes += 1; 11938 return -1; 11939 } 11940 return 0; 11941 } 11942 11943 /* Load and store depend on the highpart of the address. However, 11944 set_attr_alternative does not give well-defined results before reload, 11945 so we must look at the rtl ourselves to see if any of the feeding 11946 registers is used in a memref. */ 11947 11948 /* Called by sh_contains_memref_p via for_each_rtx. */ 11949 static int 11950 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED) 11951 { 11952 return (MEM_P (*loc)); 11953 } 11954 11955 /* Return nonzero iff INSN contains a MEM. */ 11956 int 11957 sh_contains_memref_p (rtx insn) 11958 { 11959 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL); 11960 } 11961 11962 /* Return nonzero iff INSN loads a banked register. */ 11963 int 11964 sh_loads_bankedreg_p (rtx insn) 11965 { 11966 if (GET_CODE (PATTERN (insn)) == SET) 11967 { 11968 rtx op = SET_DEST (PATTERN(insn)); 11969 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op))) 11970 return 1; 11971 } 11972 11973 return 0; 11974 } 11975 11976 /* FNADDR is the MEM expression from a call expander. Return an address 11977 to use in an SHmedia insn pattern. */ 11978 rtx 11979 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall) 11980 { 11981 int is_sym; 11982 11983 fnaddr = XEXP (fnaddr, 0); 11984 is_sym = GET_CODE (fnaddr) == SYMBOL_REF; 11985 if (flag_pic && is_sym) 11986 { 11987 if (! SYMBOL_REF_LOCAL_P (fnaddr)) 11988 { 11989 rtx reg = gen_reg_rtx (Pmode); 11990 11991 /* We must not use GOTPLT for sibcalls, because PIC_REG 11992 must be restored before the PLT code gets to run. */ 11993 if (is_sibcall) 11994 emit_insn (gen_symGOT2reg (reg, fnaddr)); 11995 else 11996 emit_insn (gen_symGOTPLT2reg (reg, fnaddr)); 11997 fnaddr = reg; 11998 } 11999 else 12000 { 12001 fnaddr = gen_sym2PIC (fnaddr); 12002 PUT_MODE (fnaddr, Pmode); 12003 } 12004 } 12005 /* If ptabs might trap, make this visible to the rest of the compiler. 12006 We generally assume that symbols pertain to valid locations, but 12007 it is possible to generate invalid symbols with asm or linker tricks. 12008 In a list of functions where each returns its successor, an invalid 12009 symbol might denote an empty list. */ 12010 if (!TARGET_PT_FIXED 12011 && (!is_sym || TARGET_INVALID_SYMBOLS) 12012 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr)))) 12013 { 12014 rtx tr = gen_reg_rtx (PDImode); 12015 12016 emit_insn (gen_ptabs (tr, fnaddr)); 12017 fnaddr = tr; 12018 } 12019 else if (! target_reg_operand (fnaddr, Pmode)) 12020 fnaddr = copy_to_mode_reg (Pmode, fnaddr); 12021 return fnaddr; 12022 } 12023 12024 enum reg_class 12025 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass, 12026 enum machine_mode mode, secondary_reload_info *sri) 12027 { 12028 if (in_p) 12029 { 12030 if (REGCLASS_HAS_FP_REG (rclass) 12031 && ! TARGET_SHMEDIA 12032 && immediate_operand ((x), mode) 12033 && ! ((fp_zero_operand (x) || fp_one_operand (x)) 12034 && mode == SFmode && fldi_ok ())) 12035 switch (mode) 12036 { 12037 case SFmode: 12038 sri->icode = CODE_FOR_reload_insf__frn; 12039 return NO_REGS; 12040 case DFmode: 12041 sri->icode = CODE_FOR_reload_indf__frn; 12042 return NO_REGS; 12043 case SImode: 12044 /* ??? If we knew that we are in the appropriate mode - 12045 single precision - we could use a reload pattern directly. */ 12046 return FPUL_REGS; 12047 default: 12048 abort (); 12049 } 12050 if (rclass == FPUL_REGS 12051 && ((REG_P (x) 12052 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG 12053 || REGNO (x) == T_REG)) 12054 || GET_CODE (x) == PLUS)) 12055 return GENERAL_REGS; 12056 if (rclass == FPUL_REGS && immediate_operand (x, mode)) 12057 { 12058 if (satisfies_constraint_I08 (x) || fp_zero_operand (x)) 12059 return GENERAL_REGS; 12060 else if (mode == SFmode) 12061 return FP_REGS; 12062 sri->icode = CODE_FOR_reload_insi__i_fpul; 12063 return NO_REGS; 12064 } 12065 if (rclass == FPSCR_REGS 12066 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER) 12067 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS))) 12068 return GENERAL_REGS; 12069 if (REGCLASS_HAS_FP_REG (rclass) 12070 && TARGET_SHMEDIA 12071 && immediate_operand (x, mode) 12072 && x != CONST0_RTX (GET_MODE (x)) 12073 && GET_MODE (x) != V4SFmode) 12074 return GENERAL_REGS; 12075 if ((mode == QImode || mode == HImode) 12076 && TARGET_SHMEDIA && inqhi_operand (x, mode)) 12077 { 12078 sri->icode = ((mode == QImode) 12079 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi); 12080 return NO_REGS; 12081 } 12082 if (TARGET_SHMEDIA && rclass == GENERAL_REGS 12083 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x))) 12084 return TARGET_REGS; 12085 } /* end of input-only processing. */ 12086 12087 if (((REGCLASS_HAS_FP_REG (rclass) 12088 && (REG_P (x) 12089 && (GENERAL_OR_AP_REGISTER_P (REGNO (x)) 12090 || (FP_REGISTER_P (REGNO (x)) && mode == SImode 12091 && TARGET_FMOVD)))) 12092 || (REGCLASS_HAS_GENERAL_REG (rclass) 12093 && REG_P (x) 12094 && FP_REGISTER_P (REGNO (x)))) 12095 && ! TARGET_SHMEDIA 12096 && (mode == SFmode || mode == SImode)) 12097 return FPUL_REGS; 12098 if ((rclass == FPUL_REGS 12099 || (REGCLASS_HAS_FP_REG (rclass) 12100 && ! TARGET_SHMEDIA && mode == SImode)) 12101 && (MEM_P (x) 12102 || (REG_P (x) 12103 && (REGNO (x) >= FIRST_PSEUDO_REGISTER 12104 || REGNO (x) == T_REG 12105 || system_reg_operand (x, VOIDmode))))) 12106 { 12107 if (rclass == FPUL_REGS) 12108 return GENERAL_REGS; 12109 return FPUL_REGS; 12110 } 12111 if ((rclass == TARGET_REGS 12112 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS)) 12113 && !satisfies_constraint_Csy (x) 12114 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x)))) 12115 return GENERAL_REGS; 12116 if ((rclass == MAC_REGS || rclass == PR_REGS) 12117 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x)) 12118 && rclass != REGNO_REG_CLASS (REGNO (x))) 12119 return GENERAL_REGS; 12120 if (rclass != GENERAL_REGS && REG_P (x) 12121 && TARGET_REGISTER_P (REGNO (x))) 12122 return GENERAL_REGS; 12123 return NO_REGS; 12124 } 12125 12126 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT; 12127 12128 #include "gt-sh.h" 12129