1 /* Output routines for GCC for ARM. 2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 4 Free Software Foundation, Inc. 5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) 6 and Martin Simmons (@harleqn.co.uk). 7 More major hacks by Richard Earnshaw (rearnsha@arm.com). 8 9 This file is part of GCC. 10 11 GCC is free software; you can redistribute it and/or modify it 12 under the terms of the GNU General Public License as published 13 by the Free Software Foundation; either version 3, or (at your 14 option) any later version. 15 16 GCC is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 19 License for more details. 20 21 You should have received a copy of the GNU General Public License 22 along with GCC; see the file COPYING3. If not see 23 <http://www.gnu.org/licenses/>. */ 24 25 #include "config.h" 26 #include "system.h" 27 #include "coretypes.h" 28 #include "tm.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "obstack.h" 32 #include "regs.h" 33 #include "hard-reg-set.h" 34 #include "real.h" 35 #include "insn-config.h" 36 #include "conditions.h" 37 #include "output.h" 38 #include "insn-attr.h" 39 #include "flags.h" 40 #include "reload.h" 41 #include "function.h" 42 #include "expr.h" 43 #include "optabs.h" 44 #include "toplev.h" 45 #include "recog.h" 46 #include "cgraph.h" 47 #include "ggc.h" 48 #include "except.h" 49 #include "c-pragma.h" 50 #include "integrate.h" 51 #include "tm_p.h" 52 #include "target.h" 53 #include "target-def.h" 54 #include "debug.h" 55 #include "langhooks.h" 56 #include "df.h" 57 #include "intl.h" 58 #include "libfuncs.h" 59 60 /* Forward definitions of types. */ 61 typedef struct minipool_node Mnode; 62 typedef struct minipool_fixup Mfix; 63 64 void (*arm_lang_output_object_attributes_hook)(void); 65 66 /* Forward function declarations. */ 67 static int arm_compute_static_chain_stack_bytes (void); 68 static arm_stack_offsets *arm_get_frame_offsets (void); 69 static void arm_add_gc_roots (void); 70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx, 71 HOST_WIDE_INT, rtx, rtx, int, int); 72 static unsigned bit_count (unsigned long); 73 static int arm_address_register_rtx_p (rtx, int); 74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int); 75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int); 76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int); 77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); 78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); 79 inline static int thumb1_index_register_rtx_p (rtx, int); 80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); 81 static int thumb_far_jump_used_p (void); 82 static bool thumb_force_lr_save (void); 83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); 84 static rtx emit_sfm (int, int); 85 static unsigned arm_size_return_regs (void); 86 static bool arm_assemble_integer (rtx, unsigned int, int); 87 static const char *fp_const_from_val (REAL_VALUE_TYPE *); 88 static arm_cc get_arm_condition_code (rtx); 89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); 90 static rtx is_jump_table (rtx); 91 static const char *output_multi_immediate (rtx *, const char *, const char *, 92 int, HOST_WIDE_INT); 93 static const char *shift_op (rtx, HOST_WIDE_INT *); 94 static struct machine_function *arm_init_machine_status (void); 95 static void thumb_exit (FILE *, int); 96 static rtx is_jump_table (rtx); 97 static HOST_WIDE_INT get_jump_table_size (rtx); 98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT); 99 static Mnode *add_minipool_forward_ref (Mfix *); 100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT); 101 static Mnode *add_minipool_backward_ref (Mfix *); 102 static void assign_minipool_offsets (Mfix *); 103 static void arm_print_value (FILE *, rtx); 104 static void dump_minipool (rtx); 105 static int arm_barrier_cost (rtx); 106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT); 107 static void push_minipool_barrier (rtx, HOST_WIDE_INT); 108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode, 109 rtx); 110 static void arm_reorg (void); 111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int); 112 static unsigned long arm_compute_save_reg0_reg12_mask (void); 113 static unsigned long arm_compute_save_reg_mask (void); 114 static unsigned long arm_isr_value (tree); 115 static unsigned long arm_compute_func_type (void); 116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); 117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); 118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); 119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); 121 #endif 122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT); 123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); 124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT); 125 static int arm_comp_type_attributes (const_tree, const_tree); 126 static void arm_set_default_type_attributes (tree); 127 static int arm_adjust_cost (rtx, rtx, rtx, int); 128 static int count_insns_for_constant (HOST_WIDE_INT, int); 129 static int arm_get_strip_length (int); 130 static bool arm_function_ok_for_sibcall (tree, tree); 131 static enum machine_mode arm_promote_function_mode (const_tree, 132 enum machine_mode, int *, 133 const_tree, int); 134 static bool arm_return_in_memory (const_tree, const_tree); 135 static rtx arm_function_value (const_tree, const_tree, bool); 136 static rtx arm_libcall_value (enum machine_mode, const_rtx); 137 138 static void arm_internal_label (FILE *, const char *, unsigned long); 139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, 140 tree); 141 static bool arm_have_conditional_execution (void); 142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); 143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); 144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); 145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); 146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); 147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); 148 static bool arm_rtx_costs (rtx, int, int, int *, bool); 149 static int arm_address_cost (rtx, bool); 150 static bool arm_memory_load_p (rtx); 151 static bool arm_cirrus_insn_p (rtx); 152 static void cirrus_reorg (rtx); 153 static void arm_init_builtins (void); 154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 155 static void arm_init_iwmmxt_builtins (void); 156 static rtx safe_vector_operand (rtx, enum machine_mode); 157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); 158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); 159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 160 static void emit_constant_insn (rtx cond, rtx pattern); 161 static rtx emit_set_insn (rtx, rtx); 162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, 163 tree, bool); 164 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, 165 const_tree); 166 static int aapcs_select_return_coproc (const_tree, const_tree); 167 168 #ifdef OBJECT_FORMAT_ELF 169 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; 170 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; 171 #endif 172 #ifndef ARM_PE 173 static void arm_encode_section_info (tree, rtx, int); 174 #endif 175 176 static void arm_file_end (void); 177 static void arm_file_start (void); 178 179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, 180 tree, int *, int); 181 static bool arm_pass_by_reference (CUMULATIVE_ARGS *, 182 enum machine_mode, const_tree, bool); 183 static bool arm_promote_prototypes (const_tree); 184 static bool arm_default_short_enums (void); 185 static bool arm_align_anon_bitfield (void); 186 static bool arm_return_in_msb (const_tree); 187 static bool arm_must_pass_in_stack (enum machine_mode, const_tree); 188 static bool arm_return_in_memory (const_tree, const_tree); 189 #ifdef TARGET_UNWIND_INFO 190 static void arm_unwind_emit (FILE *, rtx); 191 static bool arm_output_ttype (rtx); 192 #endif 193 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); 194 static rtx arm_dwarf_register_span (rtx); 195 196 static tree arm_cxx_guard_type (void); 197 static bool arm_cxx_guard_mask_bit (void); 198 static tree arm_get_cookie_size (tree); 199 static bool arm_cookie_has_size (void); 200 static bool arm_cxx_cdtor_returns_this (void); 201 static bool arm_cxx_key_method_may_be_inline (void); 202 static void arm_cxx_determine_class_data_visibility (tree); 203 static bool arm_cxx_class_data_always_comdat (void); 204 static bool arm_cxx_use_aeabi_atexit (void); 205 static void arm_init_libfuncs (void); 206 static tree arm_build_builtin_va_list (void); 207 static void arm_expand_builtin_va_start (tree, rtx); 208 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 209 static bool arm_handle_option (size_t, const char *, int); 210 static void arm_target_help (void); 211 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); 212 static bool arm_cannot_copy_insn_p (rtx); 213 static bool arm_tls_symbol_p (rtx x); 214 static int arm_issue_rate (void); 215 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 216 static bool arm_allocate_stack_slots_for_args (void); 217 static const char *arm_invalid_parameter_type (const_tree t); 218 static const char *arm_invalid_return_type (const_tree t); 219 static tree arm_promoted_type (const_tree t); 220 static tree arm_convert_to_type (tree type, tree expr); 221 static bool arm_scalar_mode_supported_p (enum machine_mode); 222 static bool arm_frame_pointer_required (void); 223 static bool arm_can_eliminate (const int, const int); 224 static void arm_asm_trampoline_template (FILE *); 225 static void arm_trampoline_init (rtx, tree, rtx); 226 static rtx arm_trampoline_adjust_address (rtx); 227 228 229 /* Table of machine attributes. */ 230 static const struct attribute_spec arm_attribute_table[] = 231 { 232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 233 /* Function calls made to this symbol must be done indirectly, because 234 it may lie outside of the 26 bit addressing range of a normal function 235 call. */ 236 { "long_call", 0, 0, false, true, true, NULL }, 237 /* Whereas these functions are always known to reside within the 26 bit 238 addressing range. */ 239 { "short_call", 0, 0, false, true, true, NULL }, 240 /* Specify the procedure call conventions for a function. */ 241 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute }, 242 /* Interrupt Service Routines have special prologue and epilogue requirements. */ 243 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute }, 244 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute }, 245 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute }, 246 #ifdef ARM_PE 247 /* ARM/PE has three new attributes: 248 interfacearm - ? 249 dllexport - for exporting a function/variable that will live in a dll 250 dllimport - for importing a function/variable from a dll 251 252 Microsoft allows multiple declspecs in one __declspec, separating 253 them with spaces. We do NOT support this. Instead, use __declspec 254 multiple times. 255 */ 256 { "dllimport", 0, 0, true, false, false, NULL }, 257 { "dllexport", 0, 0, true, false, false, NULL }, 258 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute }, 259 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES 260 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 261 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 262 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute }, 263 #endif 264 { NULL, 0, 0, false, false, false, NULL } 265 }; 266 267 /* Initialize the GCC target structure. */ 268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 269 #undef TARGET_MERGE_DECL_ATTRIBUTES 270 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 271 #endif 272 273 #undef TARGET_LEGITIMIZE_ADDRESS 274 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address 275 276 #undef TARGET_ATTRIBUTE_TABLE 277 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table 278 279 #undef TARGET_ASM_FILE_START 280 #define TARGET_ASM_FILE_START arm_file_start 281 #undef TARGET_ASM_FILE_END 282 #define TARGET_ASM_FILE_END arm_file_end 283 284 #undef TARGET_ASM_ALIGNED_SI_OP 285 #define TARGET_ASM_ALIGNED_SI_OP NULL 286 #undef TARGET_ASM_INTEGER 287 #define TARGET_ASM_INTEGER arm_assemble_integer 288 289 #undef TARGET_ASM_FUNCTION_PROLOGUE 290 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue 291 292 #undef TARGET_ASM_FUNCTION_EPILOGUE 293 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue 294 295 #undef TARGET_DEFAULT_TARGET_FLAGS 296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG) 297 #undef TARGET_HANDLE_OPTION 298 #define TARGET_HANDLE_OPTION arm_handle_option 299 #undef TARGET_HELP 300 #define TARGET_HELP arm_target_help 301 302 #undef TARGET_COMP_TYPE_ATTRIBUTES 303 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes 304 305 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES 306 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes 307 308 #undef TARGET_SCHED_ADJUST_COST 309 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost 310 311 #undef TARGET_ENCODE_SECTION_INFO 312 #ifdef ARM_PE 313 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info 314 #else 315 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info 316 #endif 317 318 #undef TARGET_STRIP_NAME_ENCODING 319 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding 320 321 #undef TARGET_ASM_INTERNAL_LABEL 322 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label 323 324 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 325 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall 326 327 #undef TARGET_FUNCTION_VALUE 328 #define TARGET_FUNCTION_VALUE arm_function_value 329 330 #undef TARGET_LIBCALL_VALUE 331 #define TARGET_LIBCALL_VALUE arm_libcall_value 332 333 #undef TARGET_ASM_OUTPUT_MI_THUNK 334 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk 335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall 337 338 #undef TARGET_RTX_COSTS 339 #define TARGET_RTX_COSTS arm_rtx_costs 340 #undef TARGET_ADDRESS_COST 341 #define TARGET_ADDRESS_COST arm_address_cost 342 343 #undef TARGET_SHIFT_TRUNCATION_MASK 344 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask 345 #undef TARGET_VECTOR_MODE_SUPPORTED_P 346 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p 347 348 #undef TARGET_MACHINE_DEPENDENT_REORG 349 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg 350 351 #undef TARGET_INIT_BUILTINS 352 #define TARGET_INIT_BUILTINS arm_init_builtins 353 #undef TARGET_EXPAND_BUILTIN 354 #define TARGET_EXPAND_BUILTIN arm_expand_builtin 355 356 #undef TARGET_INIT_LIBFUNCS 357 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs 358 359 #undef TARGET_PROMOTE_FUNCTION_MODE 360 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode 361 #undef TARGET_PROMOTE_PROTOTYPES 362 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes 363 #undef TARGET_PASS_BY_REFERENCE 364 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference 365 #undef TARGET_ARG_PARTIAL_BYTES 366 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes 367 368 #undef TARGET_SETUP_INCOMING_VARARGS 369 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs 370 371 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS 372 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args 373 374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE 375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template 376 #undef TARGET_TRAMPOLINE_INIT 377 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init 378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address 380 381 #undef TARGET_DEFAULT_SHORT_ENUMS 382 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums 383 384 #undef TARGET_ALIGN_ANON_BITFIELD 385 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield 386 387 #undef TARGET_NARROW_VOLATILE_BITFIELD 388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false 389 390 #undef TARGET_CXX_GUARD_TYPE 391 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type 392 393 #undef TARGET_CXX_GUARD_MASK_BIT 394 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit 395 396 #undef TARGET_CXX_GET_COOKIE_SIZE 397 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size 398 399 #undef TARGET_CXX_COOKIE_HAS_SIZE 400 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size 401 402 #undef TARGET_CXX_CDTOR_RETURNS_THIS 403 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this 404 405 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE 406 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline 407 408 #undef TARGET_CXX_USE_AEABI_ATEXIT 409 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit 410 411 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY 412 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \ 413 arm_cxx_determine_class_data_visibility 414 415 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT 416 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat 417 418 #undef TARGET_RETURN_IN_MSB 419 #define TARGET_RETURN_IN_MSB arm_return_in_msb 420 421 #undef TARGET_RETURN_IN_MEMORY 422 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory 423 424 #undef TARGET_MUST_PASS_IN_STACK 425 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack 426 427 #ifdef TARGET_UNWIND_INFO 428 #undef TARGET_UNWIND_EMIT 429 #define TARGET_UNWIND_EMIT arm_unwind_emit 430 431 /* EABI unwinding tables use a different format for the typeinfo tables. */ 432 #undef TARGET_ASM_TTYPE 433 #define TARGET_ASM_TTYPE arm_output_ttype 434 435 #undef TARGET_ARM_EABI_UNWINDER 436 #define TARGET_ARM_EABI_UNWINDER true 437 #endif /* TARGET_UNWIND_INFO */ 438 439 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 440 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec 441 442 #undef TARGET_DWARF_REGISTER_SPAN 443 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span 444 445 #undef TARGET_CANNOT_COPY_INSN_P 446 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p 447 448 #ifdef HAVE_AS_TLS 449 #undef TARGET_HAVE_TLS 450 #define TARGET_HAVE_TLS true 451 #endif 452 453 #undef TARGET_HAVE_CONDITIONAL_EXECUTION 454 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution 455 456 #undef TARGET_CANNOT_FORCE_CONST_MEM 457 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem 458 459 #undef TARGET_MAX_ANCHOR_OFFSET 460 #define TARGET_MAX_ANCHOR_OFFSET 4095 461 462 /* The minimum is set such that the total size of the block 463 for a particular anchor is -4088 + 1 + 4095 bytes, which is 464 divisible by eight, ensuring natural spacing of anchors. */ 465 #undef TARGET_MIN_ANCHOR_OFFSET 466 #define TARGET_MIN_ANCHOR_OFFSET -4088 467 468 #undef TARGET_SCHED_ISSUE_RATE 469 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate 470 471 #undef TARGET_MANGLE_TYPE 472 #define TARGET_MANGLE_TYPE arm_mangle_type 473 474 #undef TARGET_BUILD_BUILTIN_VA_LIST 475 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list 476 #undef TARGET_EXPAND_BUILTIN_VA_START 477 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start 478 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 479 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr 480 481 #ifdef HAVE_AS_TLS 482 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 483 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel 484 #endif 485 486 #undef TARGET_LEGITIMATE_ADDRESS_P 487 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p 488 489 #undef TARGET_INVALID_PARAMETER_TYPE 490 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type 491 492 #undef TARGET_INVALID_RETURN_TYPE 493 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type 494 495 #undef TARGET_PROMOTED_TYPE 496 #define TARGET_PROMOTED_TYPE arm_promoted_type 497 498 #undef TARGET_CONVERT_TO_TYPE 499 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type 500 501 #undef TARGET_SCALAR_MODE_SUPPORTED_P 502 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p 503 504 #undef TARGET_FRAME_POINTER_REQUIRED 505 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required 506 507 #undef TARGET_CAN_ELIMINATE 508 #define TARGET_CAN_ELIMINATE arm_can_eliminate 509 510 struct gcc_target targetm = TARGET_INITIALIZER; 511 512 /* Obstack for minipool constant handling. */ 513 static struct obstack minipool_obstack; 514 static char * minipool_startobj; 515 516 /* The maximum number of insns skipped which 517 will be conditionalised if possible. */ 518 static int max_insns_skipped = 5; 519 520 extern FILE * asm_out_file; 521 522 /* True if we are currently building a constant table. */ 523 int making_const_table; 524 525 /* The processor for which instructions should be scheduled. */ 526 enum processor_type arm_tune = arm_none; 527 528 /* The default processor used if not overridden by commandline. */ 529 static enum processor_type arm_default_cpu = arm_none; 530 531 /* Which floating point hardware to schedule for. */ 532 int arm_fpu_attr; 533 534 /* Which floating popint hardware to use. */ 535 const struct arm_fpu_desc *arm_fpu_desc; 536 537 /* Whether to use floating point hardware. */ 538 enum float_abi_type arm_float_abi; 539 540 /* Which __fp16 format to use. */ 541 enum arm_fp16_format_type arm_fp16_format; 542 543 /* Which ABI to use. */ 544 enum arm_abi_type arm_abi; 545 546 /* Which thread pointer model to use. */ 547 enum arm_tp_type target_thread_pointer = TP_AUTO; 548 549 /* Used to parse -mstructure_size_boundary command line option. */ 550 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY; 551 552 /* Used for Thumb call_via trampolines. */ 553 rtx thumb_call_via_label[14]; 554 static int thumb_call_reg_needed; 555 556 /* Bit values used to identify processor capabilities. */ 557 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */ 558 #define FL_ARCH3M (1 << 1) /* Extended multiply */ 559 #define FL_MODE26 (1 << 2) /* 26-bit mode support */ 560 #define FL_MODE32 (1 << 3) /* 32-bit mode support */ 561 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */ 562 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */ 563 #define FL_THUMB (1 << 6) /* Thumb aware */ 564 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */ 565 #define FL_STRONG (1 << 8) /* StrongARM */ 566 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */ 567 #define FL_XSCALE (1 << 10) /* XScale */ 568 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */ 569 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds 570 media instructions. */ 571 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */ 572 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops. 573 Note: ARM6 & 7 derivatives only. */ 574 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */ 575 #define FL_THUMB2 (1 << 16) /* Thumb-2. */ 576 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M' 577 profile. */ 578 #define FL_DIV (1 << 18) /* Hardware divide. */ 579 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ 580 #define FL_NEON (1 << 20) /* Neon instructions. */ 581 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M 582 architecture. */ 583 584 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ 585 586 #define FL_FOR_ARCH2 FL_NOTM 587 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) 588 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) 589 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) 590 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB) 591 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5) 592 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB) 593 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E) 594 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB) 595 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE 596 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6) 597 #define FL_FOR_ARCH6J FL_FOR_ARCH6 598 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) 599 #define FL_FOR_ARCH6Z FL_FOR_ARCH6 600 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K 601 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) 602 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) 603 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) 604 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) 605 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) 606 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) 607 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) 608 609 /* The bits in this mask specify which 610 instructions we are allowed to generate. */ 611 static unsigned long insn_flags = 0; 612 613 /* The bits in this mask specify which instruction scheduling options should 614 be used. */ 615 static unsigned long tune_flags = 0; 616 617 /* The following are used in the arm.md file as equivalents to bits 618 in the above two flag variables. */ 619 620 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */ 621 int arm_arch3m = 0; 622 623 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */ 624 int arm_arch4 = 0; 625 626 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */ 627 int arm_arch4t = 0; 628 629 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */ 630 int arm_arch5 = 0; 631 632 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */ 633 int arm_arch5e = 0; 634 635 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */ 636 int arm_arch6 = 0; 637 638 /* Nonzero if this chip supports the ARM 6K extensions. */ 639 int arm_arch6k = 0; 640 641 /* Nonzero if instructions not present in the 'M' profile can be used. */ 642 int arm_arch_notm = 0; 643 644 /* Nonzero if instructions present in ARMv7E-M can be used. */ 645 int arm_arch7em = 0; 646 647 /* Nonzero if this chip can benefit from load scheduling. */ 648 int arm_ld_sched = 0; 649 650 /* Nonzero if this chip is a StrongARM. */ 651 int arm_tune_strongarm = 0; 652 653 /* Nonzero if this chip is a Cirrus variant. */ 654 int arm_arch_cirrus = 0; 655 656 /* Nonzero if this chip supports Intel Wireless MMX technology. */ 657 int arm_arch_iwmmxt = 0; 658 659 /* Nonzero if this chip is an XScale. */ 660 int arm_arch_xscale = 0; 661 662 /* Nonzero if tuning for XScale */ 663 int arm_tune_xscale = 0; 664 665 /* Nonzero if we want to tune for stores that access the write-buffer. 666 This typically means an ARM6 or ARM7 with MMU or MPU. */ 667 int arm_tune_wbuf = 0; 668 669 /* Nonzero if tuning for Cortex-A9. */ 670 int arm_tune_cortex_a9 = 0; 671 672 /* Nonzero if generating Thumb instructions. */ 673 int thumb_code = 0; 674 675 /* Nonzero if we should define __THUMB_INTERWORK__ in the 676 preprocessor. 677 XXX This is a bit of a hack, it's intended to help work around 678 problems in GLD which doesn't understand that armv5t code is 679 interworking clean. */ 680 int arm_cpp_interwork = 0; 681 682 /* Nonzero if chip supports Thumb 2. */ 683 int arm_arch_thumb2; 684 685 /* Nonzero if chip supports integer division instruction. */ 686 int arm_arch_hwdiv; 687 688 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we 689 must report the mode of the memory reference from PRINT_OPERAND to 690 PRINT_OPERAND_ADDRESS. */ 691 enum machine_mode output_memory_reference_mode; 692 693 /* The register number to be used for the PIC offset register. */ 694 unsigned arm_pic_register = INVALID_REGNUM; 695 696 /* Set to 1 after arm_reorg has started. Reset to start at the start of 697 the next function. */ 698 static int after_arm_reorg = 0; 699 700 /* The maximum number of insns to be used when loading a constant. */ 701 static int arm_constant_limit = 3; 702 703 static enum arm_pcs arm_pcs_default; 704 705 /* For an explanation of these variables, see final_prescan_insn below. */ 706 int arm_ccfsm_state; 707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ 708 enum arm_cond_code arm_current_cc; 709 rtx arm_target_insn; 710 int arm_target_label; 711 /* The number of conditionally executed insns, including the current insn. */ 712 int arm_condexec_count = 0; 713 /* A bitmask specifying the patterns for the IT block. 714 Zero means do not output an IT block before this insn. */ 715 int arm_condexec_mask = 0; 716 /* The number of bits used in arm_condexec_mask. */ 717 int arm_condexec_masklen = 0; 718 719 /* The condition codes of the ARM, and the inverse function. */ 720 static const char * const arm_condition_codes[] = 721 { 722 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", 723 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" 724 }; 725 726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl") 727 #define streq(string1, string2) (strcmp (string1, string2) == 0) 728 729 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ 730 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ 731 | (1 << PIC_OFFSET_TABLE_REGNUM))) 732 733 /* Initialization code. */ 734 735 struct processors 736 { 737 const char *const name; 738 enum processor_type core; 739 const char *arch; 740 const unsigned long flags; 741 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool); 742 }; 743 744 /* Not all of these give usefully different compilation alternatives, 745 but there is no simple way of generalizing them. */ 746 static const struct processors all_cores[] = 747 { 748 /* ARM Cores */ 749 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ 750 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs}, 751 #include "arm-cores.def" 752 #undef ARM_CORE 753 {NULL, arm_none, NULL, 0, NULL} 754 }; 755 756 static const struct processors all_architectures[] = 757 { 758 /* ARM Architectures */ 759 /* We don't specify rtx_costs here as it will be figured out 760 from the core. */ 761 762 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, 763 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, 764 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL}, 765 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL}, 766 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL}, 767 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no 768 implementations that support it, so we will leave it out for now. */ 769 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL}, 770 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL}, 771 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL}, 772 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL}, 773 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL}, 774 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL}, 775 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL}, 776 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL}, 777 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL}, 778 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL}, 779 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL}, 780 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL}, 781 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL}, 782 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, 783 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, 784 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, 785 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, 786 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, 787 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, 788 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, 789 {NULL, arm_none, NULL, 0 , NULL} 790 }; 791 792 struct arm_cpu_select 793 { 794 const char * string; 795 const char * name; 796 const struct processors * processors; 797 }; 798 799 /* This is a magic structure. The 'string' field is magically filled in 800 with a pointer to the value specified by the user on the command line 801 assuming that the user has specified such a value. */ 802 803 static struct arm_cpu_select arm_select[] = 804 { 805 /* string name processors */ 806 { NULL, "-mcpu=", all_cores }, 807 { NULL, "-march=", all_architectures }, 808 { NULL, "-mtune=", all_cores } 809 }; 810 811 /* Defines representing the indexes into the above table. */ 812 #define ARM_OPT_SET_CPU 0 813 #define ARM_OPT_SET_ARCH 1 814 #define ARM_OPT_SET_TUNE 2 815 816 /* The name of the preprocessor macro to define for this architecture. */ 817 818 char arm_arch_name[] = "__ARM_ARCH_0UNK__"; 819 820 /* Available values for -mfpu=. */ 821 822 static const struct arm_fpu_desc all_fpus[] = 823 { 824 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false}, 825 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false}, 826 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false}, 827 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false}, 828 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false}, 829 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, 830 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true}, 831 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false}, 832 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true}, 833 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false}, 834 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true}, 835 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false}, 836 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true }, 837 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true}, 838 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true}, 839 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true}, 840 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true}, 841 /* Compatibility aliases. */ 842 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, 843 }; 844 845 846 struct float_abi 847 { 848 const char * name; 849 enum float_abi_type abi_type; 850 }; 851 852 853 /* Available values for -mfloat-abi=. */ 854 855 static const struct float_abi all_float_abis[] = 856 { 857 {"soft", ARM_FLOAT_ABI_SOFT}, 858 {"softfp", ARM_FLOAT_ABI_SOFTFP}, 859 {"hard", ARM_FLOAT_ABI_HARD} 860 }; 861 862 863 struct fp16_format 864 { 865 const char *name; 866 enum arm_fp16_format_type fp16_format_type; 867 }; 868 869 870 /* Available values for -mfp16-format=. */ 871 872 static const struct fp16_format all_fp16_formats[] = 873 { 874 {"none", ARM_FP16_FORMAT_NONE}, 875 {"ieee", ARM_FP16_FORMAT_IEEE}, 876 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE} 877 }; 878 879 880 struct abi_name 881 { 882 const char *name; 883 enum arm_abi_type abi_type; 884 }; 885 886 887 /* Available values for -mabi=. */ 888 889 static const struct abi_name arm_all_abis[] = 890 { 891 {"apcs-gnu", ARM_ABI_APCS}, 892 {"atpcs", ARM_ABI_ATPCS}, 893 {"aapcs", ARM_ABI_AAPCS}, 894 {"iwmmxt", ARM_ABI_IWMMXT}, 895 {"aapcs-linux", ARM_ABI_AAPCS_LINUX} 896 }; 897 898 /* Supported TLS relocations. */ 899 900 enum tls_reloc { 901 TLS_GD32, 902 TLS_LDM32, 903 TLS_LDO32, 904 TLS_IE32, 905 TLS_LE32 906 }; 907 908 /* Emit an insn that's a simple single-set. Both the operands must be known 909 to be valid. */ 910 inline static rtx 911 emit_set_insn (rtx x, rtx y) 912 { 913 return emit_insn (gen_rtx_SET (VOIDmode, x, y)); 914 } 915 916 /* Return the number of bits set in VALUE. */ 917 static unsigned 918 bit_count (unsigned long value) 919 { 920 unsigned long count = 0; 921 922 while (value) 923 { 924 count++; 925 value &= value - 1; /* Clear the least-significant set bit. */ 926 } 927 928 return count; 929 } 930 931 /* Set up library functions unique to ARM. */ 932 933 static void 934 arm_init_libfuncs (void) 935 { 936 /* There are no special library functions unless we are using the 937 ARM BPABI. */ 938 if (!TARGET_BPABI) 939 return; 940 941 /* The functions below are described in Section 4 of the "Run-Time 942 ABI for the ARM architecture", Version 1.0. */ 943 944 /* Double-precision floating-point arithmetic. Table 2. */ 945 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd"); 946 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv"); 947 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul"); 948 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg"); 949 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub"); 950 951 /* Double-precision comparisons. Table 3. */ 952 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq"); 953 set_optab_libfunc (ne_optab, DFmode, NULL); 954 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt"); 955 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple"); 956 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge"); 957 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt"); 958 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun"); 959 960 /* Single-precision floating-point arithmetic. Table 4. */ 961 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd"); 962 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv"); 963 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul"); 964 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg"); 965 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub"); 966 967 /* Single-precision comparisons. Table 5. */ 968 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq"); 969 set_optab_libfunc (ne_optab, SFmode, NULL); 970 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt"); 971 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple"); 972 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge"); 973 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt"); 974 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun"); 975 976 /* Floating-point to integer conversions. Table 6. */ 977 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz"); 978 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz"); 979 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz"); 980 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz"); 981 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz"); 982 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz"); 983 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz"); 984 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz"); 985 986 /* Conversions between floating types. Table 7. */ 987 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f"); 988 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d"); 989 990 /* Integer to floating-point conversions. Table 8. */ 991 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d"); 992 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d"); 993 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d"); 994 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d"); 995 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f"); 996 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f"); 997 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f"); 998 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f"); 999 1000 /* Long long. Table 9. */ 1001 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul"); 1002 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod"); 1003 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod"); 1004 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl"); 1005 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr"); 1006 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr"); 1007 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp"); 1008 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp"); 1009 1010 /* Integer (32/32->32) division. \S 4.3.1. */ 1011 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod"); 1012 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod"); 1013 1014 /* The divmod functions are designed so that they can be used for 1015 plain division, even though they return both the quotient and the 1016 remainder. The quotient is returned in the usual location (i.e., 1017 r0 for SImode, {r0, r1} for DImode), just as would be expected 1018 for an ordinary division routine. Because the AAPCS calling 1019 conventions specify that all of { r0, r1, r2, r3 } are 1020 callee-saved registers, there is no need to tell the compiler 1021 explicitly that those registers are clobbered by these 1022 routines. */ 1023 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod"); 1024 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod"); 1025 1026 /* For SImode division the ABI provides div-without-mod routines, 1027 which are faster. */ 1028 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv"); 1029 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv"); 1030 1031 /* We don't have mod libcalls. Fortunately gcc knows how to use the 1032 divmod libcalls instead. */ 1033 set_optab_libfunc (smod_optab, DImode, NULL); 1034 set_optab_libfunc (umod_optab, DImode, NULL); 1035 set_optab_libfunc (smod_optab, SImode, NULL); 1036 set_optab_libfunc (umod_optab, SImode, NULL); 1037 1038 /* Half-precision float operations. The compiler handles all operations 1039 with NULL libfuncs by converting the SFmode. */ 1040 switch (arm_fp16_format) 1041 { 1042 case ARM_FP16_FORMAT_IEEE: 1043 case ARM_FP16_FORMAT_ALTERNATIVE: 1044 1045 /* Conversions. */ 1046 set_conv_libfunc (trunc_optab, HFmode, SFmode, 1047 (arm_fp16_format == ARM_FP16_FORMAT_IEEE 1048 ? "__gnu_f2h_ieee" 1049 : "__gnu_f2h_alternative")); 1050 set_conv_libfunc (sext_optab, SFmode, HFmode, 1051 (arm_fp16_format == ARM_FP16_FORMAT_IEEE 1052 ? "__gnu_h2f_ieee" 1053 : "__gnu_h2f_alternative")); 1054 1055 /* Arithmetic. */ 1056 set_optab_libfunc (add_optab, HFmode, NULL); 1057 set_optab_libfunc (sdiv_optab, HFmode, NULL); 1058 set_optab_libfunc (smul_optab, HFmode, NULL); 1059 set_optab_libfunc (neg_optab, HFmode, NULL); 1060 set_optab_libfunc (sub_optab, HFmode, NULL); 1061 1062 /* Comparisons. */ 1063 set_optab_libfunc (eq_optab, HFmode, NULL); 1064 set_optab_libfunc (ne_optab, HFmode, NULL); 1065 set_optab_libfunc (lt_optab, HFmode, NULL); 1066 set_optab_libfunc (le_optab, HFmode, NULL); 1067 set_optab_libfunc (ge_optab, HFmode, NULL); 1068 set_optab_libfunc (gt_optab, HFmode, NULL); 1069 set_optab_libfunc (unord_optab, HFmode, NULL); 1070 break; 1071 1072 default: 1073 break; 1074 } 1075 1076 if (TARGET_AAPCS_BASED) 1077 synchronize_libfunc = init_one_libfunc ("__sync_synchronize"); 1078 } 1079 1080 /* On AAPCS systems, this is the "struct __va_list". */ 1081 static GTY(()) tree va_list_type; 1082 1083 /* Return the type to use as __builtin_va_list. */ 1084 static tree 1085 arm_build_builtin_va_list (void) 1086 { 1087 tree va_list_name; 1088 tree ap_field; 1089 1090 if (!TARGET_AAPCS_BASED) 1091 return std_build_builtin_va_list (); 1092 1093 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type 1094 defined as: 1095 1096 struct __va_list 1097 { 1098 void *__ap; 1099 }; 1100 1101 The C Library ABI further reinforces this definition in \S 1102 4.1. 1103 1104 We must follow this definition exactly. The structure tag 1105 name is visible in C++ mangled names, and thus forms a part 1106 of the ABI. The field name may be used by people who 1107 #include <stdarg.h>. */ 1108 /* Create the type. */ 1109 va_list_type = lang_hooks.types.make_type (RECORD_TYPE); 1110 /* Give it the required name. */ 1111 va_list_name = build_decl (BUILTINS_LOCATION, 1112 TYPE_DECL, 1113 get_identifier ("__va_list"), 1114 va_list_type); 1115 DECL_ARTIFICIAL (va_list_name) = 1; 1116 TYPE_NAME (va_list_type) = va_list_name; 1117 TYPE_STUB_DECL (va_list_type) = va_list_name; 1118 /* Create the __ap field. */ 1119 ap_field = build_decl (BUILTINS_LOCATION, 1120 FIELD_DECL, 1121 get_identifier ("__ap"), 1122 ptr_type_node); 1123 DECL_ARTIFICIAL (ap_field) = 1; 1124 DECL_FIELD_CONTEXT (ap_field) = va_list_type; 1125 TYPE_FIELDS (va_list_type) = ap_field; 1126 /* Compute its layout. */ 1127 layout_type (va_list_type); 1128 1129 return va_list_type; 1130 } 1131 1132 /* Return an expression of type "void *" pointing to the next 1133 available argument in a variable-argument list. VALIST is the 1134 user-level va_list object, of type __builtin_va_list. */ 1135 static tree 1136 arm_extract_valist_ptr (tree valist) 1137 { 1138 if (TREE_TYPE (valist) == error_mark_node) 1139 return error_mark_node; 1140 1141 /* On an AAPCS target, the pointer is stored within "struct 1142 va_list". */ 1143 if (TARGET_AAPCS_BASED) 1144 { 1145 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist)); 1146 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field), 1147 valist, ap_field, NULL_TREE); 1148 } 1149 1150 return valist; 1151 } 1152 1153 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ 1154 static void 1155 arm_expand_builtin_va_start (tree valist, rtx nextarg) 1156 { 1157 valist = arm_extract_valist_ptr (valist); 1158 std_expand_builtin_va_start (valist, nextarg); 1159 } 1160 1161 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ 1162 static tree 1163 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 1164 gimple_seq *post_p) 1165 { 1166 valist = arm_extract_valist_ptr (valist); 1167 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 1168 } 1169 1170 /* Implement TARGET_HANDLE_OPTION. */ 1171 1172 static bool 1173 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) 1174 { 1175 switch (code) 1176 { 1177 case OPT_march_: 1178 arm_select[1].string = arg; 1179 return true; 1180 1181 case OPT_mcpu_: 1182 arm_select[0].string = arg; 1183 return true; 1184 1185 case OPT_mhard_float: 1186 target_float_abi_name = "hard"; 1187 return true; 1188 1189 case OPT_msoft_float: 1190 target_float_abi_name = "soft"; 1191 return true; 1192 1193 case OPT_mtune_: 1194 arm_select[2].string = arg; 1195 return true; 1196 1197 default: 1198 return true; 1199 } 1200 } 1201 1202 static void 1203 arm_target_help (void) 1204 { 1205 int i; 1206 static int columns = 0; 1207 int remaining; 1208 1209 /* If we have not done so already, obtain the desired maximum width of 1210 the output. Note - this is a duplication of the code at the start of 1211 gcc/opts.c:print_specific_help() - the two copies should probably be 1212 replaced by a single function. */ 1213 if (columns == 0) 1214 { 1215 const char *p; 1216 1217 GET_ENVIRONMENT (p, "COLUMNS"); 1218 if (p != NULL) 1219 { 1220 int value = atoi (p); 1221 1222 if (value > 0) 1223 columns = value; 1224 } 1225 1226 if (columns == 0) 1227 /* Use a reasonable default. */ 1228 columns = 80; 1229 } 1230 1231 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n"); 1232 1233 /* The - 2 is because we know that the last entry in the array is NULL. */ 1234 i = ARRAY_SIZE (all_cores) - 2; 1235 gcc_assert (i > 0); 1236 printf (" %s", all_cores[i].name); 1237 remaining = columns - (strlen (all_cores[i].name) + 4); 1238 gcc_assert (remaining >= 0); 1239 1240 while (i--) 1241 { 1242 int len = strlen (all_cores[i].name); 1243 1244 if (remaining > len + 2) 1245 { 1246 printf (", %s", all_cores[i].name); 1247 remaining -= len + 2; 1248 } 1249 else 1250 { 1251 if (remaining > 0) 1252 printf (","); 1253 printf ("\n %s", all_cores[i].name); 1254 remaining = columns - (len + 4); 1255 } 1256 } 1257 1258 printf ("\n\n Known ARM architectures (for use with the -march= option):\n"); 1259 1260 i = ARRAY_SIZE (all_architectures) - 2; 1261 gcc_assert (i > 0); 1262 1263 printf (" %s", all_architectures[i].name); 1264 remaining = columns - (strlen (all_architectures[i].name) + 4); 1265 gcc_assert (remaining >= 0); 1266 1267 while (i--) 1268 { 1269 int len = strlen (all_architectures[i].name); 1270 1271 if (remaining > len + 2) 1272 { 1273 printf (", %s", all_architectures[i].name); 1274 remaining -= len + 2; 1275 } 1276 else 1277 { 1278 if (remaining > 0) 1279 printf (","); 1280 printf ("\n %s", all_architectures[i].name); 1281 remaining = columns - (len + 4); 1282 } 1283 } 1284 printf ("\n"); 1285 1286 } 1287 1288 /* Fix up any incompatible options that the user has specified. 1289 This has now turned into a maze. */ 1290 void 1291 arm_override_options (void) 1292 { 1293 unsigned i; 1294 enum processor_type target_arch_cpu = arm_none; 1295 enum processor_type selected_cpu = arm_none; 1296 1297 /* Set up the flags based on the cpu/architecture selected by the user. */ 1298 for (i = ARRAY_SIZE (arm_select); i--;) 1299 { 1300 struct arm_cpu_select * ptr = arm_select + i; 1301 1302 if (ptr->string != NULL && ptr->string[0] != '\0') 1303 { 1304 const struct processors * sel; 1305 1306 for (sel = ptr->processors; sel->name != NULL; sel++) 1307 if (streq (ptr->string, sel->name)) 1308 { 1309 /* Set the architecture define. */ 1310 if (i != ARM_OPT_SET_TUNE) 1311 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); 1312 1313 /* Determine the processor core for which we should 1314 tune code-generation. */ 1315 if (/* -mcpu= is a sensible default. */ 1316 i == ARM_OPT_SET_CPU 1317 /* -mtune= overrides -mcpu= and -march=. */ 1318 || i == ARM_OPT_SET_TUNE) 1319 arm_tune = (enum processor_type) (sel - ptr->processors); 1320 1321 /* Remember the CPU associated with this architecture. 1322 If no other option is used to set the CPU type, 1323 we'll use this to guess the most suitable tuning 1324 options. */ 1325 if (i == ARM_OPT_SET_ARCH) 1326 target_arch_cpu = sel->core; 1327 1328 if (i == ARM_OPT_SET_CPU) 1329 selected_cpu = (enum processor_type) (sel - ptr->processors); 1330 1331 if (i != ARM_OPT_SET_TUNE) 1332 { 1333 /* If we have been given an architecture and a processor 1334 make sure that they are compatible. We only generate 1335 a warning though, and we prefer the CPU over the 1336 architecture. */ 1337 if (insn_flags != 0 && (insn_flags ^ sel->flags)) 1338 warning (0, "switch -mcpu=%s conflicts with -march= switch", 1339 ptr->string); 1340 1341 insn_flags = sel->flags; 1342 } 1343 1344 break; 1345 } 1346 1347 if (sel->name == NULL) 1348 error ("bad value (%s) for %s switch", ptr->string, ptr->name); 1349 } 1350 } 1351 1352 /* Guess the tuning options from the architecture if necessary. */ 1353 if (arm_tune == arm_none) 1354 arm_tune = target_arch_cpu; 1355 1356 /* If the user did not specify a processor, choose one for them. */ 1357 if (insn_flags == 0) 1358 { 1359 const struct processors * sel; 1360 unsigned int sought; 1361 1362 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT; 1363 if (selected_cpu == arm_none) 1364 { 1365 #ifdef SUBTARGET_CPU_DEFAULT 1366 /* Use the subtarget default CPU if none was specified by 1367 configure. */ 1368 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT; 1369 #endif 1370 /* Default to ARM6. */ 1371 if (selected_cpu == arm_none) 1372 selected_cpu = arm6; 1373 } 1374 sel = &all_cores[selected_cpu]; 1375 1376 insn_flags = sel->flags; 1377 1378 /* Now check to see if the user has specified some command line 1379 switch that require certain abilities from the cpu. */ 1380 sought = 0; 1381 1382 if (TARGET_INTERWORK || TARGET_THUMB) 1383 { 1384 sought |= (FL_THUMB | FL_MODE32); 1385 1386 /* There are no ARM processors that support both APCS-26 and 1387 interworking. Therefore we force FL_MODE26 to be removed 1388 from insn_flags here (if it was set), so that the search 1389 below will always be able to find a compatible processor. */ 1390 insn_flags &= ~FL_MODE26; 1391 } 1392 1393 if (sought != 0 && ((sought & insn_flags) != sought)) 1394 { 1395 /* Try to locate a CPU type that supports all of the abilities 1396 of the default CPU, plus the extra abilities requested by 1397 the user. */ 1398 for (sel = all_cores; sel->name != NULL; sel++) 1399 if ((sel->flags & sought) == (sought | insn_flags)) 1400 break; 1401 1402 if (sel->name == NULL) 1403 { 1404 unsigned current_bit_count = 0; 1405 const struct processors * best_fit = NULL; 1406 1407 /* Ideally we would like to issue an error message here 1408 saying that it was not possible to find a CPU compatible 1409 with the default CPU, but which also supports the command 1410 line options specified by the programmer, and so they 1411 ought to use the -mcpu=<name> command line option to 1412 override the default CPU type. 1413 1414 If we cannot find a cpu that has both the 1415 characteristics of the default cpu and the given 1416 command line options we scan the array again looking 1417 for a best match. */ 1418 for (sel = all_cores; sel->name != NULL; sel++) 1419 if ((sel->flags & sought) == sought) 1420 { 1421 unsigned count; 1422 1423 count = bit_count (sel->flags & insn_flags); 1424 1425 if (count >= current_bit_count) 1426 { 1427 best_fit = sel; 1428 current_bit_count = count; 1429 } 1430 } 1431 1432 gcc_assert (best_fit); 1433 sel = best_fit; 1434 } 1435 1436 insn_flags = sel->flags; 1437 } 1438 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); 1439 arm_default_cpu = (enum processor_type) (sel - all_cores); 1440 if (arm_tune == arm_none) 1441 arm_tune = arm_default_cpu; 1442 } 1443 1444 /* The processor for which we should tune should now have been 1445 chosen. */ 1446 gcc_assert (arm_tune != arm_none); 1447 1448 tune_flags = all_cores[(int)arm_tune].flags; 1449 1450 if (target_fp16_format_name) 1451 { 1452 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) 1453 { 1454 if (streq (all_fp16_formats[i].name, target_fp16_format_name)) 1455 { 1456 arm_fp16_format = all_fp16_formats[i].fp16_format_type; 1457 break; 1458 } 1459 } 1460 if (i == ARRAY_SIZE (all_fp16_formats)) 1461 error ("invalid __fp16 format option: -mfp16-format=%s", 1462 target_fp16_format_name); 1463 } 1464 else 1465 arm_fp16_format = ARM_FP16_FORMAT_NONE; 1466 1467 if (target_abi_name) 1468 { 1469 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) 1470 { 1471 if (streq (arm_all_abis[i].name, target_abi_name)) 1472 { 1473 arm_abi = arm_all_abis[i].abi_type; 1474 break; 1475 } 1476 } 1477 if (i == ARRAY_SIZE (arm_all_abis)) 1478 error ("invalid ABI option: -mabi=%s", target_abi_name); 1479 } 1480 else 1481 arm_abi = ARM_DEFAULT_ABI; 1482 1483 /* Make sure that the processor choice does not conflict with any of the 1484 other command line choices. */ 1485 if (TARGET_ARM && !(insn_flags & FL_NOTM)) 1486 error ("target CPU does not support ARM mode"); 1487 1488 /* BPABI targets use linker tricks to allow interworking on cores 1489 without thumb support. */ 1490 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI)) 1491 { 1492 warning (0, "target CPU does not support interworking" ); 1493 target_flags &= ~MASK_INTERWORK; 1494 } 1495 1496 if (TARGET_THUMB && !(insn_flags & FL_THUMB)) 1497 { 1498 warning (0, "target CPU does not support THUMB instructions"); 1499 target_flags &= ~MASK_THUMB; 1500 } 1501 1502 if (TARGET_APCS_FRAME && TARGET_THUMB) 1503 { 1504 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */ 1505 target_flags &= ~MASK_APCS_FRAME; 1506 } 1507 1508 /* Callee super interworking implies thumb interworking. Adding 1509 this to the flags here simplifies the logic elsewhere. */ 1510 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) 1511 target_flags |= MASK_INTERWORK; 1512 1513 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done 1514 from here where no function is being compiled currently. */ 1515 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) 1516 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); 1517 1518 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) 1519 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); 1520 1521 if (TARGET_ARM && TARGET_CALLER_INTERWORKING) 1522 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb"); 1523 1524 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) 1525 { 1526 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); 1527 target_flags |= MASK_APCS_FRAME; 1528 } 1529 1530 if (TARGET_POKE_FUNCTION_NAME) 1531 target_flags |= MASK_APCS_FRAME; 1532 1533 if (TARGET_APCS_REENT && flag_pic) 1534 error ("-fpic and -mapcs-reent are incompatible"); 1535 1536 if (TARGET_APCS_REENT) 1537 warning (0, "APCS reentrant code not supported. Ignored"); 1538 1539 /* If this target is normally configured to use APCS frames, warn if they 1540 are turned off and debugging is turned on. */ 1541 if (TARGET_ARM 1542 && write_symbols != NO_DEBUG 1543 && !TARGET_APCS_FRAME 1544 && (TARGET_DEFAULT & MASK_APCS_FRAME)) 1545 warning (0, "-g with -mno-apcs-frame may not give sensible debugging"); 1546 1547 if (TARGET_APCS_FLOAT) 1548 warning (0, "passing floating point arguments in fp regs not yet supported"); 1549 1550 /* Initialize boolean versions of the flags, for use in the arm.md file. */ 1551 arm_arch3m = (insn_flags & FL_ARCH3M) != 0; 1552 arm_arch4 = (insn_flags & FL_ARCH4) != 0; 1553 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0); 1554 arm_arch5 = (insn_flags & FL_ARCH5) != 0; 1555 arm_arch5e = (insn_flags & FL_ARCH5E) != 0; 1556 arm_arch6 = (insn_flags & FL_ARCH6) != 0; 1557 arm_arch6k = (insn_flags & FL_ARCH6K) != 0; 1558 arm_arch_notm = (insn_flags & FL_NOTM) != 0; 1559 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; 1560 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; 1561 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; 1562 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; 1563 1564 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; 1565 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; 1566 thumb_code = (TARGET_ARM == 0); 1567 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; 1568 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; 1569 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; 1570 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; 1571 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; 1572 1573 /* If we are not using the default (ARM mode) section anchor offset 1574 ranges, then set the correct ranges now. */ 1575 if (TARGET_THUMB1) 1576 { 1577 /* Thumb-1 LDR instructions cannot have negative offsets. 1578 Permissible positive offset ranges are 5-bit (for byte loads), 1579 6-bit (for halfword loads), or 7-bit (for word loads). 1580 Empirical results suggest a 7-bit anchor range gives the best 1581 overall code size. */ 1582 targetm.min_anchor_offset = 0; 1583 targetm.max_anchor_offset = 127; 1584 } 1585 else if (TARGET_THUMB2) 1586 { 1587 /* The minimum is set such that the total size of the block 1588 for a particular anchor is 248 + 1 + 4095 bytes, which is 1589 divisible by eight, ensuring natural spacing of anchors. */ 1590 targetm.min_anchor_offset = -248; 1591 targetm.max_anchor_offset = 4095; 1592 } 1593 1594 /* V5 code we generate is completely interworking capable, so we turn off 1595 TARGET_INTERWORK here to avoid many tests later on. */ 1596 1597 /* XXX However, we must pass the right pre-processor defines to CPP 1598 or GLD can get confused. This is a hack. */ 1599 if (TARGET_INTERWORK) 1600 arm_cpp_interwork = 1; 1601 1602 if (arm_arch5) 1603 target_flags &= ~MASK_INTERWORK; 1604 1605 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN) 1606 error ("iwmmxt requires an AAPCS compatible ABI for proper operation"); 1607 1608 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) 1609 error ("iwmmxt abi requires an iwmmxt capable cpu"); 1610 1611 if (target_fpu_name == NULL && target_fpe_name != NULL) 1612 { 1613 if (streq (target_fpe_name, "2")) 1614 target_fpu_name = "fpe2"; 1615 else if (streq (target_fpe_name, "3")) 1616 target_fpu_name = "fpe3"; 1617 else 1618 error ("invalid floating point emulation option: -mfpe=%s", 1619 target_fpe_name); 1620 } 1621 1622 if (target_fpu_name == NULL) 1623 { 1624 #ifdef FPUTYPE_DEFAULT 1625 target_fpu_name = FPUTYPE_DEFAULT; 1626 #else 1627 if (arm_arch_cirrus) 1628 target_fpu_name = "maverick"; 1629 else 1630 target_fpu_name = "fpe2"; 1631 #endif 1632 } 1633 1634 arm_fpu_desc = NULL; 1635 for (i = 0; i < ARRAY_SIZE (all_fpus); i++) 1636 { 1637 if (streq (all_fpus[i].name, target_fpu_name)) 1638 { 1639 arm_fpu_desc = &all_fpus[i]; 1640 break; 1641 } 1642 } 1643 1644 if (!arm_fpu_desc) 1645 { 1646 error ("invalid floating point option: -mfpu=%s", target_fpu_name); 1647 return; 1648 } 1649 1650 switch (arm_fpu_desc->model) 1651 { 1652 case ARM_FP_MODEL_FPA: 1653 if (arm_fpu_desc->rev == 2) 1654 arm_fpu_attr = FPU_FPE2; 1655 else if (arm_fpu_desc->rev == 3) 1656 arm_fpu_attr = FPU_FPE3; 1657 else 1658 arm_fpu_attr = FPU_FPA; 1659 break; 1660 1661 case ARM_FP_MODEL_MAVERICK: 1662 arm_fpu_attr = FPU_MAVERICK; 1663 break; 1664 1665 case ARM_FP_MODEL_VFP: 1666 arm_fpu_attr = FPU_VFP; 1667 break; 1668 1669 default: 1670 gcc_unreachable(); 1671 } 1672 1673 if (target_float_abi_name != NULL) 1674 { 1675 /* The user specified a FP ABI. */ 1676 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++) 1677 { 1678 if (streq (all_float_abis[i].name, target_float_abi_name)) 1679 { 1680 arm_float_abi = all_float_abis[i].abi_type; 1681 break; 1682 } 1683 } 1684 if (i == ARRAY_SIZE (all_float_abis)) 1685 error ("invalid floating point abi: -mfloat-abi=%s", 1686 target_float_abi_name); 1687 } 1688 else 1689 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI; 1690 1691 if (TARGET_AAPCS_BASED 1692 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA)) 1693 error ("FPA is unsupported in the AAPCS"); 1694 1695 if (TARGET_AAPCS_BASED) 1696 { 1697 if (TARGET_CALLER_INTERWORKING) 1698 error ("AAPCS does not support -mcaller-super-interworking"); 1699 else 1700 if (TARGET_CALLEE_INTERWORKING) 1701 error ("AAPCS does not support -mcallee-super-interworking"); 1702 } 1703 1704 /* FPA and iWMMXt are incompatible because the insn encodings overlap. 1705 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon 1706 will ever exist. GCC makes no attempt to support this combination. */ 1707 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT) 1708 sorry ("iWMMXt and hardware floating point"); 1709 1710 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */ 1711 if (TARGET_THUMB2 && TARGET_IWMMXT) 1712 sorry ("Thumb-2 iWMMXt"); 1713 1714 /* __fp16 support currently assumes the core has ldrh. */ 1715 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) 1716 sorry ("__fp16 and no ldrh"); 1717 1718 /* If soft-float is specified then don't use FPU. */ 1719 if (TARGET_SOFT_FLOAT) 1720 arm_fpu_attr = FPU_NONE; 1721 1722 if (TARGET_AAPCS_BASED) 1723 { 1724 if (arm_abi == ARM_ABI_IWMMXT) 1725 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; 1726 else if (arm_float_abi == ARM_FLOAT_ABI_HARD 1727 && TARGET_HARD_FLOAT 1728 && TARGET_VFP) 1729 arm_pcs_default = ARM_PCS_AAPCS_VFP; 1730 else 1731 arm_pcs_default = ARM_PCS_AAPCS; 1732 } 1733 else 1734 { 1735 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) 1736 sorry ("-mfloat-abi=hard and VFP"); 1737 1738 if (arm_abi == ARM_ABI_APCS) 1739 arm_pcs_default = ARM_PCS_APCS; 1740 else 1741 arm_pcs_default = ARM_PCS_ATPCS; 1742 } 1743 1744 /* For arm2/3 there is no need to do any scheduling if there is only 1745 a floating point emulator, or we are doing software floating-point. */ 1746 if ((TARGET_SOFT_FLOAT 1747 || (TARGET_FPA && arm_fpu_desc->rev)) 1748 && (tune_flags & FL_MODE32) == 0) 1749 flag_schedule_insns = flag_schedule_insns_after_reload = 0; 1750 1751 if (target_thread_switch) 1752 { 1753 if (strcmp (target_thread_switch, "soft") == 0) 1754 target_thread_pointer = TP_SOFT; 1755 else if (strcmp (target_thread_switch, "auto") == 0) 1756 target_thread_pointer = TP_AUTO; 1757 else if (strcmp (target_thread_switch, "cp15") == 0) 1758 target_thread_pointer = TP_CP15; 1759 else 1760 error ("invalid thread pointer option: -mtp=%s", target_thread_switch); 1761 } 1762 1763 /* Use the cp15 method if it is available. */ 1764 if (target_thread_pointer == TP_AUTO) 1765 { 1766 if (arm_arch6k && !TARGET_THUMB1) 1767 target_thread_pointer = TP_CP15; 1768 else 1769 target_thread_pointer = TP_SOFT; 1770 } 1771 1772 if (TARGET_HARD_TP && TARGET_THUMB1) 1773 error ("can not use -mtp=cp15 with 16-bit Thumb"); 1774 1775 /* Override the default structure alignment for AAPCS ABI. */ 1776 if (TARGET_AAPCS_BASED) 1777 arm_structure_size_boundary = 8; 1778 1779 if (structure_size_string != NULL) 1780 { 1781 int size = strtol (structure_size_string, NULL, 0); 1782 1783 if (size == 8 || size == 32 1784 || (ARM_DOUBLEWORD_ALIGN && size == 64)) 1785 arm_structure_size_boundary = size; 1786 else 1787 warning (0, "structure size boundary can only be set to %s", 1788 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32"); 1789 } 1790 1791 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic) 1792 { 1793 error ("RTP PIC is incompatible with Thumb"); 1794 flag_pic = 0; 1795 } 1796 1797 /* If stack checking is disabled, we can use r10 as the PIC register, 1798 which keeps r9 available. The EABI specifies r9 as the PIC register. */ 1799 if (flag_pic && TARGET_SINGLE_PIC_BASE) 1800 { 1801 if (TARGET_VXWORKS_RTP) 1802 warning (0, "RTP PIC is incompatible with -msingle-pic-base"); 1803 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10; 1804 } 1805 1806 if (flag_pic && TARGET_VXWORKS_RTP) 1807 arm_pic_register = 9; 1808 1809 if (arm_pic_register_string != NULL) 1810 { 1811 int pic_register = decode_reg_name (arm_pic_register_string); 1812 1813 if (!flag_pic) 1814 warning (0, "-mpic-register= is useless without -fpic"); 1815 1816 /* Prevent the user from choosing an obviously stupid PIC register. */ 1817 else if (pic_register < 0 || call_used_regs[pic_register] 1818 || pic_register == HARD_FRAME_POINTER_REGNUM 1819 || pic_register == STACK_POINTER_REGNUM 1820 || pic_register >= PC_REGNUM 1821 || (TARGET_VXWORKS_RTP 1822 && (unsigned int) pic_register != arm_pic_register)) 1823 error ("unable to use '%s' for PIC register", arm_pic_register_string); 1824 else 1825 arm_pic_register = pic_register; 1826 } 1827 1828 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ 1829 if (fix_cm3_ldrd == 2) 1830 { 1831 if (selected_cpu == cortexm3) 1832 fix_cm3_ldrd = 1; 1833 else 1834 fix_cm3_ldrd = 0; 1835 } 1836 1837 if (TARGET_THUMB1 && flag_schedule_insns) 1838 { 1839 /* Don't warn since it's on by default in -O2. */ 1840 flag_schedule_insns = 0; 1841 } 1842 1843 if (optimize_size) 1844 { 1845 arm_constant_limit = 1; 1846 1847 /* If optimizing for size, bump the number of instructions that we 1848 are prepared to conditionally execute (even on a StrongARM). */ 1849 max_insns_skipped = 6; 1850 } 1851 else 1852 { 1853 /* For processors with load scheduling, it never costs more than 1854 2 cycles to load a constant, and the load scheduler may well 1855 reduce that to 1. */ 1856 if (arm_ld_sched) 1857 arm_constant_limit = 1; 1858 1859 /* On XScale the longer latency of a load makes it more difficult 1860 to achieve a good schedule, so it's faster to synthesize 1861 constants that can be done in two insns. */ 1862 if (arm_tune_xscale) 1863 arm_constant_limit = 2; 1864 1865 /* StrongARM has early execution of branches, so a sequence 1866 that is worth skipping is shorter. */ 1867 if (arm_tune_strongarm) 1868 max_insns_skipped = 3; 1869 } 1870 1871 /* Hot/Cold partitioning is not currently supported, since we can't 1872 handle literal pool placement in that case. */ 1873 if (flag_reorder_blocks_and_partition) 1874 { 1875 inform (input_location, 1876 "-freorder-blocks-and-partition not supported on this architecture"); 1877 flag_reorder_blocks_and_partition = 0; 1878 flag_reorder_blocks = 1; 1879 } 1880 1881 /* Register global variables with the garbage collector. */ 1882 arm_add_gc_roots (); 1883 } 1884 1885 static void 1886 arm_add_gc_roots (void) 1887 { 1888 gcc_obstack_init(&minipool_obstack); 1889 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0); 1890 } 1891 1892 /* A table of known ARM exception types. 1893 For use with the interrupt function attribute. */ 1894 1895 typedef struct 1896 { 1897 const char *const arg; 1898 const unsigned long return_value; 1899 } 1900 isr_attribute_arg; 1901 1902 static const isr_attribute_arg isr_attribute_args [] = 1903 { 1904 { "IRQ", ARM_FT_ISR }, 1905 { "irq", ARM_FT_ISR }, 1906 { "FIQ", ARM_FT_FIQ }, 1907 { "fiq", ARM_FT_FIQ }, 1908 { "ABORT", ARM_FT_ISR }, 1909 { "abort", ARM_FT_ISR }, 1910 { "ABORT", ARM_FT_ISR }, 1911 { "abort", ARM_FT_ISR }, 1912 { "UNDEF", ARM_FT_EXCEPTION }, 1913 { "undef", ARM_FT_EXCEPTION }, 1914 { "SWI", ARM_FT_EXCEPTION }, 1915 { "swi", ARM_FT_EXCEPTION }, 1916 { NULL, ARM_FT_NORMAL } 1917 }; 1918 1919 /* Returns the (interrupt) function type of the current 1920 function, or ARM_FT_UNKNOWN if the type cannot be determined. */ 1921 1922 static unsigned long 1923 arm_isr_value (tree argument) 1924 { 1925 const isr_attribute_arg * ptr; 1926 const char * arg; 1927 1928 if (!arm_arch_notm) 1929 return ARM_FT_NORMAL | ARM_FT_STACKALIGN; 1930 1931 /* No argument - default to IRQ. */ 1932 if (argument == NULL_TREE) 1933 return ARM_FT_ISR; 1934 1935 /* Get the value of the argument. */ 1936 if (TREE_VALUE (argument) == NULL_TREE 1937 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST) 1938 return ARM_FT_UNKNOWN; 1939 1940 arg = TREE_STRING_POINTER (TREE_VALUE (argument)); 1941 1942 /* Check it against the list of known arguments. */ 1943 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++) 1944 if (streq (arg, ptr->arg)) 1945 return ptr->return_value; 1946 1947 /* An unrecognized interrupt type. */ 1948 return ARM_FT_UNKNOWN; 1949 } 1950 1951 /* Computes the type of the current function. */ 1952 1953 static unsigned long 1954 arm_compute_func_type (void) 1955 { 1956 unsigned long type = ARM_FT_UNKNOWN; 1957 tree a; 1958 tree attr; 1959 1960 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL); 1961 1962 /* Decide if the current function is volatile. Such functions 1963 never return, and many memory cycles can be saved by not storing 1964 register values that will never be needed again. This optimization 1965 was added to speed up context switching in a kernel application. */ 1966 if (optimize > 0 1967 && (TREE_NOTHROW (current_function_decl) 1968 || !(flag_unwind_tables 1969 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))) 1970 && TREE_THIS_VOLATILE (current_function_decl)) 1971 type |= ARM_FT_VOLATILE; 1972 1973 if (cfun->static_chain_decl != NULL) 1974 type |= ARM_FT_NESTED; 1975 1976 attr = DECL_ATTRIBUTES (current_function_decl); 1977 1978 a = lookup_attribute ("naked", attr); 1979 if (a != NULL_TREE) 1980 type |= ARM_FT_NAKED; 1981 1982 a = lookup_attribute ("isr", attr); 1983 if (a == NULL_TREE) 1984 a = lookup_attribute ("interrupt", attr); 1985 1986 if (a == NULL_TREE) 1987 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL; 1988 else 1989 type |= arm_isr_value (TREE_VALUE (a)); 1990 1991 return type; 1992 } 1993 1994 /* Returns the type of the current function. */ 1995 1996 unsigned long 1997 arm_current_func_type (void) 1998 { 1999 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN) 2000 cfun->machine->func_type = arm_compute_func_type (); 2001 2002 return cfun->machine->func_type; 2003 } 2004 2005 bool 2006 arm_allocate_stack_slots_for_args (void) 2007 { 2008 /* Naked functions should not allocate stack slots for arguments. */ 2009 return !IS_NAKED (arm_current_func_type ()); 2010 } 2011 2012 2013 /* Output assembler code for a block containing the constant parts 2014 of a trampoline, leaving space for the variable parts. 2015 2016 On the ARM, (if r8 is the static chain regnum, and remembering that 2017 referencing pc adds an offset of 8) the trampoline looks like: 2018 ldr r8, [pc, #0] 2019 ldr pc, [pc] 2020 .word static chain value 2021 .word function's address 2022 XXX FIXME: When the trampoline returns, r8 will be clobbered. */ 2023 2024 static void 2025 arm_asm_trampoline_template (FILE *f) 2026 { 2027 if (TARGET_ARM) 2028 { 2029 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); 2030 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM); 2031 } 2032 else if (TARGET_THUMB2) 2033 { 2034 /* The Thumb-2 trampoline is similar to the arm implementation. 2035 Unlike 16-bit Thumb, we enter the stub in thumb mode. */ 2036 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", 2037 STATIC_CHAIN_REGNUM, PC_REGNUM); 2038 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM); 2039 } 2040 else 2041 { 2042 ASM_OUTPUT_ALIGN (f, 2); 2043 fprintf (f, "\t.code\t16\n"); 2044 fprintf (f, ".Ltrampoline_start:\n"); 2045 asm_fprintf (f, "\tpush\t{r0, r1}\n"); 2046 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); 2047 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM); 2048 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); 2049 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM); 2050 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM); 2051 } 2052 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); 2053 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); 2054 } 2055 2056 /* Emit RTL insns to initialize the variable parts of a trampoline. */ 2057 2058 static void 2059 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 2060 { 2061 rtx fnaddr, mem, a_tramp; 2062 2063 emit_block_move (m_tramp, assemble_trampoline_template (), 2064 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); 2065 2066 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12); 2067 emit_move_insn (mem, chain_value); 2068 2069 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16); 2070 fnaddr = XEXP (DECL_RTL (fndecl), 0); 2071 emit_move_insn (mem, fnaddr); 2072 2073 a_tramp = XEXP (m_tramp, 0); 2074 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), 2075 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode, 2076 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode); 2077 } 2078 2079 /* Thumb trampolines should be entered in thumb mode, so set 2080 the bottom bit of the address. */ 2081 2082 static rtx 2083 arm_trampoline_adjust_address (rtx addr) 2084 { 2085 if (TARGET_THUMB) 2086 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx, 2087 NULL, 0, OPTAB_LIB_WIDEN); 2088 return addr; 2089 } 2090 2091 /* Return 1 if it is possible to return using a single instruction. 2092 If SIBLING is non-null, this is a test for a return before a sibling 2093 call. SIBLING is the call insn, so we can examine its register usage. */ 2094 2095 int 2096 use_return_insn (int iscond, rtx sibling) 2097 { 2098 int regno; 2099 unsigned int func_type; 2100 unsigned long saved_int_regs; 2101 unsigned HOST_WIDE_INT stack_adjust; 2102 arm_stack_offsets *offsets; 2103 2104 /* Never use a return instruction before reload has run. */ 2105 if (!reload_completed) 2106 return 0; 2107 2108 func_type = arm_current_func_type (); 2109 2110 /* Naked, volatile and stack alignment functions need special 2111 consideration. */ 2112 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN)) 2113 return 0; 2114 2115 /* So do interrupt functions that use the frame pointer and Thumb 2116 interrupt functions. */ 2117 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB)) 2118 return 0; 2119 2120 offsets = arm_get_frame_offsets (); 2121 stack_adjust = offsets->outgoing_args - offsets->saved_regs; 2122 2123 /* As do variadic functions. */ 2124 if (crtl->args.pretend_args_size 2125 || cfun->machine->uses_anonymous_args 2126 /* Or if the function calls __builtin_eh_return () */ 2127 || crtl->calls_eh_return 2128 /* Or if the function calls alloca */ 2129 || cfun->calls_alloca 2130 /* Or if there is a stack adjustment. However, if the stack pointer 2131 is saved on the stack, we can use a pre-incrementing stack load. */ 2132 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed 2133 && stack_adjust == 4))) 2134 return 0; 2135 2136 saved_int_regs = offsets->saved_regs_mask; 2137 2138 /* Unfortunately, the insn 2139 2140 ldmib sp, {..., sp, ...} 2141 2142 triggers a bug on most SA-110 based devices, such that the stack 2143 pointer won't be correctly restored if the instruction takes a 2144 page fault. We work around this problem by popping r3 along with 2145 the other registers, since that is never slower than executing 2146 another instruction. 2147 2148 We test for !arm_arch5 here, because code for any architecture 2149 less than this could potentially be run on one of the buggy 2150 chips. */ 2151 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM) 2152 { 2153 /* Validate that r3 is a call-clobbered register (always true in 2154 the default abi) ... */ 2155 if (!call_used_regs[3]) 2156 return 0; 2157 2158 /* ... that it isn't being used for a return value ... */ 2159 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD)) 2160 return 0; 2161 2162 /* ... or for a tail-call argument ... */ 2163 if (sibling) 2164 { 2165 gcc_assert (GET_CODE (sibling) == CALL_INSN); 2166 2167 if (find_regno_fusage (sibling, USE, 3)) 2168 return 0; 2169 } 2170 2171 /* ... and that there are no call-saved registers in r0-r2 2172 (always true in the default ABI). */ 2173 if (saved_int_regs & 0x7) 2174 return 0; 2175 } 2176 2177 /* Can't be done if interworking with Thumb, and any registers have been 2178 stacked. */ 2179 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type)) 2180 return 0; 2181 2182 /* On StrongARM, conditional returns are expensive if they aren't 2183 taken and multiple registers have been stacked. */ 2184 if (iscond && arm_tune_strongarm) 2185 { 2186 /* Conditional return when just the LR is stored is a simple 2187 conditional-load instruction, that's not expensive. */ 2188 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM)) 2189 return 0; 2190 2191 if (flag_pic 2192 && arm_pic_register != INVALID_REGNUM 2193 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) 2194 return 0; 2195 } 2196 2197 /* If there are saved registers but the LR isn't saved, then we need 2198 two instructions for the return. */ 2199 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM))) 2200 return 0; 2201 2202 /* Can't be done if any of the FPA regs are pushed, 2203 since this also requires an insn. */ 2204 if (TARGET_HARD_FLOAT && TARGET_FPA) 2205 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++) 2206 if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) 2207 return 0; 2208 2209 /* Likewise VFP regs. */ 2210 if (TARGET_HARD_FLOAT && TARGET_VFP) 2211 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++) 2212 if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) 2213 return 0; 2214 2215 if (TARGET_REALLY_IWMMXT) 2216 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++) 2217 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2218 return 0; 2219 2220 return 1; 2221 } 2222 2223 /* Return TRUE if int I is a valid immediate ARM constant. */ 2224 2225 int 2226 const_ok_for_arm (HOST_WIDE_INT i) 2227 { 2228 int lowbit; 2229 2230 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must 2231 be all zero, or all one. */ 2232 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0 2233 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) 2234 != ((~(unsigned HOST_WIDE_INT) 0) 2235 & ~(unsigned HOST_WIDE_INT) 0xffffffff))) 2236 return FALSE; 2237 2238 i &= (unsigned HOST_WIDE_INT) 0xffffffff; 2239 2240 /* Fast return for 0 and small values. We must do this for zero, since 2241 the code below can't handle that one case. */ 2242 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0) 2243 return TRUE; 2244 2245 /* Get the number of trailing zeros. */ 2246 lowbit = ffs((int) i) - 1; 2247 2248 /* Only even shifts are allowed in ARM mode so round down to the 2249 nearest even number. */ 2250 if (TARGET_ARM) 2251 lowbit &= ~1; 2252 2253 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0) 2254 return TRUE; 2255 2256 if (TARGET_ARM) 2257 { 2258 /* Allow rotated constants in ARM mode. */ 2259 if (lowbit <= 4 2260 && ((i & ~0xc000003f) == 0 2261 || (i & ~0xf000000f) == 0 2262 || (i & ~0xfc000003) == 0)) 2263 return TRUE; 2264 } 2265 else 2266 { 2267 HOST_WIDE_INT v; 2268 2269 /* Allow repeated pattern. */ 2270 v = i & 0xff; 2271 v |= v << 16; 2272 if (i == v || i == (v | (v << 8))) 2273 return TRUE; 2274 } 2275 2276 return FALSE; 2277 } 2278 2279 /* Return true if I is a valid constant for the operation CODE. */ 2280 static int 2281 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) 2282 { 2283 if (const_ok_for_arm (i)) 2284 return 1; 2285 2286 switch (code) 2287 { 2288 case PLUS: 2289 case COMPARE: 2290 case EQ: 2291 case NE: 2292 case GT: 2293 case LE: 2294 case LT: 2295 case GE: 2296 case GEU: 2297 case LTU: 2298 case GTU: 2299 case LEU: 2300 case UNORDERED: 2301 case ORDERED: 2302 case UNEQ: 2303 case UNGE: 2304 case UNLT: 2305 case UNGT: 2306 case UNLE: 2307 return const_ok_for_arm (ARM_SIGN_EXTEND (-i)); 2308 2309 case MINUS: /* Should only occur with (MINUS I reg) => rsb */ 2310 case XOR: 2311 return 0; 2312 2313 case IOR: 2314 if (TARGET_THUMB2) 2315 return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); 2316 return 0; 2317 2318 case AND: 2319 return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); 2320 2321 default: 2322 gcc_unreachable (); 2323 } 2324 } 2325 2326 /* Emit a sequence of insns to handle a large constant. 2327 CODE is the code of the operation required, it can be any of SET, PLUS, 2328 IOR, AND, XOR, MINUS; 2329 MODE is the mode in which the operation is being performed; 2330 VAL is the integer to operate on; 2331 SOURCE is the other operand (a register, or a null-pointer for SET); 2332 SUBTARGETS means it is safe to create scratch registers if that will 2333 either produce a simpler sequence, or we will want to cse the values. 2334 Return value is the number of insns emitted. */ 2335 2336 /* ??? Tweak this for thumb2. */ 2337 int 2338 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, 2339 HOST_WIDE_INT val, rtx target, rtx source, int subtargets) 2340 { 2341 rtx cond; 2342 2343 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC) 2344 cond = COND_EXEC_TEST (PATTERN (insn)); 2345 else 2346 cond = NULL_RTX; 2347 2348 if (subtargets || code == SET 2349 || (GET_CODE (target) == REG && GET_CODE (source) == REG 2350 && REGNO (target) != REGNO (source))) 2351 { 2352 /* After arm_reorg has been called, we can't fix up expensive 2353 constants by pushing them into memory so we must synthesize 2354 them in-line, regardless of the cost. This is only likely to 2355 be more costly on chips that have load delay slots and we are 2356 compiling without running the scheduler (so no splitting 2357 occurred before the final instruction emission). 2358 2359 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c 2360 */ 2361 if (!after_arm_reorg 2362 && !cond 2363 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, 2364 1, 0) 2365 > arm_constant_limit + (code != SET))) 2366 { 2367 if (code == SET) 2368 { 2369 /* Currently SET is the only monadic value for CODE, all 2370 the rest are diadic. */ 2371 if (TARGET_USE_MOVT) 2372 arm_emit_movpair (target, GEN_INT (val)); 2373 else 2374 emit_set_insn (target, GEN_INT (val)); 2375 2376 return 1; 2377 } 2378 else 2379 { 2380 rtx temp = subtargets ? gen_reg_rtx (mode) : target; 2381 2382 if (TARGET_USE_MOVT) 2383 arm_emit_movpair (temp, GEN_INT (val)); 2384 else 2385 emit_set_insn (temp, GEN_INT (val)); 2386 2387 /* For MINUS, the value is subtracted from, since we never 2388 have subtraction of a constant. */ 2389 if (code == MINUS) 2390 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source)); 2391 else 2392 emit_set_insn (target, 2393 gen_rtx_fmt_ee (code, mode, source, temp)); 2394 return 2; 2395 } 2396 } 2397 } 2398 2399 return arm_gen_constant (code, mode, cond, val, target, source, subtargets, 2400 1); 2401 } 2402 2403 /* Return the number of instructions required to synthesize the given 2404 constant, if we start emitting them from bit-position I. */ 2405 static int 2406 count_insns_for_constant (HOST_WIDE_INT remainder, int i) 2407 { 2408 HOST_WIDE_INT temp1; 2409 int step_size = TARGET_ARM ? 2 : 1; 2410 int num_insns = 0; 2411 2412 gcc_assert (TARGET_ARM || i == 0); 2413 2414 do 2415 { 2416 int end; 2417 2418 if (i <= 0) 2419 i += 32; 2420 if (remainder & (((1 << step_size) - 1) << (i - step_size))) 2421 { 2422 end = i - 8; 2423 if (end < 0) 2424 end += 32; 2425 temp1 = remainder & ((0x0ff << end) 2426 | ((i < end) ? (0xff >> (32 - end)) : 0)); 2427 remainder &= ~temp1; 2428 num_insns++; 2429 i -= 8 - step_size; 2430 } 2431 i -= step_size; 2432 } while (remainder); 2433 return num_insns; 2434 } 2435 2436 static int 2437 find_best_start (unsigned HOST_WIDE_INT remainder) 2438 { 2439 int best_consecutive_zeros = 0; 2440 int i; 2441 int best_start = 0; 2442 2443 /* If we aren't targetting ARM, the best place to start is always at 2444 the bottom. */ 2445 if (! TARGET_ARM) 2446 return 0; 2447 2448 for (i = 0; i < 32; i += 2) 2449 { 2450 int consecutive_zeros = 0; 2451 2452 if (!(remainder & (3 << i))) 2453 { 2454 while ((i < 32) && !(remainder & (3 << i))) 2455 { 2456 consecutive_zeros += 2; 2457 i += 2; 2458 } 2459 if (consecutive_zeros > best_consecutive_zeros) 2460 { 2461 best_consecutive_zeros = consecutive_zeros; 2462 best_start = i - consecutive_zeros; 2463 } 2464 i -= 2; 2465 } 2466 } 2467 2468 /* So long as it won't require any more insns to do so, it's 2469 desirable to emit a small constant (in bits 0...9) in the last 2470 insn. This way there is more chance that it can be combined with 2471 a later addressing insn to form a pre-indexed load or store 2472 operation. Consider: 2473 2474 *((volatile int *)0xe0000100) = 1; 2475 *((volatile int *)0xe0000110) = 2; 2476 2477 We want this to wind up as: 2478 2479 mov rA, #0xe0000000 2480 mov rB, #1 2481 str rB, [rA, #0x100] 2482 mov rB, #2 2483 str rB, [rA, #0x110] 2484 2485 rather than having to synthesize both large constants from scratch. 2486 2487 Therefore, we calculate how many insns would be required to emit 2488 the constant starting from `best_start', and also starting from 2489 zero (i.e. with bit 31 first to be output). If `best_start' doesn't 2490 yield a shorter sequence, we may as well use zero. */ 2491 if (best_start != 0 2492 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder) 2493 && (count_insns_for_constant (remainder, 0) <= 2494 count_insns_for_constant (remainder, best_start))) 2495 best_start = 0; 2496 2497 return best_start; 2498 } 2499 2500 /* Emit an instruction with the indicated PATTERN. If COND is 2501 non-NULL, conditionalize the execution of the instruction on COND 2502 being true. */ 2503 2504 static void 2505 emit_constant_insn (rtx cond, rtx pattern) 2506 { 2507 if (cond) 2508 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern); 2509 emit_insn (pattern); 2510 } 2511 2512 /* As above, but extra parameter GENERATE which, if clear, suppresses 2513 RTL generation. */ 2514 /* ??? This needs more work for thumb2. */ 2515 2516 static int 2517 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, 2518 HOST_WIDE_INT val, rtx target, rtx source, int subtargets, 2519 int generate) 2520 { 2521 int can_invert = 0; 2522 int can_negate = 0; 2523 int final_invert = 0; 2524 int can_negate_initial = 0; 2525 int can_shift = 0; 2526 int i; 2527 int num_bits_set = 0; 2528 int set_sign_bit_copies = 0; 2529 int clear_sign_bit_copies = 0; 2530 int clear_zero_bit_copies = 0; 2531 int set_zero_bit_copies = 0; 2532 int insns = 0; 2533 unsigned HOST_WIDE_INT temp1, temp2; 2534 unsigned HOST_WIDE_INT remainder = val & 0xffffffff; 2535 int step_size = TARGET_ARM ? 2 : 1; 2536 2537 /* Find out which operations are safe for a given CODE. Also do a quick 2538 check for degenerate cases; these can occur when DImode operations 2539 are split. */ 2540 switch (code) 2541 { 2542 case SET: 2543 can_invert = 1; 2544 can_shift = 1; 2545 can_negate = 1; 2546 break; 2547 2548 case PLUS: 2549 can_negate = 1; 2550 can_negate_initial = 1; 2551 break; 2552 2553 case IOR: 2554 if (remainder == 0xffffffff) 2555 { 2556 if (generate) 2557 emit_constant_insn (cond, 2558 gen_rtx_SET (VOIDmode, target, 2559 GEN_INT (ARM_SIGN_EXTEND (val)))); 2560 return 1; 2561 } 2562 2563 if (remainder == 0) 2564 { 2565 if (reload_completed && rtx_equal_p (target, source)) 2566 return 0; 2567 2568 if (generate) 2569 emit_constant_insn (cond, 2570 gen_rtx_SET (VOIDmode, target, source)); 2571 return 1; 2572 } 2573 2574 if (TARGET_THUMB2) 2575 can_invert = 1; 2576 break; 2577 2578 case AND: 2579 if (remainder == 0) 2580 { 2581 if (generate) 2582 emit_constant_insn (cond, 2583 gen_rtx_SET (VOIDmode, target, const0_rtx)); 2584 return 1; 2585 } 2586 if (remainder == 0xffffffff) 2587 { 2588 if (reload_completed && rtx_equal_p (target, source)) 2589 return 0; 2590 if (generate) 2591 emit_constant_insn (cond, 2592 gen_rtx_SET (VOIDmode, target, source)); 2593 return 1; 2594 } 2595 can_invert = 1; 2596 break; 2597 2598 case XOR: 2599 if (remainder == 0) 2600 { 2601 if (reload_completed && rtx_equal_p (target, source)) 2602 return 0; 2603 if (generate) 2604 emit_constant_insn (cond, 2605 gen_rtx_SET (VOIDmode, target, source)); 2606 return 1; 2607 } 2608 2609 if (remainder == 0xffffffff) 2610 { 2611 if (generate) 2612 emit_constant_insn (cond, 2613 gen_rtx_SET (VOIDmode, target, 2614 gen_rtx_NOT (mode, source))); 2615 return 1; 2616 } 2617 break; 2618 2619 case MINUS: 2620 /* We treat MINUS as (val - source), since (source - val) is always 2621 passed as (source + (-val)). */ 2622 if (remainder == 0) 2623 { 2624 if (generate) 2625 emit_constant_insn (cond, 2626 gen_rtx_SET (VOIDmode, target, 2627 gen_rtx_NEG (mode, source))); 2628 return 1; 2629 } 2630 if (const_ok_for_arm (val)) 2631 { 2632 if (generate) 2633 emit_constant_insn (cond, 2634 gen_rtx_SET (VOIDmode, target, 2635 gen_rtx_MINUS (mode, GEN_INT (val), 2636 source))); 2637 return 1; 2638 } 2639 can_negate = 1; 2640 2641 break; 2642 2643 default: 2644 gcc_unreachable (); 2645 } 2646 2647 /* If we can do it in one insn get out quickly. */ 2648 if (const_ok_for_arm (val) 2649 || (can_negate_initial && const_ok_for_arm (-val)) 2650 || (can_invert && const_ok_for_arm (~val))) 2651 { 2652 if (generate) 2653 emit_constant_insn (cond, 2654 gen_rtx_SET (VOIDmode, target, 2655 (source 2656 ? gen_rtx_fmt_ee (code, mode, source, 2657 GEN_INT (val)) 2658 : GEN_INT (val)))); 2659 return 1; 2660 } 2661 2662 /* Calculate a few attributes that may be useful for specific 2663 optimizations. */ 2664 /* Count number of leading zeros. */ 2665 for (i = 31; i >= 0; i--) 2666 { 2667 if ((remainder & (1 << i)) == 0) 2668 clear_sign_bit_copies++; 2669 else 2670 break; 2671 } 2672 2673 /* Count number of leading 1's. */ 2674 for (i = 31; i >= 0; i--) 2675 { 2676 if ((remainder & (1 << i)) != 0) 2677 set_sign_bit_copies++; 2678 else 2679 break; 2680 } 2681 2682 /* Count number of trailing zero's. */ 2683 for (i = 0; i <= 31; i++) 2684 { 2685 if ((remainder & (1 << i)) == 0) 2686 clear_zero_bit_copies++; 2687 else 2688 break; 2689 } 2690 2691 /* Count number of trailing 1's. */ 2692 for (i = 0; i <= 31; i++) 2693 { 2694 if ((remainder & (1 << i)) != 0) 2695 set_zero_bit_copies++; 2696 else 2697 break; 2698 } 2699 2700 switch (code) 2701 { 2702 case SET: 2703 /* See if we can use movw. */ 2704 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0) 2705 { 2706 if (generate) 2707 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, 2708 GEN_INT (val))); 2709 return 1; 2710 } 2711 2712 /* See if we can do this by sign_extending a constant that is known 2713 to be negative. This is a good, way of doing it, since the shift 2714 may well merge into a subsequent insn. */ 2715 if (set_sign_bit_copies > 1) 2716 { 2717 if (const_ok_for_arm 2718 (temp1 = ARM_SIGN_EXTEND (remainder 2719 << (set_sign_bit_copies - 1)))) 2720 { 2721 if (generate) 2722 { 2723 rtx new_src = subtargets ? gen_reg_rtx (mode) : target; 2724 emit_constant_insn (cond, 2725 gen_rtx_SET (VOIDmode, new_src, 2726 GEN_INT (temp1))); 2727 emit_constant_insn (cond, 2728 gen_ashrsi3 (target, new_src, 2729 GEN_INT (set_sign_bit_copies - 1))); 2730 } 2731 return 2; 2732 } 2733 /* For an inverted constant, we will need to set the low bits, 2734 these will be shifted out of harm's way. */ 2735 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1; 2736 if (const_ok_for_arm (~temp1)) 2737 { 2738 if (generate) 2739 { 2740 rtx new_src = subtargets ? gen_reg_rtx (mode) : target; 2741 emit_constant_insn (cond, 2742 gen_rtx_SET (VOIDmode, new_src, 2743 GEN_INT (temp1))); 2744 emit_constant_insn (cond, 2745 gen_ashrsi3 (target, new_src, 2746 GEN_INT (set_sign_bit_copies - 1))); 2747 } 2748 return 2; 2749 } 2750 } 2751 2752 /* See if we can calculate the value as the difference between two 2753 valid immediates. */ 2754 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16) 2755 { 2756 int topshift = clear_sign_bit_copies & ~1; 2757 2758 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift)) 2759 & (0xff000000 >> topshift)); 2760 2761 /* If temp1 is zero, then that means the 9 most significant 2762 bits of remainder were 1 and we've caused it to overflow. 2763 When topshift is 0 we don't need to do anything since we 2764 can borrow from 'bit 32'. */ 2765 if (temp1 == 0 && topshift != 0) 2766 temp1 = 0x80000000 >> (topshift - 1); 2767 2768 temp2 = ARM_SIGN_EXTEND (temp1 - remainder); 2769 2770 if (const_ok_for_arm (temp2)) 2771 { 2772 if (generate) 2773 { 2774 rtx new_src = subtargets ? gen_reg_rtx (mode) : target; 2775 emit_constant_insn (cond, 2776 gen_rtx_SET (VOIDmode, new_src, 2777 GEN_INT (temp1))); 2778 emit_constant_insn (cond, 2779 gen_addsi3 (target, new_src, 2780 GEN_INT (-temp2))); 2781 } 2782 2783 return 2; 2784 } 2785 } 2786 2787 /* See if we can generate this by setting the bottom (or the top) 2788 16 bits, and then shifting these into the other half of the 2789 word. We only look for the simplest cases, to do more would cost 2790 too much. Be careful, however, not to generate this when the 2791 alternative would take fewer insns. */ 2792 if (val & 0xffff0000) 2793 { 2794 temp1 = remainder & 0xffff0000; 2795 temp2 = remainder & 0x0000ffff; 2796 2797 /* Overlaps outside this range are best done using other methods. */ 2798 for (i = 9; i < 24; i++) 2799 { 2800 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder) 2801 && !const_ok_for_arm (temp2)) 2802 { 2803 rtx new_src = (subtargets 2804 ? (generate ? gen_reg_rtx (mode) : NULL_RTX) 2805 : target); 2806 insns = arm_gen_constant (code, mode, cond, temp2, new_src, 2807 source, subtargets, generate); 2808 source = new_src; 2809 if (generate) 2810 emit_constant_insn 2811 (cond, 2812 gen_rtx_SET 2813 (VOIDmode, target, 2814 gen_rtx_IOR (mode, 2815 gen_rtx_ASHIFT (mode, source, 2816 GEN_INT (i)), 2817 source))); 2818 return insns + 1; 2819 } 2820 } 2821 2822 /* Don't duplicate cases already considered. */ 2823 for (i = 17; i < 24; i++) 2824 { 2825 if (((temp1 | (temp1 >> i)) == remainder) 2826 && !const_ok_for_arm (temp1)) 2827 { 2828 rtx new_src = (subtargets 2829 ? (generate ? gen_reg_rtx (mode) : NULL_RTX) 2830 : target); 2831 insns = arm_gen_constant (code, mode, cond, temp1, new_src, 2832 source, subtargets, generate); 2833 source = new_src; 2834 if (generate) 2835 emit_constant_insn 2836 (cond, 2837 gen_rtx_SET (VOIDmode, target, 2838 gen_rtx_IOR 2839 (mode, 2840 gen_rtx_LSHIFTRT (mode, source, 2841 GEN_INT (i)), 2842 source))); 2843 return insns + 1; 2844 } 2845 } 2846 } 2847 break; 2848 2849 case IOR: 2850 case XOR: 2851 /* If we have IOR or XOR, and the constant can be loaded in a 2852 single instruction, and we can find a temporary to put it in, 2853 then this can be done in two instructions instead of 3-4. */ 2854 if (subtargets 2855 /* TARGET can't be NULL if SUBTARGETS is 0 */ 2856 || (reload_completed && !reg_mentioned_p (target, source))) 2857 { 2858 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val))) 2859 { 2860 if (generate) 2861 { 2862 rtx sub = subtargets ? gen_reg_rtx (mode) : target; 2863 2864 emit_constant_insn (cond, 2865 gen_rtx_SET (VOIDmode, sub, 2866 GEN_INT (val))); 2867 emit_constant_insn (cond, 2868 gen_rtx_SET (VOIDmode, target, 2869 gen_rtx_fmt_ee (code, mode, 2870 source, sub))); 2871 } 2872 return 2; 2873 } 2874 } 2875 2876 if (code == XOR) 2877 break; 2878 2879 /* Convert. 2880 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s 2881 and the remainder 0s for e.g. 0xfff00000) 2882 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies) 2883 2884 This can be done in 2 instructions by using shifts with mov or mvn. 2885 e.g. for 2886 x = x | 0xfff00000; 2887 we generate. 2888 mvn r0, r0, asl #12 2889 mvn r0, r0, lsr #12 */ 2890 if (set_sign_bit_copies > 8 2891 && (val & (-1 << (32 - set_sign_bit_copies))) == val) 2892 { 2893 if (generate) 2894 { 2895 rtx sub = subtargets ? gen_reg_rtx (mode) : target; 2896 rtx shift = GEN_INT (set_sign_bit_copies); 2897 2898 emit_constant_insn 2899 (cond, 2900 gen_rtx_SET (VOIDmode, sub, 2901 gen_rtx_NOT (mode, 2902 gen_rtx_ASHIFT (mode, 2903 source, 2904 shift)))); 2905 emit_constant_insn 2906 (cond, 2907 gen_rtx_SET (VOIDmode, target, 2908 gen_rtx_NOT (mode, 2909 gen_rtx_LSHIFTRT (mode, sub, 2910 shift)))); 2911 } 2912 return 2; 2913 } 2914 2915 /* Convert 2916 x = y | constant (which has set_zero_bit_copies number of trailing ones). 2917 to 2918 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies). 2919 2920 For eg. r0 = r0 | 0xfff 2921 mvn r0, r0, lsr #12 2922 mvn r0, r0, asl #12 2923 2924 */ 2925 if (set_zero_bit_copies > 8 2926 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder) 2927 { 2928 if (generate) 2929 { 2930 rtx sub = subtargets ? gen_reg_rtx (mode) : target; 2931 rtx shift = GEN_INT (set_zero_bit_copies); 2932 2933 emit_constant_insn 2934 (cond, 2935 gen_rtx_SET (VOIDmode, sub, 2936 gen_rtx_NOT (mode, 2937 gen_rtx_LSHIFTRT (mode, 2938 source, 2939 shift)))); 2940 emit_constant_insn 2941 (cond, 2942 gen_rtx_SET (VOIDmode, target, 2943 gen_rtx_NOT (mode, 2944 gen_rtx_ASHIFT (mode, sub, 2945 shift)))); 2946 } 2947 return 2; 2948 } 2949 2950 /* This will never be reached for Thumb2 because orn is a valid 2951 instruction. This is for Thumb1 and the ARM 32 bit cases. 2952 2953 x = y | constant (such that ~constant is a valid constant) 2954 Transform this to 2955 x = ~(~y & ~constant). 2956 */ 2957 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val))) 2958 { 2959 if (generate) 2960 { 2961 rtx sub = subtargets ? gen_reg_rtx (mode) : target; 2962 emit_constant_insn (cond, 2963 gen_rtx_SET (VOIDmode, sub, 2964 gen_rtx_NOT (mode, source))); 2965 source = sub; 2966 if (subtargets) 2967 sub = gen_reg_rtx (mode); 2968 emit_constant_insn (cond, 2969 gen_rtx_SET (VOIDmode, sub, 2970 gen_rtx_AND (mode, source, 2971 GEN_INT (temp1)))); 2972 emit_constant_insn (cond, 2973 gen_rtx_SET (VOIDmode, target, 2974 gen_rtx_NOT (mode, sub))); 2975 } 2976 return 3; 2977 } 2978 break; 2979 2980 case AND: 2981 /* See if two shifts will do 2 or more insn's worth of work. */ 2982 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24) 2983 { 2984 HOST_WIDE_INT shift_mask = ((0xffffffff 2985 << (32 - clear_sign_bit_copies)) 2986 & 0xffffffff); 2987 2988 if ((remainder | shift_mask) != 0xffffffff) 2989 { 2990 if (generate) 2991 { 2992 rtx new_src = subtargets ? gen_reg_rtx (mode) : target; 2993 insns = arm_gen_constant (AND, mode, cond, 2994 remainder | shift_mask, 2995 new_src, source, subtargets, 1); 2996 source = new_src; 2997 } 2998 else 2999 { 3000 rtx targ = subtargets ? NULL_RTX : target; 3001 insns = arm_gen_constant (AND, mode, cond, 3002 remainder | shift_mask, 3003 targ, source, subtargets, 0); 3004 } 3005 } 3006 3007 if (generate) 3008 { 3009 rtx new_src = subtargets ? gen_reg_rtx (mode) : target; 3010 rtx shift = GEN_INT (clear_sign_bit_copies); 3011 3012 emit_insn (gen_ashlsi3 (new_src, source, shift)); 3013 emit_insn (gen_lshrsi3 (target, new_src, shift)); 3014 } 3015 3016 return insns + 2; 3017 } 3018 3019 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24) 3020 { 3021 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1; 3022 3023 if ((remainder | shift_mask) != 0xffffffff) 3024 { 3025 if (generate) 3026 { 3027 rtx new_src = subtargets ? gen_reg_rtx (mode) : target; 3028 3029 insns = arm_gen_constant (AND, mode, cond, 3030 remainder | shift_mask, 3031 new_src, source, subtargets, 1); 3032 source = new_src; 3033 } 3034 else 3035 { 3036 rtx targ = subtargets ? NULL_RTX : target; 3037 3038 insns = arm_gen_constant (AND, mode, cond, 3039 remainder | shift_mask, 3040 targ, source, subtargets, 0); 3041 } 3042 } 3043 3044 if (generate) 3045 { 3046 rtx new_src = subtargets ? gen_reg_rtx (mode) : target; 3047 rtx shift = GEN_INT (clear_zero_bit_copies); 3048 3049 emit_insn (gen_lshrsi3 (new_src, source, shift)); 3050 emit_insn (gen_ashlsi3 (target, new_src, shift)); 3051 } 3052 3053 return insns + 2; 3054 } 3055 3056 break; 3057 3058 default: 3059 break; 3060 } 3061 3062 for (i = 0; i < 32; i++) 3063 if (remainder & (1 << i)) 3064 num_bits_set++; 3065 3066 if ((code == AND) 3067 || (code != IOR && can_invert && num_bits_set > 16)) 3068 remainder ^= 0xffffffff; 3069 else if (code == PLUS && num_bits_set > 16) 3070 remainder = (-remainder) & 0xffffffff; 3071 3072 /* For XOR, if more than half the bits are set and there's a sequence 3073 of more than 8 consecutive ones in the pattern then we can XOR by the 3074 inverted constant and then invert the final result; this may save an 3075 instruction and might also lead to the final mvn being merged with 3076 some other operation. */ 3077 else if (code == XOR && num_bits_set > 16 3078 && (count_insns_for_constant (remainder ^ 0xffffffff, 3079 find_best_start 3080 (remainder ^ 0xffffffff)) 3081 < count_insns_for_constant (remainder, 3082 find_best_start (remainder)))) 3083 { 3084 remainder ^= 0xffffffff; 3085 final_invert = 1; 3086 } 3087 else 3088 { 3089 can_invert = 0; 3090 can_negate = 0; 3091 } 3092 3093 /* Now try and find a way of doing the job in either two or three 3094 instructions. 3095 We start by looking for the largest block of zeros that are aligned on 3096 a 2-bit boundary, we then fill up the temps, wrapping around to the 3097 top of the word when we drop off the bottom. 3098 In the worst case this code should produce no more than four insns. 3099 Thumb-2 constants are shifted, not rotated, so the MSB is always the 3100 best place to start. */ 3101 3102 /* ??? Use thumb2 replicated constants when the high and low halfwords are 3103 the same. */ 3104 { 3105 /* Now start emitting the insns. */ 3106 i = find_best_start (remainder); 3107 do 3108 { 3109 int end; 3110 3111 if (i <= 0) 3112 i += 32; 3113 if (remainder & (3 << (i - 2))) 3114 { 3115 end = i - 8; 3116 if (end < 0) 3117 end += 32; 3118 temp1 = remainder & ((0x0ff << end) 3119 | ((i < end) ? (0xff >> (32 - end)) : 0)); 3120 remainder &= ~temp1; 3121 3122 if (generate) 3123 { 3124 rtx new_src, temp1_rtx; 3125 3126 if (code == SET || code == MINUS) 3127 { 3128 new_src = (subtargets ? gen_reg_rtx (mode) : target); 3129 if (can_invert && code != MINUS) 3130 temp1 = ~temp1; 3131 } 3132 else 3133 { 3134 if ((final_invert || remainder) && subtargets) 3135 new_src = gen_reg_rtx (mode); 3136 else 3137 new_src = target; 3138 if (can_invert) 3139 temp1 = ~temp1; 3140 else if (can_negate) 3141 temp1 = -temp1; 3142 } 3143 3144 temp1 = trunc_int_for_mode (temp1, mode); 3145 temp1_rtx = GEN_INT (temp1); 3146 3147 if (code == SET) 3148 ; 3149 else if (code == MINUS) 3150 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); 3151 else 3152 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); 3153 3154 emit_constant_insn (cond, 3155 gen_rtx_SET (VOIDmode, new_src, 3156 temp1_rtx)); 3157 source = new_src; 3158 } 3159 3160 if (code == SET) 3161 { 3162 can_invert = 0; 3163 code = PLUS; 3164 } 3165 else if (code == MINUS) 3166 code = PLUS; 3167 3168 insns++; 3169 i -= 8 - step_size; 3170 } 3171 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary 3172 shifts. */ 3173 i -= step_size; 3174 } 3175 while (remainder); 3176 } 3177 3178 if (final_invert) 3179 { 3180 if (generate) 3181 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, 3182 gen_rtx_NOT (mode, source))); 3183 insns++; 3184 } 3185 3186 return insns; 3187 } 3188 3189 /* Canonicalize a comparison so that we are more likely to recognize it. 3190 This can be done for a few constant compares, where we can make the 3191 immediate value easier to load. */ 3192 3193 enum rtx_code 3194 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode, 3195 rtx * op1) 3196 { 3197 unsigned HOST_WIDE_INT i = INTVAL (*op1); 3198 unsigned HOST_WIDE_INT maxval; 3199 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; 3200 3201 switch (code) 3202 { 3203 case EQ: 3204 case NE: 3205 return code; 3206 3207 case GT: 3208 case LE: 3209 if (i != maxval 3210 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) 3211 { 3212 *op1 = GEN_INT (i + 1); 3213 return code == GT ? GE : LT; 3214 } 3215 break; 3216 3217 case GE: 3218 case LT: 3219 if (i != ~maxval 3220 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) 3221 { 3222 *op1 = GEN_INT (i - 1); 3223 return code == GE ? GT : LE; 3224 } 3225 break; 3226 3227 case GTU: 3228 case LEU: 3229 if (i != ~((unsigned HOST_WIDE_INT) 0) 3230 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) 3231 { 3232 *op1 = GEN_INT (i + 1); 3233 return code == GTU ? GEU : LTU; 3234 } 3235 break; 3236 3237 case GEU: 3238 case LTU: 3239 if (i != 0 3240 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) 3241 { 3242 *op1 = GEN_INT (i - 1); 3243 return code == GEU ? GTU : LEU; 3244 } 3245 break; 3246 3247 default: 3248 gcc_unreachable (); 3249 } 3250 3251 return code; 3252 } 3253 3254 3255 /* Define how to find the value returned by a function. */ 3256 3257 static rtx 3258 arm_function_value(const_tree type, const_tree func, 3259 bool outgoing ATTRIBUTE_UNUSED) 3260 { 3261 enum machine_mode mode; 3262 int unsignedp ATTRIBUTE_UNUSED; 3263 rtx r ATTRIBUTE_UNUSED; 3264 3265 mode = TYPE_MODE (type); 3266 3267 if (TARGET_AAPCS_BASED) 3268 return aapcs_allocate_return_reg (mode, type, func); 3269 3270 /* Promote integer types. */ 3271 if (INTEGRAL_TYPE_P (type)) 3272 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1); 3273 3274 /* Promotes small structs returned in a register to full-word size 3275 for big-endian AAPCS. */ 3276 if (arm_return_in_msb (type)) 3277 { 3278 HOST_WIDE_INT size = int_size_in_bytes (type); 3279 if (size % UNITS_PER_WORD != 0) 3280 { 3281 size += UNITS_PER_WORD - size % UNITS_PER_WORD; 3282 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 3283 } 3284 } 3285 3286 return LIBCALL_VALUE (mode); 3287 } 3288 3289 static int 3290 libcall_eq (const void *p1, const void *p2) 3291 { 3292 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2); 3293 } 3294 3295 static hashval_t 3296 libcall_hash (const void *p1) 3297 { 3298 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE); 3299 } 3300 3301 static void 3302 add_libcall (htab_t htab, rtx libcall) 3303 { 3304 *htab_find_slot (htab, libcall, INSERT) = libcall; 3305 } 3306 3307 static bool 3308 arm_libcall_uses_aapcs_base (const_rtx libcall) 3309 { 3310 static bool init_done = false; 3311 static htab_t libcall_htab; 3312 3313 if (!init_done) 3314 { 3315 init_done = true; 3316 3317 libcall_htab = htab_create (31, libcall_hash, libcall_eq, 3318 NULL); 3319 add_libcall (libcall_htab, 3320 convert_optab_libfunc (sfloat_optab, SFmode, SImode)); 3321 add_libcall (libcall_htab, 3322 convert_optab_libfunc (sfloat_optab, DFmode, SImode)); 3323 add_libcall (libcall_htab, 3324 convert_optab_libfunc (sfloat_optab, SFmode, DImode)); 3325 add_libcall (libcall_htab, 3326 convert_optab_libfunc (sfloat_optab, DFmode, DImode)); 3327 3328 add_libcall (libcall_htab, 3329 convert_optab_libfunc (ufloat_optab, SFmode, SImode)); 3330 add_libcall (libcall_htab, 3331 convert_optab_libfunc (ufloat_optab, DFmode, SImode)); 3332 add_libcall (libcall_htab, 3333 convert_optab_libfunc (ufloat_optab, SFmode, DImode)); 3334 add_libcall (libcall_htab, 3335 convert_optab_libfunc (ufloat_optab, DFmode, DImode)); 3336 3337 add_libcall (libcall_htab, 3338 convert_optab_libfunc (sext_optab, SFmode, HFmode)); 3339 add_libcall (libcall_htab, 3340 convert_optab_libfunc (trunc_optab, HFmode, SFmode)); 3341 add_libcall (libcall_htab, 3342 convert_optab_libfunc (sfix_optab, SImode, DFmode)); 3343 add_libcall (libcall_htab, 3344 convert_optab_libfunc (ufix_optab, SImode, DFmode)); 3345 add_libcall (libcall_htab, 3346 convert_optab_libfunc (sfix_optab, DImode, DFmode)); 3347 add_libcall (libcall_htab, 3348 convert_optab_libfunc (ufix_optab, DImode, DFmode)); 3349 add_libcall (libcall_htab, 3350 convert_optab_libfunc (sfix_optab, DImode, SFmode)); 3351 add_libcall (libcall_htab, 3352 convert_optab_libfunc (ufix_optab, DImode, SFmode)); 3353 3354 /* Values from double-precision helper functions are returned in core 3355 registers if the selected core only supports single-precision 3356 arithmetic, even if we are using the hard-float ABI. The same is 3357 true for single-precision helpers, but we will never be using the 3358 hard-float ABI on a CPU which doesn't support single-precision 3359 operations in hardware. */ 3360 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode)); 3361 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode)); 3362 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode)); 3363 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode)); 3364 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode)); 3365 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode)); 3366 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode)); 3367 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode)); 3368 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode)); 3369 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode)); 3370 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode)); 3371 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode, 3372 SFmode)); 3373 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode, 3374 DFmode)); 3375 } 3376 3377 return libcall && htab_find (libcall_htab, libcall) != NULL; 3378 } 3379 3380 rtx 3381 arm_libcall_value (enum machine_mode mode, const_rtx libcall) 3382 { 3383 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS 3384 && GET_MODE_CLASS (mode) == MODE_FLOAT) 3385 { 3386 /* The following libcalls return their result in integer registers, 3387 even though they return a floating point value. */ 3388 if (arm_libcall_uses_aapcs_base (libcall)) 3389 return gen_rtx_REG (mode, ARG_REGISTER(1)); 3390 3391 } 3392 3393 return LIBCALL_VALUE (mode); 3394 } 3395 3396 /* Determine the amount of memory needed to store the possible return 3397 registers of an untyped call. */ 3398 int 3399 arm_apply_result_size (void) 3400 { 3401 int size = 16; 3402 3403 if (TARGET_32BIT) 3404 { 3405 if (TARGET_HARD_FLOAT_ABI) 3406 { 3407 if (TARGET_VFP) 3408 size += 32; 3409 if (TARGET_FPA) 3410 size += 12; 3411 if (TARGET_MAVERICK) 3412 size += 8; 3413 } 3414 if (TARGET_IWMMXT_ABI) 3415 size += 8; 3416 } 3417 3418 return size; 3419 } 3420 3421 /* Decide whether TYPE should be returned in memory (true) 3422 or in a register (false). FNTYPE is the type of the function making 3423 the call. */ 3424 static bool 3425 arm_return_in_memory (const_tree type, const_tree fntype) 3426 { 3427 HOST_WIDE_INT size; 3428 3429 size = int_size_in_bytes (type); /* Negative if not fixed size. */ 3430 3431 if (TARGET_AAPCS_BASED) 3432 { 3433 /* Simple, non-aggregate types (ie not including vectors and 3434 complex) are always returned in a register (or registers). 3435 We don't care about which register here, so we can short-cut 3436 some of the detail. */ 3437 if (!AGGREGATE_TYPE_P (type) 3438 && TREE_CODE (type) != VECTOR_TYPE 3439 && TREE_CODE (type) != COMPLEX_TYPE) 3440 return false; 3441 3442 /* Any return value that is no larger than one word can be 3443 returned in r0. */ 3444 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) 3445 return false; 3446 3447 /* Check any available co-processors to see if they accept the 3448 type as a register candidate (VFP, for example, can return 3449 some aggregates in consecutive registers). These aren't 3450 available if the call is variadic. */ 3451 if (aapcs_select_return_coproc (type, fntype) >= 0) 3452 return false; 3453 3454 /* Vector values should be returned using ARM registers, not 3455 memory (unless they're over 16 bytes, which will break since 3456 we only have four call-clobbered registers to play with). */ 3457 if (TREE_CODE (type) == VECTOR_TYPE) 3458 return (size < 0 || size > (4 * UNITS_PER_WORD)); 3459 3460 /* The rest go in memory. */ 3461 return true; 3462 } 3463 3464 if (TREE_CODE (type) == VECTOR_TYPE) 3465 return (size < 0 || size > (4 * UNITS_PER_WORD)); 3466 3467 if (!AGGREGATE_TYPE_P (type) && 3468 (TREE_CODE (type) != VECTOR_TYPE)) 3469 /* All simple types are returned in registers. */ 3470 return false; 3471 3472 if (arm_abi != ARM_ABI_APCS) 3473 { 3474 /* ATPCS and later return aggregate types in memory only if they are 3475 larger than a word (or are variable size). */ 3476 return (size < 0 || size > UNITS_PER_WORD); 3477 } 3478 3479 /* For the arm-wince targets we choose to be compatible with Microsoft's 3480 ARM and Thumb compilers, which always return aggregates in memory. */ 3481 #ifndef ARM_WINCE 3482 /* All structures/unions bigger than one word are returned in memory. 3483 Also catch the case where int_size_in_bytes returns -1. In this case 3484 the aggregate is either huge or of variable size, and in either case 3485 we will want to return it via memory and not in a register. */ 3486 if (size < 0 || size > UNITS_PER_WORD) 3487 return true; 3488 3489 if (TREE_CODE (type) == RECORD_TYPE) 3490 { 3491 tree field; 3492 3493 /* For a struct the APCS says that we only return in a register 3494 if the type is 'integer like' and every addressable element 3495 has an offset of zero. For practical purposes this means 3496 that the structure can have at most one non bit-field element 3497 and that this element must be the first one in the structure. */ 3498 3499 /* Find the first field, ignoring non FIELD_DECL things which will 3500 have been created by C++. */ 3501 for (field = TYPE_FIELDS (type); 3502 field && TREE_CODE (field) != FIELD_DECL; 3503 field = TREE_CHAIN (field)) 3504 continue; 3505 3506 if (field == NULL) 3507 return false; /* An empty structure. Allowed by an extension to ANSI C. */ 3508 3509 /* Check that the first field is valid for returning in a register. */ 3510 3511 /* ... Floats are not allowed */ 3512 if (FLOAT_TYPE_P (TREE_TYPE (field))) 3513 return true; 3514 3515 /* ... Aggregates that are not themselves valid for returning in 3516 a register are not allowed. */ 3517 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) 3518 return true; 3519 3520 /* Now check the remaining fields, if any. Only bitfields are allowed, 3521 since they are not addressable. */ 3522 for (field = TREE_CHAIN (field); 3523 field; 3524 field = TREE_CHAIN (field)) 3525 { 3526 if (TREE_CODE (field) != FIELD_DECL) 3527 continue; 3528 3529 if (!DECL_BIT_FIELD_TYPE (field)) 3530 return true; 3531 } 3532 3533 return false; 3534 } 3535 3536 if (TREE_CODE (type) == UNION_TYPE) 3537 { 3538 tree field; 3539 3540 /* Unions can be returned in registers if every element is 3541 integral, or can be returned in an integer register. */ 3542 for (field = TYPE_FIELDS (type); 3543 field; 3544 field = TREE_CHAIN (field)) 3545 { 3546 if (TREE_CODE (field) != FIELD_DECL) 3547 continue; 3548 3549 if (FLOAT_TYPE_P (TREE_TYPE (field))) 3550 return true; 3551 3552 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) 3553 return true; 3554 } 3555 3556 return false; 3557 } 3558 #endif /* not ARM_WINCE */ 3559 3560 /* Return all other types in memory. */ 3561 return true; 3562 } 3563 3564 /* Indicate whether or not words of a double are in big-endian order. */ 3565 3566 int 3567 arm_float_words_big_endian (void) 3568 { 3569 if (TARGET_MAVERICK) 3570 return 0; 3571 3572 /* For FPA, float words are always big-endian. For VFP, floats words 3573 follow the memory system mode. */ 3574 3575 if (TARGET_FPA) 3576 { 3577 return 1; 3578 } 3579 3580 if (TARGET_VFP) 3581 return (TARGET_BIG_END ? 1 : 0); 3582 3583 return 1; 3584 } 3585 3586 const struct pcs_attribute_arg 3587 { 3588 const char *arg; 3589 enum arm_pcs value; 3590 } pcs_attribute_args[] = 3591 { 3592 {"aapcs", ARM_PCS_AAPCS}, 3593 {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, 3594 #if 0 3595 /* We could recognize these, but changes would be needed elsewhere 3596 * to implement them. */ 3597 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, 3598 {"atpcs", ARM_PCS_ATPCS}, 3599 {"apcs", ARM_PCS_APCS}, 3600 #endif 3601 {NULL, ARM_PCS_UNKNOWN} 3602 }; 3603 3604 static enum arm_pcs 3605 arm_pcs_from_attribute (tree attr) 3606 { 3607 const struct pcs_attribute_arg *ptr; 3608 const char *arg; 3609 3610 /* Get the value of the argument. */ 3611 if (TREE_VALUE (attr) == NULL_TREE 3612 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) 3613 return ARM_PCS_UNKNOWN; 3614 3615 arg = TREE_STRING_POINTER (TREE_VALUE (attr)); 3616 3617 /* Check it against the list of known arguments. */ 3618 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) 3619 if (streq (arg, ptr->arg)) 3620 return ptr->value; 3621 3622 /* An unrecognized interrupt type. */ 3623 return ARM_PCS_UNKNOWN; 3624 } 3625 3626 /* Get the PCS variant to use for this call. TYPE is the function's type 3627 specification, DECL is the specific declartion. DECL may be null if 3628 the call could be indirect or if this is a library call. */ 3629 static enum arm_pcs 3630 arm_get_pcs_model (const_tree type, const_tree decl) 3631 { 3632 bool user_convention = false; 3633 enum arm_pcs user_pcs = arm_pcs_default; 3634 tree attr; 3635 3636 gcc_assert (type); 3637 3638 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); 3639 if (attr) 3640 { 3641 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); 3642 user_convention = true; 3643 } 3644 3645 if (TARGET_AAPCS_BASED) 3646 { 3647 /* Detect varargs functions. These always use the base rules 3648 (no argument is ever a candidate for a co-processor 3649 register). */ 3650 bool base_rules = (TYPE_ARG_TYPES (type) != 0 3651 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type))) 3652 != void_type_node)); 3653 3654 if (user_convention) 3655 { 3656 if (user_pcs > ARM_PCS_AAPCS_LOCAL) 3657 sorry ("Non-AAPCS derived PCS variant"); 3658 else if (base_rules && user_pcs != ARM_PCS_AAPCS) 3659 error ("Variadic functions must use the base AAPCS variant"); 3660 } 3661 3662 if (base_rules) 3663 return ARM_PCS_AAPCS; 3664 else if (user_convention) 3665 return user_pcs; 3666 else if (decl && flag_unit_at_a_time) 3667 { 3668 /* Local functions never leak outside this compilation unit, 3669 so we are free to use whatever conventions are 3670 appropriate. */ 3671 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ 3672 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); 3673 if (i && i->local) 3674 return ARM_PCS_AAPCS_LOCAL; 3675 } 3676 } 3677 else if (user_convention && user_pcs != arm_pcs_default) 3678 sorry ("PCS variant"); 3679 3680 /* For everything else we use the target's default. */ 3681 return arm_pcs_default; 3682 } 3683 3684 3685 static void 3686 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, 3687 const_tree fntype ATTRIBUTE_UNUSED, 3688 rtx libcall ATTRIBUTE_UNUSED, 3689 const_tree fndecl ATTRIBUTE_UNUSED) 3690 { 3691 /* Record the unallocated VFP registers. */ 3692 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; 3693 pcum->aapcs_vfp_reg_alloc = 0; 3694 } 3695 3696 /* Walk down the type tree of TYPE counting consecutive base elements. 3697 If *MODEP is VOIDmode, then set it to the first valid floating point 3698 type. If a non-floating point type is found, or if a floating point 3699 type that doesn't match a non-VOIDmode *MODEP is found, then return -1, 3700 otherwise return the count in the sub-tree. */ 3701 static int 3702 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) 3703 { 3704 enum machine_mode mode; 3705 HOST_WIDE_INT size; 3706 3707 switch (TREE_CODE (type)) 3708 { 3709 case REAL_TYPE: 3710 mode = TYPE_MODE (type); 3711 if (mode != DFmode && mode != SFmode) 3712 return -1; 3713 3714 if (*modep == VOIDmode) 3715 *modep = mode; 3716 3717 if (*modep == mode) 3718 return 1; 3719 3720 break; 3721 3722 case COMPLEX_TYPE: 3723 mode = TYPE_MODE (TREE_TYPE (type)); 3724 if (mode != DFmode && mode != SFmode) 3725 return -1; 3726 3727 if (*modep == VOIDmode) 3728 *modep = mode; 3729 3730 if (*modep == mode) 3731 return 2; 3732 3733 break; 3734 3735 case VECTOR_TYPE: 3736 /* Use V2SImode and V4SImode as representatives of all 64-bit 3737 and 128-bit vector types, whether or not those modes are 3738 supported with the present options. */ 3739 size = int_size_in_bytes (type); 3740 switch (size) 3741 { 3742 case 8: 3743 mode = V2SImode; 3744 break; 3745 case 16: 3746 mode = V4SImode; 3747 break; 3748 default: 3749 return -1; 3750 } 3751 3752 if (*modep == VOIDmode) 3753 *modep = mode; 3754 3755 /* Vector modes are considered to be opaque: two vectors are 3756 equivalent for the purposes of being homogeneous aggregates 3757 if they are the same size. */ 3758 if (*modep == mode) 3759 return 1; 3760 3761 break; 3762 3763 case ARRAY_TYPE: 3764 { 3765 int count; 3766 tree index = TYPE_DOMAIN (type); 3767 3768 /* Can't handle incomplete types. */ 3769 if (!COMPLETE_TYPE_P(type)) 3770 return -1; 3771 3772 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); 3773 if (count == -1 3774 || !index 3775 || !TYPE_MAX_VALUE (index) 3776 || !host_integerp (TYPE_MAX_VALUE (index), 1) 3777 || !TYPE_MIN_VALUE (index) 3778 || !host_integerp (TYPE_MIN_VALUE (index), 1) 3779 || count < 0) 3780 return -1; 3781 3782 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1) 3783 - tree_low_cst (TYPE_MIN_VALUE (index), 1)); 3784 3785 /* There must be no padding. */ 3786 if (!host_integerp (TYPE_SIZE (type), 1) 3787 || (tree_low_cst (TYPE_SIZE (type), 1) 3788 != count * GET_MODE_BITSIZE (*modep))) 3789 return -1; 3790 3791 return count; 3792 } 3793 3794 case RECORD_TYPE: 3795 { 3796 int count = 0; 3797 int sub_count; 3798 tree field; 3799 3800 /* Can't handle incomplete types. */ 3801 if (!COMPLETE_TYPE_P(type)) 3802 return -1; 3803 3804 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3805 { 3806 if (TREE_CODE (field) != FIELD_DECL) 3807 continue; 3808 3809 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 3810 if (sub_count < 0) 3811 return -1; 3812 count += sub_count; 3813 } 3814 3815 /* There must be no padding. */ 3816 if (!host_integerp (TYPE_SIZE (type), 1) 3817 || (tree_low_cst (TYPE_SIZE (type), 1) 3818 != count * GET_MODE_BITSIZE (*modep))) 3819 return -1; 3820 3821 return count; 3822 } 3823 3824 case UNION_TYPE: 3825 case QUAL_UNION_TYPE: 3826 { 3827 /* These aren't very interesting except in a degenerate case. */ 3828 int count = 0; 3829 int sub_count; 3830 tree field; 3831 3832 /* Can't handle incomplete types. */ 3833 if (!COMPLETE_TYPE_P(type)) 3834 return -1; 3835 3836 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3837 { 3838 if (TREE_CODE (field) != FIELD_DECL) 3839 continue; 3840 3841 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 3842 if (sub_count < 0) 3843 return -1; 3844 count = count > sub_count ? count : sub_count; 3845 } 3846 3847 /* There must be no padding. */ 3848 if (!host_integerp (TYPE_SIZE (type), 1) 3849 || (tree_low_cst (TYPE_SIZE (type), 1) 3850 != count * GET_MODE_BITSIZE (*modep))) 3851 return -1; 3852 3853 return count; 3854 } 3855 3856 default: 3857 break; 3858 } 3859 3860 return -1; 3861 } 3862 3863 /* Return true if PCS_VARIANT should use VFP registers. */ 3864 static bool 3865 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) 3866 { 3867 if (pcs_variant == ARM_PCS_AAPCS_VFP) 3868 return true; 3869 3870 if (pcs_variant != ARM_PCS_AAPCS_LOCAL) 3871 return false; 3872 3873 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && 3874 (TARGET_VFP_DOUBLE || !is_double)); 3875 } 3876 3877 static bool 3878 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, 3879 enum machine_mode mode, const_tree type, 3880 enum machine_mode *base_mode, int *count) 3881 { 3882 enum machine_mode new_mode = VOIDmode; 3883 3884 if (GET_MODE_CLASS (mode) == MODE_FLOAT 3885 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT 3886 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 3887 { 3888 *count = 1; 3889 new_mode = mode; 3890 } 3891 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 3892 { 3893 *count = 2; 3894 new_mode = (mode == DCmode ? DFmode : SFmode); 3895 } 3896 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE)) 3897 { 3898 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); 3899 3900 if (ag_count > 0 && ag_count <= 4) 3901 *count = ag_count; 3902 else 3903 return false; 3904 } 3905 else 3906 return false; 3907 3908 3909 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) 3910 return false; 3911 3912 *base_mode = new_mode; 3913 return true; 3914 } 3915 3916 static bool 3917 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, 3918 enum machine_mode mode, const_tree type) 3919 { 3920 int count ATTRIBUTE_UNUSED; 3921 enum machine_mode ag_mode ATTRIBUTE_UNUSED; 3922 3923 if (!use_vfp_abi (pcs_variant, false)) 3924 return false; 3925 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, 3926 &ag_mode, &count); 3927 } 3928 3929 static bool 3930 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 3931 const_tree type) 3932 { 3933 if (!use_vfp_abi (pcum->pcs_variant, false)) 3934 return false; 3935 3936 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, 3937 &pcum->aapcs_vfp_rmode, 3938 &pcum->aapcs_vfp_rcount); 3939 } 3940 3941 static bool 3942 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 3943 const_tree type ATTRIBUTE_UNUSED) 3944 { 3945 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); 3946 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; 3947 int regno; 3948 3949 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) 3950 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) 3951 { 3952 pcum->aapcs_vfp_reg_alloc = mask << regno; 3953 if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) 3954 { 3955 int i; 3956 int rcount = pcum->aapcs_vfp_rcount; 3957 int rshift = shift; 3958 enum machine_mode rmode = pcum->aapcs_vfp_rmode; 3959 rtx par; 3960 if (!TARGET_NEON) 3961 { 3962 /* Avoid using unsupported vector modes. */ 3963 if (rmode == V2SImode) 3964 rmode = DImode; 3965 else if (rmode == V4SImode) 3966 { 3967 rmode = DImode; 3968 rcount *= 2; 3969 rshift /= 2; 3970 } 3971 } 3972 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); 3973 for (i = 0; i < rcount; i++) 3974 { 3975 rtx tmp = gen_rtx_REG (rmode, 3976 FIRST_VFP_REGNUM + regno + i * rshift); 3977 tmp = gen_rtx_EXPR_LIST 3978 (VOIDmode, tmp, 3979 GEN_INT (i * GET_MODE_SIZE (rmode))); 3980 XVECEXP (par, 0, i) = tmp; 3981 } 3982 3983 pcum->aapcs_reg = par; 3984 } 3985 else 3986 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); 3987 return true; 3988 } 3989 return false; 3990 } 3991 3992 static rtx 3993 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, 3994 enum machine_mode mode, 3995 const_tree type ATTRIBUTE_UNUSED) 3996 { 3997 if (!use_vfp_abi (pcs_variant, false)) 3998 return false; 3999 4000 if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) 4001 { 4002 int count; 4003 enum machine_mode ag_mode; 4004 int i; 4005 rtx par; 4006 int shift; 4007 4008 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, 4009 &ag_mode, &count); 4010 4011 if (!TARGET_NEON) 4012 { 4013 if (ag_mode == V2SImode) 4014 ag_mode = DImode; 4015 else if (ag_mode == V4SImode) 4016 { 4017 ag_mode = DImode; 4018 count *= 2; 4019 } 4020 } 4021 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); 4022 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); 4023 for (i = 0; i < count; i++) 4024 { 4025 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); 4026 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, 4027 GEN_INT (i * GET_MODE_SIZE (ag_mode))); 4028 XVECEXP (par, 0, i) = tmp; 4029 } 4030 4031 return par; 4032 } 4033 4034 return gen_rtx_REG (mode, FIRST_VFP_REGNUM); 4035 } 4036 4037 static void 4038 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, 4039 enum machine_mode mode ATTRIBUTE_UNUSED, 4040 const_tree type ATTRIBUTE_UNUSED) 4041 { 4042 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; 4043 pcum->aapcs_vfp_reg_alloc = 0; 4044 return; 4045 } 4046 4047 #define AAPCS_CP(X) \ 4048 { \ 4049 aapcs_ ## X ## _cum_init, \ 4050 aapcs_ ## X ## _is_call_candidate, \ 4051 aapcs_ ## X ## _allocate, \ 4052 aapcs_ ## X ## _is_return_candidate, \ 4053 aapcs_ ## X ## _allocate_return_reg, \ 4054 aapcs_ ## X ## _advance \ 4055 } 4056 4057 /* Table of co-processors that can be used to pass arguments in 4058 registers. Idealy no arugment should be a candidate for more than 4059 one co-processor table entry, but the table is processed in order 4060 and stops after the first match. If that entry then fails to put 4061 the argument into a co-processor register, the argument will go on 4062 the stack. */ 4063 static struct 4064 { 4065 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ 4066 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); 4067 4068 /* Return true if an argument of mode MODE (or type TYPE if MODE is 4069 BLKmode) is a candidate for this co-processor's registers; this 4070 function should ignore any position-dependent state in 4071 CUMULATIVE_ARGS and only use call-type dependent information. */ 4072 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); 4073 4074 /* Return true if the argument does get a co-processor register; it 4075 should set aapcs_reg to an RTX of the register allocated as is 4076 required for a return from FUNCTION_ARG. */ 4077 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); 4078 4079 /* Return true if a result of mode MODE (or type TYPE if MODE is 4080 BLKmode) is can be returned in this co-processor's registers. */ 4081 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree); 4082 4083 /* Allocate and return an RTX element to hold the return type of a 4084 call, this routine must not fail and will only be called if 4085 is_return_candidate returned true with the same parameters. */ 4086 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree); 4087 4088 /* Finish processing this argument and prepare to start processing 4089 the next one. */ 4090 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); 4091 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = 4092 { 4093 AAPCS_CP(vfp) 4094 }; 4095 4096 #undef AAPCS_CP 4097 4098 static int 4099 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4100 tree type) 4101 { 4102 int i; 4103 4104 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) 4105 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) 4106 return i; 4107 4108 return -1; 4109 } 4110 4111 static int 4112 aapcs_select_return_coproc (const_tree type, const_tree fntype) 4113 { 4114 /* We aren't passed a decl, so we can't check that a call is local. 4115 However, it isn't clear that that would be a win anyway, since it 4116 might limit some tail-calling opportunities. */ 4117 enum arm_pcs pcs_variant; 4118 4119 if (fntype) 4120 { 4121 const_tree fndecl = NULL_TREE; 4122 4123 if (TREE_CODE (fntype) == FUNCTION_DECL) 4124 { 4125 fndecl = fntype; 4126 fntype = TREE_TYPE (fntype); 4127 } 4128 4129 pcs_variant = arm_get_pcs_model (fntype, fndecl); 4130 } 4131 else 4132 pcs_variant = arm_pcs_default; 4133 4134 if (pcs_variant != ARM_PCS_AAPCS) 4135 { 4136 int i; 4137 4138 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) 4139 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, 4140 TYPE_MODE (type), 4141 type)) 4142 return i; 4143 } 4144 return -1; 4145 } 4146 4147 static rtx 4148 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, 4149 const_tree fntype) 4150 { 4151 /* We aren't passed a decl, so we can't check that a call is local. 4152 However, it isn't clear that that would be a win anyway, since it 4153 might limit some tail-calling opportunities. */ 4154 enum arm_pcs pcs_variant; 4155 int unsignedp ATTRIBUTE_UNUSED; 4156 4157 if (fntype) 4158 { 4159 const_tree fndecl = NULL_TREE; 4160 4161 if (TREE_CODE (fntype) == FUNCTION_DECL) 4162 { 4163 fndecl = fntype; 4164 fntype = TREE_TYPE (fntype); 4165 } 4166 4167 pcs_variant = arm_get_pcs_model (fntype, fndecl); 4168 } 4169 else 4170 pcs_variant = arm_pcs_default; 4171 4172 /* Promote integer types. */ 4173 if (type && INTEGRAL_TYPE_P (type)) 4174 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1); 4175 4176 if (pcs_variant != ARM_PCS_AAPCS) 4177 { 4178 int i; 4179 4180 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) 4181 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, 4182 type)) 4183 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, 4184 mode, type); 4185 } 4186 4187 /* Promotes small structs returned in a register to full-word size 4188 for big-endian AAPCS. */ 4189 if (type && arm_return_in_msb (type)) 4190 { 4191 HOST_WIDE_INT size = int_size_in_bytes (type); 4192 if (size % UNITS_PER_WORD != 0) 4193 { 4194 size += UNITS_PER_WORD - size % UNITS_PER_WORD; 4195 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 4196 } 4197 } 4198 4199 return gen_rtx_REG (mode, R0_REGNUM); 4200 } 4201 4202 rtx 4203 aapcs_libcall_value (enum machine_mode mode) 4204 { 4205 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); 4206 } 4207 4208 /* Lay out a function argument using the AAPCS rules. The rule 4209 numbers referred to here are those in the AAPCS. */ 4210 static void 4211 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4212 tree type, int named) 4213 { 4214 int nregs, nregs2; 4215 int ncrn; 4216 4217 /* We only need to do this once per argument. */ 4218 if (pcum->aapcs_arg_processed) 4219 return; 4220 4221 pcum->aapcs_arg_processed = true; 4222 4223 /* Special case: if named is false then we are handling an incoming 4224 anonymous argument which is on the stack. */ 4225 if (!named) 4226 return; 4227 4228 /* Is this a potential co-processor register candidate? */ 4229 if (pcum->pcs_variant != ARM_PCS_AAPCS) 4230 { 4231 int slot = aapcs_select_call_coproc (pcum, mode, type); 4232 pcum->aapcs_cprc_slot = slot; 4233 4234 /* We don't have to apply any of the rules from part B of the 4235 preparation phase, these are handled elsewhere in the 4236 compiler. */ 4237 4238 if (slot >= 0) 4239 { 4240 /* A Co-processor register candidate goes either in its own 4241 class of registers or on the stack. */ 4242 if (!pcum->aapcs_cprc_failed[slot]) 4243 { 4244 /* C1.cp - Try to allocate the argument to co-processor 4245 registers. */ 4246 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) 4247 return; 4248 4249 /* C2.cp - Put the argument on the stack and note that we 4250 can't assign any more candidates in this slot. We also 4251 need to note that we have allocated stack space, so that 4252 we won't later try to split a non-cprc candidate between 4253 core registers and the stack. */ 4254 pcum->aapcs_cprc_failed[slot] = true; 4255 pcum->can_split = false; 4256 } 4257 4258 /* We didn't get a register, so this argument goes on the 4259 stack. */ 4260 gcc_assert (pcum->can_split == false); 4261 return; 4262 } 4263 } 4264 4265 /* C3 - For double-word aligned arguments, round the NCRN up to the 4266 next even number. */ 4267 ncrn = pcum->aapcs_ncrn; 4268 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type)) 4269 ncrn++; 4270 4271 nregs = ARM_NUM_REGS2(mode, type); 4272 4273 /* Sigh, this test should really assert that nregs > 0, but a GCC 4274 extension allows empty structs and then gives them empty size; it 4275 then allows such a structure to be passed by value. For some of 4276 the code below we have to pretend that such an argument has 4277 non-zero size so that we 'locate' it correctly either in 4278 registers or on the stack. */ 4279 gcc_assert (nregs >= 0); 4280 4281 nregs2 = nregs ? nregs : 1; 4282 4283 /* C4 - Argument fits entirely in core registers. */ 4284 if (ncrn + nregs2 <= NUM_ARG_REGS) 4285 { 4286 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); 4287 pcum->aapcs_next_ncrn = ncrn + nregs; 4288 return; 4289 } 4290 4291 /* C5 - Some core registers left and there are no arguments already 4292 on the stack: split this argument between the remaining core 4293 registers and the stack. */ 4294 if (ncrn < NUM_ARG_REGS && pcum->can_split) 4295 { 4296 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); 4297 pcum->aapcs_next_ncrn = NUM_ARG_REGS; 4298 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; 4299 return; 4300 } 4301 4302 /* C6 - NCRN is set to 4. */ 4303 pcum->aapcs_next_ncrn = NUM_ARG_REGS; 4304 4305 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ 4306 return; 4307 } 4308 4309 /* Initialize a variable CUM of type CUMULATIVE_ARGS 4310 for a call to a function whose data type is FNTYPE. 4311 For a library call, FNTYPE is NULL. */ 4312 void 4313 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, 4314 rtx libname, 4315 tree fndecl ATTRIBUTE_UNUSED) 4316 { 4317 /* Long call handling. */ 4318 if (fntype) 4319 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); 4320 else 4321 pcum->pcs_variant = arm_pcs_default; 4322 4323 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) 4324 { 4325 if (arm_libcall_uses_aapcs_base (libname)) 4326 pcum->pcs_variant = ARM_PCS_AAPCS; 4327 4328 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; 4329 pcum->aapcs_reg = NULL_RTX; 4330 pcum->aapcs_partial = 0; 4331 pcum->aapcs_arg_processed = false; 4332 pcum->aapcs_cprc_slot = -1; 4333 pcum->can_split = true; 4334 4335 if (pcum->pcs_variant != ARM_PCS_AAPCS) 4336 { 4337 int i; 4338 4339 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) 4340 { 4341 pcum->aapcs_cprc_failed[i] = false; 4342 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); 4343 } 4344 } 4345 return; 4346 } 4347 4348 /* Legacy ABIs */ 4349 4350 /* On the ARM, the offset starts at 0. */ 4351 pcum->nregs = 0; 4352 pcum->iwmmxt_nregs = 0; 4353 pcum->can_split = true; 4354 4355 /* Varargs vectors are treated the same as long long. 4356 named_count avoids having to change the way arm handles 'named' */ 4357 pcum->named_count = 0; 4358 pcum->nargs = 0; 4359 4360 if (TARGET_REALLY_IWMMXT && fntype) 4361 { 4362 tree fn_arg; 4363 4364 for (fn_arg = TYPE_ARG_TYPES (fntype); 4365 fn_arg; 4366 fn_arg = TREE_CHAIN (fn_arg)) 4367 pcum->named_count += 1; 4368 4369 if (! pcum->named_count) 4370 pcum->named_count = INT_MAX; 4371 } 4372 } 4373 4374 4375 /* Return true if mode/type need doubleword alignment. */ 4376 bool 4377 arm_needs_doubleword_align (enum machine_mode mode, tree type) 4378 { 4379 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY 4380 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); 4381 } 4382 4383 4384 /* Determine where to put an argument to a function. 4385 Value is zero to push the argument on the stack, 4386 or a hard register in which to store the argument. 4387 4388 MODE is the argument's machine mode. 4389 TYPE is the data type of the argument (as a tree). 4390 This is null for libcalls where that information may 4391 not be available. 4392 CUM is a variable of type CUMULATIVE_ARGS which gives info about 4393 the preceding args and about the function being called. 4394 NAMED is nonzero if this argument is a named parameter 4395 (otherwise it is an extra parameter matching an ellipsis). */ 4396 4397 rtx 4398 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4399 tree type, int named) 4400 { 4401 int nregs; 4402 4403 /* Handle the special case quickly. Pick an arbitrary value for op2 of 4404 a call insn (op3 of a call_value insn). */ 4405 if (mode == VOIDmode) 4406 return const0_rtx; 4407 4408 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) 4409 { 4410 aapcs_layout_arg (pcum, mode, type, named); 4411 return pcum->aapcs_reg; 4412 } 4413 4414 /* Varargs vectors are treated the same as long long. 4415 named_count avoids having to change the way arm handles 'named' */ 4416 if (TARGET_IWMMXT_ABI 4417 && arm_vector_mode_supported_p (mode) 4418 && pcum->named_count > pcum->nargs + 1) 4419 { 4420 if (pcum->iwmmxt_nregs <= 9) 4421 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM); 4422 else 4423 { 4424 pcum->can_split = false; 4425 return NULL_RTX; 4426 } 4427 } 4428 4429 /* Put doubleword aligned quantities in even register pairs. */ 4430 if (pcum->nregs & 1 4431 && ARM_DOUBLEWORD_ALIGN 4432 && arm_needs_doubleword_align (mode, type)) 4433 pcum->nregs++; 4434 4435 if (mode == VOIDmode) 4436 /* Pick an arbitrary value for operand 2 of the call insn. */ 4437 return const0_rtx; 4438 4439 /* Only allow splitting an arg between regs and memory if all preceding 4440 args were allocated to regs. For args passed by reference we only count 4441 the reference pointer. */ 4442 if (pcum->can_split) 4443 nregs = 1; 4444 else 4445 nregs = ARM_NUM_REGS2 (mode, type); 4446 4447 if (!named || pcum->nregs + nregs > NUM_ARG_REGS) 4448 return NULL_RTX; 4449 4450 return gen_rtx_REG (mode, pcum->nregs); 4451 } 4452 4453 static int 4454 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4455 tree type, bool named) 4456 { 4457 int nregs = pcum->nregs; 4458 4459 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) 4460 { 4461 aapcs_layout_arg (pcum, mode, type, named); 4462 return pcum->aapcs_partial; 4463 } 4464 4465 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) 4466 return 0; 4467 4468 if (NUM_ARG_REGS > nregs 4469 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type)) 4470 && pcum->can_split) 4471 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; 4472 4473 return 0; 4474 } 4475 4476 void 4477 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4478 tree type, bool named) 4479 { 4480 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) 4481 { 4482 aapcs_layout_arg (pcum, mode, type, named); 4483 4484 if (pcum->aapcs_cprc_slot >= 0) 4485 { 4486 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode, 4487 type); 4488 pcum->aapcs_cprc_slot = -1; 4489 } 4490 4491 /* Generic stuff. */ 4492 pcum->aapcs_arg_processed = false; 4493 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; 4494 pcum->aapcs_reg = NULL_RTX; 4495 pcum->aapcs_partial = 0; 4496 } 4497 else 4498 { 4499 pcum->nargs += 1; 4500 if (arm_vector_mode_supported_p (mode) 4501 && pcum->named_count > pcum->nargs 4502 && TARGET_IWMMXT_ABI) 4503 pcum->iwmmxt_nregs += 1; 4504 else 4505 pcum->nregs += ARM_NUM_REGS2 (mode, type); 4506 } 4507 } 4508 4509 /* Variable sized types are passed by reference. This is a GCC 4510 extension to the ARM ABI. */ 4511 4512 static bool 4513 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 4514 enum machine_mode mode ATTRIBUTE_UNUSED, 4515 const_tree type, bool named ATTRIBUTE_UNUSED) 4516 { 4517 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; 4518 } 4519 4520 /* Encode the current state of the #pragma [no_]long_calls. */ 4521 typedef enum 4522 { 4523 OFF, /* No #pragma [no_]long_calls is in effect. */ 4524 LONG, /* #pragma long_calls is in effect. */ 4525 SHORT /* #pragma no_long_calls is in effect. */ 4526 } arm_pragma_enum; 4527 4528 static arm_pragma_enum arm_pragma_long_calls = OFF; 4529 4530 void 4531 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) 4532 { 4533 arm_pragma_long_calls = LONG; 4534 } 4535 4536 void 4537 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) 4538 { 4539 arm_pragma_long_calls = SHORT; 4540 } 4541 4542 void 4543 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) 4544 { 4545 arm_pragma_long_calls = OFF; 4546 } 4547 4548 /* Handle an attribute requiring a FUNCTION_DECL; 4549 arguments as in struct attribute_spec.handler. */ 4550 static tree 4551 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED, 4552 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 4553 { 4554 if (TREE_CODE (*node) != FUNCTION_DECL) 4555 { 4556 warning (OPT_Wattributes, "%qE attribute only applies to functions", 4557 name); 4558 *no_add_attrs = true; 4559 } 4560 4561 return NULL_TREE; 4562 } 4563 4564 /* Handle an "interrupt" or "isr" attribute; 4565 arguments as in struct attribute_spec.handler. */ 4566 static tree 4567 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, 4568 bool *no_add_attrs) 4569 { 4570 if (DECL_P (*node)) 4571 { 4572 if (TREE_CODE (*node) != FUNCTION_DECL) 4573 { 4574 warning (OPT_Wattributes, "%qE attribute only applies to functions", 4575 name); 4576 *no_add_attrs = true; 4577 } 4578 /* FIXME: the argument if any is checked for type attributes; 4579 should it be checked for decl ones? */ 4580 } 4581 else 4582 { 4583 if (TREE_CODE (*node) == FUNCTION_TYPE 4584 || TREE_CODE (*node) == METHOD_TYPE) 4585 { 4586 if (arm_isr_value (args) == ARM_FT_UNKNOWN) 4587 { 4588 warning (OPT_Wattributes, "%qE attribute ignored", 4589 name); 4590 *no_add_attrs = true; 4591 } 4592 } 4593 else if (TREE_CODE (*node) == POINTER_TYPE 4594 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE 4595 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE) 4596 && arm_isr_value (args) != ARM_FT_UNKNOWN) 4597 { 4598 *node = build_variant_type_copy (*node); 4599 TREE_TYPE (*node) = build_type_attribute_variant 4600 (TREE_TYPE (*node), 4601 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node)))); 4602 *no_add_attrs = true; 4603 } 4604 else 4605 { 4606 /* Possibly pass this attribute on from the type to a decl. */ 4607 if (flags & ((int) ATTR_FLAG_DECL_NEXT 4608 | (int) ATTR_FLAG_FUNCTION_NEXT 4609 | (int) ATTR_FLAG_ARRAY_NEXT)) 4610 { 4611 *no_add_attrs = true; 4612 return tree_cons (name, args, NULL_TREE); 4613 } 4614 else 4615 { 4616 warning (OPT_Wattributes, "%qE attribute ignored", 4617 name); 4618 } 4619 } 4620 } 4621 4622 return NULL_TREE; 4623 } 4624 4625 /* Handle a "pcs" attribute; arguments as in struct 4626 attribute_spec.handler. */ 4627 static tree 4628 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, 4629 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 4630 { 4631 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) 4632 { 4633 warning (OPT_Wattributes, "%qE attribute ignored", name); 4634 *no_add_attrs = true; 4635 } 4636 return NULL_TREE; 4637 } 4638 4639 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 4640 /* Handle the "notshared" attribute. This attribute is another way of 4641 requesting hidden visibility. ARM's compiler supports 4642 "__declspec(notshared)"; we support the same thing via an 4643 attribute. */ 4644 4645 static tree 4646 arm_handle_notshared_attribute (tree *node, 4647 tree name ATTRIBUTE_UNUSED, 4648 tree args ATTRIBUTE_UNUSED, 4649 int flags ATTRIBUTE_UNUSED, 4650 bool *no_add_attrs) 4651 { 4652 tree decl = TYPE_NAME (*node); 4653 4654 if (decl) 4655 { 4656 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; 4657 DECL_VISIBILITY_SPECIFIED (decl) = 1; 4658 *no_add_attrs = false; 4659 } 4660 return NULL_TREE; 4661 } 4662 #endif 4663 4664 /* Return 0 if the attributes for two types are incompatible, 1 if they 4665 are compatible, and 2 if they are nearly compatible (which causes a 4666 warning to be generated). */ 4667 static int 4668 arm_comp_type_attributes (const_tree type1, const_tree type2) 4669 { 4670 int l1, l2, s1, s2; 4671 4672 /* Check for mismatch of non-default calling convention. */ 4673 if (TREE_CODE (type1) != FUNCTION_TYPE) 4674 return 1; 4675 4676 /* Check for mismatched call attributes. */ 4677 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; 4678 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; 4679 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; 4680 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; 4681 4682 /* Only bother to check if an attribute is defined. */ 4683 if (l1 | l2 | s1 | s2) 4684 { 4685 /* If one type has an attribute, the other must have the same attribute. */ 4686 if ((l1 != l2) || (s1 != s2)) 4687 return 0; 4688 4689 /* Disallow mixed attributes. */ 4690 if ((l1 & s2) || (l2 & s1)) 4691 return 0; 4692 } 4693 4694 /* Check for mismatched ISR attribute. */ 4695 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL; 4696 if (! l1) 4697 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL; 4698 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL; 4699 if (! l2) 4700 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL; 4701 if (l1 != l2) 4702 return 0; 4703 4704 return 1; 4705 } 4706 4707 /* Assigns default attributes to newly defined type. This is used to 4708 set short_call/long_call attributes for function types of 4709 functions defined inside corresponding #pragma scopes. */ 4710 static void 4711 arm_set_default_type_attributes (tree type) 4712 { 4713 /* Add __attribute__ ((long_call)) to all functions, when 4714 inside #pragma long_calls or __attribute__ ((short_call)), 4715 when inside #pragma no_long_calls. */ 4716 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) 4717 { 4718 tree type_attr_list, attr_name; 4719 type_attr_list = TYPE_ATTRIBUTES (type); 4720 4721 if (arm_pragma_long_calls == LONG) 4722 attr_name = get_identifier ("long_call"); 4723 else if (arm_pragma_long_calls == SHORT) 4724 attr_name = get_identifier ("short_call"); 4725 else 4726 return; 4727 4728 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list); 4729 TYPE_ATTRIBUTES (type) = type_attr_list; 4730 } 4731 } 4732 4733 /* Return true if DECL is known to be linked into section SECTION. */ 4734 4735 static bool 4736 arm_function_in_section_p (tree decl, section *section) 4737 { 4738 /* We can only be certain about functions defined in the same 4739 compilation unit. */ 4740 if (!TREE_STATIC (decl)) 4741 return false; 4742 4743 /* Make sure that SYMBOL always binds to the definition in this 4744 compilation unit. */ 4745 if (!targetm.binds_local_p (decl)) 4746 return false; 4747 4748 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ 4749 if (!DECL_SECTION_NAME (decl)) 4750 { 4751 /* Make sure that we will not create a unique section for DECL. */ 4752 if (flag_function_sections || DECL_ONE_ONLY (decl)) 4753 return false; 4754 } 4755 4756 return function_section (decl) == section; 4757 } 4758 4759 /* Return nonzero if a 32-bit "long_call" should be generated for 4760 a call from the current function to DECL. We generate a long_call 4761 if the function: 4762 4763 a. has an __attribute__((long call)) 4764 or b. is within the scope of a #pragma long_calls 4765 or c. the -mlong-calls command line switch has been specified 4766 4767 However we do not generate a long call if the function: 4768 4769 d. has an __attribute__ ((short_call)) 4770 or e. is inside the scope of a #pragma no_long_calls 4771 or f. is defined in the same section as the current function. */ 4772 4773 bool 4774 arm_is_long_call_p (tree decl) 4775 { 4776 tree attrs; 4777 4778 if (!decl) 4779 return TARGET_LONG_CALLS; 4780 4781 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); 4782 if (lookup_attribute ("short_call", attrs)) 4783 return false; 4784 4785 /* For "f", be conservative, and only cater for cases in which the 4786 whole of the current function is placed in the same section. */ 4787 if (!flag_reorder_blocks_and_partition 4788 && TREE_CODE (decl) == FUNCTION_DECL 4789 && arm_function_in_section_p (decl, current_function_section ())) 4790 return false; 4791 4792 if (lookup_attribute ("long_call", attrs)) 4793 return true; 4794 4795 return TARGET_LONG_CALLS; 4796 } 4797 4798 /* Return nonzero if it is ok to make a tail-call to DECL. */ 4799 static bool 4800 arm_function_ok_for_sibcall (tree decl, tree exp) 4801 { 4802 unsigned long func_type; 4803 4804 if (cfun->machine->sibcall_blocked) 4805 return false; 4806 4807 /* Never tailcall something for which we have no decl, or if we 4808 are in Thumb mode. */ 4809 if (decl == NULL || TARGET_THUMB) 4810 return false; 4811 4812 /* The PIC register is live on entry to VxWorks PLT entries, so we 4813 must make the call before restoring the PIC register. */ 4814 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) 4815 return false; 4816 4817 /* Cannot tail-call to long calls, since these are out of range of 4818 a branch instruction. */ 4819 if (arm_is_long_call_p (decl)) 4820 return false; 4821 4822 /* If we are interworking and the function is not declared static 4823 then we can't tail-call it unless we know that it exists in this 4824 compilation unit (since it might be a Thumb routine). */ 4825 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl)) 4826 return false; 4827 4828 func_type = arm_current_func_type (); 4829 /* Never tailcall from an ISR routine - it needs a special exit sequence. */ 4830 if (IS_INTERRUPT (func_type)) 4831 return false; 4832 4833 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 4834 { 4835 /* Check that the return value locations are the same. For 4836 example that we aren't returning a value from the sibling in 4837 a VFP register but then need to transfer it to a core 4838 register. */ 4839 rtx a, b; 4840 4841 a = arm_function_value (TREE_TYPE (exp), decl, false); 4842 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 4843 cfun->decl, false); 4844 if (!rtx_equal_p (a, b)) 4845 return false; 4846 } 4847 4848 /* Never tailcall if function may be called with a misaligned SP. */ 4849 if (IS_STACKALIGN (func_type)) 4850 return false; 4851 4852 /* Everything else is ok. */ 4853 return true; 4854 } 4855 4856 4857 /* Addressing mode support functions. */ 4858 4859 /* Return nonzero if X is a legitimate immediate operand when compiling 4860 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */ 4861 int 4862 legitimate_pic_operand_p (rtx x) 4863 { 4864 if (GET_CODE (x) == SYMBOL_REF 4865 || (GET_CODE (x) == CONST 4866 && GET_CODE (XEXP (x, 0)) == PLUS 4867 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) 4868 return 0; 4869 4870 return 1; 4871 } 4872 4873 /* Record that the current function needs a PIC register. Initialize 4874 cfun->machine->pic_reg if we have not already done so. */ 4875 4876 static void 4877 require_pic_register (void) 4878 { 4879 /* A lot of the logic here is made obscure by the fact that this 4880 routine gets called as part of the rtx cost estimation process. 4881 We don't want those calls to affect any assumptions about the real 4882 function; and further, we can't call entry_of_function() until we 4883 start the real expansion process. */ 4884 if (!crtl->uses_pic_offset_table) 4885 { 4886 gcc_assert (can_create_pseudo_p ()); 4887 if (arm_pic_register != INVALID_REGNUM) 4888 { 4889 if (!cfun->machine->pic_reg) 4890 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register); 4891 4892 /* Play games to avoid marking the function as needing pic 4893 if we are being called as part of the cost-estimation 4894 process. */ 4895 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) 4896 crtl->uses_pic_offset_table = 1; 4897 } 4898 else 4899 { 4900 rtx seq; 4901 4902 if (!cfun->machine->pic_reg) 4903 cfun->machine->pic_reg = gen_reg_rtx (Pmode); 4904 4905 /* Play games to avoid marking the function as needing pic 4906 if we are being called as part of the cost-estimation 4907 process. */ 4908 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) 4909 { 4910 crtl->uses_pic_offset_table = 1; 4911 start_sequence (); 4912 4913 arm_load_pic_register (0UL); 4914 4915 seq = get_insns (); 4916 end_sequence (); 4917 /* We can be called during expansion of PHI nodes, where 4918 we can't yet emit instructions directly in the final 4919 insn stream. Queue the insns on the entry edge, they will 4920 be committed after everything else is expanded. */ 4921 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); 4922 } 4923 } 4924 } 4925 } 4926 4927 rtx 4928 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) 4929 { 4930 if (GET_CODE (orig) == SYMBOL_REF 4931 || GET_CODE (orig) == LABEL_REF) 4932 { 4933 rtx pic_ref, address; 4934 rtx insn; 4935 int subregs = 0; 4936 4937 /* If this function doesn't have a pic register, create one now. */ 4938 require_pic_register (); 4939 4940 if (reg == 0) 4941 { 4942 gcc_assert (can_create_pseudo_p ()); 4943 reg = gen_reg_rtx (Pmode); 4944 4945 subregs = 1; 4946 } 4947 4948 if (subregs) 4949 address = gen_reg_rtx (Pmode); 4950 else 4951 address = reg; 4952 4953 if (TARGET_32BIT) 4954 emit_insn (gen_pic_load_addr_32bit (address, orig)); 4955 else /* TARGET_THUMB1 */ 4956 emit_insn (gen_pic_load_addr_thumb1 (address, orig)); 4957 4958 /* VxWorks does not impose a fixed gap between segments; the run-time 4959 gap can be different from the object-file gap. We therefore can't 4960 use GOTOFF unless we are absolutely sure that the symbol is in the 4961 same segment as the GOT. Unfortunately, the flexibility of linker 4962 scripts means that we can't be sure of that in general, so assume 4963 that GOTOFF is never valid on VxWorks. */ 4964 if ((GET_CODE (orig) == LABEL_REF 4965 || (GET_CODE (orig) == SYMBOL_REF && 4966 SYMBOL_REF_LOCAL_P (orig))) 4967 && NEED_GOT_RELOC 4968 && !TARGET_VXWORKS_RTP) 4969 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address); 4970 else 4971 { 4972 pic_ref = gen_const_mem (Pmode, 4973 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, 4974 address)); 4975 } 4976 4977 insn = emit_move_insn (reg, pic_ref); 4978 4979 /* Put a REG_EQUAL note on this insn, so that it can be optimized 4980 by loop. */ 4981 set_unique_reg_note (insn, REG_EQUAL, orig); 4982 4983 return reg; 4984 } 4985 else if (GET_CODE (orig) == CONST) 4986 { 4987 rtx base, offset; 4988 4989 if (GET_CODE (XEXP (orig, 0)) == PLUS 4990 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg) 4991 return orig; 4992 4993 /* Handle the case where we have: const (UNSPEC_TLS). */ 4994 if (GET_CODE (XEXP (orig, 0)) == UNSPEC 4995 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS) 4996 return orig; 4997 4998 /* Handle the case where we have: 4999 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a 5000 CONST_INT. */ 5001 if (GET_CODE (XEXP (orig, 0)) == PLUS 5002 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC 5003 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS) 5004 { 5005 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT); 5006 return orig; 5007 } 5008 5009 if (reg == 0) 5010 { 5011 gcc_assert (can_create_pseudo_p ()); 5012 reg = gen_reg_rtx (Pmode); 5013 } 5014 5015 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 5016 5017 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); 5018 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, 5019 base == reg ? 0 : reg); 5020 5021 if (GET_CODE (offset) == CONST_INT) 5022 { 5023 /* The base register doesn't really matter, we only want to 5024 test the index for the appropriate mode. */ 5025 if (!arm_legitimate_index_p (mode, offset, SET, 0)) 5026 { 5027 gcc_assert (can_create_pseudo_p ()); 5028 offset = force_reg (Pmode, offset); 5029 } 5030 5031 if (GET_CODE (offset) == CONST_INT) 5032 return plus_constant (base, INTVAL (offset)); 5033 } 5034 5035 if (GET_MODE_SIZE (mode) > 4 5036 && (GET_MODE_CLASS (mode) == MODE_INT 5037 || TARGET_SOFT_FLOAT)) 5038 { 5039 emit_insn (gen_addsi3 (reg, base, offset)); 5040 return reg; 5041 } 5042 5043 return gen_rtx_PLUS (Pmode, base, offset); 5044 } 5045 5046 return orig; 5047 } 5048 5049 5050 /* Find a spare register to use during the prolog of a function. */ 5051 5052 static int 5053 thumb_find_work_register (unsigned long pushed_regs_mask) 5054 { 5055 int reg; 5056 5057 /* Check the argument registers first as these are call-used. The 5058 register allocation order means that sometimes r3 might be used 5059 but earlier argument registers might not, so check them all. */ 5060 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --) 5061 if (!df_regs_ever_live_p (reg)) 5062 return reg; 5063 5064 /* Before going on to check the call-saved registers we can try a couple 5065 more ways of deducing that r3 is available. The first is when we are 5066 pushing anonymous arguments onto the stack and we have less than 4 5067 registers worth of fixed arguments(*). In this case r3 will be part of 5068 the variable argument list and so we can be sure that it will be 5069 pushed right at the start of the function. Hence it will be available 5070 for the rest of the prologue. 5071 (*): ie crtl->args.pretend_args_size is greater than 0. */ 5072 if (cfun->machine->uses_anonymous_args 5073 && crtl->args.pretend_args_size > 0) 5074 return LAST_ARG_REGNUM; 5075 5076 /* The other case is when we have fixed arguments but less than 4 registers 5077 worth. In this case r3 might be used in the body of the function, but 5078 it is not being used to convey an argument into the function. In theory 5079 we could just check crtl->args.size to see how many bytes are 5080 being passed in argument registers, but it seems that it is unreliable. 5081 Sometimes it will have the value 0 when in fact arguments are being 5082 passed. (See testcase execute/20021111-1.c for an example). So we also 5083 check the args_info.nregs field as well. The problem with this field is 5084 that it makes no allowances for arguments that are passed to the 5085 function but which are not used. Hence we could miss an opportunity 5086 when a function has an unused argument in r3. But it is better to be 5087 safe than to be sorry. */ 5088 if (! cfun->machine->uses_anonymous_args 5089 && crtl->args.size >= 0 5090 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD) 5091 && crtl->args.info.nregs < 4) 5092 return LAST_ARG_REGNUM; 5093 5094 /* Otherwise look for a call-saved register that is going to be pushed. */ 5095 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --) 5096 if (pushed_regs_mask & (1 << reg)) 5097 return reg; 5098 5099 if (TARGET_THUMB2) 5100 { 5101 /* Thumb-2 can use high regs. */ 5102 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++) 5103 if (pushed_regs_mask & (1 << reg)) 5104 return reg; 5105 } 5106 /* Something went wrong - thumb_compute_save_reg_mask() 5107 should have arranged for a suitable register to be pushed. */ 5108 gcc_unreachable (); 5109 } 5110 5111 static GTY(()) int pic_labelno; 5112 5113 /* Generate code to load the PIC register. In thumb mode SCRATCH is a 5114 low register. */ 5115 5116 void 5117 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED) 5118 { 5119 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg; 5120 5121 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE) 5122 return; 5123 5124 gcc_assert (flag_pic); 5125 5126 pic_reg = cfun->machine->pic_reg; 5127 if (TARGET_VXWORKS_RTP) 5128 { 5129 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); 5130 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); 5131 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); 5132 5133 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); 5134 5135 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); 5136 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); 5137 } 5138 else 5139 { 5140 /* We use an UNSPEC rather than a LABEL_REF because this label 5141 never appears in the code stream. */ 5142 5143 labelno = GEN_INT (pic_labelno++); 5144 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); 5145 l1 = gen_rtx_CONST (VOIDmode, l1); 5146 5147 /* On the ARM the PC register contains 'dot + 8' at the time of the 5148 addition, on the Thumb it is 'dot + 4'. */ 5149 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4); 5150 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), 5151 UNSPEC_GOTSYM_OFF); 5152 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); 5153 5154 if (TARGET_32BIT) 5155 { 5156 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); 5157 if (TARGET_ARM) 5158 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); 5159 else 5160 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); 5161 } 5162 else /* TARGET_THUMB1 */ 5163 { 5164 if (arm_pic_register != INVALID_REGNUM 5165 && REGNO (pic_reg) > LAST_LO_REGNUM) 5166 { 5167 /* We will have pushed the pic register, so we should always be 5168 able to find a work register. */ 5169 pic_tmp = gen_rtx_REG (SImode, 5170 thumb_find_work_register (saved_regs)); 5171 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx)); 5172 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp)); 5173 } 5174 else 5175 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx)); 5176 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); 5177 } 5178 } 5179 5180 /* Need to emit this whether or not we obey regdecls, 5181 since setjmp/longjmp can cause life info to screw up. */ 5182 emit_use (pic_reg); 5183 } 5184 5185 5186 /* Return nonzero if X is valid as an ARM state addressing register. */ 5187 static int 5188 arm_address_register_rtx_p (rtx x, int strict_p) 5189 { 5190 int regno; 5191 5192 if (GET_CODE (x) != REG) 5193 return 0; 5194 5195 regno = REGNO (x); 5196 5197 if (strict_p) 5198 return ARM_REGNO_OK_FOR_BASE_P (regno); 5199 5200 return (regno <= LAST_ARM_REGNUM 5201 || regno >= FIRST_PSEUDO_REGISTER 5202 || regno == FRAME_POINTER_REGNUM 5203 || regno == ARG_POINTER_REGNUM); 5204 } 5205 5206 /* Return TRUE if this rtx is the difference of a symbol and a label, 5207 and will reduce to a PC-relative relocation in the object file. 5208 Expressions like this can be left alone when generating PIC, rather 5209 than forced through the GOT. */ 5210 static int 5211 pcrel_constant_p (rtx x) 5212 { 5213 if (GET_CODE (x) == MINUS) 5214 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); 5215 5216 return FALSE; 5217 } 5218 5219 /* Return nonzero if X is a valid ARM state address operand. */ 5220 int 5221 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, 5222 int strict_p) 5223 { 5224 bool use_ldrd; 5225 enum rtx_code code = GET_CODE (x); 5226 5227 if (arm_address_register_rtx_p (x, strict_p)) 5228 return 1; 5229 5230 use_ldrd = (TARGET_LDRD 5231 && (mode == DImode 5232 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); 5233 5234 if (code == POST_INC || code == PRE_DEC 5235 || ((code == PRE_INC || code == POST_DEC) 5236 && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) 5237 return arm_address_register_rtx_p (XEXP (x, 0), strict_p); 5238 5239 else if ((code == POST_MODIFY || code == PRE_MODIFY) 5240 && arm_address_register_rtx_p (XEXP (x, 0), strict_p) 5241 && GET_CODE (XEXP (x, 1)) == PLUS 5242 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) 5243 { 5244 rtx addend = XEXP (XEXP (x, 1), 1); 5245 5246 /* Don't allow ldrd post increment by register because it's hard 5247 to fixup invalid register choices. */ 5248 if (use_ldrd 5249 && GET_CODE (x) == POST_MODIFY 5250 && GET_CODE (addend) == REG) 5251 return 0; 5252 5253 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4) 5254 && arm_legitimate_index_p (mode, addend, outer, strict_p)); 5255 } 5256 5257 /* After reload constants split into minipools will have addresses 5258 from a LABEL_REF. */ 5259 else if (reload_completed 5260 && (code == LABEL_REF 5261 || (code == CONST 5262 && GET_CODE (XEXP (x, 0)) == PLUS 5263 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF 5264 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))) 5265 return 1; 5266 5267 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) 5268 return 0; 5269 5270 else if (code == PLUS) 5271 { 5272 rtx xop0 = XEXP (x, 0); 5273 rtx xop1 = XEXP (x, 1); 5274 5275 return ((arm_address_register_rtx_p (xop0, strict_p) 5276 && GET_CODE(xop1) == CONST_INT 5277 && arm_legitimate_index_p (mode, xop1, outer, strict_p)) 5278 || (arm_address_register_rtx_p (xop1, strict_p) 5279 && arm_legitimate_index_p (mode, xop0, outer, strict_p))); 5280 } 5281 5282 #if 0 5283 /* Reload currently can't handle MINUS, so disable this for now */ 5284 else if (GET_CODE (x) == MINUS) 5285 { 5286 rtx xop0 = XEXP (x, 0); 5287 rtx xop1 = XEXP (x, 1); 5288 5289 return (arm_address_register_rtx_p (xop0, strict_p) 5290 && arm_legitimate_index_p (mode, xop1, outer, strict_p)); 5291 } 5292 #endif 5293 5294 else if (GET_MODE_CLASS (mode) != MODE_FLOAT 5295 && code == SYMBOL_REF 5296 && CONSTANT_POOL_ADDRESS_P (x) 5297 && ! (flag_pic 5298 && symbol_mentioned_p (get_pool_constant (x)) 5299 && ! pcrel_constant_p (get_pool_constant (x)))) 5300 return 1; 5301 5302 return 0; 5303 } 5304 5305 /* Return nonzero if X is a valid Thumb-2 address operand. */ 5306 static int 5307 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) 5308 { 5309 bool use_ldrd; 5310 enum rtx_code code = GET_CODE (x); 5311 5312 if (arm_address_register_rtx_p (x, strict_p)) 5313 return 1; 5314 5315 use_ldrd = (TARGET_LDRD 5316 && (mode == DImode 5317 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); 5318 5319 if (code == POST_INC || code == PRE_DEC 5320 || ((code == PRE_INC || code == POST_DEC) 5321 && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) 5322 return arm_address_register_rtx_p (XEXP (x, 0), strict_p); 5323 5324 else if ((code == POST_MODIFY || code == PRE_MODIFY) 5325 && arm_address_register_rtx_p (XEXP (x, 0), strict_p) 5326 && GET_CODE (XEXP (x, 1)) == PLUS 5327 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) 5328 { 5329 /* Thumb-2 only has autoincrement by constant. */ 5330 rtx addend = XEXP (XEXP (x, 1), 1); 5331 HOST_WIDE_INT offset; 5332 5333 if (GET_CODE (addend) != CONST_INT) 5334 return 0; 5335 5336 offset = INTVAL(addend); 5337 if (GET_MODE_SIZE (mode) <= 4) 5338 return (offset > -256 && offset < 256); 5339 5340 return (use_ldrd && offset > -1024 && offset < 1024 5341 && (offset & 3) == 0); 5342 } 5343 5344 /* After reload constants split into minipools will have addresses 5345 from a LABEL_REF. */ 5346 else if (reload_completed 5347 && (code == LABEL_REF 5348 || (code == CONST 5349 && GET_CODE (XEXP (x, 0)) == PLUS 5350 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF 5351 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))) 5352 return 1; 5353 5354 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) 5355 return 0; 5356 5357 else if (code == PLUS) 5358 { 5359 rtx xop0 = XEXP (x, 0); 5360 rtx xop1 = XEXP (x, 1); 5361 5362 return ((arm_address_register_rtx_p (xop0, strict_p) 5363 && thumb2_legitimate_index_p (mode, xop1, strict_p)) 5364 || (arm_address_register_rtx_p (xop1, strict_p) 5365 && thumb2_legitimate_index_p (mode, xop0, strict_p))); 5366 } 5367 5368 else if (GET_MODE_CLASS (mode) != MODE_FLOAT 5369 && code == SYMBOL_REF 5370 && CONSTANT_POOL_ADDRESS_P (x) 5371 && ! (flag_pic 5372 && symbol_mentioned_p (get_pool_constant (x)) 5373 && ! pcrel_constant_p (get_pool_constant (x)))) 5374 return 1; 5375 5376 return 0; 5377 } 5378 5379 /* Return nonzero if INDEX is valid for an address index operand in 5380 ARM state. */ 5381 static int 5382 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, 5383 int strict_p) 5384 { 5385 HOST_WIDE_INT range; 5386 enum rtx_code code = GET_CODE (index); 5387 5388 /* Standard coprocessor addressing modes. */ 5389 if (TARGET_HARD_FLOAT 5390 && (TARGET_FPA || TARGET_MAVERICK) 5391 && (GET_MODE_CLASS (mode) == MODE_FLOAT 5392 || (TARGET_MAVERICK && mode == DImode))) 5393 return (code == CONST_INT && INTVAL (index) < 1024 5394 && INTVAL (index) > -1024 5395 && (INTVAL (index) & 3) == 0); 5396 5397 if (TARGET_NEON 5398 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))) 5399 return (code == CONST_INT 5400 && INTVAL (index) < 1016 5401 && INTVAL (index) > -1024 5402 && (INTVAL (index) & 3) == 0); 5403 5404 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) 5405 return (code == CONST_INT 5406 && INTVAL (index) < 1024 5407 && INTVAL (index) > -1024 5408 && (INTVAL (index) & 3) == 0); 5409 5410 if (arm_address_register_rtx_p (index, strict_p) 5411 && (GET_MODE_SIZE (mode) <= 4)) 5412 return 1; 5413 5414 if (mode == DImode || mode == DFmode) 5415 { 5416 if (code == CONST_INT) 5417 { 5418 HOST_WIDE_INT val = INTVAL (index); 5419 5420 if (TARGET_LDRD) 5421 return val > -256 && val < 256; 5422 else 5423 return val > -4096 && val < 4092; 5424 } 5425 5426 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); 5427 } 5428 5429 if (GET_MODE_SIZE (mode) <= 4 5430 && ! (arm_arch4 5431 && (mode == HImode 5432 || mode == HFmode 5433 || (mode == QImode && outer == SIGN_EXTEND)))) 5434 { 5435 if (code == MULT) 5436 { 5437 rtx xiop0 = XEXP (index, 0); 5438 rtx xiop1 = XEXP (index, 1); 5439 5440 return ((arm_address_register_rtx_p (xiop0, strict_p) 5441 && power_of_two_operand (xiop1, SImode)) 5442 || (arm_address_register_rtx_p (xiop1, strict_p) 5443 && power_of_two_operand (xiop0, SImode))); 5444 } 5445 else if (code == LSHIFTRT || code == ASHIFTRT 5446 || code == ASHIFT || code == ROTATERT) 5447 { 5448 rtx op = XEXP (index, 1); 5449 5450 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) 5451 && GET_CODE (op) == CONST_INT 5452 && INTVAL (op) > 0 5453 && INTVAL (op) <= 31); 5454 } 5455 } 5456 5457 /* For ARM v4 we may be doing a sign-extend operation during the 5458 load. */ 5459 if (arm_arch4) 5460 { 5461 if (mode == HImode 5462 || mode == HFmode 5463 || (outer == SIGN_EXTEND && mode == QImode)) 5464 range = 256; 5465 else 5466 range = 4096; 5467 } 5468 else 5469 range = (mode == HImode || mode == HFmode) ? 4095 : 4096; 5470 5471 return (code == CONST_INT 5472 && INTVAL (index) < range 5473 && INTVAL (index) > -range); 5474 } 5475 5476 /* Return true if OP is a valid index scaling factor for Thumb-2 address 5477 index operand. i.e. 1, 2, 4 or 8. */ 5478 static bool 5479 thumb2_index_mul_operand (rtx op) 5480 { 5481 HOST_WIDE_INT val; 5482 5483 if (GET_CODE(op) != CONST_INT) 5484 return false; 5485 5486 val = INTVAL(op); 5487 return (val == 1 || val == 2 || val == 4 || val == 8); 5488 } 5489 5490 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */ 5491 static int 5492 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p) 5493 { 5494 enum rtx_code code = GET_CODE (index); 5495 5496 /* ??? Combine arm and thumb2 coprocessor addressing modes. */ 5497 /* Standard coprocessor addressing modes. */ 5498 if (TARGET_HARD_FLOAT 5499 && (TARGET_FPA || TARGET_MAVERICK) 5500 && (GET_MODE_CLASS (mode) == MODE_FLOAT 5501 || (TARGET_MAVERICK && mode == DImode))) 5502 return (code == CONST_INT && INTVAL (index) < 1024 5503 && INTVAL (index) > -1024 5504 && (INTVAL (index) & 3) == 0); 5505 5506 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) 5507 { 5508 /* For DImode assume values will usually live in core regs 5509 and only allow LDRD addressing modes. */ 5510 if (!TARGET_LDRD || mode != DImode) 5511 return (code == CONST_INT 5512 && INTVAL (index) < 1024 5513 && INTVAL (index) > -1024 5514 && (INTVAL (index) & 3) == 0); 5515 } 5516 5517 if (TARGET_NEON 5518 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))) 5519 return (code == CONST_INT 5520 && INTVAL (index) < 1016 5521 && INTVAL (index) > -1024 5522 && (INTVAL (index) & 3) == 0); 5523 5524 if (arm_address_register_rtx_p (index, strict_p) 5525 && (GET_MODE_SIZE (mode) <= 4)) 5526 return 1; 5527 5528 if (mode == DImode || mode == DFmode) 5529 { 5530 if (code == CONST_INT) 5531 { 5532 HOST_WIDE_INT val = INTVAL (index); 5533 /* ??? Can we assume ldrd for thumb2? */ 5534 /* Thumb-2 ldrd only has reg+const addressing modes. */ 5535 /* ldrd supports offsets of +-1020. 5536 However the ldr fallback does not. */ 5537 return val > -256 && val < 256 && (val & 3) == 0; 5538 } 5539 else 5540 return 0; 5541 } 5542 5543 if (code == MULT) 5544 { 5545 rtx xiop0 = XEXP (index, 0); 5546 rtx xiop1 = XEXP (index, 1); 5547 5548 return ((arm_address_register_rtx_p (xiop0, strict_p) 5549 && thumb2_index_mul_operand (xiop1)) 5550 || (arm_address_register_rtx_p (xiop1, strict_p) 5551 && thumb2_index_mul_operand (xiop0))); 5552 } 5553 else if (code == ASHIFT) 5554 { 5555 rtx op = XEXP (index, 1); 5556 5557 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) 5558 && GET_CODE (op) == CONST_INT 5559 && INTVAL (op) > 0 5560 && INTVAL (op) <= 3); 5561 } 5562 5563 return (code == CONST_INT 5564 && INTVAL (index) < 4096 5565 && INTVAL (index) > -256); 5566 } 5567 5568 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */ 5569 static int 5570 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p) 5571 { 5572 int regno; 5573 5574 if (GET_CODE (x) != REG) 5575 return 0; 5576 5577 regno = REGNO (x); 5578 5579 if (strict_p) 5580 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode); 5581 5582 return (regno <= LAST_LO_REGNUM 5583 || regno > LAST_VIRTUAL_REGISTER 5584 || regno == FRAME_POINTER_REGNUM 5585 || (GET_MODE_SIZE (mode) >= 4 5586 && (regno == STACK_POINTER_REGNUM 5587 || regno >= FIRST_PSEUDO_REGISTER 5588 || x == hard_frame_pointer_rtx 5589 || x == arg_pointer_rtx))); 5590 } 5591 5592 /* Return nonzero if x is a legitimate index register. This is the case 5593 for any base register that can access a QImode object. */ 5594 inline static int 5595 thumb1_index_register_rtx_p (rtx x, int strict_p) 5596 { 5597 return thumb1_base_register_rtx_p (x, QImode, strict_p); 5598 } 5599 5600 /* Return nonzero if x is a legitimate 16-bit Thumb-state address. 5601 5602 The AP may be eliminated to either the SP or the FP, so we use the 5603 least common denominator, e.g. SImode, and offsets from 0 to 64. 5604 5605 ??? Verify whether the above is the right approach. 5606 5607 ??? Also, the FP may be eliminated to the SP, so perhaps that 5608 needs special handling also. 5609 5610 ??? Look at how the mips16 port solves this problem. It probably uses 5611 better ways to solve some of these problems. 5612 5613 Although it is not incorrect, we don't accept QImode and HImode 5614 addresses based on the frame pointer or arg pointer until the 5615 reload pass starts. This is so that eliminating such addresses 5616 into stack based ones won't produce impossible code. */ 5617 static int 5618 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) 5619 { 5620 /* ??? Not clear if this is right. Experiment. */ 5621 if (GET_MODE_SIZE (mode) < 4 5622 && !(reload_in_progress || reload_completed) 5623 && (reg_mentioned_p (frame_pointer_rtx, x) 5624 || reg_mentioned_p (arg_pointer_rtx, x) 5625 || reg_mentioned_p (virtual_incoming_args_rtx, x) 5626 || reg_mentioned_p (virtual_outgoing_args_rtx, x) 5627 || reg_mentioned_p (virtual_stack_dynamic_rtx, x) 5628 || reg_mentioned_p (virtual_stack_vars_rtx, x))) 5629 return 0; 5630 5631 /* Accept any base register. SP only in SImode or larger. */ 5632 else if (thumb1_base_register_rtx_p (x, mode, strict_p)) 5633 return 1; 5634 5635 /* This is PC relative data before arm_reorg runs. */ 5636 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x) 5637 && GET_CODE (x) == SYMBOL_REF 5638 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic) 5639 return 1; 5640 5641 /* This is PC relative data after arm_reorg runs. */ 5642 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) 5643 && reload_completed 5644 && (GET_CODE (x) == LABEL_REF 5645 || (GET_CODE (x) == CONST 5646 && GET_CODE (XEXP (x, 0)) == PLUS 5647 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF 5648 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))) 5649 return 1; 5650 5651 /* Post-inc indexing only supported for SImode and larger. */ 5652 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4 5653 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)) 5654 return 1; 5655 5656 else if (GET_CODE (x) == PLUS) 5657 { 5658 /* REG+REG address can be any two index registers. */ 5659 /* We disallow FRAME+REG addressing since we know that FRAME 5660 will be replaced with STACK, and SP relative addressing only 5661 permits SP+OFFSET. */ 5662 if (GET_MODE_SIZE (mode) <= 4 5663 && XEXP (x, 0) != frame_pointer_rtx 5664 && XEXP (x, 1) != frame_pointer_rtx 5665 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) 5666 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) 5667 return 1; 5668 5669 /* REG+const has 5-7 bit offset for non-SP registers. */ 5670 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) 5671 || XEXP (x, 0) == arg_pointer_rtx) 5672 && GET_CODE (XEXP (x, 1)) == CONST_INT 5673 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) 5674 return 1; 5675 5676 /* REG+const has 10-bit offset for SP, but only SImode and 5677 larger is supported. */ 5678 /* ??? Should probably check for DI/DFmode overflow here 5679 just like GO_IF_LEGITIMATE_OFFSET does. */ 5680 else if (GET_CODE (XEXP (x, 0)) == REG 5681 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM 5682 && GET_MODE_SIZE (mode) >= 4 5683 && GET_CODE (XEXP (x, 1)) == CONST_INT 5684 && INTVAL (XEXP (x, 1)) >= 0 5685 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024 5686 && (INTVAL (XEXP (x, 1)) & 3) == 0) 5687 return 1; 5688 5689 else if (GET_CODE (XEXP (x, 0)) == REG 5690 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM 5691 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM 5692 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER 5693 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER)) 5694 && GET_MODE_SIZE (mode) >= 4 5695 && GET_CODE (XEXP (x, 1)) == CONST_INT 5696 && (INTVAL (XEXP (x, 1)) & 3) == 0) 5697 return 1; 5698 } 5699 5700 else if (GET_MODE_CLASS (mode) != MODE_FLOAT 5701 && GET_MODE_SIZE (mode) == 4 5702 && GET_CODE (x) == SYMBOL_REF 5703 && CONSTANT_POOL_ADDRESS_P (x) 5704 && ! (flag_pic 5705 && symbol_mentioned_p (get_pool_constant (x)) 5706 && ! pcrel_constant_p (get_pool_constant (x)))) 5707 return 1; 5708 5709 return 0; 5710 } 5711 5712 /* Return nonzero if VAL can be used as an offset in a Thumb-state address 5713 instruction of mode MODE. */ 5714 int 5715 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val) 5716 { 5717 switch (GET_MODE_SIZE (mode)) 5718 { 5719 case 1: 5720 return val >= 0 && val < 32; 5721 5722 case 2: 5723 return val >= 0 && val < 64 && (val & 1) == 0; 5724 5725 default: 5726 return (val >= 0 5727 && (val + GET_MODE_SIZE (mode)) <= 128 5728 && (val & 3) == 0); 5729 } 5730 } 5731 5732 bool 5733 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p) 5734 { 5735 if (TARGET_ARM) 5736 return arm_legitimate_address_outer_p (mode, x, SET, strict_p); 5737 else if (TARGET_THUMB2) 5738 return thumb2_legitimate_address_p (mode, x, strict_p); 5739 else /* if (TARGET_THUMB1) */ 5740 return thumb1_legitimate_address_p (mode, x, strict_p); 5741 } 5742 5743 /* Build the SYMBOL_REF for __tls_get_addr. */ 5744 5745 static GTY(()) rtx tls_get_addr_libfunc; 5746 5747 static rtx 5748 get_tls_get_addr (void) 5749 { 5750 if (!tls_get_addr_libfunc) 5751 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); 5752 return tls_get_addr_libfunc; 5753 } 5754 5755 static rtx 5756 arm_load_tp (rtx target) 5757 { 5758 if (!target) 5759 target = gen_reg_rtx (SImode); 5760 5761 if (TARGET_HARD_TP) 5762 { 5763 /* Can return in any reg. */ 5764 emit_insn (gen_load_tp_hard (target)); 5765 } 5766 else 5767 { 5768 /* Always returned in r0. Immediately copy the result into a pseudo, 5769 otherwise other uses of r0 (e.g. setting up function arguments) may 5770 clobber the value. */ 5771 5772 rtx tmp; 5773 5774 emit_insn (gen_load_tp_soft ()); 5775 5776 tmp = gen_rtx_REG (SImode, 0); 5777 emit_move_insn (target, tmp); 5778 } 5779 return target; 5780 } 5781 5782 static rtx 5783 load_tls_operand (rtx x, rtx reg) 5784 { 5785 rtx tmp; 5786 5787 if (reg == NULL_RTX) 5788 reg = gen_reg_rtx (SImode); 5789 5790 tmp = gen_rtx_CONST (SImode, x); 5791 5792 emit_move_insn (reg, tmp); 5793 5794 return reg; 5795 } 5796 5797 static rtx 5798 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc) 5799 { 5800 rtx insns, label, labelno, sum; 5801 5802 start_sequence (); 5803 5804 labelno = GEN_INT (pic_labelno++); 5805 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); 5806 label = gen_rtx_CONST (VOIDmode, label); 5807 5808 sum = gen_rtx_UNSPEC (Pmode, 5809 gen_rtvec (4, x, GEN_INT (reloc), label, 5810 GEN_INT (TARGET_ARM ? 8 : 4)), 5811 UNSPEC_TLS); 5812 reg = load_tls_operand (sum, reg); 5813 5814 if (TARGET_ARM) 5815 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); 5816 else if (TARGET_THUMB2) 5817 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); 5818 else /* TARGET_THUMB1 */ 5819 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); 5820 5821 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */ 5822 Pmode, 1, reg, Pmode); 5823 5824 insns = get_insns (); 5825 end_sequence (); 5826 5827 return insns; 5828 } 5829 5830 rtx 5831 legitimize_tls_address (rtx x, rtx reg) 5832 { 5833 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend; 5834 unsigned int model = SYMBOL_REF_TLS_MODEL (x); 5835 5836 switch (model) 5837 { 5838 case TLS_MODEL_GLOBAL_DYNAMIC: 5839 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32); 5840 dest = gen_reg_rtx (Pmode); 5841 emit_libcall_block (insns, dest, ret, x); 5842 return dest; 5843 5844 case TLS_MODEL_LOCAL_DYNAMIC: 5845 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32); 5846 5847 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to 5848 share the LDM result with other LD model accesses. */ 5849 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), 5850 UNSPEC_TLS); 5851 dest = gen_reg_rtx (Pmode); 5852 emit_libcall_block (insns, dest, ret, eqv); 5853 5854 /* Load the addend. */ 5855 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)), 5856 UNSPEC_TLS); 5857 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend)); 5858 return gen_rtx_PLUS (Pmode, dest, addend); 5859 5860 case TLS_MODEL_INITIAL_EXEC: 5861 labelno = GEN_INT (pic_labelno++); 5862 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); 5863 label = gen_rtx_CONST (VOIDmode, label); 5864 sum = gen_rtx_UNSPEC (Pmode, 5865 gen_rtvec (4, x, GEN_INT (TLS_IE32), label, 5866 GEN_INT (TARGET_ARM ? 8 : 4)), 5867 UNSPEC_TLS); 5868 reg = load_tls_operand (sum, reg); 5869 5870 if (TARGET_ARM) 5871 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); 5872 else if (TARGET_THUMB2) 5873 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno)); 5874 else 5875 { 5876 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); 5877 emit_move_insn (reg, gen_const_mem (SImode, reg)); 5878 } 5879 5880 tp = arm_load_tp (NULL_RTX); 5881 5882 return gen_rtx_PLUS (Pmode, tp, reg); 5883 5884 case TLS_MODEL_LOCAL_EXEC: 5885 tp = arm_load_tp (NULL_RTX); 5886 5887 reg = gen_rtx_UNSPEC (Pmode, 5888 gen_rtvec (2, x, GEN_INT (TLS_LE32)), 5889 UNSPEC_TLS); 5890 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg)); 5891 5892 return gen_rtx_PLUS (Pmode, tp, reg); 5893 5894 default: 5895 abort (); 5896 } 5897 } 5898 5899 /* Try machine-dependent ways of modifying an illegitimate address 5900 to be legitimate. If we find one, return the new, valid address. */ 5901 rtx 5902 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) 5903 { 5904 if (!TARGET_ARM) 5905 { 5906 /* TODO: legitimize_address for Thumb2. */ 5907 if (TARGET_THUMB2) 5908 return x; 5909 return thumb_legitimize_address (x, orig_x, mode); 5910 } 5911 5912 if (arm_tls_symbol_p (x)) 5913 return legitimize_tls_address (x, NULL_RTX); 5914 5915 if (GET_CODE (x) == PLUS) 5916 { 5917 rtx xop0 = XEXP (x, 0); 5918 rtx xop1 = XEXP (x, 1); 5919 5920 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0)) 5921 xop0 = force_reg (SImode, xop0); 5922 5923 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1)) 5924 xop1 = force_reg (SImode, xop1); 5925 5926 if (ARM_BASE_REGISTER_RTX_P (xop0) 5927 && GET_CODE (xop1) == CONST_INT) 5928 { 5929 HOST_WIDE_INT n, low_n; 5930 rtx base_reg, val; 5931 n = INTVAL (xop1); 5932 5933 /* VFP addressing modes actually allow greater offsets, but for 5934 now we just stick with the lowest common denominator. */ 5935 if (mode == DImode 5936 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode)) 5937 { 5938 low_n = n & 0x0f; 5939 n &= ~0x0f; 5940 if (low_n > 4) 5941 { 5942 n += 16; 5943 low_n -= 16; 5944 } 5945 } 5946 else 5947 { 5948 low_n = ((mode) == TImode ? 0 5949 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff)); 5950 n -= low_n; 5951 } 5952 5953 base_reg = gen_reg_rtx (SImode); 5954 val = force_operand (plus_constant (xop0, n), NULL_RTX); 5955 emit_move_insn (base_reg, val); 5956 x = plus_constant (base_reg, low_n); 5957 } 5958 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) 5959 x = gen_rtx_PLUS (SImode, xop0, xop1); 5960 } 5961 5962 /* XXX We don't allow MINUS any more -- see comment in 5963 arm_legitimate_address_outer_p (). */ 5964 else if (GET_CODE (x) == MINUS) 5965 { 5966 rtx xop0 = XEXP (x, 0); 5967 rtx xop1 = XEXP (x, 1); 5968 5969 if (CONSTANT_P (xop0)) 5970 xop0 = force_reg (SImode, xop0); 5971 5972 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1)) 5973 xop1 = force_reg (SImode, xop1); 5974 5975 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) 5976 x = gen_rtx_MINUS (SImode, xop0, xop1); 5977 } 5978 5979 /* Make sure to take full advantage of the pre-indexed addressing mode 5980 with absolute addresses which often allows for the base register to 5981 be factorized for multiple adjacent memory references, and it might 5982 even allows for the mini pool to be avoided entirely. */ 5983 else if (GET_CODE (x) == CONST_INT && optimize > 0) 5984 { 5985 unsigned int bits; 5986 HOST_WIDE_INT mask, base, index; 5987 rtx base_reg; 5988 5989 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only 5990 use a 8-bit index. So let's use a 12-bit index for SImode only and 5991 hope that arm_gen_constant will enable ldrb to use more bits. */ 5992 bits = (mode == SImode) ? 12 : 8; 5993 mask = (1 << bits) - 1; 5994 base = INTVAL (x) & ~mask; 5995 index = INTVAL (x) & mask; 5996 if (bit_count (base & 0xffffffff) > (32 - bits)/2) 5997 { 5998 /* It'll most probably be more efficient to generate the base 5999 with more bits set and use a negative index instead. */ 6000 base |= mask; 6001 index -= mask; 6002 } 6003 base_reg = force_reg (SImode, GEN_INT (base)); 6004 x = plus_constant (base_reg, index); 6005 } 6006 6007 if (flag_pic) 6008 { 6009 /* We need to find and carefully transform any SYMBOL and LABEL 6010 references; so go back to the original address expression. */ 6011 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); 6012 6013 if (new_x != orig_x) 6014 x = new_x; 6015 } 6016 6017 return x; 6018 } 6019 6020 6021 /* Try machine-dependent ways of modifying an illegitimate Thumb address 6022 to be legitimate. If we find one, return the new, valid address. */ 6023 rtx 6024 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) 6025 { 6026 if (arm_tls_symbol_p (x)) 6027 return legitimize_tls_address (x, NULL_RTX); 6028 6029 if (GET_CODE (x) == PLUS 6030 && GET_CODE (XEXP (x, 1)) == CONST_INT 6031 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode) 6032 || INTVAL (XEXP (x, 1)) < 0)) 6033 { 6034 rtx xop0 = XEXP (x, 0); 6035 rtx xop1 = XEXP (x, 1); 6036 HOST_WIDE_INT offset = INTVAL (xop1); 6037 6038 /* Try and fold the offset into a biasing of the base register and 6039 then offsetting that. Don't do this when optimizing for space 6040 since it can cause too many CSEs. */ 6041 if (optimize_size && offset >= 0 6042 && offset < 256 + 31 * GET_MODE_SIZE (mode)) 6043 { 6044 HOST_WIDE_INT delta; 6045 6046 if (offset >= 256) 6047 delta = offset - (256 - GET_MODE_SIZE (mode)); 6048 else if (offset < 32 * GET_MODE_SIZE (mode) + 8) 6049 delta = 31 * GET_MODE_SIZE (mode); 6050 else 6051 delta = offset & (~31 * GET_MODE_SIZE (mode)); 6052 6053 xop0 = force_operand (plus_constant (xop0, offset - delta), 6054 NULL_RTX); 6055 x = plus_constant (xop0, delta); 6056 } 6057 else if (offset < 0 && offset > -256) 6058 /* Small negative offsets are best done with a subtract before the 6059 dereference, forcing these into a register normally takes two 6060 instructions. */ 6061 x = force_operand (x, NULL_RTX); 6062 else 6063 { 6064 /* For the remaining cases, force the constant into a register. */ 6065 xop1 = force_reg (SImode, xop1); 6066 x = gen_rtx_PLUS (SImode, xop0, xop1); 6067 } 6068 } 6069 else if (GET_CODE (x) == PLUS 6070 && s_register_operand (XEXP (x, 1), SImode) 6071 && !s_register_operand (XEXP (x, 0), SImode)) 6072 { 6073 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX); 6074 6075 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1)); 6076 } 6077 6078 if (flag_pic) 6079 { 6080 /* We need to find and carefully transform any SYMBOL and LABEL 6081 references; so go back to the original address expression. */ 6082 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); 6083 6084 if (new_x != orig_x) 6085 x = new_x; 6086 } 6087 6088 return x; 6089 } 6090 6091 rtx 6092 thumb_legitimize_reload_address (rtx *x_p, 6093 enum machine_mode mode, 6094 int opnum, int type, 6095 int ind_levels ATTRIBUTE_UNUSED) 6096 { 6097 rtx x = *x_p; 6098 6099 if (GET_CODE (x) == PLUS 6100 && GET_MODE_SIZE (mode) < 4 6101 && REG_P (XEXP (x, 0)) 6102 && XEXP (x, 0) == stack_pointer_rtx 6103 && GET_CODE (XEXP (x, 1)) == CONST_INT 6104 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) 6105 { 6106 rtx orig_x = x; 6107 6108 x = copy_rtx (x); 6109 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), 6110 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); 6111 return x; 6112 } 6113 6114 /* If both registers are hi-regs, then it's better to reload the 6115 entire expression rather than each register individually. That 6116 only requires one reload register rather than two. */ 6117 if (GET_CODE (x) == PLUS 6118 && REG_P (XEXP (x, 0)) 6119 && REG_P (XEXP (x, 1)) 6120 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode) 6121 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode)) 6122 { 6123 rtx orig_x = x; 6124 6125 x = copy_rtx (x); 6126 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), 6127 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); 6128 return x; 6129 } 6130 6131 return NULL; 6132 } 6133 6134 /* Test for various thread-local symbols. */ 6135 6136 /* Return TRUE if X is a thread-local symbol. */ 6137 6138 static bool 6139 arm_tls_symbol_p (rtx x) 6140 { 6141 if (! TARGET_HAVE_TLS) 6142 return false; 6143 6144 if (GET_CODE (x) != SYMBOL_REF) 6145 return false; 6146 6147 return SYMBOL_REF_TLS_MODEL (x) != 0; 6148 } 6149 6150 /* Helper for arm_tls_referenced_p. */ 6151 6152 static int 6153 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED) 6154 { 6155 if (GET_CODE (*x) == SYMBOL_REF) 6156 return SYMBOL_REF_TLS_MODEL (*x) != 0; 6157 6158 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are 6159 TLS offsets, not real symbol references. */ 6160 if (GET_CODE (*x) == UNSPEC 6161 && XINT (*x, 1) == UNSPEC_TLS) 6162 return -1; 6163 6164 return 0; 6165 } 6166 6167 /* Return TRUE if X contains any TLS symbol references. */ 6168 6169 bool 6170 arm_tls_referenced_p (rtx x) 6171 { 6172 if (! TARGET_HAVE_TLS) 6173 return false; 6174 6175 return for_each_rtx (&x, arm_tls_operand_p_1, NULL); 6176 } 6177 6178 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 6179 6180 bool 6181 arm_cannot_force_const_mem (rtx x) 6182 { 6183 rtx base, offset; 6184 6185 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) 6186 { 6187 split_const (x, &base, &offset); 6188 if (GET_CODE (base) == SYMBOL_REF 6189 && !offset_within_block_p (base, INTVAL (offset))) 6190 return true; 6191 } 6192 return arm_tls_referenced_p (x); 6193 } 6194 6195 #define REG_OR_SUBREG_REG(X) \ 6196 (GET_CODE (X) == REG \ 6197 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG)) 6198 6199 #define REG_OR_SUBREG_RTX(X) \ 6200 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X)) 6201 6202 #ifndef COSTS_N_INSNS 6203 #define COSTS_N_INSNS(N) ((N) * 4 - 2) 6204 #endif 6205 static inline int 6206 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) 6207 { 6208 enum machine_mode mode = GET_MODE (x); 6209 6210 switch (code) 6211 { 6212 case ASHIFT: 6213 case ASHIFTRT: 6214 case LSHIFTRT: 6215 case ROTATERT: 6216 case PLUS: 6217 case MINUS: 6218 case COMPARE: 6219 case NEG: 6220 case NOT: 6221 return COSTS_N_INSNS (1); 6222 6223 case MULT: 6224 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6225 { 6226 int cycles = 0; 6227 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); 6228 6229 while (i) 6230 { 6231 i >>= 2; 6232 cycles++; 6233 } 6234 return COSTS_N_INSNS (2) + cycles; 6235 } 6236 return COSTS_N_INSNS (1) + 16; 6237 6238 case SET: 6239 return (COSTS_N_INSNS (1) 6240 + 4 * ((GET_CODE (SET_SRC (x)) == MEM) 6241 + GET_CODE (SET_DEST (x)) == MEM)); 6242 6243 case CONST_INT: 6244 if (outer == SET) 6245 { 6246 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) 6247 return 0; 6248 if (thumb_shiftable_const (INTVAL (x))) 6249 return COSTS_N_INSNS (2); 6250 return COSTS_N_INSNS (3); 6251 } 6252 else if ((outer == PLUS || outer == COMPARE) 6253 && INTVAL (x) < 256 && INTVAL (x) > -256) 6254 return 0; 6255 else if ((outer == IOR || outer == XOR || outer == AND) 6256 && INTVAL (x) < 256 && INTVAL (x) >= -256) 6257 return COSTS_N_INSNS (1); 6258 else if (outer == AND) 6259 { 6260 int i; 6261 /* This duplicates the tests in the andsi3 expander. */ 6262 for (i = 9; i <= 31; i++) 6263 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) 6264 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) 6265 return COSTS_N_INSNS (2); 6266 } 6267 else if (outer == ASHIFT || outer == ASHIFTRT 6268 || outer == LSHIFTRT) 6269 return 0; 6270 return COSTS_N_INSNS (2); 6271 6272 case CONST: 6273 case CONST_DOUBLE: 6274 case LABEL_REF: 6275 case SYMBOL_REF: 6276 return COSTS_N_INSNS (3); 6277 6278 case UDIV: 6279 case UMOD: 6280 case DIV: 6281 case MOD: 6282 return 100; 6283 6284 case TRUNCATE: 6285 return 99; 6286 6287 case AND: 6288 case XOR: 6289 case IOR: 6290 /* XXX guess. */ 6291 return 8; 6292 6293 case MEM: 6294 /* XXX another guess. */ 6295 /* Memory costs quite a lot for the first word, but subsequent words 6296 load at the equivalent of a single insn each. */ 6297 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) 6298 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 6299 ? 4 : 0)); 6300 6301 case IF_THEN_ELSE: 6302 /* XXX a guess. */ 6303 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) 6304 return 14; 6305 return 2; 6306 6307 case ZERO_EXTEND: 6308 /* XXX still guessing. */ 6309 switch (GET_MODE (XEXP (x, 0))) 6310 { 6311 case QImode: 6312 return (1 + (mode == DImode ? 4 : 0) 6313 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); 6314 6315 case HImode: 6316 return (4 + (mode == DImode ? 4 : 0) 6317 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); 6318 6319 case SImode: 6320 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); 6321 6322 default: 6323 return 99; 6324 } 6325 6326 default: 6327 return 99; 6328 } 6329 } 6330 6331 static inline bool 6332 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) 6333 { 6334 enum machine_mode mode = GET_MODE (x); 6335 enum rtx_code subcode; 6336 rtx operand; 6337 enum rtx_code code = GET_CODE (x); 6338 int extra_cost; 6339 *total = 0; 6340 6341 switch (code) 6342 { 6343 case MEM: 6344 /* Memory costs quite a lot for the first word, but subsequent words 6345 load at the equivalent of a single insn each. */ 6346 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); 6347 return true; 6348 6349 case DIV: 6350 case MOD: 6351 case UDIV: 6352 case UMOD: 6353 if (TARGET_HARD_FLOAT && mode == SFmode) 6354 *total = COSTS_N_INSNS (2); 6355 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) 6356 *total = COSTS_N_INSNS (4); 6357 else 6358 *total = COSTS_N_INSNS (20); 6359 return false; 6360 6361 case ROTATE: 6362 if (GET_CODE (XEXP (x, 1)) == REG) 6363 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */ 6364 else if (GET_CODE (XEXP (x, 1)) != CONST_INT) 6365 *total = rtx_cost (XEXP (x, 1), code, speed); 6366 6367 /* Fall through */ 6368 case ROTATERT: 6369 if (mode != SImode) 6370 { 6371 *total += COSTS_N_INSNS (4); 6372 return true; 6373 } 6374 6375 /* Fall through */ 6376 case ASHIFT: case LSHIFTRT: case ASHIFTRT: 6377 *total += rtx_cost (XEXP (x, 0), code, speed); 6378 if (mode == DImode) 6379 { 6380 *total += COSTS_N_INSNS (3); 6381 return true; 6382 } 6383 6384 *total += COSTS_N_INSNS (1); 6385 /* Increase the cost of complex shifts because they aren't any faster, 6386 and reduce dual issue opportunities. */ 6387 if (arm_tune_cortex_a9 6388 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT) 6389 ++*total; 6390 6391 return true; 6392 6393 case MINUS: 6394 if (TARGET_THUMB2) 6395 { 6396 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 6397 { 6398 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) 6399 *total = COSTS_N_INSNS (1); 6400 else 6401 *total = COSTS_N_INSNS (20); 6402 } 6403 else 6404 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 6405 /* Thumb2 does not have RSB, so all arguments must be 6406 registers (subtracting a constant is canonicalized as 6407 addition of the negated constant). */ 6408 return false; 6409 } 6410 6411 if (mode == DImode) 6412 { 6413 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 6414 if (GET_CODE (XEXP (x, 0)) == CONST_INT 6415 && const_ok_for_arm (INTVAL (XEXP (x, 0)))) 6416 { 6417 *total += rtx_cost (XEXP (x, 1), code, speed); 6418 return true; 6419 } 6420 6421 if (GET_CODE (XEXP (x, 1)) == CONST_INT 6422 && const_ok_for_arm (INTVAL (XEXP (x, 1)))) 6423 { 6424 *total += rtx_cost (XEXP (x, 0), code, speed); 6425 return true; 6426 } 6427 6428 return false; 6429 } 6430 6431 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 6432 { 6433 if (TARGET_HARD_FLOAT 6434 && (mode == SFmode 6435 || (mode == DFmode && !TARGET_VFP_SINGLE))) 6436 { 6437 *total = COSTS_N_INSNS (1); 6438 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE 6439 && arm_const_double_rtx (XEXP (x, 0))) 6440 { 6441 *total += rtx_cost (XEXP (x, 1), code, speed); 6442 return true; 6443 } 6444 6445 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE 6446 && arm_const_double_rtx (XEXP (x, 1))) 6447 { 6448 *total += rtx_cost (XEXP (x, 0), code, speed); 6449 return true; 6450 } 6451 6452 return false; 6453 } 6454 *total = COSTS_N_INSNS (20); 6455 return false; 6456 } 6457 6458 *total = COSTS_N_INSNS (1); 6459 if (GET_CODE (XEXP (x, 0)) == CONST_INT 6460 && const_ok_for_arm (INTVAL (XEXP (x, 0)))) 6461 { 6462 *total += rtx_cost (XEXP (x, 1), code, speed); 6463 return true; 6464 } 6465 6466 subcode = GET_CODE (XEXP (x, 1)); 6467 if (subcode == ASHIFT || subcode == ASHIFTRT 6468 || subcode == LSHIFTRT 6469 || subcode == ROTATE || subcode == ROTATERT) 6470 { 6471 *total += rtx_cost (XEXP (x, 0), code, speed); 6472 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed); 6473 return true; 6474 } 6475 6476 /* A shift as a part of RSB costs no more than RSB itself. */ 6477 if (GET_CODE (XEXP (x, 0)) == MULT 6478 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) 6479 { 6480 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed); 6481 *total += rtx_cost (XEXP (x, 1), code, speed); 6482 return true; 6483 } 6484 6485 if (subcode == MULT 6486 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)) 6487 { 6488 *total += rtx_cost (XEXP (x, 0), code, speed); 6489 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed); 6490 return true; 6491 } 6492 6493 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE 6494 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE) 6495 { 6496 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); 6497 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG 6498 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM) 6499 *total += COSTS_N_INSNS (1); 6500 6501 return true; 6502 } 6503 6504 /* Fall through */ 6505 6506 case PLUS: 6507 if (code == PLUS && arm_arch6 && mode == SImode 6508 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND 6509 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) 6510 { 6511 *total = COSTS_N_INSNS (1); 6512 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)), 6513 speed); 6514 *total += rtx_cost (XEXP (x, 1), code, speed); 6515 return true; 6516 } 6517 6518 /* MLA: All arguments must be registers. We filter out 6519 multiplication by a power of two, so that we fall down into 6520 the code below. */ 6521 if (GET_CODE (XEXP (x, 0)) == MULT 6522 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) 6523 { 6524 /* The cost comes from the cost of the multiply. */ 6525 return false; 6526 } 6527 6528 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 6529 { 6530 if (TARGET_HARD_FLOAT 6531 && (mode == SFmode 6532 || (mode == DFmode && !TARGET_VFP_SINGLE))) 6533 { 6534 *total = COSTS_N_INSNS (1); 6535 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE 6536 && arm_const_double_rtx (XEXP (x, 1))) 6537 { 6538 *total += rtx_cost (XEXP (x, 0), code, speed); 6539 return true; 6540 } 6541 6542 return false; 6543 } 6544 6545 *total = COSTS_N_INSNS (20); 6546 return false; 6547 } 6548 6549 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE 6550 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) 6551 { 6552 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed); 6553 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 6554 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM) 6555 *total += COSTS_N_INSNS (1); 6556 return true; 6557 } 6558 6559 /* Fall through */ 6560 6561 case AND: case XOR: case IOR: 6562 extra_cost = 0; 6563 6564 /* Normally the frame registers will be spilt into reg+const during 6565 reload, so it is a bad idea to combine them with other instructions, 6566 since then they might not be moved outside of loops. As a compromise 6567 we allow integration with ops that have a constant as their second 6568 operand. */ 6569 if ((REG_OR_SUBREG_REG (XEXP (x, 0)) 6570 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) 6571 && GET_CODE (XEXP (x, 1)) != CONST_INT) 6572 || (REG_OR_SUBREG_REG (XEXP (x, 0)) 6573 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))))) 6574 *total = 4; 6575 6576 if (mode == DImode) 6577 { 6578 *total += COSTS_N_INSNS (2); 6579 if (GET_CODE (XEXP (x, 1)) == CONST_INT 6580 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) 6581 { 6582 *total += rtx_cost (XEXP (x, 0), code, speed); 6583 return true; 6584 } 6585 6586 return false; 6587 } 6588 6589 *total += COSTS_N_INSNS (1); 6590 if (GET_CODE (XEXP (x, 1)) == CONST_INT 6591 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) 6592 { 6593 *total += rtx_cost (XEXP (x, 0), code, speed); 6594 return true; 6595 } 6596 subcode = GET_CODE (XEXP (x, 0)); 6597 if (subcode == ASHIFT || subcode == ASHIFTRT 6598 || subcode == LSHIFTRT 6599 || subcode == ROTATE || subcode == ROTATERT) 6600 { 6601 *total += rtx_cost (XEXP (x, 1), code, speed); 6602 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); 6603 return true; 6604 } 6605 6606 if (subcode == MULT 6607 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) 6608 { 6609 *total += rtx_cost (XEXP (x, 1), code, speed); 6610 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); 6611 return true; 6612 } 6613 6614 if (subcode == UMIN || subcode == UMAX 6615 || subcode == SMIN || subcode == SMAX) 6616 { 6617 *total = COSTS_N_INSNS (3); 6618 return true; 6619 } 6620 6621 return false; 6622 6623 case MULT: 6624 /* This should have been handled by the CPU specific routines. */ 6625 gcc_unreachable (); 6626 6627 case TRUNCATE: 6628 if (arm_arch3m && mode == SImode 6629 && GET_CODE (XEXP (x, 0)) == LSHIFTRT 6630 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 6631 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) 6632 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))) 6633 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND 6634 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND)) 6635 { 6636 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed); 6637 return true; 6638 } 6639 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */ 6640 return false; 6641 6642 case NEG: 6643 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 6644 { 6645 if (TARGET_HARD_FLOAT 6646 && (mode == SFmode 6647 || (mode == DFmode && !TARGET_VFP_SINGLE))) 6648 { 6649 *total = COSTS_N_INSNS (1); 6650 return false; 6651 } 6652 *total = COSTS_N_INSNS (2); 6653 return false; 6654 } 6655 6656 /* Fall through */ 6657 case NOT: 6658 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode)); 6659 if (mode == SImode && code == NOT) 6660 { 6661 subcode = GET_CODE (XEXP (x, 0)); 6662 if (subcode == ASHIFT || subcode == ASHIFTRT 6663 || subcode == LSHIFTRT 6664 || subcode == ROTATE || subcode == ROTATERT 6665 || (subcode == MULT 6666 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))) 6667 { 6668 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); 6669 /* Register shifts cost an extra cycle. */ 6670 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) 6671 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1), 6672 subcode, speed); 6673 return true; 6674 } 6675 } 6676 6677 return false; 6678 6679 case IF_THEN_ELSE: 6680 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) 6681 { 6682 *total = COSTS_N_INSNS (4); 6683 return true; 6684 } 6685 6686 operand = XEXP (x, 0); 6687 6688 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE 6689 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE) 6690 && GET_CODE (XEXP (operand, 0)) == REG 6691 && REGNO (XEXP (operand, 0)) == CC_REGNUM)) 6692 *total += COSTS_N_INSNS (1); 6693 *total += (rtx_cost (XEXP (x, 1), code, speed) 6694 + rtx_cost (XEXP (x, 2), code, speed)); 6695 return true; 6696 6697 case NE: 6698 if (mode == SImode && XEXP (x, 1) == const0_rtx) 6699 { 6700 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); 6701 return true; 6702 } 6703 goto scc_insn; 6704 6705 case GE: 6706 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM) 6707 && mode == SImode && XEXP (x, 1) == const0_rtx) 6708 { 6709 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); 6710 return true; 6711 } 6712 goto scc_insn; 6713 6714 case LT: 6715 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM) 6716 && mode == SImode && XEXP (x, 1) == const0_rtx) 6717 { 6718 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); 6719 return true; 6720 } 6721 goto scc_insn; 6722 6723 case EQ: 6724 case GT: 6725 case LE: 6726 case GEU: 6727 case LTU: 6728 case GTU: 6729 case LEU: 6730 case UNORDERED: 6731 case ORDERED: 6732 case UNEQ: 6733 case UNGE: 6734 case UNLT: 6735 case UNGT: 6736 case UNLE: 6737 scc_insn: 6738 /* SCC insns. In the case where the comparison has already been 6739 performed, then they cost 2 instructions. Otherwise they need 6740 an additional comparison before them. */ 6741 *total = COSTS_N_INSNS (2); 6742 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM) 6743 { 6744 return true; 6745 } 6746 6747 /* Fall through */ 6748 case COMPARE: 6749 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM) 6750 { 6751 *total = 0; 6752 return true; 6753 } 6754 6755 *total += COSTS_N_INSNS (1); 6756 if (GET_CODE (XEXP (x, 1)) == CONST_INT 6757 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) 6758 { 6759 *total += rtx_cost (XEXP (x, 0), code, speed); 6760 return true; 6761 } 6762 6763 subcode = GET_CODE (XEXP (x, 0)); 6764 if (subcode == ASHIFT || subcode == ASHIFTRT 6765 || subcode == LSHIFTRT 6766 || subcode == ROTATE || subcode == ROTATERT) 6767 { 6768 *total += rtx_cost (XEXP (x, 1), code, speed); 6769 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); 6770 return true; 6771 } 6772 6773 if (subcode == MULT 6774 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) 6775 { 6776 *total += rtx_cost (XEXP (x, 1), code, speed); 6777 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); 6778 return true; 6779 } 6780 6781 return false; 6782 6783 case UMIN: 6784 case UMAX: 6785 case SMIN: 6786 case SMAX: 6787 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); 6788 if (GET_CODE (XEXP (x, 1)) != CONST_INT 6789 || !const_ok_for_arm (INTVAL (XEXP (x, 1)))) 6790 *total += rtx_cost (XEXP (x, 1), code, speed); 6791 return true; 6792 6793 case ABS: 6794 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 6795 { 6796 if (TARGET_HARD_FLOAT 6797 && (mode == SFmode 6798 || (mode == DFmode && !TARGET_VFP_SINGLE))) 6799 { 6800 *total = COSTS_N_INSNS (1); 6801 return false; 6802 } 6803 *total = COSTS_N_INSNS (20); 6804 return false; 6805 } 6806 *total = COSTS_N_INSNS (1); 6807 if (mode == DImode) 6808 *total += COSTS_N_INSNS (3); 6809 return false; 6810 6811 case SIGN_EXTEND: 6812 if (GET_MODE_CLASS (mode) == MODE_INT) 6813 { 6814 *total = 0; 6815 if (mode == DImode) 6816 *total += COSTS_N_INSNS (1); 6817 6818 if (GET_MODE (XEXP (x, 0)) != SImode) 6819 { 6820 if (arm_arch6) 6821 { 6822 if (GET_CODE (XEXP (x, 0)) != MEM) 6823 *total += COSTS_N_INSNS (1); 6824 } 6825 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) 6826 *total += COSTS_N_INSNS (2); 6827 } 6828 6829 return false; 6830 } 6831 6832 /* Fall through */ 6833 case ZERO_EXTEND: 6834 *total = 0; 6835 if (GET_MODE_CLASS (mode) == MODE_INT) 6836 { 6837 if (mode == DImode) 6838 *total += COSTS_N_INSNS (1); 6839 6840 if (GET_MODE (XEXP (x, 0)) != SImode) 6841 { 6842 if (arm_arch6) 6843 { 6844 if (GET_CODE (XEXP (x, 0)) != MEM) 6845 *total += COSTS_N_INSNS (1); 6846 } 6847 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) 6848 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ? 6849 1 : 2); 6850 } 6851 6852 return false; 6853 } 6854 6855 switch (GET_MODE (XEXP (x, 0))) 6856 { 6857 case V8QImode: 6858 case V4HImode: 6859 case V2SImode: 6860 case V4QImode: 6861 case V2HImode: 6862 *total = COSTS_N_INSNS (1); 6863 return false; 6864 6865 default: 6866 gcc_unreachable (); 6867 } 6868 gcc_unreachable (); 6869 6870 case ZERO_EXTRACT: 6871 case SIGN_EXTRACT: 6872 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); 6873 return true; 6874 6875 case CONST_INT: 6876 if (const_ok_for_arm (INTVAL (x)) 6877 || const_ok_for_arm (~INTVAL (x))) 6878 *total = COSTS_N_INSNS (1); 6879 else 6880 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX, 6881 INTVAL (x), NULL_RTX, 6882 NULL_RTX, 0, 0)); 6883 return true; 6884 6885 case CONST: 6886 case LABEL_REF: 6887 case SYMBOL_REF: 6888 *total = COSTS_N_INSNS (3); 6889 return true; 6890 6891 case HIGH: 6892 *total = COSTS_N_INSNS (1); 6893 return true; 6894 6895 case LO_SUM: 6896 *total = COSTS_N_INSNS (1); 6897 *total += rtx_cost (XEXP (x, 0), code, speed); 6898 return true; 6899 6900 case CONST_DOUBLE: 6901 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) 6902 && (mode == SFmode || !TARGET_VFP_SINGLE)) 6903 *total = COSTS_N_INSNS (1); 6904 else 6905 *total = COSTS_N_INSNS (4); 6906 return true; 6907 6908 default: 6909 *total = COSTS_N_INSNS (4); 6910 return false; 6911 } 6912 } 6913 6914 /* RTX costs when optimizing for size. */ 6915 static bool 6916 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, 6917 int *total) 6918 { 6919 enum machine_mode mode = GET_MODE (x); 6920 if (TARGET_THUMB1) 6921 { 6922 /* XXX TBD. For now, use the standard costs. */ 6923 *total = thumb1_rtx_costs (x, code, outer_code); 6924 return true; 6925 } 6926 6927 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ 6928 switch (code) 6929 { 6930 case MEM: 6931 /* A memory access costs 1 insn if the mode is small, or the address is 6932 a single register, otherwise it costs one insn per word. */ 6933 if (REG_P (XEXP (x, 0))) 6934 *total = COSTS_N_INSNS (1); 6935 else 6936 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 6937 return true; 6938 6939 case DIV: 6940 case MOD: 6941 case UDIV: 6942 case UMOD: 6943 /* Needs a libcall, so it costs about this. */ 6944 *total = COSTS_N_INSNS (2); 6945 return false; 6946 6947 case ROTATE: 6948 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG) 6949 { 6950 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false); 6951 return true; 6952 } 6953 /* Fall through */ 6954 case ROTATERT: 6955 case ASHIFT: 6956 case LSHIFTRT: 6957 case ASHIFTRT: 6958 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT) 6959 { 6960 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false); 6961 return true; 6962 } 6963 else if (mode == SImode) 6964 { 6965 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false); 6966 /* Slightly disparage register shifts, but not by much. */ 6967 if (GET_CODE (XEXP (x, 1)) != CONST_INT) 6968 *total += 1 + rtx_cost (XEXP (x, 1), code, false); 6969 return true; 6970 } 6971 6972 /* Needs a libcall. */ 6973 *total = COSTS_N_INSNS (2); 6974 return false; 6975 6976 case MINUS: 6977 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT 6978 && (mode == SFmode || !TARGET_VFP_SINGLE)) 6979 { 6980 *total = COSTS_N_INSNS (1); 6981 return false; 6982 } 6983 6984 if (mode == SImode) 6985 { 6986 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0)); 6987 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1)); 6988 6989 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT 6990 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT 6991 || subcode1 == ROTATE || subcode1 == ROTATERT 6992 || subcode1 == ASHIFT || subcode1 == LSHIFTRT 6993 || subcode1 == ASHIFTRT) 6994 { 6995 /* It's just the cost of the two operands. */ 6996 *total = 0; 6997 return false; 6998 } 6999 7000 *total = COSTS_N_INSNS (1); 7001 return false; 7002 } 7003 7004 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 7005 return false; 7006 7007 case PLUS: 7008 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT 7009 && (mode == SFmode || !TARGET_VFP_SINGLE)) 7010 { 7011 *total = COSTS_N_INSNS (1); 7012 return false; 7013 } 7014 7015 /* A shift as a part of ADD costs nothing. */ 7016 if (GET_CODE (XEXP (x, 0)) == MULT 7017 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) 7018 { 7019 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1); 7020 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false); 7021 *total += rtx_cost (XEXP (x, 1), code, false); 7022 return true; 7023 } 7024 7025 /* Fall through */ 7026 case AND: case XOR: case IOR: 7027 if (mode == SImode) 7028 { 7029 enum rtx_code subcode = GET_CODE (XEXP (x, 0)); 7030 7031 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT 7032 || subcode == LSHIFTRT || subcode == ASHIFTRT 7033 || (code == AND && subcode == NOT)) 7034 { 7035 /* It's just the cost of the two operands. */ 7036 *total = 0; 7037 return false; 7038 } 7039 } 7040 7041 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 7042 return false; 7043 7044 case MULT: 7045 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 7046 return false; 7047 7048 case NEG: 7049 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT 7050 && (mode == SFmode || !TARGET_VFP_SINGLE)) 7051 { 7052 *total = COSTS_N_INSNS (1); 7053 return false; 7054 } 7055 7056 /* Fall through */ 7057 case NOT: 7058 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 7059 7060 return false; 7061 7062 case IF_THEN_ELSE: 7063 *total = 0; 7064 return false; 7065 7066 case COMPARE: 7067 if (cc_register (XEXP (x, 0), VOIDmode)) 7068 * total = 0; 7069 else 7070 *total = COSTS_N_INSNS (1); 7071 return false; 7072 7073 case ABS: 7074 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT 7075 && (mode == SFmode || !TARGET_VFP_SINGLE)) 7076 *total = COSTS_N_INSNS (1); 7077 else 7078 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); 7079 return false; 7080 7081 case SIGN_EXTEND: 7082 *total = 0; 7083 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4) 7084 { 7085 if (!(arm_arch4 && MEM_P (XEXP (x, 0)))) 7086 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2); 7087 } 7088 if (mode == DImode) 7089 *total += COSTS_N_INSNS (1); 7090 return false; 7091 7092 case ZERO_EXTEND: 7093 *total = 0; 7094 if (!(arm_arch4 && MEM_P (XEXP (x, 0)))) 7095 { 7096 switch (GET_MODE (XEXP (x, 0))) 7097 { 7098 case QImode: 7099 *total += COSTS_N_INSNS (1); 7100 break; 7101 7102 case HImode: 7103 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2); 7104 7105 case SImode: 7106 break; 7107 7108 default: 7109 *total += COSTS_N_INSNS (2); 7110 } 7111 } 7112 7113 if (mode == DImode) 7114 *total += COSTS_N_INSNS (1); 7115 7116 return false; 7117 7118 case CONST_INT: 7119 if (const_ok_for_arm (INTVAL (x))) 7120 /* A multiplication by a constant requires another instruction 7121 to load the constant to a register. */ 7122 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT) 7123 ? 1 : 0); 7124 else if (const_ok_for_arm (~INTVAL (x))) 7125 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1); 7126 else if (const_ok_for_arm (-INTVAL (x))) 7127 { 7128 if (outer_code == COMPARE || outer_code == PLUS 7129 || outer_code == MINUS) 7130 *total = 0; 7131 else 7132 *total = COSTS_N_INSNS (1); 7133 } 7134 else 7135 *total = COSTS_N_INSNS (2); 7136 return true; 7137 7138 case CONST: 7139 case LABEL_REF: 7140 case SYMBOL_REF: 7141 *total = COSTS_N_INSNS (2); 7142 return true; 7143 7144 case CONST_DOUBLE: 7145 *total = COSTS_N_INSNS (4); 7146 return true; 7147 7148 case HIGH: 7149 case LO_SUM: 7150 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the 7151 cost of these slightly. */ 7152 *total = COSTS_N_INSNS (1) + 1; 7153 return true; 7154 7155 default: 7156 if (mode != VOIDmode) 7157 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 7158 else 7159 *total = COSTS_N_INSNS (4); /* How knows? */ 7160 return false; 7161 } 7162 } 7163 7164 /* RTX costs when optimizing for size. */ 7165 static bool 7166 arm_rtx_costs (rtx x, int code, int outer_code, int *total, 7167 bool speed) 7168 { 7169 if (!speed) 7170 return arm_size_rtx_costs (x, (enum rtx_code) code, 7171 (enum rtx_code) outer_code, total); 7172 else 7173 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code, 7174 (enum rtx_code) outer_code, 7175 total, speed); 7176 } 7177 7178 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not 7179 supported on any "slowmul" cores, so it can be ignored. */ 7180 7181 static bool 7182 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, 7183 int *total, bool speed) 7184 { 7185 enum machine_mode mode = GET_MODE (x); 7186 7187 if (TARGET_THUMB) 7188 { 7189 *total = thumb1_rtx_costs (x, code, outer_code); 7190 return true; 7191 } 7192 7193 switch (code) 7194 { 7195 case MULT: 7196 if (GET_MODE_CLASS (mode) == MODE_FLOAT 7197 || mode == DImode) 7198 { 7199 *total = COSTS_N_INSNS (20); 7200 return false; 7201 } 7202 7203 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7204 { 7205 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) 7206 & (unsigned HOST_WIDE_INT) 0xffffffff); 7207 int cost, const_ok = const_ok_for_arm (i); 7208 int j, booth_unit_size; 7209 7210 /* Tune as appropriate. */ 7211 cost = const_ok ? 4 : 8; 7212 booth_unit_size = 2; 7213 for (j = 0; i && j < 32; j += booth_unit_size) 7214 { 7215 i >>= booth_unit_size; 7216 cost++; 7217 } 7218 7219 *total = COSTS_N_INSNS (cost); 7220 *total += rtx_cost (XEXP (x, 0), code, speed); 7221 return true; 7222 } 7223 7224 *total = COSTS_N_INSNS (20); 7225 return false; 7226 7227 default: 7228 return arm_rtx_costs_1 (x, outer_code, total, speed);; 7229 } 7230 } 7231 7232 7233 /* RTX cost for cores with a fast multiply unit (M variants). */ 7234 7235 static bool 7236 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, 7237 int *total, bool speed) 7238 { 7239 enum machine_mode mode = GET_MODE (x); 7240 7241 if (TARGET_THUMB1) 7242 { 7243 *total = thumb1_rtx_costs (x, code, outer_code); 7244 return true; 7245 } 7246 7247 /* ??? should thumb2 use different costs? */ 7248 switch (code) 7249 { 7250 case MULT: 7251 /* There is no point basing this on the tuning, since it is always the 7252 fast variant if it exists at all. */ 7253 if (mode == DImode 7254 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) 7255 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND 7256 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) 7257 { 7258 *total = COSTS_N_INSNS(2); 7259 return false; 7260 } 7261 7262 7263 if (mode == DImode) 7264 { 7265 *total = COSTS_N_INSNS (5); 7266 return false; 7267 } 7268 7269 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7270 { 7271 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) 7272 & (unsigned HOST_WIDE_INT) 0xffffffff); 7273 int cost, const_ok = const_ok_for_arm (i); 7274 int j, booth_unit_size; 7275 7276 /* Tune as appropriate. */ 7277 cost = const_ok ? 4 : 8; 7278 booth_unit_size = 8; 7279 for (j = 0; i && j < 32; j += booth_unit_size) 7280 { 7281 i >>= booth_unit_size; 7282 cost++; 7283 } 7284 7285 *total = COSTS_N_INSNS(cost); 7286 return false; 7287 } 7288 7289 if (mode == SImode) 7290 { 7291 *total = COSTS_N_INSNS (4); 7292 return false; 7293 } 7294 7295 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 7296 { 7297 if (TARGET_HARD_FLOAT 7298 && (mode == SFmode 7299 || (mode == DFmode && !TARGET_VFP_SINGLE))) 7300 { 7301 *total = COSTS_N_INSNS (1); 7302 return false; 7303 } 7304 } 7305 7306 /* Requires a lib call */ 7307 *total = COSTS_N_INSNS (20); 7308 return false; 7309 7310 default: 7311 return arm_rtx_costs_1 (x, outer_code, total, speed); 7312 } 7313 } 7314 7315 7316 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, 7317 so it can be ignored. */ 7318 7319 static bool 7320 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) 7321 { 7322 enum machine_mode mode = GET_MODE (x); 7323 7324 if (TARGET_THUMB) 7325 { 7326 *total = thumb1_rtx_costs (x, code, outer_code); 7327 return true; 7328 } 7329 7330 switch (code) 7331 { 7332 case COMPARE: 7333 if (GET_CODE (XEXP (x, 0)) != MULT) 7334 return arm_rtx_costs_1 (x, outer_code, total, speed); 7335 7336 /* A COMPARE of a MULT is slow on XScale; the muls instruction 7337 will stall until the multiplication is complete. */ 7338 *total = COSTS_N_INSNS (3); 7339 return false; 7340 7341 case MULT: 7342 /* There is no point basing this on the tuning, since it is always the 7343 fast variant if it exists at all. */ 7344 if (mode == DImode 7345 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) 7346 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND 7347 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) 7348 { 7349 *total = COSTS_N_INSNS (2); 7350 return false; 7351 } 7352 7353 7354 if (mode == DImode) 7355 { 7356 *total = COSTS_N_INSNS (5); 7357 return false; 7358 } 7359 7360 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7361 { 7362 /* If operand 1 is a constant we can more accurately 7363 calculate the cost of the multiply. The multiplier can 7364 retire 15 bits on the first cycle and a further 12 on the 7365 second. We do, of course, have to load the constant into 7366 a register first. */ 7367 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); 7368 /* There's a general overhead of one cycle. */ 7369 int cost = 1; 7370 unsigned HOST_WIDE_INT masked_const; 7371 7372 if (i & 0x80000000) 7373 i = ~i; 7374 7375 i &= (unsigned HOST_WIDE_INT) 0xffffffff; 7376 7377 masked_const = i & 0xffff8000; 7378 if (masked_const != 0) 7379 { 7380 cost++; 7381 masked_const = i & 0xf8000000; 7382 if (masked_const != 0) 7383 cost++; 7384 } 7385 *total = COSTS_N_INSNS (cost); 7386 return false; 7387 } 7388 7389 if (mode == SImode) 7390 { 7391 *total = COSTS_N_INSNS (3); 7392 return false; 7393 } 7394 7395 /* Requires a lib call */ 7396 *total = COSTS_N_INSNS (20); 7397 return false; 7398 7399 default: 7400 return arm_rtx_costs_1 (x, outer_code, total, speed); 7401 } 7402 } 7403 7404 7405 /* RTX costs for 9e (and later) cores. */ 7406 7407 static bool 7408 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, 7409 int *total, bool speed) 7410 { 7411 enum machine_mode mode = GET_MODE (x); 7412 7413 if (TARGET_THUMB1) 7414 { 7415 switch (code) 7416 { 7417 case MULT: 7418 *total = COSTS_N_INSNS (3); 7419 return true; 7420 7421 default: 7422 *total = thumb1_rtx_costs (x, code, outer_code); 7423 return true; 7424 } 7425 } 7426 7427 switch (code) 7428 { 7429 case MULT: 7430 /* There is no point basing this on the tuning, since it is always the 7431 fast variant if it exists at all. */ 7432 if (mode == DImode 7433 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) 7434 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND 7435 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) 7436 { 7437 *total = COSTS_N_INSNS (2); 7438 return false; 7439 } 7440 7441 7442 if (mode == DImode) 7443 { 7444 *total = COSTS_N_INSNS (5); 7445 return false; 7446 } 7447 7448 if (mode == SImode) 7449 { 7450 *total = COSTS_N_INSNS (2); 7451 return false; 7452 } 7453 7454 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 7455 { 7456 if (TARGET_HARD_FLOAT 7457 && (mode == SFmode 7458 || (mode == DFmode && !TARGET_VFP_SINGLE))) 7459 { 7460 *total = COSTS_N_INSNS (1); 7461 return false; 7462 } 7463 } 7464 7465 *total = COSTS_N_INSNS (20); 7466 return false; 7467 7468 default: 7469 return arm_rtx_costs_1 (x, outer_code, total, speed); 7470 } 7471 } 7472 /* All address computations that can be done are free, but rtx cost returns 7473 the same for practically all of them. So we weight the different types 7474 of address here in the order (most pref first): 7475 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */ 7476 static inline int 7477 arm_arm_address_cost (rtx x) 7478 { 7479 enum rtx_code c = GET_CODE (x); 7480 7481 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC) 7482 return 0; 7483 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) 7484 return 10; 7485 7486 if (c == PLUS) 7487 { 7488 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7489 return 2; 7490 7491 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1))) 7492 return 3; 7493 7494 return 4; 7495 } 7496 7497 return 6; 7498 } 7499 7500 static inline int 7501 arm_thumb_address_cost (rtx x) 7502 { 7503 enum rtx_code c = GET_CODE (x); 7504 7505 if (c == REG) 7506 return 1; 7507 if (c == PLUS 7508 && GET_CODE (XEXP (x, 0)) == REG 7509 && GET_CODE (XEXP (x, 1)) == CONST_INT) 7510 return 1; 7511 7512 return 2; 7513 } 7514 7515 static int 7516 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) 7517 { 7518 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); 7519 } 7520 7521 static int 7522 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) 7523 { 7524 rtx i_pat, d_pat; 7525 7526 /* Some true dependencies can have a higher cost depending 7527 on precisely how certain input operands are used. */ 7528 if (arm_tune_xscale 7529 && REG_NOTE_KIND (link) == 0 7530 && recog_memoized (insn) >= 0 7531 && recog_memoized (dep) >= 0) 7532 { 7533 int shift_opnum = get_attr_shift (insn); 7534 enum attr_type attr_type = get_attr_type (dep); 7535 7536 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted 7537 operand for INSN. If we have a shifted input operand and the 7538 instruction we depend on is another ALU instruction, then we may 7539 have to account for an additional stall. */ 7540 if (shift_opnum != 0 7541 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG)) 7542 { 7543 rtx shifted_operand; 7544 int opno; 7545 7546 /* Get the shifted operand. */ 7547 extract_insn (insn); 7548 shifted_operand = recog_data.operand[shift_opnum]; 7549 7550 /* Iterate over all the operands in DEP. If we write an operand 7551 that overlaps with SHIFTED_OPERAND, then we have increase the 7552 cost of this dependency. */ 7553 extract_insn (dep); 7554 preprocess_constraints (); 7555 for (opno = 0; opno < recog_data.n_operands; opno++) 7556 { 7557 /* We can ignore strict inputs. */ 7558 if (recog_data.operand_type[opno] == OP_IN) 7559 continue; 7560 7561 if (reg_overlap_mentioned_p (recog_data.operand[opno], 7562 shifted_operand)) 7563 return 2; 7564 } 7565 } 7566 } 7567 7568 /* XXX This is not strictly true for the FPA. */ 7569 if (REG_NOTE_KIND (link) == REG_DEP_ANTI 7570 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) 7571 return 0; 7572 7573 /* Call insns don't incur a stall, even if they follow a load. */ 7574 if (REG_NOTE_KIND (link) == 0 7575 && GET_CODE (insn) == CALL_INSN) 7576 return 1; 7577 7578 if ((i_pat = single_set (insn)) != NULL 7579 && GET_CODE (SET_SRC (i_pat)) == MEM 7580 && (d_pat = single_set (dep)) != NULL 7581 && GET_CODE (SET_DEST (d_pat)) == MEM) 7582 { 7583 rtx src_mem = XEXP (SET_SRC (i_pat), 0); 7584 /* This is a load after a store, there is no conflict if the load reads 7585 from a cached area. Assume that loads from the stack, and from the 7586 constant pool are cached, and that others will miss. This is a 7587 hack. */ 7588 7589 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem)) 7590 || reg_mentioned_p (stack_pointer_rtx, src_mem) 7591 || reg_mentioned_p (frame_pointer_rtx, src_mem) 7592 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) 7593 return 1; 7594 } 7595 7596 return cost; 7597 } 7598 7599 static int fp_consts_inited = 0; 7600 7601 /* Only zero is valid for VFP. Other values are also valid for FPA. */ 7602 static const char * const strings_fp[8] = 7603 { 7604 "0", "1", "2", "3", 7605 "4", "5", "0.5", "10" 7606 }; 7607 7608 static REAL_VALUE_TYPE values_fp[8]; 7609 7610 static void 7611 init_fp_table (void) 7612 { 7613 int i; 7614 REAL_VALUE_TYPE r; 7615 7616 if (TARGET_VFP) 7617 fp_consts_inited = 1; 7618 else 7619 fp_consts_inited = 8; 7620 7621 for (i = 0; i < fp_consts_inited; i++) 7622 { 7623 r = REAL_VALUE_ATOF (strings_fp[i], DFmode); 7624 values_fp[i] = r; 7625 } 7626 } 7627 7628 /* Return TRUE if rtx X is a valid immediate FP constant. */ 7629 int 7630 arm_const_double_rtx (rtx x) 7631 { 7632 REAL_VALUE_TYPE r; 7633 int i; 7634 7635 if (!fp_consts_inited) 7636 init_fp_table (); 7637 7638 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7639 if (REAL_VALUE_MINUS_ZERO (r)) 7640 return 0; 7641 7642 for (i = 0; i < fp_consts_inited; i++) 7643 if (REAL_VALUES_EQUAL (r, values_fp[i])) 7644 return 1; 7645 7646 return 0; 7647 } 7648 7649 /* Return TRUE if rtx X is a valid immediate FPA constant. */ 7650 int 7651 neg_const_double_rtx_ok_for_fpa (rtx x) 7652 { 7653 REAL_VALUE_TYPE r; 7654 int i; 7655 7656 if (!fp_consts_inited) 7657 init_fp_table (); 7658 7659 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7660 r = REAL_VALUE_NEGATE (r); 7661 if (REAL_VALUE_MINUS_ZERO (r)) 7662 return 0; 7663 7664 for (i = 0; i < 8; i++) 7665 if (REAL_VALUES_EQUAL (r, values_fp[i])) 7666 return 1; 7667 7668 return 0; 7669 } 7670 7671 7672 /* VFPv3 has a fairly wide range of representable immediates, formed from 7673 "quarter-precision" floating-point values. These can be evaluated using this 7674 formula (with ^ for exponentiation): 7675 7676 -1^s * n * 2^-r 7677 7678 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that 7679 16 <= n <= 31 and 0 <= r <= 7. 7680 7681 These values are mapped onto an 8-bit integer ABCDEFGH s.t. 7682 7683 - A (most-significant) is the sign bit. 7684 - BCD are the exponent (encoded as r XOR 3). 7685 - EFGH are the mantissa (encoded as n - 16). 7686 */ 7687 7688 /* Return an integer index for a VFPv3 immediate operand X suitable for the 7689 fconst[sd] instruction, or -1 if X isn't suitable. */ 7690 static int 7691 vfp3_const_double_index (rtx x) 7692 { 7693 REAL_VALUE_TYPE r, m; 7694 int sign, exponent; 7695 unsigned HOST_WIDE_INT mantissa, mant_hi; 7696 unsigned HOST_WIDE_INT mask; 7697 HOST_WIDE_INT m1, m2; 7698 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; 7699 7700 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE) 7701 return -1; 7702 7703 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7704 7705 /* We can't represent these things, so detect them first. */ 7706 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) 7707 return -1; 7708 7709 /* Extract sign, exponent and mantissa. */ 7710 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; 7711 r = REAL_VALUE_ABS (r); 7712 exponent = REAL_EXP (&r); 7713 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the 7714 highest (sign) bit, with a fixed binary point at bit point_pos. 7715 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 7716 bits for the mantissa, this may fail (low bits would be lost). */ 7717 real_ldexp (&m, &r, point_pos - exponent); 7718 REAL_VALUE_TO_INT (&m1, &m2, m); 7719 mantissa = m1; 7720 mant_hi = m2; 7721 7722 /* If there are bits set in the low part of the mantissa, we can't 7723 represent this value. */ 7724 if (mantissa != 0) 7725 return -1; 7726 7727 /* Now make it so that mantissa contains the most-significant bits, and move 7728 the point_pos to indicate that the least-significant bits have been 7729 discarded. */ 7730 point_pos -= HOST_BITS_PER_WIDE_INT; 7731 mantissa = mant_hi; 7732 7733 /* We can permit four significant bits of mantissa only, plus a high bit 7734 which is always 1. */ 7735 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; 7736 if ((mantissa & mask) != 0) 7737 return -1; 7738 7739 /* Now we know the mantissa is in range, chop off the unneeded bits. */ 7740 mantissa >>= point_pos - 5; 7741 7742 /* The mantissa may be zero. Disallow that case. (It's possible to load the 7743 floating-point immediate zero with Neon using an integer-zero load, but 7744 that case is handled elsewhere.) */ 7745 if (mantissa == 0) 7746 return -1; 7747 7748 gcc_assert (mantissa >= 16 && mantissa <= 31); 7749 7750 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where 7751 normalized significands are in the range [1, 2). (Our mantissa is shifted 7752 left 4 places at this point relative to normalized IEEE754 values). GCC 7753 internally uses [0.5, 1) (see real.c), so the exponent returned from 7754 REAL_EXP must be altered. */ 7755 exponent = 5 - exponent; 7756 7757 if (exponent < 0 || exponent > 7) 7758 return -1; 7759 7760 /* Sign, mantissa and exponent are now in the correct form to plug into the 7761 formula described in the comment above. */ 7762 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16); 7763 } 7764 7765 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */ 7766 int 7767 vfp3_const_double_rtx (rtx x) 7768 { 7769 if (!TARGET_VFP3) 7770 return 0; 7771 7772 return vfp3_const_double_index (x) != -1; 7773 } 7774 7775 /* Recognize immediates which can be used in various Neon instructions. Legal 7776 immediates are described by the following table (for VMVN variants, the 7777 bitwise inverse of the constant shown is recognized. In either case, VMOV 7778 is output and the correct instruction to use for a given constant is chosen 7779 by the assembler). The constant shown is replicated across all elements of 7780 the destination vector. 7781 7782 insn elems variant constant (binary) 7783 ---- ----- ------- ----------------- 7784 vmov i32 0 00000000 00000000 00000000 abcdefgh 7785 vmov i32 1 00000000 00000000 abcdefgh 00000000 7786 vmov i32 2 00000000 abcdefgh 00000000 00000000 7787 vmov i32 3 abcdefgh 00000000 00000000 00000000 7788 vmov i16 4 00000000 abcdefgh 7789 vmov i16 5 abcdefgh 00000000 7790 vmvn i32 6 00000000 00000000 00000000 abcdefgh 7791 vmvn i32 7 00000000 00000000 abcdefgh 00000000 7792 vmvn i32 8 00000000 abcdefgh 00000000 00000000 7793 vmvn i32 9 abcdefgh 00000000 00000000 00000000 7794 vmvn i16 10 00000000 abcdefgh 7795 vmvn i16 11 abcdefgh 00000000 7796 vmov i32 12 00000000 00000000 abcdefgh 11111111 7797 vmvn i32 13 00000000 00000000 abcdefgh 11111111 7798 vmov i32 14 00000000 abcdefgh 11111111 11111111 7799 vmvn i32 15 00000000 abcdefgh 11111111 11111111 7800 vmov i8 16 abcdefgh 7801 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd 7802 eeeeeeee ffffffff gggggggg hhhhhhhh 7803 vmov f32 18 aBbbbbbc defgh000 00000000 00000000 7804 7805 For case 18, B = !b. Representable values are exactly those accepted by 7806 vfp3_const_double_index, but are output as floating-point numbers rather 7807 than indices. 7808 7809 Variants 0-5 (inclusive) may also be used as immediates for the second 7810 operand of VORR/VBIC instructions. 7811 7812 The INVERSE argument causes the bitwise inverse of the given operand to be 7813 recognized instead (used for recognizing legal immediates for the VAND/VORN 7814 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is 7815 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be 7816 output, rather than the real insns vbic/vorr). 7817 7818 INVERSE makes no difference to the recognition of float vectors. 7819 7820 The return value is the variant of immediate as shown in the above table, or 7821 -1 if the given value doesn't match any of the listed patterns. 7822 */ 7823 static int 7824 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, 7825 rtx *modconst, int *elementwidth) 7826 { 7827 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \ 7828 matches = 1; \ 7829 for (i = 0; i < idx; i += (STRIDE)) \ 7830 if (!(TEST)) \ 7831 matches = 0; \ 7832 if (matches) \ 7833 { \ 7834 immtype = (CLASS); \ 7835 elsize = (ELSIZE); \ 7836 break; \ 7837 } 7838 7839 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); 7840 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); 7841 unsigned char bytes[16]; 7842 int immtype = -1, matches; 7843 unsigned int invmask = inverse ? 0xff : 0; 7844 7845 /* Vectors of float constants. */ 7846 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 7847 { 7848 rtx el0 = CONST_VECTOR_ELT (op, 0); 7849 REAL_VALUE_TYPE r0; 7850 7851 if (!vfp3_const_double_rtx (el0)) 7852 return -1; 7853 7854 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0); 7855 7856 for (i = 1; i < n_elts; i++) 7857 { 7858 rtx elt = CONST_VECTOR_ELT (op, i); 7859 REAL_VALUE_TYPE re; 7860 7861 REAL_VALUE_FROM_CONST_DOUBLE (re, elt); 7862 7863 if (!REAL_VALUES_EQUAL (r0, re)) 7864 return -1; 7865 } 7866 7867 if (modconst) 7868 *modconst = CONST_VECTOR_ELT (op, 0); 7869 7870 if (elementwidth) 7871 *elementwidth = 0; 7872 7873 return 18; 7874 } 7875 7876 /* Splat vector constant out into a byte vector. */ 7877 for (i = 0; i < n_elts; i++) 7878 { 7879 rtx el = CONST_VECTOR_ELT (op, i); 7880 unsigned HOST_WIDE_INT elpart; 7881 unsigned int part, parts; 7882 7883 if (GET_CODE (el) == CONST_INT) 7884 { 7885 elpart = INTVAL (el); 7886 parts = 1; 7887 } 7888 else if (GET_CODE (el) == CONST_DOUBLE) 7889 { 7890 elpart = CONST_DOUBLE_LOW (el); 7891 parts = 2; 7892 } 7893 else 7894 gcc_unreachable (); 7895 7896 for (part = 0; part < parts; part++) 7897 { 7898 unsigned int byte; 7899 for (byte = 0; byte < innersize; byte++) 7900 { 7901 bytes[idx++] = (elpart & 0xff) ^ invmask; 7902 elpart >>= BITS_PER_UNIT; 7903 } 7904 if (GET_CODE (el) == CONST_DOUBLE) 7905 elpart = CONST_DOUBLE_HIGH (el); 7906 } 7907 } 7908 7909 /* Sanity check. */ 7910 gcc_assert (idx == GET_MODE_SIZE (mode)); 7911 7912 do 7913 { 7914 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 7915 && bytes[i + 2] == 0 && bytes[i + 3] == 0); 7916 7917 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] 7918 && bytes[i + 2] == 0 && bytes[i + 3] == 0); 7919 7920 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 7921 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); 7922 7923 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 7924 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]); 7925 7926 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0); 7927 7928 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]); 7929 7930 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff 7931 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); 7932 7933 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] 7934 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); 7935 7936 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff 7937 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); 7938 7939 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff 7940 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]); 7941 7942 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff); 7943 7944 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]); 7945 7946 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] 7947 && bytes[i + 2] == 0 && bytes[i + 3] == 0); 7948 7949 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] 7950 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); 7951 7952 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff 7953 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); 7954 7955 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 7956 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); 7957 7958 CHECK (1, 8, 16, bytes[i] == bytes[0]); 7959 7960 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) 7961 && bytes[i] == bytes[(i + 8) % idx]); 7962 } 7963 while (0); 7964 7965 if (immtype == -1) 7966 return -1; 7967 7968 if (elementwidth) 7969 *elementwidth = elsize; 7970 7971 if (modconst) 7972 { 7973 unsigned HOST_WIDE_INT imm = 0; 7974 7975 /* Un-invert bytes of recognized vector, if necessary. */ 7976 if (invmask != 0) 7977 for (i = 0; i < idx; i++) 7978 bytes[i] ^= invmask; 7979 7980 if (immtype == 17) 7981 { 7982 /* FIXME: Broken on 32-bit H_W_I hosts. */ 7983 gcc_assert (sizeof (HOST_WIDE_INT) == 8); 7984 7985 for (i = 0; i < 8; i++) 7986 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) 7987 << (i * BITS_PER_UNIT); 7988 7989 *modconst = GEN_INT (imm); 7990 } 7991 else 7992 { 7993 unsigned HOST_WIDE_INT imm = 0; 7994 7995 for (i = 0; i < elsize / BITS_PER_UNIT; i++) 7996 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); 7997 7998 *modconst = GEN_INT (imm); 7999 } 8000 } 8001 8002 return immtype; 8003 #undef CHECK 8004 } 8005 8006 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly, 8007 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for 8008 float elements), and a modified constant (whatever should be output for a 8009 VMOV) in *MODCONST. */ 8010 8011 int 8012 neon_immediate_valid_for_move (rtx op, enum machine_mode mode, 8013 rtx *modconst, int *elementwidth) 8014 { 8015 rtx tmpconst; 8016 int tmpwidth; 8017 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth); 8018 8019 if (retval == -1) 8020 return 0; 8021 8022 if (modconst) 8023 *modconst = tmpconst; 8024 8025 if (elementwidth) 8026 *elementwidth = tmpwidth; 8027 8028 return 1; 8029 } 8030 8031 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If 8032 the immediate is valid, write a constant suitable for using as an operand 8033 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to 8034 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */ 8035 8036 int 8037 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse, 8038 rtx *modconst, int *elementwidth) 8039 { 8040 rtx tmpconst; 8041 int tmpwidth; 8042 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth); 8043 8044 if (retval < 0 || retval > 5) 8045 return 0; 8046 8047 if (modconst) 8048 *modconst = tmpconst; 8049 8050 if (elementwidth) 8051 *elementwidth = tmpwidth; 8052 8053 return 1; 8054 } 8055 8056 /* Return a string suitable for output of Neon immediate logic operation 8057 MNEM. */ 8058 8059 char * 8060 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, 8061 int inverse, int quad) 8062 { 8063 int width, is_valid; 8064 static char templ[40]; 8065 8066 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); 8067 8068 gcc_assert (is_valid != 0); 8069 8070 if (quad) 8071 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); 8072 else 8073 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); 8074 8075 return templ; 8076 } 8077 8078 /* Output a sequence of pairwise operations to implement a reduction. 8079 NOTE: We do "too much work" here, because pairwise operations work on two 8080 registers-worth of operands in one go. Unfortunately we can't exploit those 8081 extra calculations to do the full operation in fewer steps, I don't think. 8082 Although all vector elements of the result but the first are ignored, we 8083 actually calculate the same result in each of the elements. An alternative 8084 such as initially loading a vector with zero to use as each of the second 8085 operands would use up an additional register and take an extra instruction, 8086 for no particular gain. */ 8087 8088 void 8089 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, 8090 rtx (*reduc) (rtx, rtx, rtx)) 8091 { 8092 enum machine_mode inner = GET_MODE_INNER (mode); 8093 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner); 8094 rtx tmpsum = op1; 8095 8096 for (i = parts / 2; i >= 1; i /= 2) 8097 { 8098 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode); 8099 emit_insn (reduc (dest, tmpsum, tmpsum)); 8100 tmpsum = dest; 8101 } 8102 } 8103 8104 /* If VALS is a vector constant that can be loaded into a register 8105 using VDUP, generate instructions to do so and return an RTX to 8106 assign to the register. Otherwise return NULL_RTX. */ 8107 8108 static rtx 8109 neon_vdup_constant (rtx vals) 8110 { 8111 enum machine_mode mode = GET_MODE (vals); 8112 enum machine_mode inner_mode = GET_MODE_INNER (mode); 8113 int n_elts = GET_MODE_NUNITS (mode); 8114 bool all_same = true; 8115 rtx x; 8116 int i; 8117 8118 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) 8119 return NULL_RTX; 8120 8121 for (i = 0; i < n_elts; ++i) 8122 { 8123 x = XVECEXP (vals, 0, i); 8124 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 8125 all_same = false; 8126 } 8127 8128 if (!all_same) 8129 /* The elements are not all the same. We could handle repeating 8130 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t 8131 {0, C, 0, C, 0, C, 0, C} which can be loaded using 8132 vdup.i16). */ 8133 return NULL_RTX; 8134 8135 /* We can load this constant by using VDUP and a constant in a 8136 single ARM register. This will be cheaper than a vector 8137 load. */ 8138 8139 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); 8140 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x), 8141 UNSPEC_VDUP_N); 8142 } 8143 8144 /* Generate code to load VALS, which is a PARALLEL containing only 8145 constants (for vec_init) or CONST_VECTOR, efficiently into a 8146 register. Returns an RTX to copy into the register, or NULL_RTX 8147 for a PARALLEL that can not be converted into a CONST_VECTOR. */ 8148 8149 rtx 8150 neon_make_constant (rtx vals) 8151 { 8152 enum machine_mode mode = GET_MODE (vals); 8153 rtx target; 8154 rtx const_vec = NULL_RTX; 8155 int n_elts = GET_MODE_NUNITS (mode); 8156 int n_const = 0; 8157 int i; 8158 8159 if (GET_CODE (vals) == CONST_VECTOR) 8160 const_vec = vals; 8161 else if (GET_CODE (vals) == PARALLEL) 8162 { 8163 /* A CONST_VECTOR must contain only CONST_INTs and 8164 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). 8165 Only store valid constants in a CONST_VECTOR. */ 8166 for (i = 0; i < n_elts; ++i) 8167 { 8168 rtx x = XVECEXP (vals, 0, i); 8169 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 8170 n_const++; 8171 } 8172 if (n_const == n_elts) 8173 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 8174 } 8175 else 8176 gcc_unreachable (); 8177 8178 if (const_vec != NULL 8179 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL)) 8180 /* Load using VMOV. On Cortex-A8 this takes one cycle. */ 8181 return const_vec; 8182 else if ((target = neon_vdup_constant (vals)) != NULL_RTX) 8183 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON 8184 pipeline cycle; creating the constant takes one or two ARM 8185 pipeline cycles. */ 8186 return target; 8187 else if (const_vec != NULL_RTX) 8188 /* Load from constant pool. On Cortex-A8 this takes two cycles 8189 (for either double or quad vectors). We can not take advantage 8190 of single-cycle VLD1 because we need a PC-relative addressing 8191 mode. */ 8192 return const_vec; 8193 else 8194 /* A PARALLEL containing something not valid inside CONST_VECTOR. 8195 We can not construct an initializer. */ 8196 return NULL_RTX; 8197 } 8198 8199 /* Initialize vector TARGET to VALS. */ 8200 8201 void 8202 neon_expand_vector_init (rtx target, rtx vals) 8203 { 8204 enum machine_mode mode = GET_MODE (target); 8205 enum machine_mode inner_mode = GET_MODE_INNER (mode); 8206 int n_elts = GET_MODE_NUNITS (mode); 8207 int n_var = 0, one_var = -1; 8208 bool all_same = true; 8209 rtx x, mem; 8210 int i; 8211 8212 for (i = 0; i < n_elts; ++i) 8213 { 8214 x = XVECEXP (vals, 0, i); 8215 if (!CONSTANT_P (x)) 8216 ++n_var, one_var = i; 8217 8218 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 8219 all_same = false; 8220 } 8221 8222 if (n_var == 0) 8223 { 8224 rtx constant = neon_make_constant (vals); 8225 if (constant != NULL_RTX) 8226 { 8227 emit_move_insn (target, constant); 8228 return; 8229 } 8230 } 8231 8232 /* Splat a single non-constant element if we can. */ 8233 if (all_same && GET_MODE_SIZE (inner_mode) <= 4) 8234 { 8235 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); 8236 emit_insn (gen_rtx_SET (VOIDmode, target, 8237 gen_rtx_UNSPEC (mode, gen_rtvec (1, x), 8238 UNSPEC_VDUP_N))); 8239 return; 8240 } 8241 8242 /* One field is non-constant. Load constant then overwrite varying 8243 field. This is more efficient than using the stack. */ 8244 if (n_var == 1) 8245 { 8246 rtx copy = copy_rtx (vals); 8247 rtvec ops; 8248 8249 /* Load constant part of vector, substitute neighboring value for 8250 varying element. */ 8251 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); 8252 neon_expand_vector_init (target, copy); 8253 8254 /* Insert variable. */ 8255 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); 8256 ops = gen_rtvec (3, x, target, GEN_INT (one_var)); 8257 emit_insn (gen_rtx_SET (VOIDmode, target, 8258 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE))); 8259 return; 8260 } 8261 8262 /* Construct the vector in memory one field at a time 8263 and load the whole vector. */ 8264 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); 8265 for (i = 0; i < n_elts; i++) 8266 emit_move_insn (adjust_address_nv (mem, inner_mode, 8267 i * GET_MODE_SIZE (inner_mode)), 8268 XVECEXP (vals, 0, i)); 8269 emit_move_insn (target, mem); 8270 } 8271 8272 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise 8273 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so 8274 reported source locations are bogus. */ 8275 8276 static void 8277 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, 8278 const char *err) 8279 { 8280 HOST_WIDE_INT lane; 8281 8282 gcc_assert (GET_CODE (operand) == CONST_INT); 8283 8284 lane = INTVAL (operand); 8285 8286 if (lane < low || lane >= high) 8287 error (err); 8288 } 8289 8290 /* Bounds-check lanes. */ 8291 8292 void 8293 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) 8294 { 8295 bounds_check (operand, low, high, "lane out of range"); 8296 } 8297 8298 /* Bounds-check constants. */ 8299 8300 void 8301 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) 8302 { 8303 bounds_check (operand, low, high, "constant out of range"); 8304 } 8305 8306 HOST_WIDE_INT 8307 neon_element_bits (enum machine_mode mode) 8308 { 8309 if (mode == DImode) 8310 return GET_MODE_BITSIZE (mode); 8311 else 8312 return GET_MODE_BITSIZE (GET_MODE_INNER (mode)); 8313 } 8314 8315 8316 /* Predicates for `match_operand' and `match_operator'. */ 8317 8318 /* Return nonzero if OP is a valid Cirrus memory address pattern. */ 8319 int 8320 cirrus_memory_offset (rtx op) 8321 { 8322 /* Reject eliminable registers. */ 8323 if (! (reload_in_progress || reload_completed) 8324 && ( reg_mentioned_p (frame_pointer_rtx, op) 8325 || reg_mentioned_p (arg_pointer_rtx, op) 8326 || reg_mentioned_p (virtual_incoming_args_rtx, op) 8327 || reg_mentioned_p (virtual_outgoing_args_rtx, op) 8328 || reg_mentioned_p (virtual_stack_dynamic_rtx, op) 8329 || reg_mentioned_p (virtual_stack_vars_rtx, op))) 8330 return 0; 8331 8332 if (GET_CODE (op) == MEM) 8333 { 8334 rtx ind; 8335 8336 ind = XEXP (op, 0); 8337 8338 /* Match: (mem (reg)). */ 8339 if (GET_CODE (ind) == REG) 8340 return 1; 8341 8342 /* Match: 8343 (mem (plus (reg) 8344 (const))). */ 8345 if (GET_CODE (ind) == PLUS 8346 && GET_CODE (XEXP (ind, 0)) == REG 8347 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) 8348 && GET_CODE (XEXP (ind, 1)) == CONST_INT) 8349 return 1; 8350 } 8351 8352 return 0; 8353 } 8354 8355 /* Return TRUE if OP is a valid coprocessor memory address pattern. 8356 WB is true if full writeback address modes are allowed and is false 8357 if limited writeback address modes (POST_INC and PRE_DEC) are 8358 allowed. */ 8359 8360 int 8361 arm_coproc_mem_operand (rtx op, bool wb) 8362 { 8363 rtx ind; 8364 8365 /* Reject eliminable registers. */ 8366 if (! (reload_in_progress || reload_completed) 8367 && ( reg_mentioned_p (frame_pointer_rtx, op) 8368 || reg_mentioned_p (arg_pointer_rtx, op) 8369 || reg_mentioned_p (virtual_incoming_args_rtx, op) 8370 || reg_mentioned_p (virtual_outgoing_args_rtx, op) 8371 || reg_mentioned_p (virtual_stack_dynamic_rtx, op) 8372 || reg_mentioned_p (virtual_stack_vars_rtx, op))) 8373 return FALSE; 8374 8375 /* Constants are converted into offsets from labels. */ 8376 if (GET_CODE (op) != MEM) 8377 return FALSE; 8378 8379 ind = XEXP (op, 0); 8380 8381 if (reload_completed 8382 && (GET_CODE (ind) == LABEL_REF 8383 || (GET_CODE (ind) == CONST 8384 && GET_CODE (XEXP (ind, 0)) == PLUS 8385 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF 8386 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT))) 8387 return TRUE; 8388 8389 /* Match: (mem (reg)). */ 8390 if (GET_CODE (ind) == REG) 8391 return arm_address_register_rtx_p (ind, 0); 8392 8393 /* Autoincremment addressing modes. POST_INC and PRE_DEC are 8394 acceptable in any case (subject to verification by 8395 arm_address_register_rtx_p). We need WB to be true to accept 8396 PRE_INC and POST_DEC. */ 8397 if (GET_CODE (ind) == POST_INC 8398 || GET_CODE (ind) == PRE_DEC 8399 || (wb 8400 && (GET_CODE (ind) == PRE_INC 8401 || GET_CODE (ind) == POST_DEC))) 8402 return arm_address_register_rtx_p (XEXP (ind, 0), 0); 8403 8404 if (wb 8405 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) 8406 && arm_address_register_rtx_p (XEXP (ind, 0), 0) 8407 && GET_CODE (XEXP (ind, 1)) == PLUS 8408 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) 8409 ind = XEXP (ind, 1); 8410 8411 /* Match: 8412 (plus (reg) 8413 (const)). */ 8414 if (GET_CODE (ind) == PLUS 8415 && GET_CODE (XEXP (ind, 0)) == REG 8416 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) 8417 && GET_CODE (XEXP (ind, 1)) == CONST_INT 8418 && INTVAL (XEXP (ind, 1)) > -1024 8419 && INTVAL (XEXP (ind, 1)) < 1024 8420 && (INTVAL (XEXP (ind, 1)) & 3) == 0) 8421 return TRUE; 8422 8423 return FALSE; 8424 } 8425 8426 /* Return TRUE if OP is a memory operand which we can load or store a vector 8427 to/from. TYPE is one of the following values: 8428 0 - Vector load/stor (vldr) 8429 1 - Core registers (ldm) 8430 2 - Element/structure loads (vld1) 8431 */ 8432 int 8433 neon_vector_mem_operand (rtx op, int type) 8434 { 8435 rtx ind; 8436 8437 /* Reject eliminable registers. */ 8438 if (! (reload_in_progress || reload_completed) 8439 && ( reg_mentioned_p (frame_pointer_rtx, op) 8440 || reg_mentioned_p (arg_pointer_rtx, op) 8441 || reg_mentioned_p (virtual_incoming_args_rtx, op) 8442 || reg_mentioned_p (virtual_outgoing_args_rtx, op) 8443 || reg_mentioned_p (virtual_stack_dynamic_rtx, op) 8444 || reg_mentioned_p (virtual_stack_vars_rtx, op))) 8445 return FALSE; 8446 8447 /* Constants are converted into offsets from labels. */ 8448 if (GET_CODE (op) != MEM) 8449 return FALSE; 8450 8451 ind = XEXP (op, 0); 8452 8453 if (reload_completed 8454 && (GET_CODE (ind) == LABEL_REF 8455 || (GET_CODE (ind) == CONST 8456 && GET_CODE (XEXP (ind, 0)) == PLUS 8457 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF 8458 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT))) 8459 return TRUE; 8460 8461 /* Match: (mem (reg)). */ 8462 if (GET_CODE (ind) == REG) 8463 return arm_address_register_rtx_p (ind, 0); 8464 8465 /* Allow post-increment with Neon registers. */ 8466 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC)) 8467 return arm_address_register_rtx_p (XEXP (ind, 0), 0); 8468 8469 /* FIXME: vld1 allows register post-modify. */ 8470 8471 /* Match: 8472 (plus (reg) 8473 (const)). */ 8474 if (type == 0 8475 && GET_CODE (ind) == PLUS 8476 && GET_CODE (XEXP (ind, 0)) == REG 8477 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) 8478 && GET_CODE (XEXP (ind, 1)) == CONST_INT 8479 && INTVAL (XEXP (ind, 1)) > -1024 8480 && INTVAL (XEXP (ind, 1)) < 1016 8481 && (INTVAL (XEXP (ind, 1)) & 3) == 0) 8482 return TRUE; 8483 8484 return FALSE; 8485 } 8486 8487 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct 8488 type. */ 8489 int 8490 neon_struct_mem_operand (rtx op) 8491 { 8492 rtx ind; 8493 8494 /* Reject eliminable registers. */ 8495 if (! (reload_in_progress || reload_completed) 8496 && ( reg_mentioned_p (frame_pointer_rtx, op) 8497 || reg_mentioned_p (arg_pointer_rtx, op) 8498 || reg_mentioned_p (virtual_incoming_args_rtx, op) 8499 || reg_mentioned_p (virtual_outgoing_args_rtx, op) 8500 || reg_mentioned_p (virtual_stack_dynamic_rtx, op) 8501 || reg_mentioned_p (virtual_stack_vars_rtx, op))) 8502 return FALSE; 8503 8504 /* Constants are converted into offsets from labels. */ 8505 if (GET_CODE (op) != MEM) 8506 return FALSE; 8507 8508 ind = XEXP (op, 0); 8509 8510 if (reload_completed 8511 && (GET_CODE (ind) == LABEL_REF 8512 || (GET_CODE (ind) == CONST 8513 && GET_CODE (XEXP (ind, 0)) == PLUS 8514 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF 8515 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT))) 8516 return TRUE; 8517 8518 /* Match: (mem (reg)). */ 8519 if (GET_CODE (ind) == REG) 8520 return arm_address_register_rtx_p (ind, 0); 8521 8522 return FALSE; 8523 } 8524 8525 /* Return true if X is a register that will be eliminated later on. */ 8526 int 8527 arm_eliminable_register (rtx x) 8528 { 8529 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM 8530 || REGNO (x) == ARG_POINTER_REGNUM 8531 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER 8532 && REGNO (x) <= LAST_VIRTUAL_REGISTER)); 8533 } 8534 8535 /* Return GENERAL_REGS if a scratch register required to reload x to/from 8536 coprocessor registers. Otherwise return NO_REGS. */ 8537 8538 enum reg_class 8539 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) 8540 { 8541 if (mode == HFmode) 8542 { 8543 if (!TARGET_NEON_FP16) 8544 return GENERAL_REGS; 8545 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) 8546 return NO_REGS; 8547 return GENERAL_REGS; 8548 } 8549 8550 if (TARGET_NEON 8551 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 8552 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 8553 && neon_vector_mem_operand (x, 0)) 8554 return NO_REGS; 8555 8556 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) 8557 return NO_REGS; 8558 8559 return GENERAL_REGS; 8560 } 8561 8562 /* Values which must be returned in the most-significant end of the return 8563 register. */ 8564 8565 static bool 8566 arm_return_in_msb (const_tree valtype) 8567 { 8568 return (TARGET_AAPCS_BASED 8569 && BYTES_BIG_ENDIAN 8570 && (AGGREGATE_TYPE_P (valtype) 8571 || TREE_CODE (valtype) == COMPLEX_TYPE)); 8572 } 8573 8574 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction. 8575 Use by the Cirrus Maverick code which has to workaround 8576 a hardware bug triggered by such instructions. */ 8577 static bool 8578 arm_memory_load_p (rtx insn) 8579 { 8580 rtx body, lhs, rhs;; 8581 8582 if (insn == NULL_RTX || GET_CODE (insn) != INSN) 8583 return false; 8584 8585 body = PATTERN (insn); 8586 8587 if (GET_CODE (body) != SET) 8588 return false; 8589 8590 lhs = XEXP (body, 0); 8591 rhs = XEXP (body, 1); 8592 8593 lhs = REG_OR_SUBREG_RTX (lhs); 8594 8595 /* If the destination is not a general purpose 8596 register we do not have to worry. */ 8597 if (GET_CODE (lhs) != REG 8598 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS) 8599 return false; 8600 8601 /* As well as loads from memory we also have to react 8602 to loads of invalid constants which will be turned 8603 into loads from the minipool. */ 8604 return (GET_CODE (rhs) == MEM 8605 || GET_CODE (rhs) == SYMBOL_REF 8606 || note_invalid_constants (insn, -1, false)); 8607 } 8608 8609 /* Return TRUE if INSN is a Cirrus instruction. */ 8610 static bool 8611 arm_cirrus_insn_p (rtx insn) 8612 { 8613 enum attr_cirrus attr; 8614 8615 /* get_attr cannot accept USE or CLOBBER. */ 8616 if (!insn 8617 || GET_CODE (insn) != INSN 8618 || GET_CODE (PATTERN (insn)) == USE 8619 || GET_CODE (PATTERN (insn)) == CLOBBER) 8620 return 0; 8621 8622 attr = get_attr_cirrus (insn); 8623 8624 return attr != CIRRUS_NOT; 8625 } 8626 8627 /* Cirrus reorg for invalid instruction combinations. */ 8628 static void 8629 cirrus_reorg (rtx first) 8630 { 8631 enum attr_cirrus attr; 8632 rtx body = PATTERN (first); 8633 rtx t; 8634 int nops; 8635 8636 /* Any branch must be followed by 2 non Cirrus instructions. */ 8637 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN) 8638 { 8639 nops = 0; 8640 t = next_nonnote_insn (first); 8641 8642 if (arm_cirrus_insn_p (t)) 8643 ++ nops; 8644 8645 if (arm_cirrus_insn_p (next_nonnote_insn (t))) 8646 ++ nops; 8647 8648 while (nops --) 8649 emit_insn_after (gen_nop (), first); 8650 8651 return; 8652 } 8653 8654 /* (float (blah)) is in parallel with a clobber. */ 8655 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) 8656 body = XVECEXP (body, 0, 0); 8657 8658 if (GET_CODE (body) == SET) 8659 { 8660 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1); 8661 8662 /* cfldrd, cfldr64, cfstrd, cfstr64 must 8663 be followed by a non Cirrus insn. */ 8664 if (get_attr_cirrus (first) == CIRRUS_DOUBLE) 8665 { 8666 if (arm_cirrus_insn_p (next_nonnote_insn (first))) 8667 emit_insn_after (gen_nop (), first); 8668 8669 return; 8670 } 8671 else if (arm_memory_load_p (first)) 8672 { 8673 unsigned int arm_regno; 8674 8675 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr, 8676 ldr/cfmv64hr combination where the Rd field is the same 8677 in both instructions must be split with a non Cirrus 8678 insn. Example: 8679 8680 ldr r0, blah 8681 nop 8682 cfmvsr mvf0, r0. */ 8683 8684 /* Get Arm register number for ldr insn. */ 8685 if (GET_CODE (lhs) == REG) 8686 arm_regno = REGNO (lhs); 8687 else 8688 { 8689 gcc_assert (GET_CODE (rhs) == REG); 8690 arm_regno = REGNO (rhs); 8691 } 8692 8693 /* Next insn. */ 8694 first = next_nonnote_insn (first); 8695 8696 if (! arm_cirrus_insn_p (first)) 8697 return; 8698 8699 body = PATTERN (first); 8700 8701 /* (float (blah)) is in parallel with a clobber. */ 8702 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0)) 8703 body = XVECEXP (body, 0, 0); 8704 8705 if (GET_CODE (body) == FLOAT) 8706 body = XEXP (body, 0); 8707 8708 if (get_attr_cirrus (first) == CIRRUS_MOVE 8709 && GET_CODE (XEXP (body, 1)) == REG 8710 && arm_regno == REGNO (XEXP (body, 1))) 8711 emit_insn_after (gen_nop (), first); 8712 8713 return; 8714 } 8715 } 8716 8717 /* get_attr cannot accept USE or CLOBBER. */ 8718 if (!first 8719 || GET_CODE (first) != INSN 8720 || GET_CODE (PATTERN (first)) == USE 8721 || GET_CODE (PATTERN (first)) == CLOBBER) 8722 return; 8723 8724 attr = get_attr_cirrus (first); 8725 8726 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...) 8727 must be followed by a non-coprocessor instruction. */ 8728 if (attr == CIRRUS_COMPARE) 8729 { 8730 nops = 0; 8731 8732 t = next_nonnote_insn (first); 8733 8734 if (arm_cirrus_insn_p (t)) 8735 ++ nops; 8736 8737 if (arm_cirrus_insn_p (next_nonnote_insn (t))) 8738 ++ nops; 8739 8740 while (nops --) 8741 emit_insn_after (gen_nop (), first); 8742 8743 return; 8744 } 8745 } 8746 8747 /* Return TRUE if X references a SYMBOL_REF. */ 8748 int 8749 symbol_mentioned_p (rtx x) 8750 { 8751 const char * fmt; 8752 int i; 8753 8754 if (GET_CODE (x) == SYMBOL_REF) 8755 return 1; 8756 8757 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they 8758 are constant offsets, not symbols. */ 8759 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) 8760 return 0; 8761 8762 fmt = GET_RTX_FORMAT (GET_CODE (x)); 8763 8764 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 8765 { 8766 if (fmt[i] == 'E') 8767 { 8768 int j; 8769 8770 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 8771 if (symbol_mentioned_p (XVECEXP (x, i, j))) 8772 return 1; 8773 } 8774 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i))) 8775 return 1; 8776 } 8777 8778 return 0; 8779 } 8780 8781 /* Return TRUE if X references a LABEL_REF. */ 8782 int 8783 label_mentioned_p (rtx x) 8784 { 8785 const char * fmt; 8786 int i; 8787 8788 if (GET_CODE (x) == LABEL_REF) 8789 return 1; 8790 8791 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing 8792 instruction, but they are constant offsets, not symbols. */ 8793 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) 8794 return 0; 8795 8796 fmt = GET_RTX_FORMAT (GET_CODE (x)); 8797 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 8798 { 8799 if (fmt[i] == 'E') 8800 { 8801 int j; 8802 8803 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 8804 if (label_mentioned_p (XVECEXP (x, i, j))) 8805 return 1; 8806 } 8807 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i))) 8808 return 1; 8809 } 8810 8811 return 0; 8812 } 8813 8814 int 8815 tls_mentioned_p (rtx x) 8816 { 8817 switch (GET_CODE (x)) 8818 { 8819 case CONST: 8820 return tls_mentioned_p (XEXP (x, 0)); 8821 8822 case UNSPEC: 8823 if (XINT (x, 1) == UNSPEC_TLS) 8824 return 1; 8825 8826 default: 8827 return 0; 8828 } 8829 } 8830 8831 /* Must not copy any rtx that uses a pc-relative address. */ 8832 8833 static int 8834 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED) 8835 { 8836 if (GET_CODE (*x) == UNSPEC 8837 && XINT (*x, 1) == UNSPEC_PIC_BASE) 8838 return 1; 8839 return 0; 8840 } 8841 8842 static bool 8843 arm_cannot_copy_insn_p (rtx insn) 8844 { 8845 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL); 8846 } 8847 8848 enum rtx_code 8849 minmax_code (rtx x) 8850 { 8851 enum rtx_code code = GET_CODE (x); 8852 8853 switch (code) 8854 { 8855 case SMAX: 8856 return GE; 8857 case SMIN: 8858 return LE; 8859 case UMIN: 8860 return LEU; 8861 case UMAX: 8862 return GEU; 8863 default: 8864 gcc_unreachable (); 8865 } 8866 } 8867 8868 /* Return 1 if memory locations are adjacent. */ 8869 int 8870 adjacent_mem_locations (rtx a, rtx b) 8871 { 8872 /* We don't guarantee to preserve the order of these memory refs. */ 8873 if (volatile_refs_p (a) || volatile_refs_p (b)) 8874 return 0; 8875 8876 if ((GET_CODE (XEXP (a, 0)) == REG 8877 || (GET_CODE (XEXP (a, 0)) == PLUS 8878 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT)) 8879 && (GET_CODE (XEXP (b, 0)) == REG 8880 || (GET_CODE (XEXP (b, 0)) == PLUS 8881 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT))) 8882 { 8883 HOST_WIDE_INT val0 = 0, val1 = 0; 8884 rtx reg0, reg1; 8885 int val_diff; 8886 8887 if (GET_CODE (XEXP (a, 0)) == PLUS) 8888 { 8889 reg0 = XEXP (XEXP (a, 0), 0); 8890 val0 = INTVAL (XEXP (XEXP (a, 0), 1)); 8891 } 8892 else 8893 reg0 = XEXP (a, 0); 8894 8895 if (GET_CODE (XEXP (b, 0)) == PLUS) 8896 { 8897 reg1 = XEXP (XEXP (b, 0), 0); 8898 val1 = INTVAL (XEXP (XEXP (b, 0), 1)); 8899 } 8900 else 8901 reg1 = XEXP (b, 0); 8902 8903 /* Don't accept any offset that will require multiple 8904 instructions to handle, since this would cause the 8905 arith_adjacentmem pattern to output an overlong sequence. */ 8906 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS)) 8907 return 0; 8908 8909 /* Don't allow an eliminable register: register elimination can make 8910 the offset too large. */ 8911 if (arm_eliminable_register (reg0)) 8912 return 0; 8913 8914 val_diff = val1 - val0; 8915 8916 if (arm_ld_sched) 8917 { 8918 /* If the target has load delay slots, then there's no benefit 8919 to using an ldm instruction unless the offset is zero and 8920 we are optimizing for size. */ 8921 return (optimize_size && (REGNO (reg0) == REGNO (reg1)) 8922 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4) 8923 && (val_diff == 4 || val_diff == -4)); 8924 } 8925 8926 return ((REGNO (reg0) == REGNO (reg1)) 8927 && (val_diff == 4 || val_diff == -4)); 8928 } 8929 8930 return 0; 8931 } 8932 8933 int 8934 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base, 8935 HOST_WIDE_INT *load_offset) 8936 { 8937 int unsorted_regs[4]; 8938 HOST_WIDE_INT unsorted_offsets[4]; 8939 int order[4]; 8940 int base_reg = -1; 8941 int i; 8942 8943 /* Can only handle 2, 3, or 4 insns at present, 8944 though could be easily extended if required. */ 8945 gcc_assert (nops >= 2 && nops <= 4); 8946 8947 memset (order, 0, 4 * sizeof (int)); 8948 8949 /* Loop over the operands and check that the memory references are 8950 suitable (i.e. immediate offsets from the same base register). At 8951 the same time, extract the target register, and the memory 8952 offsets. */ 8953 for (i = 0; i < nops; i++) 8954 { 8955 rtx reg; 8956 rtx offset; 8957 8958 /* Convert a subreg of a mem into the mem itself. */ 8959 if (GET_CODE (operands[nops + i]) == SUBREG) 8960 operands[nops + i] = alter_subreg (operands + (nops + i)); 8961 8962 gcc_assert (GET_CODE (operands[nops + i]) == MEM); 8963 8964 /* Don't reorder volatile memory references; it doesn't seem worth 8965 looking for the case where the order is ok anyway. */ 8966 if (MEM_VOLATILE_P (operands[nops + i])) 8967 return 0; 8968 8969 offset = const0_rtx; 8970 8971 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG 8972 || (GET_CODE (reg) == SUBREG 8973 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 8974 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS 8975 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0)) 8976 == REG) 8977 || (GET_CODE (reg) == SUBREG 8978 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 8979 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) 8980 == CONST_INT))) 8981 { 8982 if (i == 0) 8983 { 8984 base_reg = REGNO (reg); 8985 unsorted_regs[0] = (GET_CODE (operands[i]) == REG 8986 ? REGNO (operands[i]) 8987 : REGNO (SUBREG_REG (operands[i]))); 8988 order[0] = 0; 8989 } 8990 else 8991 { 8992 if (base_reg != (int) REGNO (reg)) 8993 /* Not addressed from the same base register. */ 8994 return 0; 8995 8996 unsorted_regs[i] = (GET_CODE (operands[i]) == REG 8997 ? REGNO (operands[i]) 8998 : REGNO (SUBREG_REG (operands[i]))); 8999 if (unsorted_regs[i] < unsorted_regs[order[0]]) 9000 order[0] = i; 9001 } 9002 9003 /* If it isn't an integer register, or if it overwrites the 9004 base register but isn't the last insn in the list, then 9005 we can't do this. */ 9006 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14 9007 || (i != nops - 1 && unsorted_regs[i] == base_reg)) 9008 return 0; 9009 9010 unsorted_offsets[i] = INTVAL (offset); 9011 } 9012 else 9013 /* Not a suitable memory address. */ 9014 return 0; 9015 } 9016 9017 /* All the useful information has now been extracted from the 9018 operands into unsorted_regs and unsorted_offsets; additionally, 9019 order[0] has been set to the lowest numbered register in the 9020 list. Sort the registers into order, and check that the memory 9021 offsets are ascending and adjacent. */ 9022 9023 for (i = 1; i < nops; i++) 9024 { 9025 int j; 9026 9027 order[i] = order[i - 1]; 9028 for (j = 0; j < nops; j++) 9029 if (unsorted_regs[j] > unsorted_regs[order[i - 1]] 9030 && (order[i] == order[i - 1] 9031 || unsorted_regs[j] < unsorted_regs[order[i]])) 9032 order[i] = j; 9033 9034 /* Have we found a suitable register? if not, one must be used more 9035 than once. */ 9036 if (order[i] == order[i - 1]) 9037 return 0; 9038 9039 /* Is the memory address adjacent and ascending? */ 9040 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4) 9041 return 0; 9042 } 9043 9044 if (base) 9045 { 9046 *base = base_reg; 9047 9048 for (i = 0; i < nops; i++) 9049 regs[i] = unsorted_regs[order[i]]; 9050 9051 *load_offset = unsorted_offsets[order[0]]; 9052 } 9053 9054 if (unsorted_offsets[order[0]] == 0) 9055 return 1; /* ldmia */ 9056 9057 if (TARGET_ARM && unsorted_offsets[order[0]] == 4) 9058 return 2; /* ldmib */ 9059 9060 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) 9061 return 3; /* ldmda */ 9062 9063 if (unsorted_offsets[order[nops - 1]] == -4) 9064 return 4; /* ldmdb */ 9065 9066 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm 9067 if the offset isn't small enough. The reason 2 ldrs are faster 9068 is because these ARMs are able to do more than one cache access 9069 in a single cycle. The ARM9 and StrongARM have Harvard caches, 9070 whilst the ARM8 has a double bandwidth cache. This means that 9071 these cores can do both an instruction fetch and a data fetch in 9072 a single cycle, so the trick of calculating the address into a 9073 scratch register (one of the result regs) and then doing a load 9074 multiple actually becomes slower (and no smaller in code size). 9075 That is the transformation 9076 9077 ldr rd1, [rbase + offset] 9078 ldr rd2, [rbase + offset + 4] 9079 9080 to 9081 9082 add rd1, rbase, offset 9083 ldmia rd1, {rd1, rd2} 9084 9085 produces worse code -- '3 cycles + any stalls on rd2' instead of 9086 '2 cycles + any stalls on rd2'. On ARMs with only one cache 9087 access per cycle, the first sequence could never complete in less 9088 than 6 cycles, whereas the ldm sequence would only take 5 and 9089 would make better use of sequential accesses if not hitting the 9090 cache. 9091 9092 We cheat here and test 'arm_ld_sched' which we currently know to 9093 only be true for the ARM8, ARM9 and StrongARM. If this ever 9094 changes, then the test below needs to be reworked. */ 9095 if (nops == 2 && arm_ld_sched) 9096 return 0; 9097 9098 /* Can't do it without setting up the offset, only do this if it takes 9099 no more than one insn. */ 9100 return (const_ok_for_arm (unsorted_offsets[order[0]]) 9101 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0; 9102 } 9103 9104 const char * 9105 emit_ldm_seq (rtx *operands, int nops) 9106 { 9107 int regs[4]; 9108 int base_reg; 9109 HOST_WIDE_INT offset; 9110 char buf[100]; 9111 int i; 9112 9113 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset)) 9114 { 9115 case 1: 9116 strcpy (buf, "ldm%(ia%)\t"); 9117 break; 9118 9119 case 2: 9120 strcpy (buf, "ldm%(ib%)\t"); 9121 break; 9122 9123 case 3: 9124 strcpy (buf, "ldm%(da%)\t"); 9125 break; 9126 9127 case 4: 9128 strcpy (buf, "ldm%(db%)\t"); 9129 break; 9130 9131 case 5: 9132 if (offset >= 0) 9133 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX, 9134 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg], 9135 (long) offset); 9136 else 9137 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX, 9138 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg], 9139 (long) -offset); 9140 output_asm_insn (buf, operands); 9141 base_reg = regs[0]; 9142 strcpy (buf, "ldm%(ia%)\t"); 9143 break; 9144 9145 default: 9146 gcc_unreachable (); 9147 } 9148 9149 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX, 9150 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]); 9151 9152 for (i = 1; i < nops; i++) 9153 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX, 9154 reg_names[regs[i]]); 9155 9156 strcat (buf, "}\t%@ phole ldm"); 9157 9158 output_asm_insn (buf, operands); 9159 return ""; 9160 } 9161 9162 int 9163 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base, 9164 HOST_WIDE_INT * load_offset) 9165 { 9166 int unsorted_regs[4]; 9167 HOST_WIDE_INT unsorted_offsets[4]; 9168 int order[4]; 9169 int base_reg = -1; 9170 int i; 9171 9172 /* Can only handle 2, 3, or 4 insns at present, though could be easily 9173 extended if required. */ 9174 gcc_assert (nops >= 2 && nops <= 4); 9175 9176 memset (order, 0, 4 * sizeof (int)); 9177 9178 /* Loop over the operands and check that the memory references are 9179 suitable (i.e. immediate offsets from the same base register). At 9180 the same time, extract the target register, and the memory 9181 offsets. */ 9182 for (i = 0; i < nops; i++) 9183 { 9184 rtx reg; 9185 rtx offset; 9186 9187 /* Convert a subreg of a mem into the mem itself. */ 9188 if (GET_CODE (operands[nops + i]) == SUBREG) 9189 operands[nops + i] = alter_subreg (operands + (nops + i)); 9190 9191 gcc_assert (GET_CODE (operands[nops + i]) == MEM); 9192 9193 /* Don't reorder volatile memory references; it doesn't seem worth 9194 looking for the case where the order is ok anyway. */ 9195 if (MEM_VOLATILE_P (operands[nops + i])) 9196 return 0; 9197 9198 offset = const0_rtx; 9199 9200 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG 9201 || (GET_CODE (reg) == SUBREG 9202 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 9203 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS 9204 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0)) 9205 == REG) 9206 || (GET_CODE (reg) == SUBREG 9207 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 9208 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) 9209 == CONST_INT))) 9210 { 9211 if (i == 0) 9212 { 9213 base_reg = REGNO (reg); 9214 unsorted_regs[0] = (GET_CODE (operands[i]) == REG 9215 ? REGNO (operands[i]) 9216 : REGNO (SUBREG_REG (operands[i]))); 9217 order[0] = 0; 9218 } 9219 else 9220 { 9221 if (base_reg != (int) REGNO (reg)) 9222 /* Not addressed from the same base register. */ 9223 return 0; 9224 9225 unsorted_regs[i] = (GET_CODE (operands[i]) == REG 9226 ? REGNO (operands[i]) 9227 : REGNO (SUBREG_REG (operands[i]))); 9228 if (unsorted_regs[i] < unsorted_regs[order[0]]) 9229 order[0] = i; 9230 } 9231 9232 /* If it isn't an integer register, then we can't do this. */ 9233 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14) 9234 return 0; 9235 9236 unsorted_offsets[i] = INTVAL (offset); 9237 } 9238 else 9239 /* Not a suitable memory address. */ 9240 return 0; 9241 } 9242 9243 /* All the useful information has now been extracted from the 9244 operands into unsorted_regs and unsorted_offsets; additionally, 9245 order[0] has been set to the lowest numbered register in the 9246 list. Sort the registers into order, and check that the memory 9247 offsets are ascending and adjacent. */ 9248 9249 for (i = 1; i < nops; i++) 9250 { 9251 int j; 9252 9253 order[i] = order[i - 1]; 9254 for (j = 0; j < nops; j++) 9255 if (unsorted_regs[j] > unsorted_regs[order[i - 1]] 9256 && (order[i] == order[i - 1] 9257 || unsorted_regs[j] < unsorted_regs[order[i]])) 9258 order[i] = j; 9259 9260 /* Have we found a suitable register? if not, one must be used more 9261 than once. */ 9262 if (order[i] == order[i - 1]) 9263 return 0; 9264 9265 /* Is the memory address adjacent and ascending? */ 9266 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4) 9267 return 0; 9268 } 9269 9270 if (base) 9271 { 9272 *base = base_reg; 9273 9274 for (i = 0; i < nops; i++) 9275 regs[i] = unsorted_regs[order[i]]; 9276 9277 *load_offset = unsorted_offsets[order[0]]; 9278 } 9279 9280 if (unsorted_offsets[order[0]] == 0) 9281 return 1; /* stmia */ 9282 9283 if (unsorted_offsets[order[0]] == 4) 9284 return 2; /* stmib */ 9285 9286 if (unsorted_offsets[order[nops - 1]] == 0) 9287 return 3; /* stmda */ 9288 9289 if (unsorted_offsets[order[nops - 1]] == -4) 9290 return 4; /* stmdb */ 9291 9292 return 0; 9293 } 9294 9295 const char * 9296 emit_stm_seq (rtx *operands, int nops) 9297 { 9298 int regs[4]; 9299 int base_reg; 9300 HOST_WIDE_INT offset; 9301 char buf[100]; 9302 int i; 9303 9304 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset)) 9305 { 9306 case 1: 9307 strcpy (buf, "stm%(ia%)\t"); 9308 break; 9309 9310 case 2: 9311 strcpy (buf, "stm%(ib%)\t"); 9312 break; 9313 9314 case 3: 9315 strcpy (buf, "stm%(da%)\t"); 9316 break; 9317 9318 case 4: 9319 strcpy (buf, "stm%(db%)\t"); 9320 break; 9321 9322 default: 9323 gcc_unreachable (); 9324 } 9325 9326 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX, 9327 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]); 9328 9329 for (i = 1; i < nops; i++) 9330 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX, 9331 reg_names[regs[i]]); 9332 9333 strcat (buf, "}\t%@ phole stm"); 9334 9335 output_asm_insn (buf, operands); 9336 return ""; 9337 } 9338 9339 /* Routines for use in generating RTL. */ 9340 9341 rtx 9342 arm_gen_load_multiple (int base_regno, int count, rtx from, int up, 9343 int write_back, rtx basemem, HOST_WIDE_INT *offsetp) 9344 { 9345 HOST_WIDE_INT offset = *offsetp; 9346 int i = 0, j; 9347 rtx result; 9348 int sign = up ? 1 : -1; 9349 rtx mem, addr; 9350 9351 /* XScale has load-store double instructions, but they have stricter 9352 alignment requirements than load-store multiple, so we cannot 9353 use them. 9354 9355 For XScale ldm requires 2 + NREGS cycles to complete and blocks 9356 the pipeline until completion. 9357 9358 NREGS CYCLES 9359 1 3 9360 2 4 9361 3 5 9362 4 6 9363 9364 An ldr instruction takes 1-3 cycles, but does not block the 9365 pipeline. 9366 9367 NREGS CYCLES 9368 1 1-3 9369 2 2-6 9370 3 3-9 9371 4 4-12 9372 9373 Best case ldr will always win. However, the more ldr instructions 9374 we issue, the less likely we are to be able to schedule them well. 9375 Using ldr instructions also increases code size. 9376 9377 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm 9378 for counts of 3 or 4 regs. */ 9379 if (arm_tune_xscale && count <= 2 && ! optimize_size) 9380 { 9381 rtx seq; 9382 9383 start_sequence (); 9384 9385 for (i = 0; i < count; i++) 9386 { 9387 addr = plus_constant (from, i * 4 * sign); 9388 mem = adjust_automodify_address (basemem, SImode, addr, offset); 9389 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem); 9390 offset += 4 * sign; 9391 } 9392 9393 if (write_back) 9394 { 9395 emit_move_insn (from, plus_constant (from, count * 4 * sign)); 9396 *offsetp = offset; 9397 } 9398 9399 seq = get_insns (); 9400 end_sequence (); 9401 9402 return seq; 9403 } 9404 9405 result = gen_rtx_PARALLEL (VOIDmode, 9406 rtvec_alloc (count + (write_back ? 1 : 0))); 9407 if (write_back) 9408 { 9409 XVECEXP (result, 0, 0) 9410 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign)); 9411 i = 1; 9412 count++; 9413 } 9414 9415 for (j = 0; i < count; i++, j++) 9416 { 9417 addr = plus_constant (from, j * 4 * sign); 9418 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset); 9419 XVECEXP (result, 0, i) 9420 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem); 9421 offset += 4 * sign; 9422 } 9423 9424 if (write_back) 9425 *offsetp = offset; 9426 9427 return result; 9428 } 9429 9430 rtx 9431 arm_gen_store_multiple (int base_regno, int count, rtx to, int up, 9432 int write_back, rtx basemem, HOST_WIDE_INT *offsetp) 9433 { 9434 HOST_WIDE_INT offset = *offsetp; 9435 int i = 0, j; 9436 rtx result; 9437 int sign = up ? 1 : -1; 9438 rtx mem, addr; 9439 9440 /* See arm_gen_load_multiple for discussion of 9441 the pros/cons of ldm/stm usage for XScale. */ 9442 if (arm_tune_xscale && count <= 2 && ! optimize_size) 9443 { 9444 rtx seq; 9445 9446 start_sequence (); 9447 9448 for (i = 0; i < count; i++) 9449 { 9450 addr = plus_constant (to, i * 4 * sign); 9451 mem = adjust_automodify_address (basemem, SImode, addr, offset); 9452 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i)); 9453 offset += 4 * sign; 9454 } 9455 9456 if (write_back) 9457 { 9458 emit_move_insn (to, plus_constant (to, count * 4 * sign)); 9459 *offsetp = offset; 9460 } 9461 9462 seq = get_insns (); 9463 end_sequence (); 9464 9465 return seq; 9466 } 9467 9468 result = gen_rtx_PARALLEL (VOIDmode, 9469 rtvec_alloc (count + (write_back ? 1 : 0))); 9470 if (write_back) 9471 { 9472 XVECEXP (result, 0, 0) 9473 = gen_rtx_SET (VOIDmode, to, 9474 plus_constant (to, count * 4 * sign)); 9475 i = 1; 9476 count++; 9477 } 9478 9479 for (j = 0; i < count; i++, j++) 9480 { 9481 addr = plus_constant (to, j * 4 * sign); 9482 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset); 9483 XVECEXP (result, 0, i) 9484 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j)); 9485 offset += 4 * sign; 9486 } 9487 9488 if (write_back) 9489 *offsetp = offset; 9490 9491 return result; 9492 } 9493 9494 int 9495 arm_gen_movmemqi (rtx *operands) 9496 { 9497 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes; 9498 HOST_WIDE_INT srcoffset, dstoffset; 9499 int i; 9500 rtx src, dst, srcbase, dstbase; 9501 rtx part_bytes_reg = NULL; 9502 rtx mem; 9503 9504 if (GET_CODE (operands[2]) != CONST_INT 9505 || GET_CODE (operands[3]) != CONST_INT 9506 || INTVAL (operands[2]) > 64 9507 || INTVAL (operands[3]) & 3) 9508 return 0; 9509 9510 dstbase = operands[0]; 9511 srcbase = operands[1]; 9512 9513 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0)); 9514 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0)); 9515 9516 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2])); 9517 out_words_to_go = INTVAL (operands[2]) / 4; 9518 last_bytes = INTVAL (operands[2]) & 3; 9519 dstoffset = srcoffset = 0; 9520 9521 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0) 9522 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3); 9523 9524 for (i = 0; in_words_to_go >= 2; i+=4) 9525 { 9526 if (in_words_to_go > 4) 9527 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE, 9528 srcbase, &srcoffset)); 9529 else 9530 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE, 9531 FALSE, srcbase, &srcoffset)); 9532 9533 if (out_words_to_go) 9534 { 9535 if (out_words_to_go > 4) 9536 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE, 9537 dstbase, &dstoffset)); 9538 else if (out_words_to_go != 1) 9539 emit_insn (arm_gen_store_multiple (0, out_words_to_go, 9540 dst, TRUE, 9541 (last_bytes == 0 9542 ? FALSE : TRUE), 9543 dstbase, &dstoffset)); 9544 else 9545 { 9546 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); 9547 emit_move_insn (mem, gen_rtx_REG (SImode, 0)); 9548 if (last_bytes != 0) 9549 { 9550 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4))); 9551 dstoffset += 4; 9552 } 9553 } 9554 } 9555 9556 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4; 9557 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4; 9558 } 9559 9560 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */ 9561 if (out_words_to_go) 9562 { 9563 rtx sreg; 9564 9565 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); 9566 sreg = copy_to_reg (mem); 9567 9568 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); 9569 emit_move_insn (mem, sreg); 9570 in_words_to_go--; 9571 9572 gcc_assert (!in_words_to_go); /* Sanity check */ 9573 } 9574 9575 if (in_words_to_go) 9576 { 9577 gcc_assert (in_words_to_go > 0); 9578 9579 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); 9580 part_bytes_reg = copy_to_mode_reg (SImode, mem); 9581 } 9582 9583 gcc_assert (!last_bytes || part_bytes_reg); 9584 9585 if (BYTES_BIG_ENDIAN && last_bytes) 9586 { 9587 rtx tmp = gen_reg_rtx (SImode); 9588 9589 /* The bytes we want are in the top end of the word. */ 9590 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, 9591 GEN_INT (8 * (4 - last_bytes)))); 9592 part_bytes_reg = tmp; 9593 9594 while (last_bytes) 9595 { 9596 mem = adjust_automodify_address (dstbase, QImode, 9597 plus_constant (dst, last_bytes - 1), 9598 dstoffset + last_bytes - 1); 9599 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); 9600 9601 if (--last_bytes) 9602 { 9603 tmp = gen_reg_rtx (SImode); 9604 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8))); 9605 part_bytes_reg = tmp; 9606 } 9607 } 9608 9609 } 9610 else 9611 { 9612 if (last_bytes > 1) 9613 { 9614 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset); 9615 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg)); 9616 last_bytes -= 2; 9617 if (last_bytes) 9618 { 9619 rtx tmp = gen_reg_rtx (SImode); 9620 emit_insn (gen_addsi3 (dst, dst, const2_rtx)); 9621 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16))); 9622 part_bytes_reg = tmp; 9623 dstoffset += 2; 9624 } 9625 } 9626 9627 if (last_bytes) 9628 { 9629 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset); 9630 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); 9631 } 9632 } 9633 9634 return 1; 9635 } 9636 9637 /* Select a dominance comparison mode if possible for a test of the general 9638 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. 9639 COND_OR == DOM_CC_X_AND_Y => (X && Y) 9640 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y) 9641 COND_OR == DOM_CC_X_OR_Y => (X || Y) 9642 In all cases OP will be either EQ or NE, but we don't need to know which 9643 here. If we are unable to support a dominance comparison we return 9644 CC mode. This will then fail to match for the RTL expressions that 9645 generate this call. */ 9646 enum machine_mode 9647 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or) 9648 { 9649 enum rtx_code cond1, cond2; 9650 int swapped = 0; 9651 9652 /* Currently we will probably get the wrong result if the individual 9653 comparisons are not simple. This also ensures that it is safe to 9654 reverse a comparison if necessary. */ 9655 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1)) 9656 != CCmode) 9657 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1)) 9658 != CCmode)) 9659 return CCmode; 9660 9661 /* The if_then_else variant of this tests the second condition if the 9662 first passes, but is true if the first fails. Reverse the first 9663 condition to get a true "inclusive-or" expression. */ 9664 if (cond_or == DOM_CC_NX_OR_Y) 9665 cond1 = reverse_condition (cond1); 9666 9667 /* If the comparisons are not equal, and one doesn't dominate the other, 9668 then we can't do this. */ 9669 if (cond1 != cond2 9670 && !comparison_dominates_p (cond1, cond2) 9671 && (swapped = 1, !comparison_dominates_p (cond2, cond1))) 9672 return CCmode; 9673 9674 if (swapped) 9675 { 9676 enum rtx_code temp = cond1; 9677 cond1 = cond2; 9678 cond2 = temp; 9679 } 9680 9681 switch (cond1) 9682 { 9683 case EQ: 9684 if (cond_or == DOM_CC_X_AND_Y) 9685 return CC_DEQmode; 9686 9687 switch (cond2) 9688 { 9689 case EQ: return CC_DEQmode; 9690 case LE: return CC_DLEmode; 9691 case LEU: return CC_DLEUmode; 9692 case GE: return CC_DGEmode; 9693 case GEU: return CC_DGEUmode; 9694 default: gcc_unreachable (); 9695 } 9696 9697 case LT: 9698 if (cond_or == DOM_CC_X_AND_Y) 9699 return CC_DLTmode; 9700 9701 switch (cond2) 9702 { 9703 case LT: 9704 return CC_DLTmode; 9705 case LE: 9706 return CC_DLEmode; 9707 case NE: 9708 return CC_DNEmode; 9709 default: 9710 gcc_unreachable (); 9711 } 9712 9713 case GT: 9714 if (cond_or == DOM_CC_X_AND_Y) 9715 return CC_DGTmode; 9716 9717 switch (cond2) 9718 { 9719 case GT: 9720 return CC_DGTmode; 9721 case GE: 9722 return CC_DGEmode; 9723 case NE: 9724 return CC_DNEmode; 9725 default: 9726 gcc_unreachable (); 9727 } 9728 9729 case LTU: 9730 if (cond_or == DOM_CC_X_AND_Y) 9731 return CC_DLTUmode; 9732 9733 switch (cond2) 9734 { 9735 case LTU: 9736 return CC_DLTUmode; 9737 case LEU: 9738 return CC_DLEUmode; 9739 case NE: 9740 return CC_DNEmode; 9741 default: 9742 gcc_unreachable (); 9743 } 9744 9745 case GTU: 9746 if (cond_or == DOM_CC_X_AND_Y) 9747 return CC_DGTUmode; 9748 9749 switch (cond2) 9750 { 9751 case GTU: 9752 return CC_DGTUmode; 9753 case GEU: 9754 return CC_DGEUmode; 9755 case NE: 9756 return CC_DNEmode; 9757 default: 9758 gcc_unreachable (); 9759 } 9760 9761 /* The remaining cases only occur when both comparisons are the 9762 same. */ 9763 case NE: 9764 gcc_assert (cond1 == cond2); 9765 return CC_DNEmode; 9766 9767 case LE: 9768 gcc_assert (cond1 == cond2); 9769 return CC_DLEmode; 9770 9771 case GE: 9772 gcc_assert (cond1 == cond2); 9773 return CC_DGEmode; 9774 9775 case LEU: 9776 gcc_assert (cond1 == cond2); 9777 return CC_DLEUmode; 9778 9779 case GEU: 9780 gcc_assert (cond1 == cond2); 9781 return CC_DGEUmode; 9782 9783 default: 9784 gcc_unreachable (); 9785 } 9786 } 9787 9788 enum machine_mode 9789 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) 9790 { 9791 /* All floating point compares return CCFP if it is an equality 9792 comparison, and CCFPE otherwise. */ 9793 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 9794 { 9795 switch (op) 9796 { 9797 case EQ: 9798 case NE: 9799 case UNORDERED: 9800 case ORDERED: 9801 case UNLT: 9802 case UNLE: 9803 case UNGT: 9804 case UNGE: 9805 case UNEQ: 9806 case LTGT: 9807 return CCFPmode; 9808 9809 case LT: 9810 case LE: 9811 case GT: 9812 case GE: 9813 if (TARGET_HARD_FLOAT && TARGET_MAVERICK) 9814 return CCFPmode; 9815 return CCFPEmode; 9816 9817 default: 9818 gcc_unreachable (); 9819 } 9820 } 9821 9822 /* A compare with a shifted operand. Because of canonicalization, the 9823 comparison will have to be swapped when we emit the assembler. */ 9824 if (GET_MODE (y) == SImode 9825 && (REG_P (y) || (GET_CODE (y) == SUBREG)) 9826 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT 9827 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE 9828 || GET_CODE (x) == ROTATERT)) 9829 return CC_SWPmode; 9830 9831 /* This operation is performed swapped, but since we only rely on the Z 9832 flag we don't need an additional mode. */ 9833 if (GET_MODE (y) == SImode 9834 && (REG_P (y) || (GET_CODE (y) == SUBREG)) 9835 && GET_CODE (x) == NEG 9836 && (op == EQ || op == NE)) 9837 return CC_Zmode; 9838 9839 /* This is a special case that is used by combine to allow a 9840 comparison of a shifted byte load to be split into a zero-extend 9841 followed by a comparison of the shifted integer (only valid for 9842 equalities and unsigned inequalities). */ 9843 if (GET_MODE (x) == SImode 9844 && GET_CODE (x) == ASHIFT 9845 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24 9846 && GET_CODE (XEXP (x, 0)) == SUBREG 9847 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM 9848 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode 9849 && (op == EQ || op == NE 9850 || op == GEU || op == GTU || op == LTU || op == LEU) 9851 && GET_CODE (y) == CONST_INT) 9852 return CC_Zmode; 9853 9854 /* A construct for a conditional compare, if the false arm contains 9855 0, then both conditions must be true, otherwise either condition 9856 must be true. Not all conditions are possible, so CCmode is 9857 returned if it can't be done. */ 9858 if (GET_CODE (x) == IF_THEN_ELSE 9859 && (XEXP (x, 2) == const0_rtx 9860 || XEXP (x, 2) == const1_rtx) 9861 && COMPARISON_P (XEXP (x, 0)) 9862 && COMPARISON_P (XEXP (x, 1))) 9863 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), 9864 INTVAL (XEXP (x, 2))); 9865 9866 /* Alternate canonicalizations of the above. These are somewhat cleaner. */ 9867 if (GET_CODE (x) == AND 9868 && COMPARISON_P (XEXP (x, 0)) 9869 && COMPARISON_P (XEXP (x, 1))) 9870 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), 9871 DOM_CC_X_AND_Y); 9872 9873 if (GET_CODE (x) == IOR 9874 && COMPARISON_P (XEXP (x, 0)) 9875 && COMPARISON_P (XEXP (x, 1))) 9876 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), 9877 DOM_CC_X_OR_Y); 9878 9879 /* An operation (on Thumb) where we want to test for a single bit. 9880 This is done by shifting that bit up into the top bit of a 9881 scratch register; we can then branch on the sign bit. */ 9882 if (TARGET_THUMB1 9883 && GET_MODE (x) == SImode 9884 && (op == EQ || op == NE) 9885 && GET_CODE (x) == ZERO_EXTRACT 9886 && XEXP (x, 1) == const1_rtx) 9887 return CC_Nmode; 9888 9889 /* An operation that sets the condition codes as a side-effect, the 9890 V flag is not set correctly, so we can only use comparisons where 9891 this doesn't matter. (For LT and GE we can use "mi" and "pl" 9892 instead.) */ 9893 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */ 9894 if (GET_MODE (x) == SImode 9895 && y == const0_rtx 9896 && (op == EQ || op == NE || op == LT || op == GE) 9897 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS 9898 || GET_CODE (x) == AND || GET_CODE (x) == IOR 9899 || GET_CODE (x) == XOR || GET_CODE (x) == MULT 9900 || GET_CODE (x) == NOT || GET_CODE (x) == NEG 9901 || GET_CODE (x) == LSHIFTRT 9902 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT 9903 || GET_CODE (x) == ROTATERT 9904 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT))) 9905 return CC_NOOVmode; 9906 9907 if (GET_MODE (x) == QImode && (op == EQ || op == NE)) 9908 return CC_Zmode; 9909 9910 if (GET_MODE (x) == SImode && (op == LTU || op == GEU) 9911 && GET_CODE (x) == PLUS 9912 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) 9913 return CC_Cmode; 9914 9915 return CCmode; 9916 } 9917 9918 /* X and Y are two things to compare using CODE. Emit the compare insn and 9919 return the rtx for register 0 in the proper mode. FP means this is a 9920 floating point compare: I don't think that it is needed on the arm. */ 9921 rtx 9922 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) 9923 { 9924 enum machine_mode mode = SELECT_CC_MODE (code, x, y); 9925 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); 9926 9927 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); 9928 9929 return cc_reg; 9930 } 9931 9932 /* Generate a sequence of insns that will generate the correct return 9933 address mask depending on the physical architecture that the program 9934 is running on. */ 9935 rtx 9936 arm_gen_return_addr_mask (void) 9937 { 9938 rtx reg = gen_reg_rtx (Pmode); 9939 9940 emit_insn (gen_return_addr_mask (reg)); 9941 return reg; 9942 } 9943 9944 void 9945 arm_reload_in_hi (rtx *operands) 9946 { 9947 rtx ref = operands[1]; 9948 rtx base, scratch; 9949 HOST_WIDE_INT offset = 0; 9950 9951 if (GET_CODE (ref) == SUBREG) 9952 { 9953 offset = SUBREG_BYTE (ref); 9954 ref = SUBREG_REG (ref); 9955 } 9956 9957 if (GET_CODE (ref) == REG) 9958 { 9959 /* We have a pseudo which has been spilt onto the stack; there 9960 are two cases here: the first where there is a simple 9961 stack-slot replacement and a second where the stack-slot is 9962 out of range, or is used as a subreg. */ 9963 if (reg_equiv_mem[REGNO (ref)]) 9964 { 9965 ref = reg_equiv_mem[REGNO (ref)]; 9966 base = find_replacement (&XEXP (ref, 0)); 9967 } 9968 else 9969 /* The slot is out of range, or was dressed up in a SUBREG. */ 9970 base = reg_equiv_address[REGNO (ref)]; 9971 } 9972 else 9973 base = find_replacement (&XEXP (ref, 0)); 9974 9975 /* Handle the case where the address is too complex to be offset by 1. */ 9976 if (GET_CODE (base) == MINUS 9977 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT)) 9978 { 9979 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); 9980 9981 emit_set_insn (base_plus, base); 9982 base = base_plus; 9983 } 9984 else if (GET_CODE (base) == PLUS) 9985 { 9986 /* The addend must be CONST_INT, or we would have dealt with it above. */ 9987 HOST_WIDE_INT hi, lo; 9988 9989 offset += INTVAL (XEXP (base, 1)); 9990 base = XEXP (base, 0); 9991 9992 /* Rework the address into a legal sequence of insns. */ 9993 /* Valid range for lo is -4095 -> 4095 */ 9994 lo = (offset >= 0 9995 ? (offset & 0xfff) 9996 : -((-offset) & 0xfff)); 9997 9998 /* Corner case, if lo is the max offset then we would be out of range 9999 once we have added the additional 1 below, so bump the msb into the 10000 pre-loading insn(s). */ 10001 if (lo == 4095) 10002 lo &= 0x7ff; 10003 10004 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff) 10005 ^ (HOST_WIDE_INT) 0x80000000) 10006 - (HOST_WIDE_INT) 0x80000000); 10007 10008 gcc_assert (hi + lo == offset); 10009 10010 if (hi != 0) 10011 { 10012 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); 10013 10014 /* Get the base address; addsi3 knows how to handle constants 10015 that require more than one insn. */ 10016 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi))); 10017 base = base_plus; 10018 offset = lo; 10019 } 10020 } 10021 10022 /* Operands[2] may overlap operands[0] (though it won't overlap 10023 operands[1]), that's why we asked for a DImode reg -- so we can 10024 use the bit that does not overlap. */ 10025 if (REGNO (operands[2]) == REGNO (operands[0])) 10026 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); 10027 else 10028 scratch = gen_rtx_REG (SImode, REGNO (operands[2])); 10029 10030 emit_insn (gen_zero_extendqisi2 (scratch, 10031 gen_rtx_MEM (QImode, 10032 plus_constant (base, 10033 offset)))); 10034 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0), 10035 gen_rtx_MEM (QImode, 10036 plus_constant (base, 10037 offset + 1)))); 10038 if (!BYTES_BIG_ENDIAN) 10039 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0), 10040 gen_rtx_IOR (SImode, 10041 gen_rtx_ASHIFT 10042 (SImode, 10043 gen_rtx_SUBREG (SImode, operands[0], 0), 10044 GEN_INT (8)), 10045 scratch)); 10046 else 10047 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0), 10048 gen_rtx_IOR (SImode, 10049 gen_rtx_ASHIFT (SImode, scratch, 10050 GEN_INT (8)), 10051 gen_rtx_SUBREG (SImode, operands[0], 0))); 10052 } 10053 10054 /* Handle storing a half-word to memory during reload by synthesizing as two 10055 byte stores. Take care not to clobber the input values until after we 10056 have moved them somewhere safe. This code assumes that if the DImode 10057 scratch in operands[2] overlaps either the input value or output address 10058 in some way, then that value must die in this insn (we absolutely need 10059 two scratch registers for some corner cases). */ 10060 void 10061 arm_reload_out_hi (rtx *operands) 10062 { 10063 rtx ref = operands[0]; 10064 rtx outval = operands[1]; 10065 rtx base, scratch; 10066 HOST_WIDE_INT offset = 0; 10067 10068 if (GET_CODE (ref) == SUBREG) 10069 { 10070 offset = SUBREG_BYTE (ref); 10071 ref = SUBREG_REG (ref); 10072 } 10073 10074 if (GET_CODE (ref) == REG) 10075 { 10076 /* We have a pseudo which has been spilt onto the stack; there 10077 are two cases here: the first where there is a simple 10078 stack-slot replacement and a second where the stack-slot is 10079 out of range, or is used as a subreg. */ 10080 if (reg_equiv_mem[REGNO (ref)]) 10081 { 10082 ref = reg_equiv_mem[REGNO (ref)]; 10083 base = find_replacement (&XEXP (ref, 0)); 10084 } 10085 else 10086 /* The slot is out of range, or was dressed up in a SUBREG. */ 10087 base = reg_equiv_address[REGNO (ref)]; 10088 } 10089 else 10090 base = find_replacement (&XEXP (ref, 0)); 10091 10092 scratch = gen_rtx_REG (SImode, REGNO (operands[2])); 10093 10094 /* Handle the case where the address is too complex to be offset by 1. */ 10095 if (GET_CODE (base) == MINUS 10096 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT)) 10097 { 10098 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); 10099 10100 /* Be careful not to destroy OUTVAL. */ 10101 if (reg_overlap_mentioned_p (base_plus, outval)) 10102 { 10103 /* Updating base_plus might destroy outval, see if we can 10104 swap the scratch and base_plus. */ 10105 if (!reg_overlap_mentioned_p (scratch, outval)) 10106 { 10107 rtx tmp = scratch; 10108 scratch = base_plus; 10109 base_plus = tmp; 10110 } 10111 else 10112 { 10113 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); 10114 10115 /* Be conservative and copy OUTVAL into the scratch now, 10116 this should only be necessary if outval is a subreg 10117 of something larger than a word. */ 10118 /* XXX Might this clobber base? I can't see how it can, 10119 since scratch is known to overlap with OUTVAL, and 10120 must be wider than a word. */ 10121 emit_insn (gen_movhi (scratch_hi, outval)); 10122 outval = scratch_hi; 10123 } 10124 } 10125 10126 emit_set_insn (base_plus, base); 10127 base = base_plus; 10128 } 10129 else if (GET_CODE (base) == PLUS) 10130 { 10131 /* The addend must be CONST_INT, or we would have dealt with it above. */ 10132 HOST_WIDE_INT hi, lo; 10133 10134 offset += INTVAL (XEXP (base, 1)); 10135 base = XEXP (base, 0); 10136 10137 /* Rework the address into a legal sequence of insns. */ 10138 /* Valid range for lo is -4095 -> 4095 */ 10139 lo = (offset >= 0 10140 ? (offset & 0xfff) 10141 : -((-offset) & 0xfff)); 10142 10143 /* Corner case, if lo is the max offset then we would be out of range 10144 once we have added the additional 1 below, so bump the msb into the 10145 pre-loading insn(s). */ 10146 if (lo == 4095) 10147 lo &= 0x7ff; 10148 10149 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff) 10150 ^ (HOST_WIDE_INT) 0x80000000) 10151 - (HOST_WIDE_INT) 0x80000000); 10152 10153 gcc_assert (hi + lo == offset); 10154 10155 if (hi != 0) 10156 { 10157 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); 10158 10159 /* Be careful not to destroy OUTVAL. */ 10160 if (reg_overlap_mentioned_p (base_plus, outval)) 10161 { 10162 /* Updating base_plus might destroy outval, see if we 10163 can swap the scratch and base_plus. */ 10164 if (!reg_overlap_mentioned_p (scratch, outval)) 10165 { 10166 rtx tmp = scratch; 10167 scratch = base_plus; 10168 base_plus = tmp; 10169 } 10170 else 10171 { 10172 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); 10173 10174 /* Be conservative and copy outval into scratch now, 10175 this should only be necessary if outval is a 10176 subreg of something larger than a word. */ 10177 /* XXX Might this clobber base? I can't see how it 10178 can, since scratch is known to overlap with 10179 outval. */ 10180 emit_insn (gen_movhi (scratch_hi, outval)); 10181 outval = scratch_hi; 10182 } 10183 } 10184 10185 /* Get the base address; addsi3 knows how to handle constants 10186 that require more than one insn. */ 10187 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi))); 10188 base = base_plus; 10189 offset = lo; 10190 } 10191 } 10192 10193 if (BYTES_BIG_ENDIAN) 10194 { 10195 emit_insn (gen_movqi (gen_rtx_MEM (QImode, 10196 plus_constant (base, offset + 1)), 10197 gen_lowpart (QImode, outval))); 10198 emit_insn (gen_lshrsi3 (scratch, 10199 gen_rtx_SUBREG (SImode, outval, 0), 10200 GEN_INT (8))); 10201 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)), 10202 gen_lowpart (QImode, scratch))); 10203 } 10204 else 10205 { 10206 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)), 10207 gen_lowpart (QImode, outval))); 10208 emit_insn (gen_lshrsi3 (scratch, 10209 gen_rtx_SUBREG (SImode, outval, 0), 10210 GEN_INT (8))); 10211 emit_insn (gen_movqi (gen_rtx_MEM (QImode, 10212 plus_constant (base, offset + 1)), 10213 gen_lowpart (QImode, scratch))); 10214 } 10215 } 10216 10217 /* Return true if a type must be passed in memory. For AAPCS, small aggregates 10218 (padded to the size of a word) should be passed in a register. */ 10219 10220 static bool 10221 arm_must_pass_in_stack (enum machine_mode mode, const_tree type) 10222 { 10223 if (TARGET_AAPCS_BASED) 10224 return must_pass_in_stack_var_size (mode, type); 10225 else 10226 return must_pass_in_stack_var_size_or_pad (mode, type); 10227 } 10228 10229 10230 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE). 10231 Return true if an argument passed on the stack should be padded upwards, 10232 i.e. if the least-significant byte has useful data. 10233 For legacy APCS ABIs we use the default. For AAPCS based ABIs small 10234 aggregate types are placed in the lowest memory address. */ 10235 10236 bool 10237 arm_pad_arg_upward (enum machine_mode mode, const_tree type) 10238 { 10239 if (!TARGET_AAPCS_BASED) 10240 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward; 10241 10242 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type)) 10243 return false; 10244 10245 return true; 10246 } 10247 10248 10249 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST). 10250 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant 10251 byte of the register has useful data, and return the opposite if the 10252 most significant byte does. 10253 For AAPCS, small aggregates and small complex types are always padded 10254 upwards. */ 10255 10256 bool 10257 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, 10258 tree type, int first ATTRIBUTE_UNUSED) 10259 { 10260 if (TARGET_AAPCS_BASED 10261 && BYTES_BIG_ENDIAN 10262 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE) 10263 && int_size_in_bytes (type) <= 4) 10264 return true; 10265 10266 /* Otherwise, use default padding. */ 10267 return !BYTES_BIG_ENDIAN; 10268 } 10269 10270 10271 /* Print a symbolic form of X to the debug file, F. */ 10272 static void 10273 arm_print_value (FILE *f, rtx x) 10274 { 10275 switch (GET_CODE (x)) 10276 { 10277 case CONST_INT: 10278 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x)); 10279 return; 10280 10281 case CONST_DOUBLE: 10282 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3)); 10283 return; 10284 10285 case CONST_VECTOR: 10286 { 10287 int i; 10288 10289 fprintf (f, "<"); 10290 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++) 10291 { 10292 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i))); 10293 if (i < (CONST_VECTOR_NUNITS (x) - 1)) 10294 fputc (',', f); 10295 } 10296 fprintf (f, ">"); 10297 } 10298 return; 10299 10300 case CONST_STRING: 10301 fprintf (f, "\"%s\"", XSTR (x, 0)); 10302 return; 10303 10304 case SYMBOL_REF: 10305 fprintf (f, "`%s'", XSTR (x, 0)); 10306 return; 10307 10308 case LABEL_REF: 10309 fprintf (f, "L%d", INSN_UID (XEXP (x, 0))); 10310 return; 10311 10312 case CONST: 10313 arm_print_value (f, XEXP (x, 0)); 10314 return; 10315 10316 case PLUS: 10317 arm_print_value (f, XEXP (x, 0)); 10318 fprintf (f, "+"); 10319 arm_print_value (f, XEXP (x, 1)); 10320 return; 10321 10322 case PC: 10323 fprintf (f, "pc"); 10324 return; 10325 10326 default: 10327 fprintf (f, "????"); 10328 return; 10329 } 10330 } 10331 10332 /* Routines for manipulation of the constant pool. */ 10333 10334 /* Arm instructions cannot load a large constant directly into a 10335 register; they have to come from a pc relative load. The constant 10336 must therefore be placed in the addressable range of the pc 10337 relative load. Depending on the precise pc relative load 10338 instruction the range is somewhere between 256 bytes and 4k. This 10339 means that we often have to dump a constant inside a function, and 10340 generate code to branch around it. 10341 10342 It is important to minimize this, since the branches will slow 10343 things down and make the code larger. 10344 10345 Normally we can hide the table after an existing unconditional 10346 branch so that there is no interruption of the flow, but in the 10347 worst case the code looks like this: 10348 10349 ldr rn, L1 10350 ... 10351 b L2 10352 align 10353 L1: .long value 10354 L2: 10355 ... 10356 10357 ldr rn, L3 10358 ... 10359 b L4 10360 align 10361 L3: .long value 10362 L4: 10363 ... 10364 10365 We fix this by performing a scan after scheduling, which notices 10366 which instructions need to have their operands fetched from the 10367 constant table and builds the table. 10368 10369 The algorithm starts by building a table of all the constants that 10370 need fixing up and all the natural barriers in the function (places 10371 where a constant table can be dropped without breaking the flow). 10372 For each fixup we note how far the pc-relative replacement will be 10373 able to reach and the offset of the instruction into the function. 10374 10375 Having built the table we then group the fixes together to form 10376 tables that are as large as possible (subject to addressing 10377 constraints) and emit each table of constants after the last 10378 barrier that is within range of all the instructions in the group. 10379 If a group does not contain a barrier, then we forcibly create one 10380 by inserting a jump instruction into the flow. Once the table has 10381 been inserted, the insns are then modified to reference the 10382 relevant entry in the pool. 10383 10384 Possible enhancements to the algorithm (not implemented) are: 10385 10386 1) For some processors and object formats, there may be benefit in 10387 aligning the pools to the start of cache lines; this alignment 10388 would need to be taken into account when calculating addressability 10389 of a pool. */ 10390 10391 /* These typedefs are located at the start of this file, so that 10392 they can be used in the prototypes there. This comment is to 10393 remind readers of that fact so that the following structures 10394 can be understood more easily. 10395 10396 typedef struct minipool_node Mnode; 10397 typedef struct minipool_fixup Mfix; */ 10398 10399 struct minipool_node 10400 { 10401 /* Doubly linked chain of entries. */ 10402 Mnode * next; 10403 Mnode * prev; 10404 /* The maximum offset into the code that this entry can be placed. While 10405 pushing fixes for forward references, all entries are sorted in order 10406 of increasing max_address. */ 10407 HOST_WIDE_INT max_address; 10408 /* Similarly for an entry inserted for a backwards ref. */ 10409 HOST_WIDE_INT min_address; 10410 /* The number of fixes referencing this entry. This can become zero 10411 if we "unpush" an entry. In this case we ignore the entry when we 10412 come to emit the code. */ 10413 int refcount; 10414 /* The offset from the start of the minipool. */ 10415 HOST_WIDE_INT offset; 10416 /* The value in table. */ 10417 rtx value; 10418 /* The mode of value. */ 10419 enum machine_mode mode; 10420 /* The size of the value. With iWMMXt enabled 10421 sizes > 4 also imply an alignment of 8-bytes. */ 10422 int fix_size; 10423 }; 10424 10425 struct minipool_fixup 10426 { 10427 Mfix * next; 10428 rtx insn; 10429 HOST_WIDE_INT address; 10430 rtx * loc; 10431 enum machine_mode mode; 10432 int fix_size; 10433 rtx value; 10434 Mnode * minipool; 10435 HOST_WIDE_INT forwards; 10436 HOST_WIDE_INT backwards; 10437 }; 10438 10439 /* Fixes less than a word need padding out to a word boundary. */ 10440 #define MINIPOOL_FIX_SIZE(mode) \ 10441 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4) 10442 10443 static Mnode * minipool_vector_head; 10444 static Mnode * minipool_vector_tail; 10445 static rtx minipool_vector_label; 10446 static int minipool_pad; 10447 10448 /* The linked list of all minipool fixes required for this function. */ 10449 Mfix * minipool_fix_head; 10450 Mfix * minipool_fix_tail; 10451 /* The fix entry for the current minipool, once it has been placed. */ 10452 Mfix * minipool_barrier; 10453 10454 /* Determines if INSN is the start of a jump table. Returns the end 10455 of the TABLE or NULL_RTX. */ 10456 static rtx 10457 is_jump_table (rtx insn) 10458 { 10459 rtx table; 10460 10461 if (GET_CODE (insn) == JUMP_INSN 10462 && JUMP_LABEL (insn) != NULL 10463 && ((table = next_real_insn (JUMP_LABEL (insn))) 10464 == next_real_insn (insn)) 10465 && table != NULL 10466 && GET_CODE (table) == JUMP_INSN 10467 && (GET_CODE (PATTERN (table)) == ADDR_VEC 10468 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC)) 10469 return table; 10470 10471 return NULL_RTX; 10472 } 10473 10474 #ifndef JUMP_TABLES_IN_TEXT_SECTION 10475 #define JUMP_TABLES_IN_TEXT_SECTION 0 10476 #endif 10477 10478 static HOST_WIDE_INT 10479 get_jump_table_size (rtx insn) 10480 { 10481 /* ADDR_VECs only take room if read-only data does into the text 10482 section. */ 10483 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section) 10484 { 10485 rtx body = PATTERN (insn); 10486 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0; 10487 HOST_WIDE_INT size; 10488 HOST_WIDE_INT modesize; 10489 10490 modesize = GET_MODE_SIZE (GET_MODE (body)); 10491 size = modesize * XVECLEN (body, elt); 10492 switch (modesize) 10493 { 10494 case 1: 10495 /* Round up size of TBB table to a halfword boundary. */ 10496 size = (size + 1) & ~(HOST_WIDE_INT)1; 10497 break; 10498 case 2: 10499 /* No padding necessary for TBH. */ 10500 break; 10501 case 4: 10502 /* Add two bytes for alignment on Thumb. */ 10503 if (TARGET_THUMB) 10504 size += 2; 10505 break; 10506 default: 10507 gcc_unreachable (); 10508 } 10509 return size; 10510 } 10511 10512 return 0; 10513 } 10514 10515 /* Move a minipool fix MP from its current location to before MAX_MP. 10516 If MAX_MP is NULL, then MP doesn't need moving, but the addressing 10517 constraints may need updating. */ 10518 static Mnode * 10519 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp, 10520 HOST_WIDE_INT max_address) 10521 { 10522 /* The code below assumes these are different. */ 10523 gcc_assert (mp != max_mp); 10524 10525 if (max_mp == NULL) 10526 { 10527 if (max_address < mp->max_address) 10528 mp->max_address = max_address; 10529 } 10530 else 10531 { 10532 if (max_address > max_mp->max_address - mp->fix_size) 10533 mp->max_address = max_mp->max_address - mp->fix_size; 10534 else 10535 mp->max_address = max_address; 10536 10537 /* Unlink MP from its current position. Since max_mp is non-null, 10538 mp->prev must be non-null. */ 10539 mp->prev->next = mp->next; 10540 if (mp->next != NULL) 10541 mp->next->prev = mp->prev; 10542 else 10543 minipool_vector_tail = mp->prev; 10544 10545 /* Re-insert it before MAX_MP. */ 10546 mp->next = max_mp; 10547 mp->prev = max_mp->prev; 10548 max_mp->prev = mp; 10549 10550 if (mp->prev != NULL) 10551 mp->prev->next = mp; 10552 else 10553 minipool_vector_head = mp; 10554 } 10555 10556 /* Save the new entry. */ 10557 max_mp = mp; 10558 10559 /* Scan over the preceding entries and adjust their addresses as 10560 required. */ 10561 while (mp->prev != NULL 10562 && mp->prev->max_address > mp->max_address - mp->prev->fix_size) 10563 { 10564 mp->prev->max_address = mp->max_address - mp->prev->fix_size; 10565 mp = mp->prev; 10566 } 10567 10568 return max_mp; 10569 } 10570 10571 /* Add a constant to the minipool for a forward reference. Returns the 10572 node added or NULL if the constant will not fit in this pool. */ 10573 static Mnode * 10574 add_minipool_forward_ref (Mfix *fix) 10575 { 10576 /* If set, max_mp is the first pool_entry that has a lower 10577 constraint than the one we are trying to add. */ 10578 Mnode * max_mp = NULL; 10579 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad; 10580 Mnode * mp; 10581 10582 /* If the minipool starts before the end of FIX->INSN then this FIX 10583 can not be placed into the current pool. Furthermore, adding the 10584 new constant pool entry may cause the pool to start FIX_SIZE bytes 10585 earlier. */ 10586 if (minipool_vector_head && 10587 (fix->address + get_attr_length (fix->insn) 10588 >= minipool_vector_head->max_address - fix->fix_size)) 10589 return NULL; 10590 10591 /* Scan the pool to see if a constant with the same value has 10592 already been added. While we are doing this, also note the 10593 location where we must insert the constant if it doesn't already 10594 exist. */ 10595 for (mp = minipool_vector_head; mp != NULL; mp = mp->next) 10596 { 10597 if (GET_CODE (fix->value) == GET_CODE (mp->value) 10598 && fix->mode == mp->mode 10599 && (GET_CODE (fix->value) != CODE_LABEL 10600 || (CODE_LABEL_NUMBER (fix->value) 10601 == CODE_LABEL_NUMBER (mp->value))) 10602 && rtx_equal_p (fix->value, mp->value)) 10603 { 10604 /* More than one fix references this entry. */ 10605 mp->refcount++; 10606 return move_minipool_fix_forward_ref (mp, max_mp, max_address); 10607 } 10608 10609 /* Note the insertion point if necessary. */ 10610 if (max_mp == NULL 10611 && mp->max_address > max_address) 10612 max_mp = mp; 10613 10614 /* If we are inserting an 8-bytes aligned quantity and 10615 we have not already found an insertion point, then 10616 make sure that all such 8-byte aligned quantities are 10617 placed at the start of the pool. */ 10618 if (ARM_DOUBLEWORD_ALIGN 10619 && max_mp == NULL 10620 && fix->fix_size >= 8 10621 && mp->fix_size < 8) 10622 { 10623 max_mp = mp; 10624 max_address = mp->max_address; 10625 } 10626 } 10627 10628 /* The value is not currently in the minipool, so we need to create 10629 a new entry for it. If MAX_MP is NULL, the entry will be put on 10630 the end of the list since the placement is less constrained than 10631 any existing entry. Otherwise, we insert the new fix before 10632 MAX_MP and, if necessary, adjust the constraints on the other 10633 entries. */ 10634 mp = XNEW (Mnode); 10635 mp->fix_size = fix->fix_size; 10636 mp->mode = fix->mode; 10637 mp->value = fix->value; 10638 mp->refcount = 1; 10639 /* Not yet required for a backwards ref. */ 10640 mp->min_address = -65536; 10641 10642 if (max_mp == NULL) 10643 { 10644 mp->max_address = max_address; 10645 mp->next = NULL; 10646 mp->prev = minipool_vector_tail; 10647 10648 if (mp->prev == NULL) 10649 { 10650 minipool_vector_head = mp; 10651 minipool_vector_label = gen_label_rtx (); 10652 } 10653 else 10654 mp->prev->next = mp; 10655 10656 minipool_vector_tail = mp; 10657 } 10658 else 10659 { 10660 if (max_address > max_mp->max_address - mp->fix_size) 10661 mp->max_address = max_mp->max_address - mp->fix_size; 10662 else 10663 mp->max_address = max_address; 10664 10665 mp->next = max_mp; 10666 mp->prev = max_mp->prev; 10667 max_mp->prev = mp; 10668 if (mp->prev != NULL) 10669 mp->prev->next = mp; 10670 else 10671 minipool_vector_head = mp; 10672 } 10673 10674 /* Save the new entry. */ 10675 max_mp = mp; 10676 10677 /* Scan over the preceding entries and adjust their addresses as 10678 required. */ 10679 while (mp->prev != NULL 10680 && mp->prev->max_address > mp->max_address - mp->prev->fix_size) 10681 { 10682 mp->prev->max_address = mp->max_address - mp->prev->fix_size; 10683 mp = mp->prev; 10684 } 10685 10686 return max_mp; 10687 } 10688 10689 static Mnode * 10690 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp, 10691 HOST_WIDE_INT min_address) 10692 { 10693 HOST_WIDE_INT offset; 10694 10695 /* The code below assumes these are different. */ 10696 gcc_assert (mp != min_mp); 10697 10698 if (min_mp == NULL) 10699 { 10700 if (min_address > mp->min_address) 10701 mp->min_address = min_address; 10702 } 10703 else 10704 { 10705 /* We will adjust this below if it is too loose. */ 10706 mp->min_address = min_address; 10707 10708 /* Unlink MP from its current position. Since min_mp is non-null, 10709 mp->next must be non-null. */ 10710 mp->next->prev = mp->prev; 10711 if (mp->prev != NULL) 10712 mp->prev->next = mp->next; 10713 else 10714 minipool_vector_head = mp->next; 10715 10716 /* Reinsert it after MIN_MP. */ 10717 mp->prev = min_mp; 10718 mp->next = min_mp->next; 10719 min_mp->next = mp; 10720 if (mp->next != NULL) 10721 mp->next->prev = mp; 10722 else 10723 minipool_vector_tail = mp; 10724 } 10725 10726 min_mp = mp; 10727 10728 offset = 0; 10729 for (mp = minipool_vector_head; mp != NULL; mp = mp->next) 10730 { 10731 mp->offset = offset; 10732 if (mp->refcount > 0) 10733 offset += mp->fix_size; 10734 10735 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size) 10736 mp->next->min_address = mp->min_address + mp->fix_size; 10737 } 10738 10739 return min_mp; 10740 } 10741 10742 /* Add a constant to the minipool for a backward reference. Returns the 10743 node added or NULL if the constant will not fit in this pool. 10744 10745 Note that the code for insertion for a backwards reference can be 10746 somewhat confusing because the calculated offsets for each fix do 10747 not take into account the size of the pool (which is still under 10748 construction. */ 10749 static Mnode * 10750 add_minipool_backward_ref (Mfix *fix) 10751 { 10752 /* If set, min_mp is the last pool_entry that has a lower constraint 10753 than the one we are trying to add. */ 10754 Mnode *min_mp = NULL; 10755 /* This can be negative, since it is only a constraint. */ 10756 HOST_WIDE_INT min_address = fix->address - fix->backwards; 10757 Mnode *mp; 10758 10759 /* If we can't reach the current pool from this insn, or if we can't 10760 insert this entry at the end of the pool without pushing other 10761 fixes out of range, then we don't try. This ensures that we 10762 can't fail later on. */ 10763 if (min_address >= minipool_barrier->address 10764 || (minipool_vector_tail->min_address + fix->fix_size 10765 >= minipool_barrier->address)) 10766 return NULL; 10767 10768 /* Scan the pool to see if a constant with the same value has 10769 already been added. While we are doing this, also note the 10770 location where we must insert the constant if it doesn't already 10771 exist. */ 10772 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev) 10773 { 10774 if (GET_CODE (fix->value) == GET_CODE (mp->value) 10775 && fix->mode == mp->mode 10776 && (GET_CODE (fix->value) != CODE_LABEL 10777 || (CODE_LABEL_NUMBER (fix->value) 10778 == CODE_LABEL_NUMBER (mp->value))) 10779 && rtx_equal_p (fix->value, mp->value) 10780 /* Check that there is enough slack to move this entry to the 10781 end of the table (this is conservative). */ 10782 && (mp->max_address 10783 > (minipool_barrier->address 10784 + minipool_vector_tail->offset 10785 + minipool_vector_tail->fix_size))) 10786 { 10787 mp->refcount++; 10788 return move_minipool_fix_backward_ref (mp, min_mp, min_address); 10789 } 10790 10791 if (min_mp != NULL) 10792 mp->min_address += fix->fix_size; 10793 else 10794 { 10795 /* Note the insertion point if necessary. */ 10796 if (mp->min_address < min_address) 10797 { 10798 /* For now, we do not allow the insertion of 8-byte alignment 10799 requiring nodes anywhere but at the start of the pool. */ 10800 if (ARM_DOUBLEWORD_ALIGN 10801 && fix->fix_size >= 8 && mp->fix_size < 8) 10802 return NULL; 10803 else 10804 min_mp = mp; 10805 } 10806 else if (mp->max_address 10807 < minipool_barrier->address + mp->offset + fix->fix_size) 10808 { 10809 /* Inserting before this entry would push the fix beyond 10810 its maximum address (which can happen if we have 10811 re-located a forwards fix); force the new fix to come 10812 after it. */ 10813 if (ARM_DOUBLEWORD_ALIGN 10814 && fix->fix_size >= 8 && mp->fix_size < 8) 10815 return NULL; 10816 else 10817 { 10818 min_mp = mp; 10819 min_address = mp->min_address + fix->fix_size; 10820 } 10821 } 10822 /* Do not insert a non-8-byte aligned quantity before 8-byte 10823 aligned quantities. */ 10824 else if (ARM_DOUBLEWORD_ALIGN 10825 && fix->fix_size < 8 10826 && mp->fix_size >= 8) 10827 { 10828 min_mp = mp; 10829 min_address = mp->min_address + fix->fix_size; 10830 } 10831 } 10832 } 10833 10834 /* We need to create a new entry. */ 10835 mp = XNEW (Mnode); 10836 mp->fix_size = fix->fix_size; 10837 mp->mode = fix->mode; 10838 mp->value = fix->value; 10839 mp->refcount = 1; 10840 mp->max_address = minipool_barrier->address + 65536; 10841 10842 mp->min_address = min_address; 10843 10844 if (min_mp == NULL) 10845 { 10846 mp->prev = NULL; 10847 mp->next = minipool_vector_head; 10848 10849 if (mp->next == NULL) 10850 { 10851 minipool_vector_tail = mp; 10852 minipool_vector_label = gen_label_rtx (); 10853 } 10854 else 10855 mp->next->prev = mp; 10856 10857 minipool_vector_head = mp; 10858 } 10859 else 10860 { 10861 mp->next = min_mp->next; 10862 mp->prev = min_mp; 10863 min_mp->next = mp; 10864 10865 if (mp->next != NULL) 10866 mp->next->prev = mp; 10867 else 10868 minipool_vector_tail = mp; 10869 } 10870 10871 /* Save the new entry. */ 10872 min_mp = mp; 10873 10874 if (mp->prev) 10875 mp = mp->prev; 10876 else 10877 mp->offset = 0; 10878 10879 /* Scan over the following entries and adjust their offsets. */ 10880 while (mp->next != NULL) 10881 { 10882 if (mp->next->min_address < mp->min_address + mp->fix_size) 10883 mp->next->min_address = mp->min_address + mp->fix_size; 10884 10885 if (mp->refcount) 10886 mp->next->offset = mp->offset + mp->fix_size; 10887 else 10888 mp->next->offset = mp->offset; 10889 10890 mp = mp->next; 10891 } 10892 10893 return min_mp; 10894 } 10895 10896 static void 10897 assign_minipool_offsets (Mfix *barrier) 10898 { 10899 HOST_WIDE_INT offset = 0; 10900 Mnode *mp; 10901 10902 minipool_barrier = barrier; 10903 10904 for (mp = minipool_vector_head; mp != NULL; mp = mp->next) 10905 { 10906 mp->offset = offset; 10907 10908 if (mp->refcount > 0) 10909 offset += mp->fix_size; 10910 } 10911 } 10912 10913 /* Output the literal table */ 10914 static void 10915 dump_minipool (rtx scan) 10916 { 10917 Mnode * mp; 10918 Mnode * nmp; 10919 int align64 = 0; 10920 10921 if (ARM_DOUBLEWORD_ALIGN) 10922 for (mp = minipool_vector_head; mp != NULL; mp = mp->next) 10923 if (mp->refcount > 0 && mp->fix_size >= 8) 10924 { 10925 align64 = 1; 10926 break; 10927 } 10928 10929 if (dump_file) 10930 fprintf (dump_file, 10931 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n", 10932 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4); 10933 10934 scan = emit_label_after (gen_label_rtx (), scan); 10935 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan); 10936 scan = emit_label_after (minipool_vector_label, scan); 10937 10938 for (mp = minipool_vector_head; mp != NULL; mp = nmp) 10939 { 10940 if (mp->refcount > 0) 10941 { 10942 if (dump_file) 10943 { 10944 fprintf (dump_file, 10945 ";; Offset %u, min %ld, max %ld ", 10946 (unsigned) mp->offset, (unsigned long) mp->min_address, 10947 (unsigned long) mp->max_address); 10948 arm_print_value (dump_file, mp->value); 10949 fputc ('\n', dump_file); 10950 } 10951 10952 switch (mp->fix_size) 10953 { 10954 #ifdef HAVE_consttable_1 10955 case 1: 10956 scan = emit_insn_after (gen_consttable_1 (mp->value), scan); 10957 break; 10958 10959 #endif 10960 #ifdef HAVE_consttable_2 10961 case 2: 10962 scan = emit_insn_after (gen_consttable_2 (mp->value), scan); 10963 break; 10964 10965 #endif 10966 #ifdef HAVE_consttable_4 10967 case 4: 10968 scan = emit_insn_after (gen_consttable_4 (mp->value), scan); 10969 break; 10970 10971 #endif 10972 #ifdef HAVE_consttable_8 10973 case 8: 10974 scan = emit_insn_after (gen_consttable_8 (mp->value), scan); 10975 break; 10976 10977 #endif 10978 #ifdef HAVE_consttable_16 10979 case 16: 10980 scan = emit_insn_after (gen_consttable_16 (mp->value), scan); 10981 break; 10982 10983 #endif 10984 default: 10985 gcc_unreachable (); 10986 } 10987 } 10988 10989 nmp = mp->next; 10990 free (mp); 10991 } 10992 10993 minipool_vector_head = minipool_vector_tail = NULL; 10994 scan = emit_insn_after (gen_consttable_end (), scan); 10995 scan = emit_barrier_after (scan); 10996 } 10997 10998 /* Return the cost of forcibly inserting a barrier after INSN. */ 10999 static int 11000 arm_barrier_cost (rtx insn) 11001 { 11002 /* Basing the location of the pool on the loop depth is preferable, 11003 but at the moment, the basic block information seems to be 11004 corrupt by this stage of the compilation. */ 11005 int base_cost = 50; 11006 rtx next = next_nonnote_insn (insn); 11007 11008 if (next != NULL && GET_CODE (next) == CODE_LABEL) 11009 base_cost -= 20; 11010 11011 switch (GET_CODE (insn)) 11012 { 11013 case CODE_LABEL: 11014 /* It will always be better to place the table before the label, rather 11015 than after it. */ 11016 return 50; 11017 11018 case INSN: 11019 case CALL_INSN: 11020 return base_cost; 11021 11022 case JUMP_INSN: 11023 return base_cost - 10; 11024 11025 default: 11026 return base_cost + 10; 11027 } 11028 } 11029 11030 /* Find the best place in the insn stream in the range 11031 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier. 11032 Create the barrier by inserting a jump and add a new fix entry for 11033 it. */ 11034 static Mfix * 11035 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address) 11036 { 11037 HOST_WIDE_INT count = 0; 11038 rtx barrier; 11039 rtx from = fix->insn; 11040 /* The instruction after which we will insert the jump. */ 11041 rtx selected = NULL; 11042 int selected_cost; 11043 /* The address at which the jump instruction will be placed. */ 11044 HOST_WIDE_INT selected_address; 11045 Mfix * new_fix; 11046 HOST_WIDE_INT max_count = max_address - fix->address; 11047 rtx label = gen_label_rtx (); 11048 11049 selected_cost = arm_barrier_cost (from); 11050 selected_address = fix->address; 11051 11052 while (from && count < max_count) 11053 { 11054 rtx tmp; 11055 int new_cost; 11056 11057 /* This code shouldn't have been called if there was a natural barrier 11058 within range. */ 11059 gcc_assert (GET_CODE (from) != BARRIER); 11060 11061 /* Count the length of this insn. */ 11062 count += get_attr_length (from); 11063 11064 /* If there is a jump table, add its length. */ 11065 tmp = is_jump_table (from); 11066 if (tmp != NULL) 11067 { 11068 count += get_jump_table_size (tmp); 11069 11070 /* Jump tables aren't in a basic block, so base the cost on 11071 the dispatch insn. If we select this location, we will 11072 still put the pool after the table. */ 11073 new_cost = arm_barrier_cost (from); 11074 11075 if (count < max_count 11076 && (!selected || new_cost <= selected_cost)) 11077 { 11078 selected = tmp; 11079 selected_cost = new_cost; 11080 selected_address = fix->address + count; 11081 } 11082 11083 /* Continue after the dispatch table. */ 11084 from = NEXT_INSN (tmp); 11085 continue; 11086 } 11087 11088 new_cost = arm_barrier_cost (from); 11089 11090 if (count < max_count 11091 && (!selected || new_cost <= selected_cost)) 11092 { 11093 selected = from; 11094 selected_cost = new_cost; 11095 selected_address = fix->address + count; 11096 } 11097 11098 from = NEXT_INSN (from); 11099 } 11100 11101 /* Make sure that we found a place to insert the jump. */ 11102 gcc_assert (selected); 11103 11104 /* Create a new JUMP_INSN that branches around a barrier. */ 11105 from = emit_jump_insn_after (gen_jump (label), selected); 11106 JUMP_LABEL (from) = label; 11107 barrier = emit_barrier_after (from); 11108 emit_label_after (label, barrier); 11109 11110 /* Create a minipool barrier entry for the new barrier. */ 11111 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix)); 11112 new_fix->insn = barrier; 11113 new_fix->address = selected_address; 11114 new_fix->next = fix->next; 11115 fix->next = new_fix; 11116 11117 return new_fix; 11118 } 11119 11120 /* Record that there is a natural barrier in the insn stream at 11121 ADDRESS. */ 11122 static void 11123 push_minipool_barrier (rtx insn, HOST_WIDE_INT address) 11124 { 11125 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); 11126 11127 fix->insn = insn; 11128 fix->address = address; 11129 11130 fix->next = NULL; 11131 if (minipool_fix_head != NULL) 11132 minipool_fix_tail->next = fix; 11133 else 11134 minipool_fix_head = fix; 11135 11136 minipool_fix_tail = fix; 11137 } 11138 11139 /* Record INSN, which will need fixing up to load a value from the 11140 minipool. ADDRESS is the offset of the insn since the start of the 11141 function; LOC is a pointer to the part of the insn which requires 11142 fixing; VALUE is the constant that must be loaded, which is of type 11143 MODE. */ 11144 static void 11145 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc, 11146 enum machine_mode mode, rtx value) 11147 { 11148 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); 11149 11150 fix->insn = insn; 11151 fix->address = address; 11152 fix->loc = loc; 11153 fix->mode = mode; 11154 fix->fix_size = MINIPOOL_FIX_SIZE (mode); 11155 fix->value = value; 11156 fix->forwards = get_attr_pool_range (insn); 11157 fix->backwards = get_attr_neg_pool_range (insn); 11158 fix->minipool = NULL; 11159 11160 /* If an insn doesn't have a range defined for it, then it isn't 11161 expecting to be reworked by this code. Better to stop now than 11162 to generate duff assembly code. */ 11163 gcc_assert (fix->forwards || fix->backwards); 11164 11165 /* If an entry requires 8-byte alignment then assume all constant pools 11166 require 4 bytes of padding. Trying to do this later on a per-pool 11167 basis is awkward because existing pool entries have to be modified. */ 11168 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8) 11169 minipool_pad = 4; 11170 11171 if (dump_file) 11172 { 11173 fprintf (dump_file, 11174 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ", 11175 GET_MODE_NAME (mode), 11176 INSN_UID (insn), (unsigned long) address, 11177 -1 * (long)fix->backwards, (long)fix->forwards); 11178 arm_print_value (dump_file, fix->value); 11179 fprintf (dump_file, "\n"); 11180 } 11181 11182 /* Add it to the chain of fixes. */ 11183 fix->next = NULL; 11184 11185 if (minipool_fix_head != NULL) 11186 minipool_fix_tail->next = fix; 11187 else 11188 minipool_fix_head = fix; 11189 11190 minipool_fix_tail = fix; 11191 } 11192 11193 /* Return the cost of synthesizing a 64-bit constant VAL inline. 11194 Returns the number of insns needed, or 99 if we don't know how to 11195 do it. */ 11196 int 11197 arm_const_double_inline_cost (rtx val) 11198 { 11199 rtx lowpart, highpart; 11200 enum machine_mode mode; 11201 11202 mode = GET_MODE (val); 11203 11204 if (mode == VOIDmode) 11205 mode = DImode; 11206 11207 gcc_assert (GET_MODE_SIZE (mode) == 8); 11208 11209 lowpart = gen_lowpart (SImode, val); 11210 highpart = gen_highpart_mode (SImode, mode, val); 11211 11212 gcc_assert (GET_CODE (lowpart) == CONST_INT); 11213 gcc_assert (GET_CODE (highpart) == CONST_INT); 11214 11215 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart), 11216 NULL_RTX, NULL_RTX, 0, 0) 11217 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart), 11218 NULL_RTX, NULL_RTX, 0, 0)); 11219 } 11220 11221 /* Return true if it is worthwhile to split a 64-bit constant into two 11222 32-bit operations. This is the case if optimizing for size, or 11223 if we have load delay slots, or if one 32-bit part can be done with 11224 a single data operation. */ 11225 bool 11226 arm_const_double_by_parts (rtx val) 11227 { 11228 enum machine_mode mode = GET_MODE (val); 11229 rtx part; 11230 11231 if (optimize_size || arm_ld_sched) 11232 return true; 11233 11234 if (mode == VOIDmode) 11235 mode = DImode; 11236 11237 part = gen_highpart_mode (SImode, mode, val); 11238 11239 gcc_assert (GET_CODE (part) == CONST_INT); 11240 11241 if (const_ok_for_arm (INTVAL (part)) 11242 || const_ok_for_arm (~INTVAL (part))) 11243 return true; 11244 11245 part = gen_lowpart (SImode, val); 11246 11247 gcc_assert (GET_CODE (part) == CONST_INT); 11248 11249 if (const_ok_for_arm (INTVAL (part)) 11250 || const_ok_for_arm (~INTVAL (part))) 11251 return true; 11252 11253 return false; 11254 } 11255 11256 /* Scan INSN and note any of its operands that need fixing. 11257 If DO_PUSHES is false we do not actually push any of the fixups 11258 needed. The function returns TRUE if any fixups were needed/pushed. 11259 This is used by arm_memory_load_p() which needs to know about loads 11260 of constants that will be converted into minipool loads. */ 11261 static bool 11262 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes) 11263 { 11264 bool result = false; 11265 int opno; 11266 11267 extract_insn (insn); 11268 11269 if (!constrain_operands (1)) 11270 fatal_insn_not_found (insn); 11271 11272 if (recog_data.n_alternatives == 0) 11273 return false; 11274 11275 /* Fill in recog_op_alt with information about the constraints of 11276 this insn. */ 11277 preprocess_constraints (); 11278 11279 for (opno = 0; opno < recog_data.n_operands; opno++) 11280 { 11281 /* Things we need to fix can only occur in inputs. */ 11282 if (recog_data.operand_type[opno] != OP_IN) 11283 continue; 11284 11285 /* If this alternative is a memory reference, then any mention 11286 of constants in this alternative is really to fool reload 11287 into allowing us to accept one there. We need to fix them up 11288 now so that we output the right code. */ 11289 if (recog_op_alt[opno][which_alternative].memory_ok) 11290 { 11291 rtx op = recog_data.operand[opno]; 11292 11293 if (CONSTANT_P (op)) 11294 { 11295 if (do_pushes) 11296 push_minipool_fix (insn, address, recog_data.operand_loc[opno], 11297 recog_data.operand_mode[opno], op); 11298 result = true; 11299 } 11300 else if (GET_CODE (op) == MEM 11301 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF 11302 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0))) 11303 { 11304 if (do_pushes) 11305 { 11306 rtx cop = avoid_constant_pool_reference (op); 11307 11308 /* Casting the address of something to a mode narrower 11309 than a word can cause avoid_constant_pool_reference() 11310 to return the pool reference itself. That's no good to 11311 us here. Lets just hope that we can use the 11312 constant pool value directly. */ 11313 if (op == cop) 11314 cop = get_pool_constant (XEXP (op, 0)); 11315 11316 push_minipool_fix (insn, address, 11317 recog_data.operand_loc[opno], 11318 recog_data.operand_mode[opno], cop); 11319 } 11320 11321 result = true; 11322 } 11323 } 11324 } 11325 11326 return result; 11327 } 11328 11329 /* Gcc puts the pool in the wrong place for ARM, since we can only 11330 load addresses a limited distance around the pc. We do some 11331 special munging to move the constant pool values to the correct 11332 point in the code. */ 11333 static void 11334 arm_reorg (void) 11335 { 11336 rtx insn; 11337 HOST_WIDE_INT address = 0; 11338 Mfix * fix; 11339 11340 minipool_fix_head = minipool_fix_tail = NULL; 11341 11342 /* The first insn must always be a note, or the code below won't 11343 scan it properly. */ 11344 insn = get_insns (); 11345 gcc_assert (GET_CODE (insn) == NOTE); 11346 minipool_pad = 0; 11347 11348 /* Scan all the insns and record the operands that will need fixing. */ 11349 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn)) 11350 { 11351 if (TARGET_CIRRUS_FIX_INVALID_INSNS 11352 && (arm_cirrus_insn_p (insn) 11353 || GET_CODE (insn) == JUMP_INSN 11354 || arm_memory_load_p (insn))) 11355 cirrus_reorg (insn); 11356 11357 if (GET_CODE (insn) == BARRIER) 11358 push_minipool_barrier (insn, address); 11359 else if (INSN_P (insn)) 11360 { 11361 rtx table; 11362 11363 note_invalid_constants (insn, address, true); 11364 address += get_attr_length (insn); 11365 11366 /* If the insn is a vector jump, add the size of the table 11367 and skip the table. */ 11368 if ((table = is_jump_table (insn)) != NULL) 11369 { 11370 address += get_jump_table_size (table); 11371 insn = table; 11372 } 11373 } 11374 } 11375 11376 fix = minipool_fix_head; 11377 11378 /* Now scan the fixups and perform the required changes. */ 11379 while (fix) 11380 { 11381 Mfix * ftmp; 11382 Mfix * fdel; 11383 Mfix * last_added_fix; 11384 Mfix * last_barrier = NULL; 11385 Mfix * this_fix; 11386 11387 /* Skip any further barriers before the next fix. */ 11388 while (fix && GET_CODE (fix->insn) == BARRIER) 11389 fix = fix->next; 11390 11391 /* No more fixes. */ 11392 if (fix == NULL) 11393 break; 11394 11395 last_added_fix = NULL; 11396 11397 for (ftmp = fix; ftmp; ftmp = ftmp->next) 11398 { 11399 if (GET_CODE (ftmp->insn) == BARRIER) 11400 { 11401 if (ftmp->address >= minipool_vector_head->max_address) 11402 break; 11403 11404 last_barrier = ftmp; 11405 } 11406 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL) 11407 break; 11408 11409 last_added_fix = ftmp; /* Keep track of the last fix added. */ 11410 } 11411 11412 /* If we found a barrier, drop back to that; any fixes that we 11413 could have reached but come after the barrier will now go in 11414 the next mini-pool. */ 11415 if (last_barrier != NULL) 11416 { 11417 /* Reduce the refcount for those fixes that won't go into this 11418 pool after all. */ 11419 for (fdel = last_barrier->next; 11420 fdel && fdel != ftmp; 11421 fdel = fdel->next) 11422 { 11423 fdel->minipool->refcount--; 11424 fdel->minipool = NULL; 11425 } 11426 11427 ftmp = last_barrier; 11428 } 11429 else 11430 { 11431 /* ftmp is first fix that we can't fit into this pool and 11432 there no natural barriers that we could use. Insert a 11433 new barrier in the code somewhere between the previous 11434 fix and this one, and arrange to jump around it. */ 11435 HOST_WIDE_INT max_address; 11436 11437 /* The last item on the list of fixes must be a barrier, so 11438 we can never run off the end of the list of fixes without 11439 last_barrier being set. */ 11440 gcc_assert (ftmp); 11441 11442 max_address = minipool_vector_head->max_address; 11443 /* Check that there isn't another fix that is in range that 11444 we couldn't fit into this pool because the pool was 11445 already too large: we need to put the pool before such an 11446 instruction. The pool itself may come just after the 11447 fix because create_fix_barrier also allows space for a 11448 jump instruction. */ 11449 if (ftmp->address < max_address) 11450 max_address = ftmp->address + 1; 11451 11452 last_barrier = create_fix_barrier (last_added_fix, max_address); 11453 } 11454 11455 assign_minipool_offsets (last_barrier); 11456 11457 while (ftmp) 11458 { 11459 if (GET_CODE (ftmp->insn) != BARRIER 11460 && ((ftmp->minipool = add_minipool_backward_ref (ftmp)) 11461 == NULL)) 11462 break; 11463 11464 ftmp = ftmp->next; 11465 } 11466 11467 /* Scan over the fixes we have identified for this pool, fixing them 11468 up and adding the constants to the pool itself. */ 11469 for (this_fix = fix; this_fix && ftmp != this_fix; 11470 this_fix = this_fix->next) 11471 if (GET_CODE (this_fix->insn) != BARRIER) 11472 { 11473 rtx addr 11474 = plus_constant (gen_rtx_LABEL_REF (VOIDmode, 11475 minipool_vector_label), 11476 this_fix->minipool->offset); 11477 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr); 11478 } 11479 11480 dump_minipool (last_barrier->insn); 11481 fix = ftmp; 11482 } 11483 11484 /* From now on we must synthesize any constants that we can't handle 11485 directly. This can happen if the RTL gets split during final 11486 instruction generation. */ 11487 after_arm_reorg = 1; 11488 11489 /* Free the minipool memory. */ 11490 obstack_free (&minipool_obstack, minipool_startobj); 11491 } 11492 11493 /* Routines to output assembly language. */ 11494 11495 /* If the rtx is the correct value then return the string of the number. 11496 In this way we can ensure that valid double constants are generated even 11497 when cross compiling. */ 11498 const char * 11499 fp_immediate_constant (rtx x) 11500 { 11501 REAL_VALUE_TYPE r; 11502 int i; 11503 11504 if (!fp_consts_inited) 11505 init_fp_table (); 11506 11507 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 11508 for (i = 0; i < 8; i++) 11509 if (REAL_VALUES_EQUAL (r, values_fp[i])) 11510 return strings_fp[i]; 11511 11512 gcc_unreachable (); 11513 } 11514 11515 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */ 11516 static const char * 11517 fp_const_from_val (REAL_VALUE_TYPE *r) 11518 { 11519 int i; 11520 11521 if (!fp_consts_inited) 11522 init_fp_table (); 11523 11524 for (i = 0; i < 8; i++) 11525 if (REAL_VALUES_EQUAL (*r, values_fp[i])) 11526 return strings_fp[i]; 11527 11528 gcc_unreachable (); 11529 } 11530 11531 /* Output the operands of a LDM/STM instruction to STREAM. 11532 MASK is the ARM register set mask of which only bits 0-15 are important. 11533 REG is the base register, either the frame pointer or the stack pointer, 11534 INSTR is the possibly suffixed load or store instruction. 11535 RFE is nonzero if the instruction should also copy spsr to cpsr. */ 11536 11537 static void 11538 print_multi_reg (FILE *stream, const char *instr, unsigned reg, 11539 unsigned long mask, int rfe) 11540 { 11541 unsigned i; 11542 bool not_first = FALSE; 11543 11544 gcc_assert (!rfe || (mask & (1 << PC_REGNUM))); 11545 fputc ('\t', stream); 11546 asm_fprintf (stream, instr, reg); 11547 fputc ('{', stream); 11548 11549 for (i = 0; i <= LAST_ARM_REGNUM; i++) 11550 if (mask & (1 << i)) 11551 { 11552 if (not_first) 11553 fprintf (stream, ", "); 11554 11555 asm_fprintf (stream, "%r", i); 11556 not_first = TRUE; 11557 } 11558 11559 if (rfe) 11560 fprintf (stream, "}^\n"); 11561 else 11562 fprintf (stream, "}\n"); 11563 } 11564 11565 11566 /* Output a FLDMD instruction to STREAM. 11567 BASE if the register containing the address. 11568 REG and COUNT specify the register range. 11569 Extra registers may be added to avoid hardware bugs. 11570 11571 We output FLDMD even for ARMv5 VFP implementations. Although 11572 FLDMD is technically not supported until ARMv6, it is believed 11573 that all VFP implementations support its use in this context. */ 11574 11575 static void 11576 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count) 11577 { 11578 int i; 11579 11580 /* Workaround ARM10 VFPr1 bug. */ 11581 if (count == 2 && !arm_arch6) 11582 { 11583 if (reg == 15) 11584 reg--; 11585 count++; 11586 } 11587 11588 /* FLDMD may not load more than 16 doubleword registers at a time. Split the 11589 load into multiple parts if we have to handle more than 16 registers. */ 11590 if (count > 16) 11591 { 11592 vfp_output_fldmd (stream, base, reg, 16); 11593 vfp_output_fldmd (stream, base, reg + 16, count - 16); 11594 return; 11595 } 11596 11597 fputc ('\t', stream); 11598 asm_fprintf (stream, "fldmfdd\t%r!, {", base); 11599 11600 for (i = reg; i < reg + count; i++) 11601 { 11602 if (i > reg) 11603 fputs (", ", stream); 11604 asm_fprintf (stream, "d%d", i); 11605 } 11606 fputs ("}\n", stream); 11607 11608 } 11609 11610 11611 /* Output the assembly for a store multiple. */ 11612 11613 const char * 11614 vfp_output_fstmd (rtx * operands) 11615 { 11616 char pattern[100]; 11617 int p; 11618 int base; 11619 int i; 11620 11621 strcpy (pattern, "fstmfdd\t%m0!, {%P1"); 11622 p = strlen (pattern); 11623 11624 gcc_assert (GET_CODE (operands[1]) == REG); 11625 11626 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2; 11627 for (i = 1; i < XVECLEN (operands[2], 0); i++) 11628 { 11629 p += sprintf (&pattern[p], ", d%d", base + i); 11630 } 11631 strcpy (&pattern[p], "}"); 11632 11633 output_asm_insn (pattern, operands); 11634 return ""; 11635 } 11636 11637 11638 /* Emit RTL to save block of VFP register pairs to the stack. Returns the 11639 number of bytes pushed. */ 11640 11641 static int 11642 vfp_emit_fstmd (int base_reg, int count) 11643 { 11644 rtx par; 11645 rtx dwarf; 11646 rtx tmp, reg; 11647 int i; 11648 11649 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two 11650 register pairs are stored by a store multiple insn. We avoid this 11651 by pushing an extra pair. */ 11652 if (count == 2 && !arm_arch6) 11653 { 11654 if (base_reg == LAST_VFP_REGNUM - 3) 11655 base_reg -= 2; 11656 count++; 11657 } 11658 11659 /* FSTMD may not store more than 16 doubleword registers at once. Split 11660 larger stores into multiple parts (up to a maximum of two, in 11661 practice). */ 11662 if (count > 16) 11663 { 11664 int saved; 11665 /* NOTE: base_reg is an internal register number, so each D register 11666 counts as 2. */ 11667 saved = vfp_emit_fstmd (base_reg + 32, count - 16); 11668 saved += vfp_emit_fstmd (base_reg, 16); 11669 return saved; 11670 } 11671 11672 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); 11673 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1)); 11674 11675 reg = gen_rtx_REG (DFmode, base_reg); 11676 base_reg += 2; 11677 11678 XVECEXP (par, 0, 0) 11679 = gen_rtx_SET (VOIDmode, 11680 gen_frame_mem 11681 (BLKmode, 11682 gen_rtx_PRE_MODIFY (Pmode, 11683 stack_pointer_rtx, 11684 plus_constant 11685 (stack_pointer_rtx, 11686 - (count * 8))) 11687 ), 11688 gen_rtx_UNSPEC (BLKmode, 11689 gen_rtvec (1, reg), 11690 UNSPEC_PUSH_MULT)); 11691 11692 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, 11693 plus_constant (stack_pointer_rtx, -(count * 8))); 11694 RTX_FRAME_RELATED_P (tmp) = 1; 11695 XVECEXP (dwarf, 0, 0) = tmp; 11696 11697 tmp = gen_rtx_SET (VOIDmode, 11698 gen_frame_mem (DFmode, stack_pointer_rtx), 11699 reg); 11700 RTX_FRAME_RELATED_P (tmp) = 1; 11701 XVECEXP (dwarf, 0, 1) = tmp; 11702 11703 for (i = 1; i < count; i++) 11704 { 11705 reg = gen_rtx_REG (DFmode, base_reg); 11706 base_reg += 2; 11707 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg); 11708 11709 tmp = gen_rtx_SET (VOIDmode, 11710 gen_frame_mem (DFmode, 11711 plus_constant (stack_pointer_rtx, 11712 i * 8)), 11713 reg); 11714 RTX_FRAME_RELATED_P (tmp) = 1; 11715 XVECEXP (dwarf, 0, i + 1) = tmp; 11716 } 11717 11718 par = emit_insn (par); 11719 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); 11720 RTX_FRAME_RELATED_P (par) = 1; 11721 11722 return count * 8; 11723 } 11724 11725 /* Emit a call instruction with pattern PAT. ADDR is the address of 11726 the call target. */ 11727 11728 void 11729 arm_emit_call_insn (rtx pat, rtx addr) 11730 { 11731 rtx insn; 11732 11733 insn = emit_call_insn (pat); 11734 11735 /* The PIC register is live on entry to VxWorks PIC PLT entries. 11736 If the call might use such an entry, add a use of the PIC register 11737 to the instruction's CALL_INSN_FUNCTION_USAGE. */ 11738 if (TARGET_VXWORKS_RTP 11739 && flag_pic 11740 && GET_CODE (addr) == SYMBOL_REF 11741 && (SYMBOL_REF_DECL (addr) 11742 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) 11743 : !SYMBOL_REF_LOCAL_P (addr))) 11744 { 11745 require_pic_register (); 11746 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg); 11747 } 11748 } 11749 11750 /* Output a 'call' insn. */ 11751 const char * 11752 output_call (rtx *operands) 11753 { 11754 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */ 11755 11756 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */ 11757 if (REGNO (operands[0]) == LR_REGNUM) 11758 { 11759 operands[0] = gen_rtx_REG (SImode, IP_REGNUM); 11760 output_asm_insn ("mov%?\t%0, %|lr", operands); 11761 } 11762 11763 output_asm_insn ("mov%?\t%|lr, %|pc", operands); 11764 11765 if (TARGET_INTERWORK || arm_arch4t) 11766 output_asm_insn ("bx%?\t%0", operands); 11767 else 11768 output_asm_insn ("mov%?\t%|pc, %0", operands); 11769 11770 return ""; 11771 } 11772 11773 /* Output a 'call' insn that is a reference in memory. This is 11774 disabled for ARMv5 and we prefer a blx instead because otherwise 11775 there's a significant performance overhead. */ 11776 const char * 11777 output_call_mem (rtx *operands) 11778 { 11779 gcc_assert (!arm_arch5); 11780 if (TARGET_INTERWORK) 11781 { 11782 output_asm_insn ("ldr%?\t%|ip, %0", operands); 11783 output_asm_insn ("mov%?\t%|lr, %|pc", operands); 11784 output_asm_insn ("bx%?\t%|ip", operands); 11785 } 11786 else if (regno_use_in (LR_REGNUM, operands[0])) 11787 { 11788 /* LR is used in the memory address. We load the address in the 11789 first instruction. It's safe to use IP as the target of the 11790 load since the call will kill it anyway. */ 11791 output_asm_insn ("ldr%?\t%|ip, %0", operands); 11792 output_asm_insn ("mov%?\t%|lr, %|pc", operands); 11793 if (arm_arch4t) 11794 output_asm_insn ("bx%?\t%|ip", operands); 11795 else 11796 output_asm_insn ("mov%?\t%|pc, %|ip", operands); 11797 } 11798 else 11799 { 11800 output_asm_insn ("mov%?\t%|lr, %|pc", operands); 11801 output_asm_insn ("ldr%?\t%|pc, %0", operands); 11802 } 11803 11804 return ""; 11805 } 11806 11807 11808 /* Output a move from arm registers to an fpa registers. 11809 OPERANDS[0] is an fpa register. 11810 OPERANDS[1] is the first registers of an arm register pair. */ 11811 const char * 11812 output_mov_long_double_fpa_from_arm (rtx *operands) 11813 { 11814 int arm_reg0 = REGNO (operands[1]); 11815 rtx ops[3]; 11816 11817 gcc_assert (arm_reg0 != IP_REGNUM); 11818 11819 ops[0] = gen_rtx_REG (SImode, arm_reg0); 11820 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); 11821 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0); 11822 11823 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops); 11824 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands); 11825 11826 return ""; 11827 } 11828 11829 /* Output a move from an fpa register to arm registers. 11830 OPERANDS[0] is the first registers of an arm register pair. 11831 OPERANDS[1] is an fpa register. */ 11832 const char * 11833 output_mov_long_double_arm_from_fpa (rtx *operands) 11834 { 11835 int arm_reg0 = REGNO (operands[0]); 11836 rtx ops[3]; 11837 11838 gcc_assert (arm_reg0 != IP_REGNUM); 11839 11840 ops[0] = gen_rtx_REG (SImode, arm_reg0); 11841 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); 11842 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0); 11843 11844 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands); 11845 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops); 11846 return ""; 11847 } 11848 11849 /* Output a move from arm registers to arm registers of a long double 11850 OPERANDS[0] is the destination. 11851 OPERANDS[1] is the source. */ 11852 const char * 11853 output_mov_long_double_arm_from_arm (rtx *operands) 11854 { 11855 /* We have to be careful here because the two might overlap. */ 11856 int dest_start = REGNO (operands[0]); 11857 int src_start = REGNO (operands[1]); 11858 rtx ops[2]; 11859 int i; 11860 11861 if (dest_start < src_start) 11862 { 11863 for (i = 0; i < 3; i++) 11864 { 11865 ops[0] = gen_rtx_REG (SImode, dest_start + i); 11866 ops[1] = gen_rtx_REG (SImode, src_start + i); 11867 output_asm_insn ("mov%?\t%0, %1", ops); 11868 } 11869 } 11870 else 11871 { 11872 for (i = 2; i >= 0; i--) 11873 { 11874 ops[0] = gen_rtx_REG (SImode, dest_start + i); 11875 ops[1] = gen_rtx_REG (SImode, src_start + i); 11876 output_asm_insn ("mov%?\t%0, %1", ops); 11877 } 11878 } 11879 11880 return ""; 11881 } 11882 11883 void 11884 arm_emit_movpair (rtx dest, rtx src) 11885 { 11886 /* If the src is an immediate, simplify it. */ 11887 if (CONST_INT_P (src)) 11888 { 11889 HOST_WIDE_INT val = INTVAL (src); 11890 emit_set_insn (dest, GEN_INT (val & 0x0000ffff)); 11891 if ((val >> 16) & 0x0000ffff) 11892 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16), 11893 GEN_INT (16)), 11894 GEN_INT ((val >> 16) & 0x0000ffff)); 11895 return; 11896 } 11897 emit_set_insn (dest, gen_rtx_HIGH (SImode, src)); 11898 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src)); 11899 } 11900 11901 /* Output a move from arm registers to an fpa registers. 11902 OPERANDS[0] is an fpa register. 11903 OPERANDS[1] is the first registers of an arm register pair. */ 11904 const char * 11905 output_mov_double_fpa_from_arm (rtx *operands) 11906 { 11907 int arm_reg0 = REGNO (operands[1]); 11908 rtx ops[2]; 11909 11910 gcc_assert (arm_reg0 != IP_REGNUM); 11911 11912 ops[0] = gen_rtx_REG (SImode, arm_reg0); 11913 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); 11914 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops); 11915 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands); 11916 return ""; 11917 } 11918 11919 /* Output a move from an fpa register to arm registers. 11920 OPERANDS[0] is the first registers of an arm register pair. 11921 OPERANDS[1] is an fpa register. */ 11922 const char * 11923 output_mov_double_arm_from_fpa (rtx *operands) 11924 { 11925 int arm_reg0 = REGNO (operands[0]); 11926 rtx ops[2]; 11927 11928 gcc_assert (arm_reg0 != IP_REGNUM); 11929 11930 ops[0] = gen_rtx_REG (SImode, arm_reg0); 11931 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); 11932 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands); 11933 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops); 11934 return ""; 11935 } 11936 11937 /* Output a move between double words. 11938 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM 11939 or MEM<-REG and all MEMs must be offsettable addresses. */ 11940 const char * 11941 output_move_double (rtx *operands) 11942 { 11943 enum rtx_code code0 = GET_CODE (operands[0]); 11944 enum rtx_code code1 = GET_CODE (operands[1]); 11945 rtx otherops[3]; 11946 11947 if (code0 == REG) 11948 { 11949 unsigned int reg0 = REGNO (operands[0]); 11950 11951 otherops[0] = gen_rtx_REG (SImode, 1 + reg0); 11952 11953 gcc_assert (code1 == MEM); /* Constraints should ensure this. */ 11954 11955 switch (GET_CODE (XEXP (operands[1], 0))) 11956 { 11957 case REG: 11958 if (TARGET_LDRD 11959 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0)))) 11960 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands); 11961 else 11962 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands); 11963 break; 11964 11965 case PRE_INC: 11966 gcc_assert (TARGET_LDRD); 11967 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands); 11968 break; 11969 11970 case PRE_DEC: 11971 if (TARGET_LDRD) 11972 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands); 11973 else 11974 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands); 11975 break; 11976 11977 case POST_INC: 11978 if (TARGET_LDRD) 11979 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands); 11980 else 11981 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands); 11982 break; 11983 11984 case POST_DEC: 11985 gcc_assert (TARGET_LDRD); 11986 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands); 11987 break; 11988 11989 case PRE_MODIFY: 11990 case POST_MODIFY: 11991 /* Autoicrement addressing modes should never have overlapping 11992 base and destination registers, and overlapping index registers 11993 are already prohibited, so this doesn't need to worry about 11994 fix_cm3_ldrd. */ 11995 otherops[0] = operands[0]; 11996 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0); 11997 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1); 11998 11999 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY) 12000 { 12001 if (reg_overlap_mentioned_p (otherops[0], otherops[2])) 12002 { 12003 /* Registers overlap so split out the increment. */ 12004 output_asm_insn ("add%?\t%1, %1, %2", otherops); 12005 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops); 12006 } 12007 else 12008 { 12009 /* Use a single insn if we can. 12010 FIXME: IWMMXT allows offsets larger than ldrd can 12011 handle, fix these up with a pair of ldr. */ 12012 if (TARGET_THUMB2 12013 || GET_CODE (otherops[2]) != CONST_INT 12014 || (INTVAL (otherops[2]) > -256 12015 && INTVAL (otherops[2]) < 256)) 12016 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops); 12017 else 12018 { 12019 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops); 12020 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); 12021 } 12022 } 12023 } 12024 else 12025 { 12026 /* Use a single insn if we can. 12027 FIXME: IWMMXT allows offsets larger than ldrd can handle, 12028 fix these up with a pair of ldr. */ 12029 if (TARGET_THUMB2 12030 || GET_CODE (otherops[2]) != CONST_INT 12031 || (INTVAL (otherops[2]) > -256 12032 && INTVAL (otherops[2]) < 256)) 12033 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops); 12034 else 12035 { 12036 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); 12037 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops); 12038 } 12039 } 12040 break; 12041 12042 case LABEL_REF: 12043 case CONST: 12044 /* We might be able to use ldrd %0, %1 here. However the range is 12045 different to ldr/adr, and it is broken on some ARMv7-M 12046 implementations. */ 12047 /* Use the second register of the pair to avoid problematic 12048 overlap. */ 12049 otherops[1] = operands[1]; 12050 output_asm_insn ("adr%?\t%0, %1", otherops); 12051 operands[1] = otherops[0]; 12052 if (TARGET_LDRD) 12053 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands); 12054 else 12055 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands); 12056 break; 12057 12058 /* ??? This needs checking for thumb2. */ 12059 default: 12060 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1), 12061 GET_MODE (XEXP (XEXP (operands[1], 0), 1)))) 12062 { 12063 otherops[0] = operands[0]; 12064 otherops[1] = XEXP (XEXP (operands[1], 0), 0); 12065 otherops[2] = XEXP (XEXP (operands[1], 0), 1); 12066 12067 if (GET_CODE (XEXP (operands[1], 0)) == PLUS) 12068 { 12069 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD) 12070 { 12071 switch ((int) INTVAL (otherops[2])) 12072 { 12073 case -8: 12074 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops); 12075 return ""; 12076 case -4: 12077 if (TARGET_THUMB2) 12078 break; 12079 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops); 12080 return ""; 12081 case 4: 12082 if (TARGET_THUMB2) 12083 break; 12084 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops); 12085 return ""; 12086 } 12087 } 12088 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1); 12089 operands[1] = otherops[0]; 12090 if (TARGET_LDRD 12091 && (GET_CODE (otherops[2]) == REG 12092 || TARGET_THUMB2 12093 || (GET_CODE (otherops[2]) == CONST_INT 12094 && INTVAL (otherops[2]) > -256 12095 && INTVAL (otherops[2]) < 256))) 12096 { 12097 if (reg_overlap_mentioned_p (operands[0], 12098 otherops[2])) 12099 { 12100 rtx tmp; 12101 /* Swap base and index registers over to 12102 avoid a conflict. */ 12103 tmp = otherops[1]; 12104 otherops[1] = otherops[2]; 12105 otherops[2] = tmp; 12106 } 12107 /* If both registers conflict, it will usually 12108 have been fixed by a splitter. */ 12109 if (reg_overlap_mentioned_p (operands[0], otherops[2]) 12110 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1]))) 12111 { 12112 output_asm_insn ("add%?\t%0, %1, %2", otherops); 12113 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands); 12114 } 12115 else 12116 { 12117 otherops[0] = operands[0]; 12118 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops); 12119 } 12120 return ""; 12121 } 12122 12123 if (GET_CODE (otherops[2]) == CONST_INT) 12124 { 12125 if (!(const_ok_for_arm (INTVAL (otherops[2])))) 12126 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops); 12127 else 12128 output_asm_insn ("add%?\t%0, %1, %2", otherops); 12129 } 12130 else 12131 output_asm_insn ("add%?\t%0, %1, %2", otherops); 12132 } 12133 else 12134 output_asm_insn ("sub%?\t%0, %1, %2", otherops); 12135 12136 if (TARGET_LDRD) 12137 return "ldr%(d%)\t%0, [%1]"; 12138 12139 return "ldm%(ia%)\t%1, %M0"; 12140 } 12141 else 12142 { 12143 otherops[1] = adjust_address (operands[1], SImode, 4); 12144 /* Take care of overlapping base/data reg. */ 12145 if (reg_mentioned_p (operands[0], operands[1])) 12146 { 12147 output_asm_insn ("ldr%?\t%0, %1", otherops); 12148 output_asm_insn ("ldr%?\t%0, %1", operands); 12149 } 12150 else 12151 { 12152 output_asm_insn ("ldr%?\t%0, %1", operands); 12153 output_asm_insn ("ldr%?\t%0, %1", otherops); 12154 } 12155 } 12156 } 12157 } 12158 else 12159 { 12160 /* Constraints should ensure this. */ 12161 gcc_assert (code0 == MEM && code1 == REG); 12162 gcc_assert (REGNO (operands[1]) != IP_REGNUM); 12163 12164 switch (GET_CODE (XEXP (operands[0], 0))) 12165 { 12166 case REG: 12167 if (TARGET_LDRD) 12168 output_asm_insn ("str%(d%)\t%1, [%m0]", operands); 12169 else 12170 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands); 12171 break; 12172 12173 case PRE_INC: 12174 gcc_assert (TARGET_LDRD); 12175 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands); 12176 break; 12177 12178 case PRE_DEC: 12179 if (TARGET_LDRD) 12180 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands); 12181 else 12182 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands); 12183 break; 12184 12185 case POST_INC: 12186 if (TARGET_LDRD) 12187 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands); 12188 else 12189 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands); 12190 break; 12191 12192 case POST_DEC: 12193 gcc_assert (TARGET_LDRD); 12194 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands); 12195 break; 12196 12197 case PRE_MODIFY: 12198 case POST_MODIFY: 12199 otherops[0] = operands[1]; 12200 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0); 12201 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1); 12202 12203 /* IWMMXT allows offsets larger than ldrd can handle, 12204 fix these up with a pair of ldr. */ 12205 if (!TARGET_THUMB2 12206 && GET_CODE (otherops[2]) == CONST_INT 12207 && (INTVAL(otherops[2]) <= -256 12208 || INTVAL(otherops[2]) >= 256)) 12209 { 12210 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) 12211 { 12212 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops); 12213 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); 12214 } 12215 else 12216 { 12217 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); 12218 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops); 12219 } 12220 } 12221 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) 12222 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops); 12223 else 12224 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops); 12225 break; 12226 12227 case PLUS: 12228 otherops[2] = XEXP (XEXP (operands[0], 0), 1); 12229 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD) 12230 { 12231 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1))) 12232 { 12233 case -8: 12234 output_asm_insn ("stm%(db%)\t%m0, %M1", operands); 12235 return ""; 12236 12237 case -4: 12238 if (TARGET_THUMB2) 12239 break; 12240 output_asm_insn ("stm%(da%)\t%m0, %M1", operands); 12241 return ""; 12242 12243 case 4: 12244 if (TARGET_THUMB2) 12245 break; 12246 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands); 12247 return ""; 12248 } 12249 } 12250 if (TARGET_LDRD 12251 && (GET_CODE (otherops[2]) == REG 12252 || TARGET_THUMB2 12253 || (GET_CODE (otherops[2]) == CONST_INT 12254 && INTVAL (otherops[2]) > -256 12255 && INTVAL (otherops[2]) < 256))) 12256 { 12257 otherops[0] = operands[1]; 12258 otherops[1] = XEXP (XEXP (operands[0], 0), 0); 12259 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops); 12260 return ""; 12261 } 12262 /* Fall through */ 12263 12264 default: 12265 otherops[0] = adjust_address (operands[0], SImode, 4); 12266 otherops[1] = operands[1]; 12267 output_asm_insn ("str%?\t%1, %0", operands); 12268 output_asm_insn ("str%?\t%H1, %0", otherops); 12269 } 12270 } 12271 12272 return ""; 12273 } 12274 12275 /* Output a move, load or store for quad-word vectors in ARM registers. Only 12276 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */ 12277 12278 const char * 12279 output_move_quad (rtx *operands) 12280 { 12281 if (REG_P (operands[0])) 12282 { 12283 /* Load, or reg->reg move. */ 12284 12285 if (MEM_P (operands[1])) 12286 { 12287 switch (GET_CODE (XEXP (operands[1], 0))) 12288 { 12289 case REG: 12290 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands); 12291 break; 12292 12293 case LABEL_REF: 12294 case CONST: 12295 output_asm_insn ("adr%?\t%0, %1", operands); 12296 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands); 12297 break; 12298 12299 default: 12300 gcc_unreachable (); 12301 } 12302 } 12303 else 12304 { 12305 rtx ops[2]; 12306 int dest, src, i; 12307 12308 gcc_assert (REG_P (operands[1])); 12309 12310 dest = REGNO (operands[0]); 12311 src = REGNO (operands[1]); 12312 12313 /* This seems pretty dumb, but hopefully GCC won't try to do it 12314 very often. */ 12315 if (dest < src) 12316 for (i = 0; i < 4; i++) 12317 { 12318 ops[0] = gen_rtx_REG (SImode, dest + i); 12319 ops[1] = gen_rtx_REG (SImode, src + i); 12320 output_asm_insn ("mov%?\t%0, %1", ops); 12321 } 12322 else 12323 for (i = 3; i >= 0; i--) 12324 { 12325 ops[0] = gen_rtx_REG (SImode, dest + i); 12326 ops[1] = gen_rtx_REG (SImode, src + i); 12327 output_asm_insn ("mov%?\t%0, %1", ops); 12328 } 12329 } 12330 } 12331 else 12332 { 12333 gcc_assert (MEM_P (operands[0])); 12334 gcc_assert (REG_P (operands[1])); 12335 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0])); 12336 12337 switch (GET_CODE (XEXP (operands[0], 0))) 12338 { 12339 case REG: 12340 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands); 12341 break; 12342 12343 default: 12344 gcc_unreachable (); 12345 } 12346 } 12347 12348 return ""; 12349 } 12350 12351 /* Output a VFP load or store instruction. */ 12352 12353 const char * 12354 output_move_vfp (rtx *operands) 12355 { 12356 rtx reg, mem, addr, ops[2]; 12357 int load = REG_P (operands[0]); 12358 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; 12359 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT; 12360 const char *templ; 12361 char buff[50]; 12362 enum machine_mode mode; 12363 12364 reg = operands[!load]; 12365 mem = operands[load]; 12366 12367 mode = GET_MODE (reg); 12368 12369 gcc_assert (REG_P (reg)); 12370 gcc_assert (IS_VFP_REGNUM (REGNO (reg))); 12371 gcc_assert (mode == SFmode 12372 || mode == DFmode 12373 || mode == SImode 12374 || mode == DImode 12375 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode))); 12376 gcc_assert (MEM_P (mem)); 12377 12378 addr = XEXP (mem, 0); 12379 12380 switch (GET_CODE (addr)) 12381 { 12382 case PRE_DEC: 12383 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s"; 12384 ops[0] = XEXP (addr, 0); 12385 ops[1] = reg; 12386 break; 12387 12388 case POST_INC: 12389 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s"; 12390 ops[0] = XEXP (addr, 0); 12391 ops[1] = reg; 12392 break; 12393 12394 default: 12395 templ = "f%s%c%%?\t%%%s0, %%1%s"; 12396 ops[0] = reg; 12397 ops[1] = mem; 12398 break; 12399 } 12400 12401 sprintf (buff, templ, 12402 load ? "ld" : "st", 12403 dp ? 'd' : 's', 12404 dp ? "P" : "", 12405 integer_p ? "\t%@ int" : ""); 12406 output_asm_insn (buff, ops); 12407 12408 return ""; 12409 } 12410 12411 /* Output a Neon quad-word load or store, or a load or store for 12412 larger structure modes. 12413 12414 WARNING: The ordering of elements is weird in big-endian mode, 12415 because we use VSTM, as required by the EABI. GCC RTL defines 12416 element ordering based on in-memory order. This can be differ 12417 from the architectural ordering of elements within a NEON register. 12418 The intrinsics defined in arm_neon.h use the NEON register element 12419 ordering, not the GCC RTL element ordering. 12420 12421 For example, the in-memory ordering of a big-endian a quadword 12422 vector with 16-bit elements when stored from register pair {d0,d1} 12423 will be (lowest address first, d0[N] is NEON register element N): 12424 12425 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]] 12426 12427 When necessary, quadword registers (dN, dN+1) are moved to ARM 12428 registers from rN in the order: 12429 12430 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2) 12431 12432 So that STM/LDM can be used on vectors in ARM registers, and the 12433 same memory layout will result as if VSTM/VLDM were used. */ 12434 12435 const char * 12436 output_move_neon (rtx *operands) 12437 { 12438 rtx reg, mem, addr, ops[2]; 12439 int regno, load = REG_P (operands[0]); 12440 const char *templ; 12441 char buff[50]; 12442 enum machine_mode mode; 12443 12444 reg = operands[!load]; 12445 mem = operands[load]; 12446 12447 mode = GET_MODE (reg); 12448 12449 gcc_assert (REG_P (reg)); 12450 regno = REGNO (reg); 12451 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno) 12452 || NEON_REGNO_OK_FOR_QUAD (regno)); 12453 gcc_assert (VALID_NEON_DREG_MODE (mode) 12454 || VALID_NEON_QREG_MODE (mode) 12455 || VALID_NEON_STRUCT_MODE (mode)); 12456 gcc_assert (MEM_P (mem)); 12457 12458 addr = XEXP (mem, 0); 12459 12460 /* Strip off const from addresses like (const (plus (...))). */ 12461 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) 12462 addr = XEXP (addr, 0); 12463 12464 switch (GET_CODE (addr)) 12465 { 12466 case POST_INC: 12467 templ = "v%smia%%?\t%%0!, %%h1"; 12468 ops[0] = XEXP (addr, 0); 12469 ops[1] = reg; 12470 break; 12471 12472 case PRE_DEC: 12473 /* FIXME: We should be using vld1/vst1 here in BE mode? */ 12474 templ = "v%smdb%%?\t%%0!, %%h1"; 12475 ops[0] = XEXP (addr, 0); 12476 ops[1] = reg; 12477 break; 12478 12479 case POST_MODIFY: 12480 /* FIXME: Not currently enabled in neon_vector_mem_operand. */ 12481 gcc_unreachable (); 12482 12483 case LABEL_REF: 12484 case PLUS: 12485 { 12486 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; 12487 int i; 12488 int overlap = -1; 12489 for (i = 0; i < nregs; i++) 12490 { 12491 /* We're only using DImode here because it's a convenient size. */ 12492 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); 12493 ops[1] = adjust_address (mem, DImode, 8 * i); 12494 if (reg_overlap_mentioned_p (ops[0], mem)) 12495 { 12496 gcc_assert (overlap == -1); 12497 overlap = i; 12498 } 12499 else 12500 { 12501 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); 12502 output_asm_insn (buff, ops); 12503 } 12504 } 12505 if (overlap != -1) 12506 { 12507 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap); 12508 ops[1] = adjust_address (mem, SImode, 8 * overlap); 12509 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); 12510 output_asm_insn (buff, ops); 12511 } 12512 12513 return ""; 12514 } 12515 12516 default: 12517 templ = "v%smia%%?\t%%m0, %%h1"; 12518 ops[0] = mem; 12519 ops[1] = reg; 12520 } 12521 12522 sprintf (buff, templ, load ? "ld" : "st"); 12523 output_asm_insn (buff, ops); 12524 12525 return ""; 12526 } 12527 12528 /* Compute and return the length of neon_mov<mode>, where <mode> is 12529 one of VSTRUCT modes: EI, OI, CI or XI. */ 12530 int 12531 arm_attr_length_move_neon (rtx insn) 12532 { 12533 rtx reg, mem, addr; 12534 int load; 12535 enum machine_mode mode; 12536 12537 extract_insn_cached (insn); 12538 12539 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) 12540 { 12541 mode = GET_MODE (recog_data.operand[0]); 12542 switch (mode) 12543 { 12544 case EImode: 12545 case OImode: 12546 return 8; 12547 case CImode: 12548 return 12; 12549 case XImode: 12550 return 16; 12551 default: 12552 gcc_unreachable (); 12553 } 12554 } 12555 12556 load = REG_P (recog_data.operand[0]); 12557 reg = recog_data.operand[!load]; 12558 mem = recog_data.operand[load]; 12559 12560 gcc_assert (MEM_P (mem)); 12561 12562 mode = GET_MODE (reg); 12563 addr = XEXP (mem, 0); 12564 12565 /* Strip off const from addresses like (const (plus (...))). */ 12566 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) 12567 addr = XEXP (addr, 0); 12568 12569 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS) 12570 { 12571 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; 12572 return insns * 4; 12573 } 12574 else 12575 return 4; 12576 } 12577 12578 /* Output an ADD r, s, #n where n may be too big for one instruction. 12579 If adding zero to one register, output nothing. */ 12580 const char * 12581 output_add_immediate (rtx *operands) 12582 { 12583 HOST_WIDE_INT n = INTVAL (operands[2]); 12584 12585 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1])) 12586 { 12587 if (n < 0) 12588 output_multi_immediate (operands, 12589 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2, 12590 -n); 12591 else 12592 output_multi_immediate (operands, 12593 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2, 12594 n); 12595 } 12596 12597 return ""; 12598 } 12599 12600 /* Output a multiple immediate operation. 12601 OPERANDS is the vector of operands referred to in the output patterns. 12602 INSTR1 is the output pattern to use for the first constant. 12603 INSTR2 is the output pattern to use for subsequent constants. 12604 IMMED_OP is the index of the constant slot in OPERANDS. 12605 N is the constant value. */ 12606 static const char * 12607 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2, 12608 int immed_op, HOST_WIDE_INT n) 12609 { 12610 #if HOST_BITS_PER_WIDE_INT > 32 12611 n &= 0xffffffff; 12612 #endif 12613 12614 if (n == 0) 12615 { 12616 /* Quick and easy output. */ 12617 operands[immed_op] = const0_rtx; 12618 output_asm_insn (instr1, operands); 12619 } 12620 else 12621 { 12622 int i; 12623 const char * instr = instr1; 12624 12625 /* Note that n is never zero here (which would give no output). */ 12626 for (i = 0; i < 32; i += 2) 12627 { 12628 if (n & (3 << i)) 12629 { 12630 operands[immed_op] = GEN_INT (n & (255 << i)); 12631 output_asm_insn (instr, operands); 12632 instr = instr2; 12633 i += 6; 12634 } 12635 } 12636 } 12637 12638 return ""; 12639 } 12640 12641 /* Return the name of a shifter operation. */ 12642 static const char * 12643 arm_shift_nmem(enum rtx_code code) 12644 { 12645 switch (code) 12646 { 12647 case ASHIFT: 12648 return ARM_LSL_NAME; 12649 12650 case ASHIFTRT: 12651 return "asr"; 12652 12653 case LSHIFTRT: 12654 return "lsr"; 12655 12656 case ROTATERT: 12657 return "ror"; 12658 12659 default: 12660 abort(); 12661 } 12662 } 12663 12664 /* Return the appropriate ARM instruction for the operation code. 12665 The returned result should not be overwritten. OP is the rtx of the 12666 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator 12667 was shifted. */ 12668 const char * 12669 arithmetic_instr (rtx op, int shift_first_arg) 12670 { 12671 switch (GET_CODE (op)) 12672 { 12673 case PLUS: 12674 return "add"; 12675 12676 case MINUS: 12677 return shift_first_arg ? "rsb" : "sub"; 12678 12679 case IOR: 12680 return "orr"; 12681 12682 case XOR: 12683 return "eor"; 12684 12685 case AND: 12686 return "and"; 12687 12688 case ASHIFT: 12689 case ASHIFTRT: 12690 case LSHIFTRT: 12691 case ROTATERT: 12692 return arm_shift_nmem(GET_CODE(op)); 12693 12694 default: 12695 gcc_unreachable (); 12696 } 12697 } 12698 12699 /* Ensure valid constant shifts and return the appropriate shift mnemonic 12700 for the operation code. The returned result should not be overwritten. 12701 OP is the rtx code of the shift. 12702 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant 12703 shift. */ 12704 static const char * 12705 shift_op (rtx op, HOST_WIDE_INT *amountp) 12706 { 12707 const char * mnem; 12708 enum rtx_code code = GET_CODE (op); 12709 12710 switch (GET_CODE (XEXP (op, 1))) 12711 { 12712 case REG: 12713 case SUBREG: 12714 *amountp = -1; 12715 break; 12716 12717 case CONST_INT: 12718 *amountp = INTVAL (XEXP (op, 1)); 12719 break; 12720 12721 default: 12722 gcc_unreachable (); 12723 } 12724 12725 switch (code) 12726 { 12727 case ROTATE: 12728 gcc_assert (*amountp != -1); 12729 *amountp = 32 - *amountp; 12730 code = ROTATERT; 12731 12732 /* Fall through. */ 12733 12734 case ASHIFT: 12735 case ASHIFTRT: 12736 case LSHIFTRT: 12737 case ROTATERT: 12738 mnem = arm_shift_nmem(code); 12739 break; 12740 12741 case MULT: 12742 /* We never have to worry about the amount being other than a 12743 power of 2, since this case can never be reloaded from a reg. */ 12744 gcc_assert (*amountp != -1); 12745 *amountp = int_log2 (*amountp); 12746 return ARM_LSL_NAME; 12747 12748 default: 12749 gcc_unreachable (); 12750 } 12751 12752 if (*amountp != -1) 12753 { 12754 /* This is not 100% correct, but follows from the desire to merge 12755 multiplication by a power of 2 with the recognizer for a 12756 shift. >=32 is not a valid shift for "lsl", so we must try and 12757 output a shift that produces the correct arithmetical result. 12758 Using lsr #32 is identical except for the fact that the carry bit 12759 is not set correctly if we set the flags; but we never use the 12760 carry bit from such an operation, so we can ignore that. */ 12761 if (code == ROTATERT) 12762 /* Rotate is just modulo 32. */ 12763 *amountp &= 31; 12764 else if (*amountp != (*amountp & 31)) 12765 { 12766 if (code == ASHIFT) 12767 mnem = "lsr"; 12768 *amountp = 32; 12769 } 12770 12771 /* Shifts of 0 are no-ops. */ 12772 if (*amountp == 0) 12773 return NULL; 12774 } 12775 12776 return mnem; 12777 } 12778 12779 /* Obtain the shift from the POWER of two. */ 12780 12781 static HOST_WIDE_INT 12782 int_log2 (HOST_WIDE_INT power) 12783 { 12784 HOST_WIDE_INT shift = 0; 12785 12786 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0) 12787 { 12788 gcc_assert (shift <= 31); 12789 shift++; 12790 } 12791 12792 return shift; 12793 } 12794 12795 /* Output a .ascii pseudo-op, keeping track of lengths. This is 12796 because /bin/as is horribly restrictive. The judgement about 12797 whether or not each character is 'printable' (and can be output as 12798 is) or not (and must be printed with an octal escape) must be made 12799 with reference to the *host* character set -- the situation is 12800 similar to that discussed in the comments above pp_c_char in 12801 c-pretty-print.c. */ 12802 12803 #define MAX_ASCII_LEN 51 12804 12805 void 12806 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len) 12807 { 12808 int i; 12809 int len_so_far = 0; 12810 12811 fputs ("\t.ascii\t\"", stream); 12812 12813 for (i = 0; i < len; i++) 12814 { 12815 int c = p[i]; 12816 12817 if (len_so_far >= MAX_ASCII_LEN) 12818 { 12819 fputs ("\"\n\t.ascii\t\"", stream); 12820 len_so_far = 0; 12821 } 12822 12823 if (ISPRINT (c)) 12824 { 12825 if (c == '\\' || c == '\"') 12826 { 12827 putc ('\\', stream); 12828 len_so_far++; 12829 } 12830 putc (c, stream); 12831 len_so_far++; 12832 } 12833 else 12834 { 12835 fprintf (stream, "\\%03o", c); 12836 len_so_far += 4; 12837 } 12838 } 12839 12840 fputs ("\"\n", stream); 12841 } 12842 12843 /* Compute the register save mask for registers 0 through 12 12844 inclusive. This code is used by arm_compute_save_reg_mask. */ 12845 12846 static unsigned long 12847 arm_compute_save_reg0_reg12_mask (void) 12848 { 12849 unsigned long func_type = arm_current_func_type (); 12850 unsigned long save_reg_mask = 0; 12851 unsigned int reg; 12852 12853 if (IS_INTERRUPT (func_type)) 12854 { 12855 unsigned int max_reg; 12856 /* Interrupt functions must not corrupt any registers, 12857 even call clobbered ones. If this is a leaf function 12858 we can just examine the registers used by the RTL, but 12859 otherwise we have to assume that whatever function is 12860 called might clobber anything, and so we have to save 12861 all the call-clobbered registers as well. */ 12862 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ) 12863 /* FIQ handlers have registers r8 - r12 banked, so 12864 we only need to check r0 - r7, Normal ISRs only 12865 bank r14 and r15, so we must check up to r12. 12866 r13 is the stack pointer which is always preserved, 12867 so we do not need to consider it here. */ 12868 max_reg = 7; 12869 else 12870 max_reg = 12; 12871 12872 for (reg = 0; reg <= max_reg; reg++) 12873 if (df_regs_ever_live_p (reg) 12874 || (! current_function_is_leaf && call_used_regs[reg])) 12875 save_reg_mask |= (1 << reg); 12876 12877 /* Also save the pic base register if necessary. */ 12878 if (flag_pic 12879 && !TARGET_SINGLE_PIC_BASE 12880 && arm_pic_register != INVALID_REGNUM 12881 && crtl->uses_pic_offset_table) 12882 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM; 12883 } 12884 else if (IS_VOLATILE(func_type)) 12885 { 12886 /* For noreturn functions we historically omitted register saves 12887 altogether. However this really messes up debugging. As a 12888 compromise save just the frame pointers. Combined with the link 12889 register saved elsewhere this should be sufficient to get 12890 a backtrace. */ 12891 if (frame_pointer_needed) 12892 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM; 12893 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM)) 12894 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM; 12895 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM)) 12896 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM; 12897 } 12898 else 12899 { 12900 /* In the normal case we only need to save those registers 12901 which are call saved and which are used by this function. */ 12902 for (reg = 0; reg <= 11; reg++) 12903 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) 12904 save_reg_mask |= (1 << reg); 12905 12906 /* Handle the frame pointer as a special case. */ 12907 if (frame_pointer_needed) 12908 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM; 12909 12910 /* If we aren't loading the PIC register, 12911 don't stack it even though it may be live. */ 12912 if (flag_pic 12913 && !TARGET_SINGLE_PIC_BASE 12914 && arm_pic_register != INVALID_REGNUM 12915 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM) 12916 || crtl->uses_pic_offset_table)) 12917 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM; 12918 12919 /* The prologue will copy SP into R0, so save it. */ 12920 if (IS_STACKALIGN (func_type)) 12921 save_reg_mask |= 1; 12922 } 12923 12924 /* Save registers so the exception handler can modify them. */ 12925 if (crtl->calls_eh_return) 12926 { 12927 unsigned int i; 12928 12929 for (i = 0; ; i++) 12930 { 12931 reg = EH_RETURN_DATA_REGNO (i); 12932 if (reg == INVALID_REGNUM) 12933 break; 12934 save_reg_mask |= 1 << reg; 12935 } 12936 } 12937 12938 return save_reg_mask; 12939 } 12940 12941 12942 /* Compute the number of bytes used to store the static chain register on the 12943 stack, above the stack frame. We need to know this accurately to get the 12944 alignment of the rest of the stack frame correct. */ 12945 12946 static int arm_compute_static_chain_stack_bytes (void) 12947 { 12948 unsigned long func_type = arm_current_func_type (); 12949 int static_chain_stack_bytes = 0; 12950 12951 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM && 12952 IS_NESTED (func_type) && 12953 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0) 12954 static_chain_stack_bytes = 4; 12955 12956 return static_chain_stack_bytes; 12957 } 12958 12959 12960 /* Compute a bit mask of which registers need to be 12961 saved on the stack for the current function. 12962 This is used by arm_get_frame_offsets, which may add extra registers. */ 12963 12964 static unsigned long 12965 arm_compute_save_reg_mask (void) 12966 { 12967 unsigned int save_reg_mask = 0; 12968 unsigned long func_type = arm_current_func_type (); 12969 unsigned int reg; 12970 12971 if (IS_NAKED (func_type)) 12972 /* This should never really happen. */ 12973 return 0; 12974 12975 /* If we are creating a stack frame, then we must save the frame pointer, 12976 IP (which will hold the old stack pointer), LR and the PC. */ 12977 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) 12978 save_reg_mask |= 12979 (1 << ARM_HARD_FRAME_POINTER_REGNUM) 12980 | (1 << IP_REGNUM) 12981 | (1 << LR_REGNUM) 12982 | (1 << PC_REGNUM); 12983 12984 save_reg_mask |= arm_compute_save_reg0_reg12_mask (); 12985 12986 /* Decide if we need to save the link register. 12987 Interrupt routines have their own banked link register, 12988 so they never need to save it. 12989 Otherwise if we do not use the link register we do not need to save 12990 it. If we are pushing other registers onto the stack however, we 12991 can save an instruction in the epilogue by pushing the link register 12992 now and then popping it back into the PC. This incurs extra memory 12993 accesses though, so we only do it when optimizing for size, and only 12994 if we know that we will not need a fancy return sequence. */ 12995 if (df_regs_ever_live_p (LR_REGNUM) 12996 || (save_reg_mask 12997 && optimize_size 12998 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL 12999 && !crtl->calls_eh_return)) 13000 save_reg_mask |= 1 << LR_REGNUM; 13001 13002 if (cfun->machine->lr_save_eliminated) 13003 save_reg_mask &= ~ (1 << LR_REGNUM); 13004 13005 if (TARGET_REALLY_IWMMXT 13006 && ((bit_count (save_reg_mask) 13007 + ARM_NUM_INTS (crtl->args.pretend_args_size + 13008 arm_compute_static_chain_stack_bytes()) 13009 ) % 2) != 0) 13010 { 13011 /* The total number of registers that are going to be pushed 13012 onto the stack is odd. We need to ensure that the stack 13013 is 64-bit aligned before we start to save iWMMXt registers, 13014 and also before we start to create locals. (A local variable 13015 might be a double or long long which we will load/store using 13016 an iWMMXt instruction). Therefore we need to push another 13017 ARM register, so that the stack will be 64-bit aligned. We 13018 try to avoid using the arg registers (r0 -r3) as they might be 13019 used to pass values in a tail call. */ 13020 for (reg = 4; reg <= 12; reg++) 13021 if ((save_reg_mask & (1 << reg)) == 0) 13022 break; 13023 13024 if (reg <= 12) 13025 save_reg_mask |= (1 << reg); 13026 else 13027 { 13028 cfun->machine->sibcall_blocked = 1; 13029 save_reg_mask |= (1 << 3); 13030 } 13031 } 13032 13033 /* We may need to push an additional register for use initializing the 13034 PIC base register. */ 13035 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic 13036 && (save_reg_mask & THUMB2_WORK_REGS) == 0) 13037 { 13038 reg = thumb_find_work_register (1 << 4); 13039 if (!call_used_regs[reg]) 13040 save_reg_mask |= (1 << reg); 13041 } 13042 13043 return save_reg_mask; 13044 } 13045 13046 13047 /* Compute a bit mask of which registers need to be 13048 saved on the stack for the current function. */ 13049 static unsigned long 13050 thumb1_compute_save_reg_mask (void) 13051 { 13052 unsigned long mask; 13053 unsigned reg; 13054 13055 mask = 0; 13056 for (reg = 0; reg < 12; reg ++) 13057 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13058 mask |= 1 << reg; 13059 13060 if (flag_pic 13061 && !TARGET_SINGLE_PIC_BASE 13062 && arm_pic_register != INVALID_REGNUM 13063 && crtl->uses_pic_offset_table) 13064 mask |= 1 << PIC_OFFSET_TABLE_REGNUM; 13065 13066 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */ 13067 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) 13068 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM; 13069 13070 /* LR will also be pushed if any lo regs are pushed. */ 13071 if (mask & 0xff || thumb_force_lr_save ()) 13072 mask |= (1 << LR_REGNUM); 13073 13074 /* Make sure we have a low work register if we need one. 13075 We will need one if we are going to push a high register, 13076 but we are not currently intending to push a low register. */ 13077 if ((mask & 0xff) == 0 13078 && ((mask & 0x0f00) || TARGET_BACKTRACE)) 13079 { 13080 /* Use thumb_find_work_register to choose which register 13081 we will use. If the register is live then we will 13082 have to push it. Use LAST_LO_REGNUM as our fallback 13083 choice for the register to select. */ 13084 reg = thumb_find_work_register (1 << LAST_LO_REGNUM); 13085 /* Make sure the register returned by thumb_find_work_register is 13086 not part of the return value. */ 13087 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ()) 13088 reg = LAST_LO_REGNUM; 13089 13090 if (! call_used_regs[reg]) 13091 mask |= 1 << reg; 13092 } 13093 13094 /* The 504 below is 8 bytes less than 512 because there are two possible 13095 alignment words. We can't tell here if they will be present or not so we 13096 have to play it safe and assume that they are. */ 13097 if ((CALLER_INTERWORKING_SLOT_SIZE + 13098 ROUND_UP_WORD (get_frame_size ()) + 13099 crtl->outgoing_args_size) >= 504) 13100 { 13101 /* This is the same as the code in thumb1_expand_prologue() which 13102 determines which register to use for stack decrement. */ 13103 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++) 13104 if (mask & (1 << reg)) 13105 break; 13106 13107 if (reg > LAST_LO_REGNUM) 13108 { 13109 /* Make sure we have a register available for stack decrement. */ 13110 mask |= 1 << LAST_LO_REGNUM; 13111 } 13112 } 13113 13114 return mask; 13115 } 13116 13117 13118 /* Return the number of bytes required to save VFP registers. */ 13119 static int 13120 arm_get_vfp_saved_size (void) 13121 { 13122 unsigned int regno; 13123 int count; 13124 int saved; 13125 13126 saved = 0; 13127 /* Space for saved VFP registers. */ 13128 if (TARGET_HARD_FLOAT && TARGET_VFP) 13129 { 13130 count = 0; 13131 for (regno = FIRST_VFP_REGNUM; 13132 regno < LAST_VFP_REGNUM; 13133 regno += 2) 13134 { 13135 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno]) 13136 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1])) 13137 { 13138 if (count > 0) 13139 { 13140 /* Workaround ARM10 VFPr1 bug. */ 13141 if (count == 2 && !arm_arch6) 13142 count++; 13143 saved += count * 8; 13144 } 13145 count = 0; 13146 } 13147 else 13148 count++; 13149 } 13150 if (count > 0) 13151 { 13152 if (count == 2 && !arm_arch6) 13153 count++; 13154 saved += count * 8; 13155 } 13156 } 13157 return saved; 13158 } 13159 13160 13161 /* Generate a function exit sequence. If REALLY_RETURN is false, then do 13162 everything bar the final return instruction. */ 13163 const char * 13164 output_return_instruction (rtx operand, int really_return, int reverse) 13165 { 13166 char conditional[10]; 13167 char instr[100]; 13168 unsigned reg; 13169 unsigned long live_regs_mask; 13170 unsigned long func_type; 13171 arm_stack_offsets *offsets; 13172 13173 func_type = arm_current_func_type (); 13174 13175 if (IS_NAKED (func_type)) 13176 return ""; 13177 13178 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN) 13179 { 13180 /* If this function was declared non-returning, and we have 13181 found a tail call, then we have to trust that the called 13182 function won't return. */ 13183 if (really_return) 13184 { 13185 rtx ops[2]; 13186 13187 /* Otherwise, trap an attempted return by aborting. */ 13188 ops[0] = operand; 13189 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" 13190 : "abort"); 13191 assemble_external_libcall (ops[1]); 13192 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops); 13193 } 13194 13195 return ""; 13196 } 13197 13198 gcc_assert (!cfun->calls_alloca || really_return); 13199 13200 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd'); 13201 13202 cfun->machine->return_used_this_function = 1; 13203 13204 offsets = arm_get_frame_offsets (); 13205 live_regs_mask = offsets->saved_regs_mask; 13206 13207 if (live_regs_mask) 13208 { 13209 const char * return_reg; 13210 13211 /* If we do not have any special requirements for function exit 13212 (e.g. interworking) then we can load the return address 13213 directly into the PC. Otherwise we must load it into LR. */ 13214 if (really_return 13215 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK)) 13216 return_reg = reg_names[PC_REGNUM]; 13217 else 13218 return_reg = reg_names[LR_REGNUM]; 13219 13220 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM)) 13221 { 13222 /* There are three possible reasons for the IP register 13223 being saved. 1) a stack frame was created, in which case 13224 IP contains the old stack pointer, or 2) an ISR routine 13225 corrupted it, or 3) it was saved to align the stack on 13226 iWMMXt. In case 1, restore IP into SP, otherwise just 13227 restore IP. */ 13228 if (frame_pointer_needed) 13229 { 13230 live_regs_mask &= ~ (1 << IP_REGNUM); 13231 live_regs_mask |= (1 << SP_REGNUM); 13232 } 13233 else 13234 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT); 13235 } 13236 13237 /* On some ARM architectures it is faster to use LDR rather than 13238 LDM to load a single register. On other architectures, the 13239 cost is the same. In 26 bit mode, or for exception handlers, 13240 we have to use LDM to load the PC so that the CPSR is also 13241 restored. */ 13242 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++) 13243 if (live_regs_mask == (1U << reg)) 13244 break; 13245 13246 if (reg <= LAST_ARM_REGNUM 13247 && (reg != LR_REGNUM 13248 || ! really_return 13249 || ! IS_INTERRUPT (func_type))) 13250 { 13251 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional, 13252 (reg == LR_REGNUM) ? return_reg : reg_names[reg]); 13253 } 13254 else 13255 { 13256 char *p; 13257 int first = 1; 13258 13259 /* Generate the load multiple instruction to restore the 13260 registers. Note we can get here, even if 13261 frame_pointer_needed is true, but only if sp already 13262 points to the base of the saved core registers. */ 13263 if (live_regs_mask & (1 << SP_REGNUM)) 13264 { 13265 unsigned HOST_WIDE_INT stack_adjust; 13266 13267 stack_adjust = offsets->outgoing_args - offsets->saved_regs; 13268 gcc_assert (stack_adjust == 0 || stack_adjust == 4); 13269 13270 if (stack_adjust && arm_arch5 && TARGET_ARM) 13271 if (TARGET_UNIFIED_ASM) 13272 sprintf (instr, "ldmib%s\t%%|sp, {", conditional); 13273 else 13274 sprintf (instr, "ldm%sib\t%%|sp, {", conditional); 13275 else 13276 { 13277 /* If we can't use ldmib (SA110 bug), 13278 then try to pop r3 instead. */ 13279 if (stack_adjust) 13280 live_regs_mask |= 1 << 3; 13281 13282 if (TARGET_UNIFIED_ASM) 13283 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional); 13284 else 13285 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional); 13286 } 13287 } 13288 else 13289 if (TARGET_UNIFIED_ASM) 13290 sprintf (instr, "pop%s\t{", conditional); 13291 else 13292 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional); 13293 13294 p = instr + strlen (instr); 13295 13296 for (reg = 0; reg <= SP_REGNUM; reg++) 13297 if (live_regs_mask & (1 << reg)) 13298 { 13299 int l = strlen (reg_names[reg]); 13300 13301 if (first) 13302 first = 0; 13303 else 13304 { 13305 memcpy (p, ", ", 2); 13306 p += 2; 13307 } 13308 13309 memcpy (p, "%|", 2); 13310 memcpy (p + 2, reg_names[reg], l); 13311 p += l + 2; 13312 } 13313 13314 if (live_regs_mask & (1 << LR_REGNUM)) 13315 { 13316 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg); 13317 /* If returning from an interrupt, restore the CPSR. */ 13318 if (IS_INTERRUPT (func_type)) 13319 strcat (p, "^"); 13320 } 13321 else 13322 strcpy (p, "}"); 13323 } 13324 13325 output_asm_insn (instr, & operand); 13326 13327 /* See if we need to generate an extra instruction to 13328 perform the actual function return. */ 13329 if (really_return 13330 && func_type != ARM_FT_INTERWORKED 13331 && (live_regs_mask & (1 << LR_REGNUM)) != 0) 13332 { 13333 /* The return has already been handled 13334 by loading the LR into the PC. */ 13335 really_return = 0; 13336 } 13337 } 13338 13339 if (really_return) 13340 { 13341 switch ((int) ARM_FUNC_TYPE (func_type)) 13342 { 13343 case ARM_FT_ISR: 13344 case ARM_FT_FIQ: 13345 /* ??? This is wrong for unified assembly syntax. */ 13346 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional); 13347 break; 13348 13349 case ARM_FT_INTERWORKED: 13350 sprintf (instr, "bx%s\t%%|lr", conditional); 13351 break; 13352 13353 case ARM_FT_EXCEPTION: 13354 /* ??? This is wrong for unified assembly syntax. */ 13355 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional); 13356 break; 13357 13358 default: 13359 /* Use bx if it's available. */ 13360 if (arm_arch5 || arm_arch4t) 13361 sprintf (instr, "bx%s\t%%|lr", conditional); 13362 else 13363 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional); 13364 break; 13365 } 13366 13367 output_asm_insn (instr, & operand); 13368 } 13369 13370 return ""; 13371 } 13372 13373 /* Write the function name into the code section, directly preceding 13374 the function prologue. 13375 13376 Code will be output similar to this: 13377 t0 13378 .ascii "arm_poke_function_name", 0 13379 .align 13380 t1 13381 .word 0xff000000 + (t1 - t0) 13382 arm_poke_function_name 13383 mov ip, sp 13384 stmfd sp!, {fp, ip, lr, pc} 13385 sub fp, ip, #4 13386 13387 When performing a stack backtrace, code can inspect the value 13388 of 'pc' stored at 'fp' + 0. If the trace function then looks 13389 at location pc - 12 and the top 8 bits are set, then we know 13390 that there is a function name embedded immediately preceding this 13391 location and has length ((pc[-3]) & 0xff000000). 13392 13393 We assume that pc is declared as a pointer to an unsigned long. 13394 13395 It is of no benefit to output the function name if we are assembling 13396 a leaf function. These function types will not contain a stack 13397 backtrace structure, therefore it is not possible to determine the 13398 function name. */ 13399 void 13400 arm_poke_function_name (FILE *stream, const char *name) 13401 { 13402 unsigned long alignlength; 13403 unsigned long length; 13404 rtx x; 13405 13406 length = strlen (name) + 1; 13407 alignlength = ROUND_UP_WORD (length); 13408 13409 ASM_OUTPUT_ASCII (stream, name, length); 13410 ASM_OUTPUT_ALIGN (stream, 2); 13411 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength); 13412 assemble_aligned_integer (UNITS_PER_WORD, x); 13413 } 13414 13415 /* Place some comments into the assembler stream 13416 describing the current function. */ 13417 static void 13418 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) 13419 { 13420 unsigned long func_type; 13421 13422 if (TARGET_THUMB1) 13423 { 13424 thumb1_output_function_prologue (f, frame_size); 13425 return; 13426 } 13427 13428 /* Sanity check. */ 13429 gcc_assert (!arm_ccfsm_state && !arm_target_insn); 13430 13431 func_type = arm_current_func_type (); 13432 13433 switch ((int) ARM_FUNC_TYPE (func_type)) 13434 { 13435 default: 13436 case ARM_FT_NORMAL: 13437 break; 13438 case ARM_FT_INTERWORKED: 13439 asm_fprintf (f, "\t%@ Function supports interworking.\n"); 13440 break; 13441 case ARM_FT_ISR: 13442 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n"); 13443 break; 13444 case ARM_FT_FIQ: 13445 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n"); 13446 break; 13447 case ARM_FT_EXCEPTION: 13448 asm_fprintf (f, "\t%@ ARM Exception Handler.\n"); 13449 break; 13450 } 13451 13452 if (IS_NAKED (func_type)) 13453 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n"); 13454 13455 if (IS_VOLATILE (func_type)) 13456 asm_fprintf (f, "\t%@ Volatile: function does not return.\n"); 13457 13458 if (IS_NESTED (func_type)) 13459 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n"); 13460 if (IS_STACKALIGN (func_type)) 13461 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n"); 13462 13463 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n", 13464 crtl->args.size, 13465 crtl->args.pretend_args_size, frame_size); 13466 13467 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n", 13468 frame_pointer_needed, 13469 cfun->machine->uses_anonymous_args); 13470 13471 if (cfun->machine->lr_save_eliminated) 13472 asm_fprintf (f, "\t%@ link register save eliminated.\n"); 13473 13474 if (crtl->calls_eh_return) 13475 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n"); 13476 13477 } 13478 13479 const char * 13480 arm_output_epilogue (rtx sibling) 13481 { 13482 int reg; 13483 unsigned long saved_regs_mask; 13484 unsigned long func_type; 13485 /* Floats_offset is the offset from the "virtual" frame. In an APCS 13486 frame that is $fp + 4 for a non-variadic function. */ 13487 int floats_offset = 0; 13488 rtx operands[3]; 13489 FILE * f = asm_out_file; 13490 unsigned int lrm_count = 0; 13491 int really_return = (sibling == NULL); 13492 int start_reg; 13493 arm_stack_offsets *offsets; 13494 13495 /* If we have already generated the return instruction 13496 then it is futile to generate anything else. */ 13497 if (use_return_insn (FALSE, sibling) && 13498 (cfun->machine->return_used_this_function != 0)) 13499 return ""; 13500 13501 func_type = arm_current_func_type (); 13502 13503 if (IS_NAKED (func_type)) 13504 /* Naked functions don't have epilogues. */ 13505 return ""; 13506 13507 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN) 13508 { 13509 rtx op; 13510 13511 /* A volatile function should never return. Call abort. */ 13512 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort"); 13513 assemble_external_libcall (op); 13514 output_asm_insn ("bl\t%a0", &op); 13515 13516 return ""; 13517 } 13518 13519 /* If we are throwing an exception, then we really must be doing a 13520 return, so we can't tail-call. */ 13521 gcc_assert (!crtl->calls_eh_return || really_return); 13522 13523 offsets = arm_get_frame_offsets (); 13524 saved_regs_mask = offsets->saved_regs_mask; 13525 13526 if (TARGET_IWMMXT) 13527 lrm_count = bit_count (saved_regs_mask); 13528 13529 floats_offset = offsets->saved_args; 13530 /* Compute how far away the floats will be. */ 13531 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++) 13532 if (saved_regs_mask & (1 << reg)) 13533 floats_offset += 4; 13534 13535 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) 13536 { 13537 /* This variable is for the Virtual Frame Pointer, not VFP regs. */ 13538 int vfp_offset = offsets->frame; 13539 13540 if (TARGET_FPA_EMU2) 13541 { 13542 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) 13543 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13544 { 13545 floats_offset += 12; 13546 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n", 13547 reg, FP_REGNUM, floats_offset - vfp_offset); 13548 } 13549 } 13550 else 13551 { 13552 start_reg = LAST_FPA_REGNUM; 13553 13554 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) 13555 { 13556 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13557 { 13558 floats_offset += 12; 13559 13560 /* We can't unstack more than four registers at once. */ 13561 if (start_reg - reg == 3) 13562 { 13563 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n", 13564 reg, FP_REGNUM, floats_offset - vfp_offset); 13565 start_reg = reg - 1; 13566 } 13567 } 13568 else 13569 { 13570 if (reg != start_reg) 13571 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n", 13572 reg + 1, start_reg - reg, 13573 FP_REGNUM, floats_offset - vfp_offset); 13574 start_reg = reg - 1; 13575 } 13576 } 13577 13578 /* Just in case the last register checked also needs unstacking. */ 13579 if (reg != start_reg) 13580 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n", 13581 reg + 1, start_reg - reg, 13582 FP_REGNUM, floats_offset - vfp_offset); 13583 } 13584 13585 if (TARGET_HARD_FLOAT && TARGET_VFP) 13586 { 13587 int saved_size; 13588 13589 /* The fldmd insns do not have base+offset addressing 13590 modes, so we use IP to hold the address. */ 13591 saved_size = arm_get_vfp_saved_size (); 13592 13593 if (saved_size > 0) 13594 { 13595 floats_offset += saved_size; 13596 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM, 13597 FP_REGNUM, floats_offset - vfp_offset); 13598 } 13599 start_reg = FIRST_VFP_REGNUM; 13600 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) 13601 { 13602 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) 13603 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) 13604 { 13605 if (start_reg != reg) 13606 vfp_output_fldmd (f, IP_REGNUM, 13607 (start_reg - FIRST_VFP_REGNUM) / 2, 13608 (reg - start_reg) / 2); 13609 start_reg = reg + 2; 13610 } 13611 } 13612 if (start_reg != reg) 13613 vfp_output_fldmd (f, IP_REGNUM, 13614 (start_reg - FIRST_VFP_REGNUM) / 2, 13615 (reg - start_reg) / 2); 13616 } 13617 13618 if (TARGET_IWMMXT) 13619 { 13620 /* The frame pointer is guaranteed to be non-double-word aligned. 13621 This is because it is set to (old_stack_pointer - 4) and the 13622 old_stack_pointer was double word aligned. Thus the offset to 13623 the iWMMXt registers to be loaded must also be non-double-word 13624 sized, so that the resultant address *is* double-word aligned. 13625 We can ignore floats_offset since that was already included in 13626 the live_regs_mask. */ 13627 lrm_count += (lrm_count % 2 ? 2 : 1); 13628 13629 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) 13630 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13631 { 13632 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n", 13633 reg, FP_REGNUM, lrm_count * 4); 13634 lrm_count += 2; 13635 } 13636 } 13637 13638 /* saved_regs_mask should contain the IP, which at the time of stack 13639 frame generation actually contains the old stack pointer. So a 13640 quick way to unwind the stack is just pop the IP register directly 13641 into the stack pointer. */ 13642 gcc_assert (saved_regs_mask & (1 << IP_REGNUM)); 13643 saved_regs_mask &= ~ (1 << IP_REGNUM); 13644 saved_regs_mask |= (1 << SP_REGNUM); 13645 13646 /* There are two registers left in saved_regs_mask - LR and PC. We 13647 only need to restore the LR register (the return address), but to 13648 save time we can load it directly into the PC, unless we need a 13649 special function exit sequence, or we are not really returning. */ 13650 if (really_return 13651 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL 13652 && !crtl->calls_eh_return) 13653 /* Delete the LR from the register mask, so that the LR on 13654 the stack is loaded into the PC in the register mask. */ 13655 saved_regs_mask &= ~ (1 << LR_REGNUM); 13656 else 13657 saved_regs_mask &= ~ (1 << PC_REGNUM); 13658 13659 /* We must use SP as the base register, because SP is one of the 13660 registers being restored. If an interrupt or page fault 13661 happens in the ldm instruction, the SP might or might not 13662 have been restored. That would be bad, as then SP will no 13663 longer indicate the safe area of stack, and we can get stack 13664 corruption. Using SP as the base register means that it will 13665 be reset correctly to the original value, should an interrupt 13666 occur. If the stack pointer already points at the right 13667 place, then omit the subtraction. */ 13668 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask)) 13669 || cfun->calls_alloca) 13670 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM, 13671 4 * bit_count (saved_regs_mask)); 13672 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0); 13673 13674 if (IS_INTERRUPT (func_type)) 13675 /* Interrupt handlers will have pushed the 13676 IP onto the stack, so restore it now. */ 13677 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0); 13678 } 13679 else 13680 { 13681 /* This branch is executed for ARM mode (non-apcs frames) and 13682 Thumb-2 mode. Frame layout is essentially the same for those 13683 cases, except that in ARM mode frame pointer points to the 13684 first saved register, while in Thumb-2 mode the frame pointer points 13685 to the last saved register. 13686 13687 It is possible to make frame pointer point to last saved 13688 register in both cases, and remove some conditionals below. 13689 That means that fp setup in prologue would be just "mov fp, sp" 13690 and sp restore in epilogue would be just "mov sp, fp", whereas 13691 now we have to use add/sub in those cases. However, the value 13692 of that would be marginal, as both mov and add/sub are 32-bit 13693 in ARM mode, and it would require extra conditionals 13694 in arm_expand_prologue to distingish ARM-apcs-frame case 13695 (where frame pointer is required to point at first register) 13696 and ARM-non-apcs-frame. Therefore, such change is postponed 13697 until real need arise. */ 13698 unsigned HOST_WIDE_INT amount; 13699 int rfe; 13700 /* Restore stack pointer if necessary. */ 13701 if (TARGET_ARM && frame_pointer_needed) 13702 { 13703 operands[0] = stack_pointer_rtx; 13704 operands[1] = hard_frame_pointer_rtx; 13705 13706 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs); 13707 output_add_immediate (operands); 13708 } 13709 else 13710 { 13711 if (frame_pointer_needed) 13712 { 13713 /* For Thumb-2 restore sp from the frame pointer. 13714 Operand restrictions mean we have to incrememnt FP, then copy 13715 to SP. */ 13716 amount = offsets->locals_base - offsets->saved_regs; 13717 operands[0] = hard_frame_pointer_rtx; 13718 } 13719 else 13720 { 13721 unsigned long count; 13722 operands[0] = stack_pointer_rtx; 13723 amount = offsets->outgoing_args - offsets->saved_regs; 13724 /* pop call clobbered registers if it avoids a 13725 separate stack adjustment. */ 13726 count = offsets->saved_regs - offsets->saved_args; 13727 if (optimize_size 13728 && count != 0 13729 && !crtl->calls_eh_return 13730 && bit_count(saved_regs_mask) * 4 == count 13731 && !IS_INTERRUPT (func_type) 13732 && !crtl->tail_call_emit) 13733 { 13734 unsigned long mask; 13735 /* Preserve return values, of any size. */ 13736 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1; 13737 mask ^= 0xf; 13738 mask &= ~saved_regs_mask; 13739 reg = 0; 13740 while (bit_count (mask) * 4 > amount) 13741 { 13742 while ((mask & (1 << reg)) == 0) 13743 reg++; 13744 mask &= ~(1 << reg); 13745 } 13746 if (bit_count (mask) * 4 == amount) { 13747 amount = 0; 13748 saved_regs_mask |= mask; 13749 } 13750 } 13751 } 13752 13753 if (amount) 13754 { 13755 operands[1] = operands[0]; 13756 operands[2] = GEN_INT (amount); 13757 output_add_immediate (operands); 13758 } 13759 if (frame_pointer_needed) 13760 asm_fprintf (f, "\tmov\t%r, %r\n", 13761 SP_REGNUM, HARD_FRAME_POINTER_REGNUM); 13762 } 13763 13764 if (TARGET_FPA_EMU2) 13765 { 13766 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) 13767 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13768 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n", 13769 reg, SP_REGNUM); 13770 } 13771 else 13772 { 13773 start_reg = FIRST_FPA_REGNUM; 13774 13775 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) 13776 { 13777 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13778 { 13779 if (reg - start_reg == 3) 13780 { 13781 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n", 13782 start_reg, SP_REGNUM); 13783 start_reg = reg + 1; 13784 } 13785 } 13786 else 13787 { 13788 if (reg != start_reg) 13789 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n", 13790 start_reg, reg - start_reg, 13791 SP_REGNUM); 13792 13793 start_reg = reg + 1; 13794 } 13795 } 13796 13797 /* Just in case the last register checked also needs unstacking. */ 13798 if (reg != start_reg) 13799 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n", 13800 start_reg, reg - start_reg, SP_REGNUM); 13801 } 13802 13803 if (TARGET_HARD_FLOAT && TARGET_VFP) 13804 { 13805 int end_reg = LAST_VFP_REGNUM + 1; 13806 13807 /* Scan the registers in reverse order. We need to match 13808 any groupings made in the prologue and generate matching 13809 pop operations. */ 13810 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2) 13811 { 13812 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) 13813 && (!df_regs_ever_live_p (reg + 1) 13814 || call_used_regs[reg + 1])) 13815 { 13816 if (end_reg > reg + 2) 13817 vfp_output_fldmd (f, SP_REGNUM, 13818 (reg + 2 - FIRST_VFP_REGNUM) / 2, 13819 (end_reg - (reg + 2)) / 2); 13820 end_reg = reg; 13821 } 13822 } 13823 if (end_reg > reg + 2) 13824 vfp_output_fldmd (f, SP_REGNUM, 0, 13825 (end_reg - (reg + 2)) / 2); 13826 } 13827 13828 if (TARGET_IWMMXT) 13829 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++) 13830 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13831 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM); 13832 13833 /* If we can, restore the LR into the PC. */ 13834 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED 13835 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL) 13836 && !IS_STACKALIGN (func_type) 13837 && really_return 13838 && crtl->args.pretend_args_size == 0 13839 && saved_regs_mask & (1 << LR_REGNUM) 13840 && !crtl->calls_eh_return) 13841 { 13842 saved_regs_mask &= ~ (1 << LR_REGNUM); 13843 saved_regs_mask |= (1 << PC_REGNUM); 13844 rfe = IS_INTERRUPT (func_type); 13845 } 13846 else 13847 rfe = 0; 13848 13849 /* Load the registers off the stack. If we only have one register 13850 to load use the LDR instruction - it is faster. For Thumb-2 13851 always use pop and the assembler will pick the best instruction.*/ 13852 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM) 13853 && !IS_INTERRUPT(func_type)) 13854 { 13855 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM); 13856 } 13857 else if (saved_regs_mask) 13858 { 13859 if (saved_regs_mask & (1 << SP_REGNUM)) 13860 /* Note - write back to the stack register is not enabled 13861 (i.e. "ldmfd sp!..."). We know that the stack pointer is 13862 in the list of registers and if we add writeback the 13863 instruction becomes UNPREDICTABLE. */ 13864 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 13865 rfe); 13866 else if (TARGET_ARM) 13867 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask, 13868 rfe); 13869 else 13870 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0); 13871 } 13872 13873 if (crtl->args.pretend_args_size) 13874 { 13875 /* Unwind the pre-pushed regs. */ 13876 operands[0] = operands[1] = stack_pointer_rtx; 13877 operands[2] = GEN_INT (crtl->args.pretend_args_size); 13878 output_add_immediate (operands); 13879 } 13880 } 13881 13882 /* We may have already restored PC directly from the stack. */ 13883 if (!really_return || saved_regs_mask & (1 << PC_REGNUM)) 13884 return ""; 13885 13886 /* Stack adjustment for exception handler. */ 13887 if (crtl->calls_eh_return) 13888 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM, 13889 ARM_EH_STACKADJ_REGNUM); 13890 13891 /* Generate the return instruction. */ 13892 switch ((int) ARM_FUNC_TYPE (func_type)) 13893 { 13894 case ARM_FT_ISR: 13895 case ARM_FT_FIQ: 13896 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM); 13897 break; 13898 13899 case ARM_FT_EXCEPTION: 13900 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM); 13901 break; 13902 13903 case ARM_FT_INTERWORKED: 13904 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM); 13905 break; 13906 13907 default: 13908 if (IS_STACKALIGN (func_type)) 13909 { 13910 /* See comment in arm_expand_prologue. */ 13911 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0); 13912 } 13913 if (arm_arch5 || arm_arch4t) 13914 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM); 13915 else 13916 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM); 13917 break; 13918 } 13919 13920 return ""; 13921 } 13922 13923 static void 13924 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 13925 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED) 13926 { 13927 arm_stack_offsets *offsets; 13928 13929 if (TARGET_THUMB1) 13930 { 13931 int regno; 13932 13933 /* Emit any call-via-reg trampolines that are needed for v4t support 13934 of call_reg and call_value_reg type insns. */ 13935 for (regno = 0; regno < LR_REGNUM; regno++) 13936 { 13937 rtx label = cfun->machine->call_via[regno]; 13938 13939 if (label != NULL) 13940 { 13941 switch_to_section (function_section (current_function_decl)); 13942 targetm.asm_out.internal_label (asm_out_file, "L", 13943 CODE_LABEL_NUMBER (label)); 13944 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno); 13945 } 13946 } 13947 13948 /* ??? Probably not safe to set this here, since it assumes that a 13949 function will be emitted as assembly immediately after we generate 13950 RTL for it. This does not happen for inline functions. */ 13951 cfun->machine->return_used_this_function = 0; 13952 } 13953 else /* TARGET_32BIT */ 13954 { 13955 /* We need to take into account any stack-frame rounding. */ 13956 offsets = arm_get_frame_offsets (); 13957 13958 gcc_assert (!use_return_insn (FALSE, NULL) 13959 || (cfun->machine->return_used_this_function != 0) 13960 || offsets->saved_regs == offsets->outgoing_args 13961 || frame_pointer_needed); 13962 13963 /* Reset the ARM-specific per-function variables. */ 13964 after_arm_reorg = 0; 13965 } 13966 } 13967 13968 /* Generate and emit an insn that we will recognize as a push_multi. 13969 Unfortunately, since this insn does not reflect very well the actual 13970 semantics of the operation, we need to annotate the insn for the benefit 13971 of DWARF2 frame unwind information. */ 13972 static rtx 13973 emit_multi_reg_push (unsigned long mask) 13974 { 13975 int num_regs = 0; 13976 int num_dwarf_regs; 13977 int i, j; 13978 rtx par; 13979 rtx dwarf; 13980 int dwarf_par_index; 13981 rtx tmp, reg; 13982 13983 for (i = 0; i <= LAST_ARM_REGNUM; i++) 13984 if (mask & (1 << i)) 13985 num_regs++; 13986 13987 gcc_assert (num_regs && num_regs <= 16); 13988 13989 /* We don't record the PC in the dwarf frame information. */ 13990 num_dwarf_regs = num_regs; 13991 if (mask & (1 << PC_REGNUM)) 13992 num_dwarf_regs--; 13993 13994 /* For the body of the insn we are going to generate an UNSPEC in 13995 parallel with several USEs. This allows the insn to be recognized 13996 by the push_multi pattern in the arm.md file. 13997 13998 The body of the insn looks something like this: 13999 14000 (parallel [ 14001 (set (mem:BLK (pre_modify:SI (reg:SI sp) 14002 (const_int:SI <num>))) 14003 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT)) 14004 (use (reg:SI XX)) 14005 (use (reg:SI YY)) 14006 ... 14007 ]) 14008 14009 For the frame note however, we try to be more explicit and actually 14010 show each register being stored into the stack frame, plus a (single) 14011 decrement of the stack pointer. We do it this way in order to be 14012 friendly to the stack unwinding code, which only wants to see a single 14013 stack decrement per instruction. The RTL we generate for the note looks 14014 something like this: 14015 14016 (sequence [ 14017 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20))) 14018 (set (mem:SI (reg:SI sp)) (reg:SI r4)) 14019 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX)) 14020 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY)) 14021 ... 14022 ]) 14023 14024 FIXME:: In an ideal world the PRE_MODIFY would not exist and 14025 instead we'd have a parallel expression detailing all 14026 the stores to the various memory addresses so that debug 14027 information is more up-to-date. Remember however while writing 14028 this to take care of the constraints with the push instruction. 14029 14030 Note also that this has to be taken care of for the VFP registers. 14031 14032 For more see PR43399. */ 14033 14034 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs)); 14035 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1)); 14036 dwarf_par_index = 1; 14037 14038 for (i = 0; i <= LAST_ARM_REGNUM; i++) 14039 { 14040 if (mask & (1 << i)) 14041 { 14042 reg = gen_rtx_REG (SImode, i); 14043 14044 XVECEXP (par, 0, 0) 14045 = gen_rtx_SET (VOIDmode, 14046 gen_frame_mem 14047 (BLKmode, 14048 gen_rtx_PRE_MODIFY (Pmode, 14049 stack_pointer_rtx, 14050 plus_constant 14051 (stack_pointer_rtx, 14052 -4 * num_regs)) 14053 ), 14054 gen_rtx_UNSPEC (BLKmode, 14055 gen_rtvec (1, reg), 14056 UNSPEC_PUSH_MULT)); 14057 14058 if (i != PC_REGNUM) 14059 { 14060 tmp = gen_rtx_SET (VOIDmode, 14061 gen_frame_mem (SImode, stack_pointer_rtx), 14062 reg); 14063 RTX_FRAME_RELATED_P (tmp) = 1; 14064 XVECEXP (dwarf, 0, dwarf_par_index) = tmp; 14065 dwarf_par_index++; 14066 } 14067 14068 break; 14069 } 14070 } 14071 14072 for (j = 1, i++; j < num_regs; i++) 14073 { 14074 if (mask & (1 << i)) 14075 { 14076 reg = gen_rtx_REG (SImode, i); 14077 14078 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg); 14079 14080 if (i != PC_REGNUM) 14081 { 14082 tmp 14083 = gen_rtx_SET (VOIDmode, 14084 gen_frame_mem 14085 (SImode, 14086 plus_constant (stack_pointer_rtx, 14087 4 * j)), 14088 reg); 14089 RTX_FRAME_RELATED_P (tmp) = 1; 14090 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; 14091 } 14092 14093 j++; 14094 } 14095 } 14096 14097 par = emit_insn (par); 14098 14099 tmp = gen_rtx_SET (VOIDmode, 14100 stack_pointer_rtx, 14101 plus_constant (stack_pointer_rtx, -4 * num_regs)); 14102 RTX_FRAME_RELATED_P (tmp) = 1; 14103 XVECEXP (dwarf, 0, 0) = tmp; 14104 14105 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); 14106 14107 return par; 14108 } 14109 14110 /* Calculate the size of the return value that is passed in registers. */ 14111 static unsigned 14112 arm_size_return_regs (void) 14113 { 14114 enum machine_mode mode; 14115 14116 if (crtl->return_rtx != 0) 14117 mode = GET_MODE (crtl->return_rtx); 14118 else 14119 mode = DECL_MODE (DECL_RESULT (current_function_decl)); 14120 14121 return GET_MODE_SIZE (mode); 14122 } 14123 14124 static rtx 14125 emit_sfm (int base_reg, int count) 14126 { 14127 rtx par; 14128 rtx dwarf; 14129 rtx tmp, reg; 14130 int i; 14131 14132 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); 14133 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1)); 14134 14135 reg = gen_rtx_REG (XFmode, base_reg++); 14136 14137 XVECEXP (par, 0, 0) 14138 = gen_rtx_SET (VOIDmode, 14139 gen_frame_mem 14140 (BLKmode, 14141 gen_rtx_PRE_MODIFY (Pmode, 14142 stack_pointer_rtx, 14143 plus_constant 14144 (stack_pointer_rtx, 14145 -12 * count)) 14146 ), 14147 gen_rtx_UNSPEC (BLKmode, 14148 gen_rtvec (1, reg), 14149 UNSPEC_PUSH_MULT)); 14150 tmp = gen_rtx_SET (VOIDmode, 14151 gen_frame_mem (XFmode, stack_pointer_rtx), reg); 14152 RTX_FRAME_RELATED_P (tmp) = 1; 14153 XVECEXP (dwarf, 0, 1) = tmp; 14154 14155 for (i = 1; i < count; i++) 14156 { 14157 reg = gen_rtx_REG (XFmode, base_reg++); 14158 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg); 14159 14160 tmp = gen_rtx_SET (VOIDmode, 14161 gen_frame_mem (XFmode, 14162 plus_constant (stack_pointer_rtx, 14163 i * 12)), 14164 reg); 14165 RTX_FRAME_RELATED_P (tmp) = 1; 14166 XVECEXP (dwarf, 0, i + 1) = tmp; 14167 } 14168 14169 tmp = gen_rtx_SET (VOIDmode, 14170 stack_pointer_rtx, 14171 plus_constant (stack_pointer_rtx, -12 * count)); 14172 14173 RTX_FRAME_RELATED_P (tmp) = 1; 14174 XVECEXP (dwarf, 0, 0) = tmp; 14175 14176 par = emit_insn (par); 14177 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); 14178 14179 return par; 14180 } 14181 14182 14183 /* Return true if the current function needs to save/restore LR. */ 14184 14185 static bool 14186 thumb_force_lr_save (void) 14187 { 14188 return !cfun->machine->lr_save_eliminated 14189 && (!leaf_function_p () 14190 || thumb_far_jump_used_p () 14191 || df_regs_ever_live_p (LR_REGNUM)); 14192 } 14193 14194 14195 /* Compute the distance from register FROM to register TO. 14196 These can be the arg pointer (26), the soft frame pointer (25), 14197 the stack pointer (13) or the hard frame pointer (11). 14198 In thumb mode r7 is used as the soft frame pointer, if needed. 14199 Typical stack layout looks like this: 14200 14201 old stack pointer -> | | 14202 ---- 14203 | | \ 14204 | | saved arguments for 14205 | | vararg functions 14206 | | / 14207 -- 14208 hard FP & arg pointer -> | | \ 14209 | | stack 14210 | | frame 14211 | | / 14212 -- 14213 | | \ 14214 | | call saved 14215 | | registers 14216 soft frame pointer -> | | / 14217 -- 14218 | | \ 14219 | | local 14220 | | variables 14221 locals base pointer -> | | / 14222 -- 14223 | | \ 14224 | | outgoing 14225 | | arguments 14226 current stack pointer -> | | / 14227 -- 14228 14229 For a given function some or all of these stack components 14230 may not be needed, giving rise to the possibility of 14231 eliminating some of the registers. 14232 14233 The values returned by this function must reflect the behavior 14234 of arm_expand_prologue() and arm_compute_save_reg_mask(). 14235 14236 The sign of the number returned reflects the direction of stack 14237 growth, so the values are positive for all eliminations except 14238 from the soft frame pointer to the hard frame pointer. 14239 14240 SFP may point just inside the local variables block to ensure correct 14241 alignment. */ 14242 14243 14244 /* Calculate stack offsets. These are used to calculate register elimination 14245 offsets and in prologue/epilogue code. Also calculates which registers 14246 should be saved. */ 14247 14248 static arm_stack_offsets * 14249 arm_get_frame_offsets (void) 14250 { 14251 struct arm_stack_offsets *offsets; 14252 unsigned long func_type; 14253 int leaf; 14254 int saved; 14255 int core_saved; 14256 HOST_WIDE_INT frame_size; 14257 int i; 14258 14259 offsets = &cfun->machine->stack_offsets; 14260 14261 /* We need to know if we are a leaf function. Unfortunately, it 14262 is possible to be called after start_sequence has been called, 14263 which causes get_insns to return the insns for the sequence, 14264 not the function, which will cause leaf_function_p to return 14265 the incorrect result. 14266 14267 to know about leaf functions once reload has completed, and the 14268 frame size cannot be changed after that time, so we can safely 14269 use the cached value. */ 14270 14271 if (reload_completed) 14272 return offsets; 14273 14274 /* Initially this is the size of the local variables. It will translated 14275 into an offset once we have determined the size of preceding data. */ 14276 frame_size = ROUND_UP_WORD (get_frame_size ()); 14277 14278 leaf = leaf_function_p (); 14279 14280 /* Space for variadic functions. */ 14281 offsets->saved_args = crtl->args.pretend_args_size; 14282 14283 /* In Thumb mode this is incorrect, but never used. */ 14284 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) + 14285 arm_compute_static_chain_stack_bytes(); 14286 14287 if (TARGET_32BIT) 14288 { 14289 unsigned int regno; 14290 14291 offsets->saved_regs_mask = arm_compute_save_reg_mask (); 14292 core_saved = bit_count (offsets->saved_regs_mask) * 4; 14293 saved = core_saved; 14294 14295 /* We know that SP will be doubleword aligned on entry, and we must 14296 preserve that condition at any subroutine call. We also require the 14297 soft frame pointer to be doubleword aligned. */ 14298 14299 if (TARGET_REALLY_IWMMXT) 14300 { 14301 /* Check for the call-saved iWMMXt registers. */ 14302 for (regno = FIRST_IWMMXT_REGNUM; 14303 regno <= LAST_IWMMXT_REGNUM; 14304 regno++) 14305 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 14306 saved += 8; 14307 } 14308 14309 func_type = arm_current_func_type (); 14310 if (! IS_VOLATILE (func_type)) 14311 { 14312 /* Space for saved FPA registers. */ 14313 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++) 14314 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 14315 saved += 12; 14316 14317 /* Space for saved VFP registers. */ 14318 if (TARGET_HARD_FLOAT && TARGET_VFP) 14319 saved += arm_get_vfp_saved_size (); 14320 } 14321 } 14322 else /* TARGET_THUMB1 */ 14323 { 14324 offsets->saved_regs_mask = thumb1_compute_save_reg_mask (); 14325 core_saved = bit_count (offsets->saved_regs_mask) * 4; 14326 saved = core_saved; 14327 if (TARGET_BACKTRACE) 14328 saved += 16; 14329 } 14330 14331 /* Saved registers include the stack frame. */ 14332 offsets->saved_regs = offsets->saved_args + saved + 14333 arm_compute_static_chain_stack_bytes(); 14334 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE; 14335 /* A leaf function does not need any stack alignment if it has nothing 14336 on the stack. */ 14337 if (leaf && frame_size == 0) 14338 { 14339 offsets->outgoing_args = offsets->soft_frame; 14340 offsets->locals_base = offsets->soft_frame; 14341 return offsets; 14342 } 14343 14344 /* Ensure SFP has the correct alignment. */ 14345 if (ARM_DOUBLEWORD_ALIGN 14346 && (offsets->soft_frame & 7)) 14347 { 14348 offsets->soft_frame += 4; 14349 /* Try to align stack by pushing an extra reg. Don't bother doing this 14350 when there is a stack frame as the alignment will be rolled into 14351 the normal stack adjustment. */ 14352 if (frame_size + crtl->outgoing_args_size == 0) 14353 { 14354 int reg = -1; 14355 14356 /* If it is safe to use r3, then do so. This sometimes 14357 generates better code on Thumb-2 by avoiding the need to 14358 use 32-bit push/pop instructions. */ 14359 if (!crtl->tail_call_emit 14360 && arm_size_return_regs () <= 12 14361 && (offsets->saved_regs_mask & (1 << 3)) == 0) 14362 { 14363 reg = 3; 14364 } 14365 else 14366 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) 14367 { 14368 if ((offsets->saved_regs_mask & (1 << i)) == 0) 14369 { 14370 reg = i; 14371 break; 14372 } 14373 } 14374 14375 if (reg != -1) 14376 { 14377 offsets->saved_regs += 4; 14378 offsets->saved_regs_mask |= (1 << reg); 14379 } 14380 } 14381 } 14382 14383 offsets->locals_base = offsets->soft_frame + frame_size; 14384 offsets->outgoing_args = (offsets->locals_base 14385 + crtl->outgoing_args_size); 14386 14387 if (ARM_DOUBLEWORD_ALIGN) 14388 { 14389 /* Ensure SP remains doubleword aligned. */ 14390 if (offsets->outgoing_args & 7) 14391 offsets->outgoing_args += 4; 14392 gcc_assert (!(offsets->outgoing_args & 7)); 14393 } 14394 14395 return offsets; 14396 } 14397 14398 14399 /* Calculate the relative offsets for the different stack pointers. Positive 14400 offsets are in the direction of stack growth. */ 14401 14402 HOST_WIDE_INT 14403 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to) 14404 { 14405 arm_stack_offsets *offsets; 14406 14407 offsets = arm_get_frame_offsets (); 14408 14409 /* OK, now we have enough information to compute the distances. 14410 There must be an entry in these switch tables for each pair 14411 of registers in ELIMINABLE_REGS, even if some of the entries 14412 seem to be redundant or useless. */ 14413 switch (from) 14414 { 14415 case ARG_POINTER_REGNUM: 14416 switch (to) 14417 { 14418 case THUMB_HARD_FRAME_POINTER_REGNUM: 14419 return 0; 14420 14421 case FRAME_POINTER_REGNUM: 14422 /* This is the reverse of the soft frame pointer 14423 to hard frame pointer elimination below. */ 14424 return offsets->soft_frame - offsets->saved_args; 14425 14426 case ARM_HARD_FRAME_POINTER_REGNUM: 14427 /* This is only non-zero in the case where the static chain register 14428 is stored above the frame. */ 14429 return offsets->frame - offsets->saved_args - 4; 14430 14431 case STACK_POINTER_REGNUM: 14432 /* If nothing has been pushed on the stack at all 14433 then this will return -4. This *is* correct! */ 14434 return offsets->outgoing_args - (offsets->saved_args + 4); 14435 14436 default: 14437 gcc_unreachable (); 14438 } 14439 gcc_unreachable (); 14440 14441 case FRAME_POINTER_REGNUM: 14442 switch (to) 14443 { 14444 case THUMB_HARD_FRAME_POINTER_REGNUM: 14445 return 0; 14446 14447 case ARM_HARD_FRAME_POINTER_REGNUM: 14448 /* The hard frame pointer points to the top entry in the 14449 stack frame. The soft frame pointer to the bottom entry 14450 in the stack frame. If there is no stack frame at all, 14451 then they are identical. */ 14452 14453 return offsets->frame - offsets->soft_frame; 14454 14455 case STACK_POINTER_REGNUM: 14456 return offsets->outgoing_args - offsets->soft_frame; 14457 14458 default: 14459 gcc_unreachable (); 14460 } 14461 gcc_unreachable (); 14462 14463 default: 14464 /* You cannot eliminate from the stack pointer. 14465 In theory you could eliminate from the hard frame 14466 pointer to the stack pointer, but this will never 14467 happen, since if a stack frame is not needed the 14468 hard frame pointer will never be used. */ 14469 gcc_unreachable (); 14470 } 14471 } 14472 14473 /* Given FROM and TO register numbers, say whether this elimination is 14474 allowed. Frame pointer elimination is automatically handled. 14475 14476 All eliminations are permissible. Note that ARG_POINTER_REGNUM and 14477 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame 14478 pointer, we must eliminate FRAME_POINTER_REGNUM into 14479 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or 14480 ARG_POINTER_REGNUM. */ 14481 14482 bool 14483 arm_can_eliminate (const int from, const int to) 14484 { 14485 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false : 14486 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false : 14487 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false : 14488 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false : 14489 true); 14490 } 14491 14492 /* Emit RTL to save coprocessor registers on function entry. Returns the 14493 number of bytes pushed. */ 14494 14495 static int 14496 arm_save_coproc_regs(void) 14497 { 14498 int saved_size = 0; 14499 unsigned reg; 14500 unsigned start_reg; 14501 rtx insn; 14502 14503 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) 14504 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) 14505 { 14506 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); 14507 insn = gen_rtx_MEM (V2SImode, insn); 14508 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg)); 14509 RTX_FRAME_RELATED_P (insn) = 1; 14510 saved_size += 8; 14511 } 14512 14513 /* Save any floating point call-saved registers used by this 14514 function. */ 14515 if (TARGET_FPA_EMU2) 14516 { 14517 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) 14518 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 14519 { 14520 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); 14521 insn = gen_rtx_MEM (XFmode, insn); 14522 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg)); 14523 RTX_FRAME_RELATED_P (insn) = 1; 14524 saved_size += 12; 14525 } 14526 } 14527 else 14528 { 14529 start_reg = LAST_FPA_REGNUM; 14530 14531 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) 14532 { 14533 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 14534 { 14535 if (start_reg - reg == 3) 14536 { 14537 insn = emit_sfm (reg, 4); 14538 RTX_FRAME_RELATED_P (insn) = 1; 14539 saved_size += 48; 14540 start_reg = reg - 1; 14541 } 14542 } 14543 else 14544 { 14545 if (start_reg != reg) 14546 { 14547 insn = emit_sfm (reg + 1, start_reg - reg); 14548 RTX_FRAME_RELATED_P (insn) = 1; 14549 saved_size += (start_reg - reg) * 12; 14550 } 14551 start_reg = reg - 1; 14552 } 14553 } 14554 14555 if (start_reg != reg) 14556 { 14557 insn = emit_sfm (reg + 1, start_reg - reg); 14558 saved_size += (start_reg - reg) * 12; 14559 RTX_FRAME_RELATED_P (insn) = 1; 14560 } 14561 } 14562 if (TARGET_HARD_FLOAT && TARGET_VFP) 14563 { 14564 start_reg = FIRST_VFP_REGNUM; 14565 14566 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) 14567 { 14568 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) 14569 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) 14570 { 14571 if (start_reg != reg) 14572 saved_size += vfp_emit_fstmd (start_reg, 14573 (reg - start_reg) / 2); 14574 start_reg = reg + 2; 14575 } 14576 } 14577 if (start_reg != reg) 14578 saved_size += vfp_emit_fstmd (start_reg, 14579 (reg - start_reg) / 2); 14580 } 14581 return saved_size; 14582 } 14583 14584 14585 /* Set the Thumb frame pointer from the stack pointer. */ 14586 14587 static void 14588 thumb_set_frame_pointer (arm_stack_offsets *offsets) 14589 { 14590 HOST_WIDE_INT amount; 14591 rtx insn, dwarf; 14592 14593 amount = offsets->outgoing_args - offsets->locals_base; 14594 if (amount < 1024) 14595 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, 14596 stack_pointer_rtx, GEN_INT (amount))); 14597 else 14598 { 14599 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount))); 14600 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1 14601 expects the first two operands to be the same. */ 14602 if (TARGET_THUMB2) 14603 { 14604 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, 14605 stack_pointer_rtx, 14606 hard_frame_pointer_rtx)); 14607 } 14608 else 14609 { 14610 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, 14611 hard_frame_pointer_rtx, 14612 stack_pointer_rtx)); 14613 } 14614 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, 14615 plus_constant (stack_pointer_rtx, amount)); 14616 RTX_FRAME_RELATED_P (dwarf) = 1; 14617 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); 14618 } 14619 14620 RTX_FRAME_RELATED_P (insn) = 1; 14621 } 14622 14623 /* Generate the prologue instructions for entry into an ARM or Thumb-2 14624 function. */ 14625 void 14626 arm_expand_prologue (void) 14627 { 14628 rtx amount; 14629 rtx insn; 14630 rtx ip_rtx; 14631 unsigned long live_regs_mask; 14632 unsigned long func_type; 14633 int fp_offset = 0; 14634 int saved_pretend_args = 0; 14635 int saved_regs = 0; 14636 unsigned HOST_WIDE_INT args_to_push; 14637 arm_stack_offsets *offsets; 14638 14639 func_type = arm_current_func_type (); 14640 14641 /* Naked functions don't have prologues. */ 14642 if (IS_NAKED (func_type)) 14643 return; 14644 14645 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */ 14646 args_to_push = crtl->args.pretend_args_size; 14647 14648 /* Compute which register we will have to save onto the stack. */ 14649 offsets = arm_get_frame_offsets (); 14650 live_regs_mask = offsets->saved_regs_mask; 14651 14652 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); 14653 14654 if (IS_STACKALIGN (func_type)) 14655 { 14656 rtx dwarf; 14657 rtx r0; 14658 rtx r1; 14659 /* Handle a word-aligned stack pointer. We generate the following: 14660 14661 mov r0, sp 14662 bic r1, r0, #7 14663 mov sp, r1 14664 <save and restore r0 in normal prologue/epilogue> 14665 mov sp, r0 14666 bx lr 14667 14668 The unwinder doesn't need to know about the stack realignment. 14669 Just tell it we saved SP in r0. */ 14670 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0); 14671 14672 r0 = gen_rtx_REG (SImode, 0); 14673 r1 = gen_rtx_REG (SImode, 1); 14674 /* Use a real rtvec rather than NULL_RTVEC so the rest of the 14675 compiler won't choke. */ 14676 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN); 14677 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf); 14678 insn = gen_movsi (r0, stack_pointer_rtx); 14679 RTX_FRAME_RELATED_P (insn) = 1; 14680 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); 14681 emit_insn (insn); 14682 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7))); 14683 emit_insn (gen_movsi (stack_pointer_rtx, r1)); 14684 } 14685 14686 /* For APCS frames, if IP register is clobbered 14687 when creating frame, save that register in a special 14688 way. */ 14689 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) 14690 { 14691 if (IS_INTERRUPT (func_type)) 14692 { 14693 /* Interrupt functions must not corrupt any registers. 14694 Creating a frame pointer however, corrupts the IP 14695 register, so we must push it first. */ 14696 insn = emit_multi_reg_push (1 << IP_REGNUM); 14697 14698 /* Do not set RTX_FRAME_RELATED_P on this insn. 14699 The dwarf stack unwinding code only wants to see one 14700 stack decrement per function, and this is not it. If 14701 this instruction is labeled as being part of the frame 14702 creation sequence then dwarf2out_frame_debug_expr will 14703 die when it encounters the assignment of IP to FP 14704 later on, since the use of SP here establishes SP as 14705 the CFA register and not IP. 14706 14707 Anyway this instruction is not really part of the stack 14708 frame creation although it is part of the prologue. */ 14709 } 14710 else if (IS_NESTED (func_type)) 14711 { 14712 /* The Static chain register is the same as the IP register 14713 used as a scratch register during stack frame creation. 14714 To get around this need to find somewhere to store IP 14715 whilst the frame is being created. We try the following 14716 places in order: 14717 14718 1. The last argument register. 14719 2. A slot on the stack above the frame. (This only 14720 works if the function is not a varargs function). 14721 3. Register r3, after pushing the argument registers 14722 onto the stack. 14723 14724 Note - we only need to tell the dwarf2 backend about the SP 14725 adjustment in the second variant; the static chain register 14726 doesn't need to be unwound, as it doesn't contain a value 14727 inherited from the caller. */ 14728 14729 if (df_regs_ever_live_p (3) == false) 14730 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); 14731 else if (args_to_push == 0) 14732 { 14733 rtx dwarf; 14734 14735 gcc_assert(arm_compute_static_chain_stack_bytes() == 4); 14736 saved_regs += 4; 14737 14738 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx); 14739 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx); 14740 fp_offset = 4; 14741 14742 /* Just tell the dwarf backend that we adjusted SP. */ 14743 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, 14744 plus_constant (stack_pointer_rtx, 14745 -fp_offset)); 14746 RTX_FRAME_RELATED_P (insn) = 1; 14747 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); 14748 } 14749 else 14750 { 14751 /* Store the args on the stack. */ 14752 if (cfun->machine->uses_anonymous_args) 14753 insn = emit_multi_reg_push 14754 ((0xf0 >> (args_to_push / 4)) & 0xf); 14755 else 14756 insn = emit_insn 14757 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 14758 GEN_INT (- args_to_push))); 14759 14760 RTX_FRAME_RELATED_P (insn) = 1; 14761 14762 saved_pretend_args = 1; 14763 fp_offset = args_to_push; 14764 args_to_push = 0; 14765 14766 /* Now reuse r3 to preserve IP. */ 14767 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); 14768 } 14769 } 14770 14771 insn = emit_set_insn (ip_rtx, 14772 plus_constant (stack_pointer_rtx, fp_offset)); 14773 RTX_FRAME_RELATED_P (insn) = 1; 14774 } 14775 14776 if (args_to_push) 14777 { 14778 /* Push the argument registers, or reserve space for them. */ 14779 if (cfun->machine->uses_anonymous_args) 14780 insn = emit_multi_reg_push 14781 ((0xf0 >> (args_to_push / 4)) & 0xf); 14782 else 14783 insn = emit_insn 14784 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 14785 GEN_INT (- args_to_push))); 14786 RTX_FRAME_RELATED_P (insn) = 1; 14787 } 14788 14789 /* If this is an interrupt service routine, and the link register 14790 is going to be pushed, and we're not generating extra 14791 push of IP (needed when frame is needed and frame layout if apcs), 14792 subtracting four from LR now will mean that the function return 14793 can be done with a single instruction. */ 14794 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ) 14795 && (live_regs_mask & (1 << LR_REGNUM)) != 0 14796 && !(frame_pointer_needed && TARGET_APCS_FRAME) 14797 && TARGET_ARM) 14798 { 14799 rtx lr = gen_rtx_REG (SImode, LR_REGNUM); 14800 14801 emit_set_insn (lr, plus_constant (lr, -4)); 14802 } 14803 14804 if (live_regs_mask) 14805 { 14806 saved_regs += bit_count (live_regs_mask) * 4; 14807 if (optimize_size && !frame_pointer_needed 14808 && saved_regs == offsets->saved_regs - offsets->saved_args) 14809 { 14810 /* If no coprocessor registers are being pushed and we don't have 14811 to worry about a frame pointer then push extra registers to 14812 create the stack frame. This is done is a way that does not 14813 alter the frame layout, so is independent of the epilogue. */ 14814 int n; 14815 int frame; 14816 n = 0; 14817 while (n < 8 && (live_regs_mask & (1 << n)) == 0) 14818 n++; 14819 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs); 14820 if (frame && n * 4 >= frame) 14821 { 14822 n = frame / 4; 14823 live_regs_mask |= (1 << n) - 1; 14824 saved_regs += frame; 14825 } 14826 } 14827 insn = emit_multi_reg_push (live_regs_mask); 14828 RTX_FRAME_RELATED_P (insn) = 1; 14829 } 14830 14831 if (! IS_VOLATILE (func_type)) 14832 saved_regs += arm_save_coproc_regs (); 14833 14834 if (frame_pointer_needed && TARGET_ARM) 14835 { 14836 /* Create the new frame pointer. */ 14837 if (TARGET_APCS_FRAME) 14838 { 14839 insn = GEN_INT (-(4 + args_to_push + fp_offset)); 14840 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn)); 14841 RTX_FRAME_RELATED_P (insn) = 1; 14842 14843 if (IS_NESTED (func_type)) 14844 { 14845 /* Recover the static chain register. */ 14846 if (!df_regs_ever_live_p (3) 14847 || saved_pretend_args) 14848 insn = gen_rtx_REG (SImode, 3); 14849 else /* if (crtl->args.pretend_args_size == 0) */ 14850 { 14851 insn = plus_constant (hard_frame_pointer_rtx, 4); 14852 insn = gen_frame_mem (SImode, insn); 14853 } 14854 emit_set_insn (ip_rtx, insn); 14855 /* Add a USE to stop propagate_one_insn() from barfing. */ 14856 emit_insn (gen_prologue_use (ip_rtx)); 14857 } 14858 } 14859 else 14860 { 14861 insn = GEN_INT (saved_regs - 4); 14862 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, 14863 stack_pointer_rtx, insn)); 14864 RTX_FRAME_RELATED_P (insn) = 1; 14865 } 14866 } 14867 14868 if (offsets->outgoing_args != offsets->saved_args + saved_regs) 14869 { 14870 /* This add can produce multiple insns for a large constant, so we 14871 need to get tricky. */ 14872 rtx last = get_last_insn (); 14873 14874 amount = GEN_INT (offsets->saved_args + saved_regs 14875 - offsets->outgoing_args); 14876 14877 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 14878 amount)); 14879 do 14880 { 14881 last = last ? NEXT_INSN (last) : get_insns (); 14882 RTX_FRAME_RELATED_P (last) = 1; 14883 } 14884 while (last != insn); 14885 14886 /* If the frame pointer is needed, emit a special barrier that 14887 will prevent the scheduler from moving stores to the frame 14888 before the stack adjustment. */ 14889 if (frame_pointer_needed) 14890 insn = emit_insn (gen_stack_tie (stack_pointer_rtx, 14891 hard_frame_pointer_rtx)); 14892 } 14893 14894 14895 if (frame_pointer_needed && TARGET_THUMB2) 14896 thumb_set_frame_pointer (offsets); 14897 14898 if (flag_pic && arm_pic_register != INVALID_REGNUM) 14899 { 14900 unsigned long mask; 14901 14902 mask = live_regs_mask; 14903 mask &= THUMB2_WORK_REGS; 14904 if (!IS_NESTED (func_type)) 14905 mask |= (1 << IP_REGNUM); 14906 arm_load_pic_register (mask); 14907 } 14908 14909 /* If we are profiling, make sure no instructions are scheduled before 14910 the call to mcount. Similarly if the user has requested no 14911 scheduling in the prolog. Similarly if we want non-call exceptions 14912 using the EABI unwinder, to prevent faulting instructions from being 14913 swapped with a stack adjustment. */ 14914 if (crtl->profile || !TARGET_SCHED_PROLOG 14915 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions)) 14916 emit_insn (gen_blockage ()); 14917 14918 /* If the link register is being kept alive, with the return address in it, 14919 then make sure that it does not get reused by the ce2 pass. */ 14920 if ((live_regs_mask & (1 << LR_REGNUM)) == 0) 14921 cfun->machine->lr_save_eliminated = 1; 14922 } 14923 14924 /* Print condition code to STREAM. Helper function for arm_print_operand. */ 14925 static void 14926 arm_print_condition (FILE *stream) 14927 { 14928 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4) 14929 { 14930 /* Branch conversion is not implemented for Thumb-2. */ 14931 if (TARGET_THUMB) 14932 { 14933 output_operand_lossage ("predicated Thumb instruction"); 14934 return; 14935 } 14936 if (current_insn_predicate != NULL) 14937 { 14938 output_operand_lossage 14939 ("predicated instruction in conditional sequence"); 14940 return; 14941 } 14942 14943 fputs (arm_condition_codes[arm_current_cc], stream); 14944 } 14945 else if (current_insn_predicate) 14946 { 14947 enum arm_cond_code code; 14948 14949 if (TARGET_THUMB1) 14950 { 14951 output_operand_lossage ("predicated Thumb instruction"); 14952 return; 14953 } 14954 14955 code = get_arm_condition_code (current_insn_predicate); 14956 fputs (arm_condition_codes[code], stream); 14957 } 14958 } 14959 14960 14961 /* If CODE is 'd', then the X is a condition operand and the instruction 14962 should only be executed if the condition is true. 14963 if CODE is 'D', then the X is a condition operand and the instruction 14964 should only be executed if the condition is false: however, if the mode 14965 of the comparison is CCFPEmode, then always execute the instruction -- we 14966 do this because in these circumstances !GE does not necessarily imply LT; 14967 in these cases the instruction pattern will take care to make sure that 14968 an instruction containing %d will follow, thereby undoing the effects of 14969 doing this instruction unconditionally. 14970 If CODE is 'N' then X is a floating point operand that must be negated 14971 before output. 14972 If CODE is 'B' then output a bitwise inverted value of X (a const int). 14973 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */ 14974 void 14975 arm_print_operand (FILE *stream, rtx x, int code) 14976 { 14977 switch (code) 14978 { 14979 case '@': 14980 fputs (ASM_COMMENT_START, stream); 14981 return; 14982 14983 case '_': 14984 fputs (user_label_prefix, stream); 14985 return; 14986 14987 case '|': 14988 fputs (REGISTER_PREFIX, stream); 14989 return; 14990 14991 case '?': 14992 arm_print_condition (stream); 14993 return; 14994 14995 case '(': 14996 /* Nothing in unified syntax, otherwise the current condition code. */ 14997 if (!TARGET_UNIFIED_ASM) 14998 arm_print_condition (stream); 14999 break; 15000 15001 case ')': 15002 /* The current condition code in unified syntax, otherwise nothing. */ 15003 if (TARGET_UNIFIED_ASM) 15004 arm_print_condition (stream); 15005 break; 15006 15007 case '.': 15008 /* The current condition code for a condition code setting instruction. 15009 Preceded by 's' in unified syntax, otherwise followed by 's'. */ 15010 if (TARGET_UNIFIED_ASM) 15011 { 15012 fputc('s', stream); 15013 arm_print_condition (stream); 15014 } 15015 else 15016 { 15017 arm_print_condition (stream); 15018 fputc('s', stream); 15019 } 15020 return; 15021 15022 case '!': 15023 /* If the instruction is conditionally executed then print 15024 the current condition code, otherwise print 's'. */ 15025 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM); 15026 if (current_insn_predicate) 15027 arm_print_condition (stream); 15028 else 15029 fputc('s', stream); 15030 break; 15031 15032 /* %# is a "break" sequence. It doesn't output anything, but is used to 15033 separate e.g. operand numbers from following text, if that text consists 15034 of further digits which we don't want to be part of the operand 15035 number. */ 15036 case '#': 15037 return; 15038 15039 case 'N': 15040 { 15041 REAL_VALUE_TYPE r; 15042 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 15043 r = REAL_VALUE_NEGATE (r); 15044 fprintf (stream, "%s", fp_const_from_val (&r)); 15045 } 15046 return; 15047 15048 /* An integer or symbol address without a preceding # sign. */ 15049 case 'c': 15050 switch (GET_CODE (x)) 15051 { 15052 case CONST_INT: 15053 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 15054 break; 15055 15056 case SYMBOL_REF: 15057 output_addr_const (stream, x); 15058 break; 15059 15060 default: 15061 gcc_unreachable (); 15062 } 15063 return; 15064 15065 case 'B': 15066 if (GET_CODE (x) == CONST_INT) 15067 { 15068 HOST_WIDE_INT val; 15069 val = ARM_SIGN_EXTEND (~INTVAL (x)); 15070 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val); 15071 } 15072 else 15073 { 15074 putc ('~', stream); 15075 output_addr_const (stream, x); 15076 } 15077 return; 15078 15079 case 'L': 15080 /* The low 16 bits of an immediate constant. */ 15081 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff); 15082 return; 15083 15084 case 'i': 15085 fprintf (stream, "%s", arithmetic_instr (x, 1)); 15086 return; 15087 15088 /* Truncate Cirrus shift counts. */ 15089 case 's': 15090 if (GET_CODE (x) == CONST_INT) 15091 { 15092 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f); 15093 return; 15094 } 15095 arm_print_operand (stream, x, 0); 15096 return; 15097 15098 case 'I': 15099 fprintf (stream, "%s", arithmetic_instr (x, 0)); 15100 return; 15101 15102 case 'S': 15103 { 15104 HOST_WIDE_INT val; 15105 const char *shift; 15106 15107 if (!shift_operator (x, SImode)) 15108 { 15109 output_operand_lossage ("invalid shift operand"); 15110 break; 15111 } 15112 15113 shift = shift_op (x, &val); 15114 15115 if (shift) 15116 { 15117 fprintf (stream, ", %s ", shift); 15118 if (val == -1) 15119 arm_print_operand (stream, XEXP (x, 1), 0); 15120 else 15121 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); 15122 } 15123 } 15124 return; 15125 15126 /* An explanation of the 'Q', 'R' and 'H' register operands: 15127 15128 In a pair of registers containing a DI or DF value the 'Q' 15129 operand returns the register number of the register containing 15130 the least significant part of the value. The 'R' operand returns 15131 the register number of the register containing the most 15132 significant part of the value. 15133 15134 The 'H' operand returns the higher of the two register numbers. 15135 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the 15136 same as the 'Q' operand, since the most significant part of the 15137 value is held in the lower number register. The reverse is true 15138 on systems where WORDS_BIG_ENDIAN is false. 15139 15140 The purpose of these operands is to distinguish between cases 15141 where the endian-ness of the values is important (for example 15142 when they are added together), and cases where the endian-ness 15143 is irrelevant, but the order of register operations is important. 15144 For example when loading a value from memory into a register 15145 pair, the endian-ness does not matter. Provided that the value 15146 from the lower memory address is put into the lower numbered 15147 register, and the value from the higher address is put into the 15148 higher numbered register, the load will work regardless of whether 15149 the value being loaded is big-wordian or little-wordian. The 15150 order of the two register loads can matter however, if the address 15151 of the memory location is actually held in one of the registers 15152 being overwritten by the load. */ 15153 case 'Q': 15154 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) 15155 { 15156 output_operand_lossage ("invalid operand for code '%c'", code); 15157 return; 15158 } 15159 15160 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)); 15161 return; 15162 15163 case 'R': 15164 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) 15165 { 15166 output_operand_lossage ("invalid operand for code '%c'", code); 15167 return; 15168 } 15169 15170 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)); 15171 return; 15172 15173 case 'H': 15174 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) 15175 { 15176 output_operand_lossage ("invalid operand for code '%c'", code); 15177 return; 15178 } 15179 15180 asm_fprintf (stream, "%r", REGNO (x) + 1); 15181 return; 15182 15183 case 'J': 15184 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) 15185 { 15186 output_operand_lossage ("invalid operand for code '%c'", code); 15187 return; 15188 } 15189 15190 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2)); 15191 return; 15192 15193 case 'K': 15194 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) 15195 { 15196 output_operand_lossage ("invalid operand for code '%c'", code); 15197 return; 15198 } 15199 15200 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3)); 15201 return; 15202 15203 case 'm': 15204 asm_fprintf (stream, "%r", 15205 GET_CODE (XEXP (x, 0)) == REG 15206 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0))); 15207 return; 15208 15209 case 'M': 15210 asm_fprintf (stream, "{%r-%r}", 15211 REGNO (x), 15212 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1); 15213 return; 15214 15215 /* Like 'M', but writing doubleword vector registers, for use by Neon 15216 insns. */ 15217 case 'h': 15218 { 15219 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2; 15220 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2; 15221 if (numregs == 1) 15222 asm_fprintf (stream, "{d%d}", regno); 15223 else 15224 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1); 15225 } 15226 return; 15227 15228 case 'd': 15229 /* CONST_TRUE_RTX means always -- that's the default. */ 15230 if (x == const_true_rtx) 15231 return; 15232 15233 if (!COMPARISON_P (x)) 15234 { 15235 output_operand_lossage ("invalid operand for code '%c'", code); 15236 return; 15237 } 15238 15239 fputs (arm_condition_codes[get_arm_condition_code (x)], 15240 stream); 15241 return; 15242 15243 case 'D': 15244 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever 15245 want to do that. */ 15246 if (x == const_true_rtx) 15247 { 15248 output_operand_lossage ("instruction never executed"); 15249 return; 15250 } 15251 if (!COMPARISON_P (x)) 15252 { 15253 output_operand_lossage ("invalid operand for code '%c'", code); 15254 return; 15255 } 15256 15257 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE 15258 (get_arm_condition_code (x))], 15259 stream); 15260 return; 15261 15262 /* Cirrus registers can be accessed in a variety of ways: 15263 single floating point (f) 15264 double floating point (d) 15265 32bit integer (fx) 15266 64bit integer (dx). */ 15267 case 'W': /* Cirrus register in F mode. */ 15268 case 'X': /* Cirrus register in D mode. */ 15269 case 'Y': /* Cirrus register in FX mode. */ 15270 case 'Z': /* Cirrus register in DX mode. */ 15271 gcc_assert (GET_CODE (x) == REG 15272 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS); 15273 15274 fprintf (stream, "mv%s%s", 15275 code == 'W' ? "f" 15276 : code == 'X' ? "d" 15277 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2); 15278 15279 return; 15280 15281 /* Print cirrus register in the mode specified by the register's mode. */ 15282 case 'V': 15283 { 15284 int mode = GET_MODE (x); 15285 15286 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS) 15287 { 15288 output_operand_lossage ("invalid operand for code '%c'", code); 15289 return; 15290 } 15291 15292 fprintf (stream, "mv%s%s", 15293 mode == DFmode ? "d" 15294 : mode == SImode ? "fx" 15295 : mode == DImode ? "dx" 15296 : "f", reg_names[REGNO (x)] + 2); 15297 15298 return; 15299 } 15300 15301 case 'U': 15302 if (GET_CODE (x) != REG 15303 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM 15304 || REGNO (x) > LAST_IWMMXT_GR_REGNUM) 15305 /* Bad value for wCG register number. */ 15306 { 15307 output_operand_lossage ("invalid operand for code '%c'", code); 15308 return; 15309 } 15310 15311 else 15312 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM); 15313 return; 15314 15315 /* Print an iWMMXt control register name. */ 15316 case 'w': 15317 if (GET_CODE (x) != CONST_INT 15318 || INTVAL (x) < 0 15319 || INTVAL (x) >= 16) 15320 /* Bad value for wC register number. */ 15321 { 15322 output_operand_lossage ("invalid operand for code '%c'", code); 15323 return; 15324 } 15325 15326 else 15327 { 15328 static const char * wc_reg_names [16] = 15329 { 15330 "wCID", "wCon", "wCSSF", "wCASF", 15331 "wC4", "wC5", "wC6", "wC7", 15332 "wCGR0", "wCGR1", "wCGR2", "wCGR3", 15333 "wC12", "wC13", "wC14", "wC15" 15334 }; 15335 15336 fprintf (stream, wc_reg_names [INTVAL (x)]); 15337 } 15338 return; 15339 15340 /* Print the high single-precision register of a VFP double-precision 15341 register. */ 15342 case 'p': 15343 { 15344 int mode = GET_MODE (x); 15345 int regno; 15346 15347 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG) 15348 { 15349 output_operand_lossage ("invalid operand for code '%c'", code); 15350 return; 15351 } 15352 15353 regno = REGNO (x); 15354 if (!VFP_REGNO_OK_FOR_DOUBLE (regno)) 15355 { 15356 output_operand_lossage ("invalid operand for code '%c'", code); 15357 return; 15358 } 15359 15360 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1); 15361 } 15362 return; 15363 15364 /* Print a VFP/Neon double precision or quad precision register name. */ 15365 case 'P': 15366 case 'q': 15367 { 15368 int mode = GET_MODE (x); 15369 int is_quad = (code == 'q'); 15370 int regno; 15371 15372 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8)) 15373 { 15374 output_operand_lossage ("invalid operand for code '%c'", code); 15375 return; 15376 } 15377 15378 if (GET_CODE (x) != REG 15379 || !IS_VFP_REGNUM (REGNO (x))) 15380 { 15381 output_operand_lossage ("invalid operand for code '%c'", code); 15382 return; 15383 } 15384 15385 regno = REGNO (x); 15386 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno)) 15387 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno))) 15388 { 15389 output_operand_lossage ("invalid operand for code '%c'", code); 15390 return; 15391 } 15392 15393 fprintf (stream, "%c%d", is_quad ? 'q' : 'd', 15394 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1)); 15395 } 15396 return; 15397 15398 /* These two codes print the low/high doubleword register of a Neon quad 15399 register, respectively. For pair-structure types, can also print 15400 low/high quadword registers. */ 15401 case 'e': 15402 case 'f': 15403 { 15404 int mode = GET_MODE (x); 15405 int regno; 15406 15407 if ((GET_MODE_SIZE (mode) != 16 15408 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG) 15409 { 15410 output_operand_lossage ("invalid operand for code '%c'", code); 15411 return; 15412 } 15413 15414 regno = REGNO (x); 15415 if (!NEON_REGNO_OK_FOR_QUAD (regno)) 15416 { 15417 output_operand_lossage ("invalid operand for code '%c'", code); 15418 return; 15419 } 15420 15421 if (GET_MODE_SIZE (mode) == 16) 15422 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1) 15423 + (code == 'f' ? 1 : 0)); 15424 else 15425 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2) 15426 + (code == 'f' ? 1 : 0)); 15427 } 15428 return; 15429 15430 /* Print a VFPv3 floating-point constant, represented as an integer 15431 index. */ 15432 case 'G': 15433 { 15434 int index = vfp3_const_double_index (x); 15435 gcc_assert (index != -1); 15436 fprintf (stream, "%d", index); 15437 } 15438 return; 15439 15440 /* Print bits representing opcode features for Neon. 15441 15442 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed 15443 and polynomials as unsigned. 15444 15445 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers. 15446 15447 Bit 2 is 1 for rounding functions, 0 otherwise. */ 15448 15449 /* Identify the type as 's', 'u', 'p' or 'f'. */ 15450 case 'T': 15451 { 15452 HOST_WIDE_INT bits = INTVAL (x); 15453 fputc ("uspf"[bits & 3], stream); 15454 } 15455 return; 15456 15457 /* Likewise, but signed and unsigned integers are both 'i'. */ 15458 case 'F': 15459 { 15460 HOST_WIDE_INT bits = INTVAL (x); 15461 fputc ("iipf"[bits & 3], stream); 15462 } 15463 return; 15464 15465 /* As for 'T', but emit 'u' instead of 'p'. */ 15466 case 't': 15467 { 15468 HOST_WIDE_INT bits = INTVAL (x); 15469 fputc ("usuf"[bits & 3], stream); 15470 } 15471 return; 15472 15473 /* Bit 2: rounding (vs none). */ 15474 case 'O': 15475 { 15476 HOST_WIDE_INT bits = INTVAL (x); 15477 fputs ((bits & 4) != 0 ? "r" : "", stream); 15478 } 15479 return; 15480 15481 /* Memory operand for vld1/vst1 instruction. */ 15482 case 'A': 15483 { 15484 rtx addr; 15485 bool postinc = FALSE; 15486 gcc_assert (GET_CODE (x) == MEM); 15487 addr = XEXP (x, 0); 15488 if (GET_CODE (addr) == POST_INC) 15489 { 15490 postinc = 1; 15491 addr = XEXP (addr, 0); 15492 } 15493 asm_fprintf (stream, "[%r]", REGNO (addr)); 15494 if (postinc) 15495 fputs("!", stream); 15496 } 15497 return; 15498 15499 /* Translate an S register number into a D register number and element index. */ 15500 case 'y': 15501 { 15502 int mode = GET_MODE (x); 15503 int regno; 15504 15505 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG) 15506 { 15507 output_operand_lossage ("invalid operand for code '%c'", code); 15508 return; 15509 } 15510 15511 regno = REGNO (x); 15512 if (!VFP_REGNO_OK_FOR_SINGLE (regno)) 15513 { 15514 output_operand_lossage ("invalid operand for code '%c'", code); 15515 return; 15516 } 15517 15518 regno = regno - FIRST_VFP_REGNUM; 15519 fprintf (stream, "d%d[%d]", regno / 2, regno % 2); 15520 } 15521 return; 15522 15523 /* Register specifier for vld1.16/vst1.16. Translate the S register 15524 number into a D register number and element index. */ 15525 case 'z': 15526 { 15527 int mode = GET_MODE (x); 15528 int regno; 15529 15530 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG) 15531 { 15532 output_operand_lossage ("invalid operand for code '%c'", code); 15533 return; 15534 } 15535 15536 regno = REGNO (x); 15537 if (!VFP_REGNO_OK_FOR_SINGLE (regno)) 15538 { 15539 output_operand_lossage ("invalid operand for code '%c'", code); 15540 return; 15541 } 15542 15543 regno = regno - FIRST_VFP_REGNUM; 15544 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); 15545 } 15546 return; 15547 15548 default: 15549 if (x == 0) 15550 { 15551 output_operand_lossage ("missing operand"); 15552 return; 15553 } 15554 15555 switch (GET_CODE (x)) 15556 { 15557 case REG: 15558 asm_fprintf (stream, "%r", REGNO (x)); 15559 break; 15560 15561 case MEM: 15562 output_memory_reference_mode = GET_MODE (x); 15563 output_address (XEXP (x, 0)); 15564 break; 15565 15566 case CONST_DOUBLE: 15567 if (TARGET_NEON) 15568 { 15569 char fpstr[20]; 15570 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), 15571 sizeof (fpstr), 0, 1); 15572 fprintf (stream, "#%s", fpstr); 15573 } 15574 else 15575 fprintf (stream, "#%s", fp_immediate_constant (x)); 15576 break; 15577 15578 default: 15579 gcc_assert (GET_CODE (x) != NEG); 15580 fputc ('#', stream); 15581 if (GET_CODE (x) == HIGH) 15582 { 15583 fputs (":lower16:", stream); 15584 x = XEXP (x, 0); 15585 } 15586 15587 output_addr_const (stream, x); 15588 break; 15589 } 15590 } 15591 } 15592 15593 /* Target hook for assembling integer objects. The ARM version needs to 15594 handle word-sized values specially. */ 15595 static bool 15596 arm_assemble_integer (rtx x, unsigned int size, int aligned_p) 15597 { 15598 enum machine_mode mode; 15599 15600 if (size == UNITS_PER_WORD && aligned_p) 15601 { 15602 fputs ("\t.word\t", asm_out_file); 15603 output_addr_const (asm_out_file, x); 15604 15605 /* Mark symbols as position independent. We only do this in the 15606 .text segment, not in the .data segment. */ 15607 if (NEED_GOT_RELOC && flag_pic && making_const_table && 15608 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)) 15609 { 15610 /* See legitimize_pic_address for an explanation of the 15611 TARGET_VXWORKS_RTP check. */ 15612 if (TARGET_VXWORKS_RTP 15613 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x))) 15614 fputs ("(GOT)", asm_out_file); 15615 else 15616 fputs ("(GOTOFF)", asm_out_file); 15617 } 15618 fputc ('\n', asm_out_file); 15619 return true; 15620 } 15621 15622 mode = GET_MODE (x); 15623 15624 if (arm_vector_mode_supported_p (mode)) 15625 { 15626 int i, units; 15627 15628 gcc_assert (GET_CODE (x) == CONST_VECTOR); 15629 15630 units = CONST_VECTOR_NUNITS (x); 15631 size = GET_MODE_SIZE (GET_MODE_INNER (mode)); 15632 15633 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 15634 for (i = 0; i < units; i++) 15635 { 15636 rtx elt = CONST_VECTOR_ELT (x, i); 15637 assemble_integer 15638 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1); 15639 } 15640 else 15641 for (i = 0; i < units; i++) 15642 { 15643 rtx elt = CONST_VECTOR_ELT (x, i); 15644 REAL_VALUE_TYPE rval; 15645 15646 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt); 15647 15648 assemble_real 15649 (rval, GET_MODE_INNER (mode), 15650 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT); 15651 } 15652 15653 return true; 15654 } 15655 15656 return default_assemble_integer (x, size, aligned_p); 15657 } 15658 15659 static void 15660 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor) 15661 { 15662 section *s; 15663 15664 if (!TARGET_AAPCS_BASED) 15665 { 15666 (is_ctor ? 15667 default_named_section_asm_out_constructor 15668 : default_named_section_asm_out_destructor) (symbol, priority); 15669 return; 15670 } 15671 15672 /* Put these in the .init_array section, using a special relocation. */ 15673 if (priority != DEFAULT_INIT_PRIORITY) 15674 { 15675 char buf[18]; 15676 sprintf (buf, "%s.%.5u", 15677 is_ctor ? ".init_array" : ".fini_array", 15678 priority); 15679 s = get_section (buf, SECTION_WRITE, NULL_TREE); 15680 } 15681 else if (is_ctor) 15682 s = ctors_section; 15683 else 15684 s = dtors_section; 15685 15686 switch_to_section (s); 15687 assemble_align (POINTER_SIZE); 15688 fputs ("\t.word\t", asm_out_file); 15689 output_addr_const (asm_out_file, symbol); 15690 fputs ("(target1)\n", asm_out_file); 15691 } 15692 15693 /* Add a function to the list of static constructors. */ 15694 15695 static void 15696 arm_elf_asm_constructor (rtx symbol, int priority) 15697 { 15698 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true); 15699 } 15700 15701 /* Add a function to the list of static destructors. */ 15702 15703 static void 15704 arm_elf_asm_destructor (rtx symbol, int priority) 15705 { 15706 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false); 15707 } 15708 15709 /* A finite state machine takes care of noticing whether or not instructions 15710 can be conditionally executed, and thus decrease execution time and code 15711 size by deleting branch instructions. The fsm is controlled by 15712 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */ 15713 15714 /* The state of the fsm controlling condition codes are: 15715 0: normal, do nothing special 15716 1: make ASM_OUTPUT_OPCODE not output this instruction 15717 2: make ASM_OUTPUT_OPCODE not output this instruction 15718 3: make instructions conditional 15719 4: make instructions conditional 15720 15721 State transitions (state->state by whom under condition): 15722 0 -> 1 final_prescan_insn if the `target' is a label 15723 0 -> 2 final_prescan_insn if the `target' is an unconditional branch 15724 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch 15725 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch 15726 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached 15727 (the target label has CODE_LABEL_NUMBER equal to arm_target_label). 15728 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached 15729 (the target insn is arm_target_insn). 15730 15731 If the jump clobbers the conditions then we use states 2 and 4. 15732 15733 A similar thing can be done with conditional return insns. 15734 15735 XXX In case the `target' is an unconditional branch, this conditionalising 15736 of the instructions always reduces code size, but not always execution 15737 time. But then, I want to reduce the code size to somewhere near what 15738 /bin/cc produces. */ 15739 15740 /* In addition to this, state is maintained for Thumb-2 COND_EXEC 15741 instructions. When a COND_EXEC instruction is seen the subsequent 15742 instructions are scanned so that multiple conditional instructions can be 15743 combined into a single IT block. arm_condexec_count and arm_condexec_mask 15744 specify the length and true/false mask for the IT block. These will be 15745 decremented/zeroed by arm_asm_output_opcode as the insns are output. */ 15746 15747 /* Returns the index of the ARM condition code string in 15748 `arm_condition_codes'. COMPARISON should be an rtx like 15749 `(eq (...) (...))'. */ 15750 static enum arm_cond_code 15751 get_arm_condition_code (rtx comparison) 15752 { 15753 enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); 15754 enum arm_cond_code code; 15755 enum rtx_code comp_code = GET_CODE (comparison); 15756 15757 if (GET_MODE_CLASS (mode) != MODE_CC) 15758 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0), 15759 XEXP (comparison, 1)); 15760 15761 switch (mode) 15762 { 15763 case CC_DNEmode: code = ARM_NE; goto dominance; 15764 case CC_DEQmode: code = ARM_EQ; goto dominance; 15765 case CC_DGEmode: code = ARM_GE; goto dominance; 15766 case CC_DGTmode: code = ARM_GT; goto dominance; 15767 case CC_DLEmode: code = ARM_LE; goto dominance; 15768 case CC_DLTmode: code = ARM_LT; goto dominance; 15769 case CC_DGEUmode: code = ARM_CS; goto dominance; 15770 case CC_DGTUmode: code = ARM_HI; goto dominance; 15771 case CC_DLEUmode: code = ARM_LS; goto dominance; 15772 case CC_DLTUmode: code = ARM_CC; 15773 15774 dominance: 15775 gcc_assert (comp_code == EQ || comp_code == NE); 15776 15777 if (comp_code == EQ) 15778 return ARM_INVERSE_CONDITION_CODE (code); 15779 return code; 15780 15781 case CC_NOOVmode: 15782 switch (comp_code) 15783 { 15784 case NE: return ARM_NE; 15785 case EQ: return ARM_EQ; 15786 case GE: return ARM_PL; 15787 case LT: return ARM_MI; 15788 default: gcc_unreachable (); 15789 } 15790 15791 case CC_Zmode: 15792 switch (comp_code) 15793 { 15794 case NE: return ARM_NE; 15795 case EQ: return ARM_EQ; 15796 default: gcc_unreachable (); 15797 } 15798 15799 case CC_Nmode: 15800 switch (comp_code) 15801 { 15802 case NE: return ARM_MI; 15803 case EQ: return ARM_PL; 15804 default: gcc_unreachable (); 15805 } 15806 15807 case CCFPEmode: 15808 case CCFPmode: 15809 /* These encodings assume that AC=1 in the FPA system control 15810 byte. This allows us to handle all cases except UNEQ and 15811 LTGT. */ 15812 switch (comp_code) 15813 { 15814 case GE: return ARM_GE; 15815 case GT: return ARM_GT; 15816 case LE: return ARM_LS; 15817 case LT: return ARM_MI; 15818 case NE: return ARM_NE; 15819 case EQ: return ARM_EQ; 15820 case ORDERED: return ARM_VC; 15821 case UNORDERED: return ARM_VS; 15822 case UNLT: return ARM_LT; 15823 case UNLE: return ARM_LE; 15824 case UNGT: return ARM_HI; 15825 case UNGE: return ARM_PL; 15826 /* UNEQ and LTGT do not have a representation. */ 15827 case UNEQ: /* Fall through. */ 15828 case LTGT: /* Fall through. */ 15829 default: gcc_unreachable (); 15830 } 15831 15832 case CC_SWPmode: 15833 switch (comp_code) 15834 { 15835 case NE: return ARM_NE; 15836 case EQ: return ARM_EQ; 15837 case GE: return ARM_LE; 15838 case GT: return ARM_LT; 15839 case LE: return ARM_GE; 15840 case LT: return ARM_GT; 15841 case GEU: return ARM_LS; 15842 case GTU: return ARM_CC; 15843 case LEU: return ARM_CS; 15844 case LTU: return ARM_HI; 15845 default: gcc_unreachable (); 15846 } 15847 15848 case CC_Cmode: 15849 switch (comp_code) 15850 { 15851 case LTU: return ARM_CS; 15852 case GEU: return ARM_CC; 15853 default: gcc_unreachable (); 15854 } 15855 15856 case CCmode: 15857 switch (comp_code) 15858 { 15859 case NE: return ARM_NE; 15860 case EQ: return ARM_EQ; 15861 case GE: return ARM_GE; 15862 case GT: return ARM_GT; 15863 case LE: return ARM_LE; 15864 case LT: return ARM_LT; 15865 case GEU: return ARM_CS; 15866 case GTU: return ARM_HI; 15867 case LEU: return ARM_LS; 15868 case LTU: return ARM_CC; 15869 default: gcc_unreachable (); 15870 } 15871 15872 default: gcc_unreachable (); 15873 } 15874 } 15875 15876 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed 15877 instructions. */ 15878 void 15879 thumb2_final_prescan_insn (rtx insn) 15880 { 15881 rtx first_insn = insn; 15882 rtx body = PATTERN (insn); 15883 rtx predicate; 15884 enum arm_cond_code code; 15885 int n; 15886 int mask; 15887 15888 /* Remove the previous insn from the count of insns to be output. */ 15889 if (arm_condexec_count) 15890 arm_condexec_count--; 15891 15892 /* Nothing to do if we are already inside a conditional block. */ 15893 if (arm_condexec_count) 15894 return; 15895 15896 if (GET_CODE (body) != COND_EXEC) 15897 return; 15898 15899 /* Conditional jumps are implemented directly. */ 15900 if (GET_CODE (insn) == JUMP_INSN) 15901 return; 15902 15903 predicate = COND_EXEC_TEST (body); 15904 arm_current_cc = get_arm_condition_code (predicate); 15905 15906 n = get_attr_ce_count (insn); 15907 arm_condexec_count = 1; 15908 arm_condexec_mask = (1 << n) - 1; 15909 arm_condexec_masklen = n; 15910 /* See if subsequent instructions can be combined into the same block. */ 15911 for (;;) 15912 { 15913 insn = next_nonnote_insn (insn); 15914 15915 /* Jumping into the middle of an IT block is illegal, so a label or 15916 barrier terminates the block. */ 15917 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN) 15918 break; 15919 15920 body = PATTERN (insn); 15921 /* USE and CLOBBER aren't really insns, so just skip them. */ 15922 if (GET_CODE (body) == USE 15923 || GET_CODE (body) == CLOBBER) 15924 continue; 15925 15926 /* ??? Recognize conditional jumps, and combine them with IT blocks. */ 15927 if (GET_CODE (body) != COND_EXEC) 15928 break; 15929 /* Allow up to 4 conditionally executed instructions in a block. */ 15930 n = get_attr_ce_count (insn); 15931 if (arm_condexec_masklen + n > 4) 15932 break; 15933 15934 predicate = COND_EXEC_TEST (body); 15935 code = get_arm_condition_code (predicate); 15936 mask = (1 << n) - 1; 15937 if (arm_current_cc == code) 15938 arm_condexec_mask |= (mask << arm_condexec_masklen); 15939 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code)) 15940 break; 15941 15942 arm_condexec_count++; 15943 arm_condexec_masklen += n; 15944 15945 /* A jump must be the last instruction in a conditional block. */ 15946 if (GET_CODE(insn) == JUMP_INSN) 15947 break; 15948 } 15949 /* Restore recog_data (getting the attributes of other insns can 15950 destroy this array, but final.c assumes that it remains intact 15951 across this call). */ 15952 extract_constrain_insn_cached (first_insn); 15953 } 15954 15955 void 15956 arm_final_prescan_insn (rtx insn) 15957 { 15958 /* BODY will hold the body of INSN. */ 15959 rtx body = PATTERN (insn); 15960 15961 /* This will be 1 if trying to repeat the trick, and things need to be 15962 reversed if it appears to fail. */ 15963 int reverse = 0; 15964 15965 /* If we start with a return insn, we only succeed if we find another one. */ 15966 int seeking_return = 0; 15967 15968 /* START_INSN will hold the insn from where we start looking. This is the 15969 first insn after the following code_label if REVERSE is true. */ 15970 rtx start_insn = insn; 15971 15972 /* If in state 4, check if the target branch is reached, in order to 15973 change back to state 0. */ 15974 if (arm_ccfsm_state == 4) 15975 { 15976 if (insn == arm_target_insn) 15977 { 15978 arm_target_insn = NULL; 15979 arm_ccfsm_state = 0; 15980 } 15981 return; 15982 } 15983 15984 /* If in state 3, it is possible to repeat the trick, if this insn is an 15985 unconditional branch to a label, and immediately following this branch 15986 is the previous target label which is only used once, and the label this 15987 branch jumps to is not too far off. */ 15988 if (arm_ccfsm_state == 3) 15989 { 15990 if (simplejump_p (insn)) 15991 { 15992 start_insn = next_nonnote_insn (start_insn); 15993 if (GET_CODE (start_insn) == BARRIER) 15994 { 15995 /* XXX Isn't this always a barrier? */ 15996 start_insn = next_nonnote_insn (start_insn); 15997 } 15998 if (GET_CODE (start_insn) == CODE_LABEL 15999 && CODE_LABEL_NUMBER (start_insn) == arm_target_label 16000 && LABEL_NUSES (start_insn) == 1) 16001 reverse = TRUE; 16002 else 16003 return; 16004 } 16005 else if (GET_CODE (body) == RETURN) 16006 { 16007 start_insn = next_nonnote_insn (start_insn); 16008 if (GET_CODE (start_insn) == BARRIER) 16009 start_insn = next_nonnote_insn (start_insn); 16010 if (GET_CODE (start_insn) == CODE_LABEL 16011 && CODE_LABEL_NUMBER (start_insn) == arm_target_label 16012 && LABEL_NUSES (start_insn) == 1) 16013 { 16014 reverse = TRUE; 16015 seeking_return = 1; 16016 } 16017 else 16018 return; 16019 } 16020 else 16021 return; 16022 } 16023 16024 gcc_assert (!arm_ccfsm_state || reverse); 16025 if (GET_CODE (insn) != JUMP_INSN) 16026 return; 16027 16028 /* This jump might be paralleled with a clobber of the condition codes 16029 the jump should always come first */ 16030 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) 16031 body = XVECEXP (body, 0, 0); 16032 16033 if (reverse 16034 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC 16035 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)) 16036 { 16037 int insns_skipped; 16038 int fail = FALSE, succeed = FALSE; 16039 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ 16040 int then_not_else = TRUE; 16041 rtx this_insn = start_insn, label = 0; 16042 16043 /* Register the insn jumped to. */ 16044 if (reverse) 16045 { 16046 if (!seeking_return) 16047 label = XEXP (SET_SRC (body), 0); 16048 } 16049 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF) 16050 label = XEXP (XEXP (SET_SRC (body), 1), 0); 16051 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF) 16052 { 16053 label = XEXP (XEXP (SET_SRC (body), 2), 0); 16054 then_not_else = FALSE; 16055 } 16056 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN) 16057 seeking_return = 1; 16058 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN) 16059 { 16060 seeking_return = 1; 16061 then_not_else = FALSE; 16062 } 16063 else 16064 gcc_unreachable (); 16065 16066 /* See how many insns this branch skips, and what kind of insns. If all 16067 insns are okay, and the label or unconditional branch to the same 16068 label is not too far away, succeed. */ 16069 for (insns_skipped = 0; 16070 !fail && !succeed && insns_skipped++ < max_insns_skipped;) 16071 { 16072 rtx scanbody; 16073 16074 this_insn = next_nonnote_insn (this_insn); 16075 if (!this_insn) 16076 break; 16077 16078 switch (GET_CODE (this_insn)) 16079 { 16080 case CODE_LABEL: 16081 /* Succeed if it is the target label, otherwise fail since 16082 control falls in from somewhere else. */ 16083 if (this_insn == label) 16084 { 16085 arm_ccfsm_state = 1; 16086 succeed = TRUE; 16087 } 16088 else 16089 fail = TRUE; 16090 break; 16091 16092 case BARRIER: 16093 /* Succeed if the following insn is the target label. 16094 Otherwise fail. 16095 If return insns are used then the last insn in a function 16096 will be a barrier. */ 16097 this_insn = next_nonnote_insn (this_insn); 16098 if (this_insn && this_insn == label) 16099 { 16100 arm_ccfsm_state = 1; 16101 succeed = TRUE; 16102 } 16103 else 16104 fail = TRUE; 16105 break; 16106 16107 case CALL_INSN: 16108 /* The AAPCS says that conditional calls should not be 16109 used since they make interworking inefficient (the 16110 linker can't transform BL<cond> into BLX). That's 16111 only a problem if the machine has BLX. */ 16112 if (arm_arch5) 16113 { 16114 fail = TRUE; 16115 break; 16116 } 16117 16118 /* Succeed if the following insn is the target label, or 16119 if the following two insns are a barrier and the 16120 target label. */ 16121 this_insn = next_nonnote_insn (this_insn); 16122 if (this_insn && GET_CODE (this_insn) == BARRIER) 16123 this_insn = next_nonnote_insn (this_insn); 16124 16125 if (this_insn && this_insn == label 16126 && insns_skipped < max_insns_skipped) 16127 { 16128 arm_ccfsm_state = 1; 16129 succeed = TRUE; 16130 } 16131 else 16132 fail = TRUE; 16133 break; 16134 16135 case JUMP_INSN: 16136 /* If this is an unconditional branch to the same label, succeed. 16137 If it is to another label, do nothing. If it is conditional, 16138 fail. */ 16139 /* XXX Probably, the tests for SET and the PC are 16140 unnecessary. */ 16141 16142 scanbody = PATTERN (this_insn); 16143 if (GET_CODE (scanbody) == SET 16144 && GET_CODE (SET_DEST (scanbody)) == PC) 16145 { 16146 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF 16147 && XEXP (SET_SRC (scanbody), 0) == label && !reverse) 16148 { 16149 arm_ccfsm_state = 2; 16150 succeed = TRUE; 16151 } 16152 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE) 16153 fail = TRUE; 16154 } 16155 /* Fail if a conditional return is undesirable (e.g. on a 16156 StrongARM), but still allow this if optimizing for size. */ 16157 else if (GET_CODE (scanbody) == RETURN 16158 && !use_return_insn (TRUE, NULL) 16159 && !optimize_size) 16160 fail = TRUE; 16161 else if (GET_CODE (scanbody) == RETURN 16162 && seeking_return) 16163 { 16164 arm_ccfsm_state = 2; 16165 succeed = TRUE; 16166 } 16167 else if (GET_CODE (scanbody) == PARALLEL) 16168 { 16169 switch (get_attr_conds (this_insn)) 16170 { 16171 case CONDS_NOCOND: 16172 break; 16173 default: 16174 fail = TRUE; 16175 break; 16176 } 16177 } 16178 else 16179 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */ 16180 16181 break; 16182 16183 case INSN: 16184 /* Instructions using or affecting the condition codes make it 16185 fail. */ 16186 scanbody = PATTERN (this_insn); 16187 if (!(GET_CODE (scanbody) == SET 16188 || GET_CODE (scanbody) == PARALLEL) 16189 || get_attr_conds (this_insn) != CONDS_NOCOND) 16190 fail = TRUE; 16191 16192 /* A conditional cirrus instruction must be followed by 16193 a non Cirrus instruction. However, since we 16194 conditionalize instructions in this function and by 16195 the time we get here we can't add instructions 16196 (nops), because shorten_branches() has already been 16197 called, we will disable conditionalizing Cirrus 16198 instructions to be safe. */ 16199 if (GET_CODE (scanbody) != USE 16200 && GET_CODE (scanbody) != CLOBBER 16201 && get_attr_cirrus (this_insn) != CIRRUS_NOT) 16202 fail = TRUE; 16203 break; 16204 16205 default: 16206 break; 16207 } 16208 } 16209 if (succeed) 16210 { 16211 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse)) 16212 arm_target_label = CODE_LABEL_NUMBER (label); 16213 else 16214 { 16215 gcc_assert (seeking_return || arm_ccfsm_state == 2); 16216 16217 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE) 16218 { 16219 this_insn = next_nonnote_insn (this_insn); 16220 gcc_assert (!this_insn 16221 || (GET_CODE (this_insn) != BARRIER 16222 && GET_CODE (this_insn) != CODE_LABEL)); 16223 } 16224 if (!this_insn) 16225 { 16226 /* Oh, dear! we ran off the end.. give up. */ 16227 extract_constrain_insn_cached (insn); 16228 arm_ccfsm_state = 0; 16229 arm_target_insn = NULL; 16230 return; 16231 } 16232 arm_target_insn = this_insn; 16233 } 16234 16235 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from 16236 what it was. */ 16237 if (!reverse) 16238 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0)); 16239 16240 if (reverse || then_not_else) 16241 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc); 16242 } 16243 16244 /* Restore recog_data (getting the attributes of other insns can 16245 destroy this array, but final.c assumes that it remains intact 16246 across this call. */ 16247 extract_constrain_insn_cached (insn); 16248 } 16249 } 16250 16251 /* Output IT instructions. */ 16252 void 16253 thumb2_asm_output_opcode (FILE * stream) 16254 { 16255 char buff[5]; 16256 int n; 16257 16258 if (arm_condexec_mask) 16259 { 16260 for (n = 0; n < arm_condexec_masklen; n++) 16261 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e'; 16262 buff[n] = 0; 16263 asm_fprintf(stream, "i%s\t%s\n\t", buff, 16264 arm_condition_codes[arm_current_cc]); 16265 arm_condexec_mask = 0; 16266 } 16267 } 16268 16269 /* Returns true if REGNO is a valid register 16270 for holding a quantity of type MODE. */ 16271 int 16272 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) 16273 { 16274 if (GET_MODE_CLASS (mode) == MODE_CC) 16275 return (regno == CC_REGNUM 16276 || (TARGET_HARD_FLOAT && TARGET_VFP 16277 && regno == VFPCC_REGNUM)); 16278 16279 if (TARGET_THUMB1) 16280 /* For the Thumb we only allow values bigger than SImode in 16281 registers 0 - 6, so that there is always a second low 16282 register available to hold the upper part of the value. 16283 We probably we ought to ensure that the register is the 16284 start of an even numbered register pair. */ 16285 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM); 16286 16287 if (TARGET_HARD_FLOAT && TARGET_MAVERICK 16288 && IS_CIRRUS_REGNUM (regno)) 16289 /* We have outlawed SI values in Cirrus registers because they 16290 reside in the lower 32 bits, but SF values reside in the 16291 upper 32 bits. This causes gcc all sorts of grief. We can't 16292 even split the registers into pairs because Cirrus SI values 16293 get sign extended to 64bits-- aldyh. */ 16294 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode); 16295 16296 if (TARGET_HARD_FLOAT && TARGET_VFP 16297 && IS_VFP_REGNUM (regno)) 16298 { 16299 if (mode == SFmode || mode == SImode) 16300 return VFP_REGNO_OK_FOR_SINGLE (regno); 16301 16302 if (mode == DFmode) 16303 return VFP_REGNO_OK_FOR_DOUBLE (regno); 16304 16305 /* VFP registers can hold HFmode values, but there is no point in 16306 putting them there unless we have hardware conversion insns. */ 16307 if (mode == HFmode) 16308 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); 16309 16310 if (TARGET_NEON) 16311 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) 16312 || (VALID_NEON_QREG_MODE (mode) 16313 && NEON_REGNO_OK_FOR_QUAD (regno)) 16314 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2)) 16315 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3)) 16316 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4)) 16317 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6)) 16318 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)); 16319 16320 return FALSE; 16321 } 16322 16323 if (TARGET_REALLY_IWMMXT) 16324 { 16325 if (IS_IWMMXT_GR_REGNUM (regno)) 16326 return mode == SImode; 16327 16328 if (IS_IWMMXT_REGNUM (regno)) 16329 return VALID_IWMMXT_REG_MODE (mode); 16330 } 16331 16332 /* We allow almost any value to be stored in the general registers. 16333 Restrict doubleword quantities to even register pairs so that we can 16334 use ldrd. Do not allow very large Neon structure opaque modes in 16335 general registers; they would use too many. */ 16336 if (regno <= LAST_ARM_REGNUM) 16337 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) 16338 && ARM_NUM_REGS (mode) <= 4; 16339 16340 if (regno == FRAME_POINTER_REGNUM 16341 || regno == ARG_POINTER_REGNUM) 16342 /* We only allow integers in the fake hard registers. */ 16343 return GET_MODE_CLASS (mode) == MODE_INT; 16344 16345 /* The only registers left are the FPA registers 16346 which we only allow to hold FP values. */ 16347 return (TARGET_HARD_FLOAT && TARGET_FPA 16348 && GET_MODE_CLASS (mode) == MODE_FLOAT 16349 && regno >= FIRST_FPA_REGNUM 16350 && regno <= LAST_FPA_REGNUM); 16351 } 16352 16353 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are 16354 not used in arm mode. */ 16355 16356 enum reg_class 16357 arm_regno_class (int regno) 16358 { 16359 if (TARGET_THUMB1) 16360 { 16361 if (regno == STACK_POINTER_REGNUM) 16362 return STACK_REG; 16363 if (regno == CC_REGNUM) 16364 return CC_REG; 16365 if (regno < 8) 16366 return LO_REGS; 16367 return HI_REGS; 16368 } 16369 16370 if (TARGET_THUMB2 && regno < 8) 16371 return LO_REGS; 16372 16373 if ( regno <= LAST_ARM_REGNUM 16374 || regno == FRAME_POINTER_REGNUM 16375 || regno == ARG_POINTER_REGNUM) 16376 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS; 16377 16378 if (regno == CC_REGNUM || regno == VFPCC_REGNUM) 16379 return TARGET_THUMB2 ? CC_REG : NO_REGS; 16380 16381 if (IS_CIRRUS_REGNUM (regno)) 16382 return CIRRUS_REGS; 16383 16384 if (IS_VFP_REGNUM (regno)) 16385 { 16386 if (regno <= D7_VFP_REGNUM) 16387 return VFP_D0_D7_REGS; 16388 else if (regno <= LAST_LO_VFP_REGNUM) 16389 return VFP_LO_REGS; 16390 else 16391 return VFP_HI_REGS; 16392 } 16393 16394 if (IS_IWMMXT_REGNUM (regno)) 16395 return IWMMXT_REGS; 16396 16397 if (IS_IWMMXT_GR_REGNUM (regno)) 16398 return IWMMXT_GR_REGS; 16399 16400 return FPA_REGS; 16401 } 16402 16403 /* Handle a special case when computing the offset 16404 of an argument from the frame pointer. */ 16405 int 16406 arm_debugger_arg_offset (int value, rtx addr) 16407 { 16408 rtx insn; 16409 16410 /* We are only interested if dbxout_parms() failed to compute the offset. */ 16411 if (value != 0) 16412 return 0; 16413 16414 /* We can only cope with the case where the address is held in a register. */ 16415 if (GET_CODE (addr) != REG) 16416 return 0; 16417 16418 /* If we are using the frame pointer to point at the argument, then 16419 an offset of 0 is correct. */ 16420 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM) 16421 return 0; 16422 16423 /* If we are using the stack pointer to point at the 16424 argument, then an offset of 0 is correct. */ 16425 /* ??? Check this is consistent with thumb2 frame layout. */ 16426 if ((TARGET_THUMB || !frame_pointer_needed) 16427 && REGNO (addr) == SP_REGNUM) 16428 return 0; 16429 16430 /* Oh dear. The argument is pointed to by a register rather 16431 than being held in a register, or being stored at a known 16432 offset from the frame pointer. Since GDB only understands 16433 those two kinds of argument we must translate the address 16434 held in the register into an offset from the frame pointer. 16435 We do this by searching through the insns for the function 16436 looking to see where this register gets its value. If the 16437 register is initialized from the frame pointer plus an offset 16438 then we are in luck and we can continue, otherwise we give up. 16439 16440 This code is exercised by producing debugging information 16441 for a function with arguments like this: 16442 16443 double func (double a, double b, int c, double d) {return d;} 16444 16445 Without this code the stab for parameter 'd' will be set to 16446 an offset of 0 from the frame pointer, rather than 8. */ 16447 16448 /* The if() statement says: 16449 16450 If the insn is a normal instruction 16451 and if the insn is setting the value in a register 16452 and if the register being set is the register holding the address of the argument 16453 and if the address is computing by an addition 16454 that involves adding to a register 16455 which is the frame pointer 16456 a constant integer 16457 16458 then... */ 16459 16460 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 16461 { 16462 if ( GET_CODE (insn) == INSN 16463 && GET_CODE (PATTERN (insn)) == SET 16464 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr) 16465 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS 16466 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG 16467 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM 16468 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT 16469 ) 16470 { 16471 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1)); 16472 16473 break; 16474 } 16475 } 16476 16477 if (value == 0) 16478 { 16479 debug_rtx (addr); 16480 warning (0, "unable to compute real location of stacked parameter"); 16481 value = 8; /* XXX magic hack */ 16482 } 16483 16484 return value; 16485 } 16486 16487 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \ 16488 do \ 16489 { \ 16490 if ((MASK) & insn_flags) \ 16491 add_builtin_function ((NAME), (TYPE), (CODE), \ 16492 BUILT_IN_MD, NULL, NULL_TREE); \ 16493 } \ 16494 while (0) 16495 16496 struct builtin_description 16497 { 16498 const unsigned int mask; 16499 const enum insn_code icode; 16500 const char * const name; 16501 const enum arm_builtins code; 16502 const enum rtx_code comparison; 16503 const unsigned int flag; 16504 }; 16505 16506 static const struct builtin_description bdesc_2arg[] = 16507 { 16508 #define IWMMXT_BUILTIN(code, string, builtin) \ 16509 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ 16510 ARM_BUILTIN_##builtin, UNKNOWN, 0 }, 16511 16512 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) 16513 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH) 16514 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW) 16515 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB) 16516 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH) 16517 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW) 16518 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB) 16519 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH) 16520 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW) 16521 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB) 16522 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH) 16523 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW) 16524 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB) 16525 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH) 16526 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW) 16527 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB) 16528 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH) 16529 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW) 16530 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL) 16531 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM) 16532 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM) 16533 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB) 16534 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH) 16535 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW) 16536 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB) 16537 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH) 16538 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW) 16539 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB) 16540 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH) 16541 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW) 16542 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB) 16543 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB) 16544 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH) 16545 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH) 16546 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW) 16547 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW) 16548 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB) 16549 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB) 16550 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH) 16551 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH) 16552 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW) 16553 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW) 16554 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND) 16555 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN) 16556 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR) 16557 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR) 16558 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B) 16559 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H) 16560 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR) 16561 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR) 16562 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB) 16563 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH) 16564 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW) 16565 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB) 16566 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH) 16567 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW) 16568 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS) 16569 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU) 16570 16571 #define IWMMXT_BUILTIN2(code, builtin) \ 16572 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, 16573 16574 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) 16575 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) 16576 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) 16577 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) 16578 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) 16579 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) 16580 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH) 16581 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI) 16582 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW) 16583 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI) 16584 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD) 16585 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI) 16586 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH) 16587 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI) 16588 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW) 16589 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI) 16590 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD) 16591 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI) 16592 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH) 16593 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI) 16594 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW) 16595 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI) 16596 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD) 16597 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI) 16598 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH) 16599 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI) 16600 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW) 16601 IWMMXT_BUILTIN2 (rorv2si3, WRORWI) 16602 IWMMXT_BUILTIN2 (rordi3_di, WRORD) 16603 IWMMXT_BUILTIN2 (rordi3, WRORDI) 16604 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) 16605 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) 16606 }; 16607 16608 static const struct builtin_description bdesc_1arg[] = 16609 { 16610 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB) 16611 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH) 16612 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW) 16613 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB) 16614 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH) 16615 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW) 16616 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB) 16617 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH) 16618 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW) 16619 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB) 16620 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH) 16621 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW) 16622 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB) 16623 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH) 16624 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW) 16625 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB) 16626 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH) 16627 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW) 16628 }; 16629 16630 /* Set up all the iWMMXt builtins. This is 16631 not called if TARGET_IWMMXT is zero. */ 16632 16633 static void 16634 arm_init_iwmmxt_builtins (void) 16635 { 16636 const struct builtin_description * d; 16637 size_t i; 16638 tree endlink = void_list_node; 16639 16640 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 16641 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 16642 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); 16643 16644 tree int_ftype_int 16645 = build_function_type (integer_type_node, 16646 tree_cons (NULL_TREE, integer_type_node, endlink)); 16647 tree v8qi_ftype_v8qi_v8qi_int 16648 = build_function_type (V8QI_type_node, 16649 tree_cons (NULL_TREE, V8QI_type_node, 16650 tree_cons (NULL_TREE, V8QI_type_node, 16651 tree_cons (NULL_TREE, 16652 integer_type_node, 16653 endlink)))); 16654 tree v4hi_ftype_v4hi_int 16655 = build_function_type (V4HI_type_node, 16656 tree_cons (NULL_TREE, V4HI_type_node, 16657 tree_cons (NULL_TREE, integer_type_node, 16658 endlink))); 16659 tree v2si_ftype_v2si_int 16660 = build_function_type (V2SI_type_node, 16661 tree_cons (NULL_TREE, V2SI_type_node, 16662 tree_cons (NULL_TREE, integer_type_node, 16663 endlink))); 16664 tree v2si_ftype_di_di 16665 = build_function_type (V2SI_type_node, 16666 tree_cons (NULL_TREE, long_long_integer_type_node, 16667 tree_cons (NULL_TREE, long_long_integer_type_node, 16668 endlink))); 16669 tree di_ftype_di_int 16670 = build_function_type (long_long_integer_type_node, 16671 tree_cons (NULL_TREE, long_long_integer_type_node, 16672 tree_cons (NULL_TREE, integer_type_node, 16673 endlink))); 16674 tree di_ftype_di_int_int 16675 = build_function_type (long_long_integer_type_node, 16676 tree_cons (NULL_TREE, long_long_integer_type_node, 16677 tree_cons (NULL_TREE, integer_type_node, 16678 tree_cons (NULL_TREE, 16679 integer_type_node, 16680 endlink)))); 16681 tree int_ftype_v8qi 16682 = build_function_type (integer_type_node, 16683 tree_cons (NULL_TREE, V8QI_type_node, 16684 endlink)); 16685 tree int_ftype_v4hi 16686 = build_function_type (integer_type_node, 16687 tree_cons (NULL_TREE, V4HI_type_node, 16688 endlink)); 16689 tree int_ftype_v2si 16690 = build_function_type (integer_type_node, 16691 tree_cons (NULL_TREE, V2SI_type_node, 16692 endlink)); 16693 tree int_ftype_v8qi_int 16694 = build_function_type (integer_type_node, 16695 tree_cons (NULL_TREE, V8QI_type_node, 16696 tree_cons (NULL_TREE, integer_type_node, 16697 endlink))); 16698 tree int_ftype_v4hi_int 16699 = build_function_type (integer_type_node, 16700 tree_cons (NULL_TREE, V4HI_type_node, 16701 tree_cons (NULL_TREE, integer_type_node, 16702 endlink))); 16703 tree int_ftype_v2si_int 16704 = build_function_type (integer_type_node, 16705 tree_cons (NULL_TREE, V2SI_type_node, 16706 tree_cons (NULL_TREE, integer_type_node, 16707 endlink))); 16708 tree v8qi_ftype_v8qi_int_int 16709 = build_function_type (V8QI_type_node, 16710 tree_cons (NULL_TREE, V8QI_type_node, 16711 tree_cons (NULL_TREE, integer_type_node, 16712 tree_cons (NULL_TREE, 16713 integer_type_node, 16714 endlink)))); 16715 tree v4hi_ftype_v4hi_int_int 16716 = build_function_type (V4HI_type_node, 16717 tree_cons (NULL_TREE, V4HI_type_node, 16718 tree_cons (NULL_TREE, integer_type_node, 16719 tree_cons (NULL_TREE, 16720 integer_type_node, 16721 endlink)))); 16722 tree v2si_ftype_v2si_int_int 16723 = build_function_type (V2SI_type_node, 16724 tree_cons (NULL_TREE, V2SI_type_node, 16725 tree_cons (NULL_TREE, integer_type_node, 16726 tree_cons (NULL_TREE, 16727 integer_type_node, 16728 endlink)))); 16729 /* Miscellaneous. */ 16730 tree v8qi_ftype_v4hi_v4hi 16731 = build_function_type (V8QI_type_node, 16732 tree_cons (NULL_TREE, V4HI_type_node, 16733 tree_cons (NULL_TREE, V4HI_type_node, 16734 endlink))); 16735 tree v4hi_ftype_v2si_v2si 16736 = build_function_type (V4HI_type_node, 16737 tree_cons (NULL_TREE, V2SI_type_node, 16738 tree_cons (NULL_TREE, V2SI_type_node, 16739 endlink))); 16740 tree v2si_ftype_v4hi_v4hi 16741 = build_function_type (V2SI_type_node, 16742 tree_cons (NULL_TREE, V4HI_type_node, 16743 tree_cons (NULL_TREE, V4HI_type_node, 16744 endlink))); 16745 tree v2si_ftype_v8qi_v8qi 16746 = build_function_type (V2SI_type_node, 16747 tree_cons (NULL_TREE, V8QI_type_node, 16748 tree_cons (NULL_TREE, V8QI_type_node, 16749 endlink))); 16750 tree v4hi_ftype_v4hi_di 16751 = build_function_type (V4HI_type_node, 16752 tree_cons (NULL_TREE, V4HI_type_node, 16753 tree_cons (NULL_TREE, 16754 long_long_integer_type_node, 16755 endlink))); 16756 tree v2si_ftype_v2si_di 16757 = build_function_type (V2SI_type_node, 16758 tree_cons (NULL_TREE, V2SI_type_node, 16759 tree_cons (NULL_TREE, 16760 long_long_integer_type_node, 16761 endlink))); 16762 tree void_ftype_int_int 16763 = build_function_type (void_type_node, 16764 tree_cons (NULL_TREE, integer_type_node, 16765 tree_cons (NULL_TREE, integer_type_node, 16766 endlink))); 16767 tree di_ftype_void 16768 = build_function_type (long_long_unsigned_type_node, endlink); 16769 tree di_ftype_v8qi 16770 = build_function_type (long_long_integer_type_node, 16771 tree_cons (NULL_TREE, V8QI_type_node, 16772 endlink)); 16773 tree di_ftype_v4hi 16774 = build_function_type (long_long_integer_type_node, 16775 tree_cons (NULL_TREE, V4HI_type_node, 16776 endlink)); 16777 tree di_ftype_v2si 16778 = build_function_type (long_long_integer_type_node, 16779 tree_cons (NULL_TREE, V2SI_type_node, 16780 endlink)); 16781 tree v2si_ftype_v4hi 16782 = build_function_type (V2SI_type_node, 16783 tree_cons (NULL_TREE, V4HI_type_node, 16784 endlink)); 16785 tree v4hi_ftype_v8qi 16786 = build_function_type (V4HI_type_node, 16787 tree_cons (NULL_TREE, V8QI_type_node, 16788 endlink)); 16789 16790 tree di_ftype_di_v4hi_v4hi 16791 = build_function_type (long_long_unsigned_type_node, 16792 tree_cons (NULL_TREE, 16793 long_long_unsigned_type_node, 16794 tree_cons (NULL_TREE, V4HI_type_node, 16795 tree_cons (NULL_TREE, 16796 V4HI_type_node, 16797 endlink)))); 16798 16799 tree di_ftype_v4hi_v4hi 16800 = build_function_type (long_long_unsigned_type_node, 16801 tree_cons (NULL_TREE, V4HI_type_node, 16802 tree_cons (NULL_TREE, V4HI_type_node, 16803 endlink))); 16804 16805 /* Normal vector binops. */ 16806 tree v8qi_ftype_v8qi_v8qi 16807 = build_function_type (V8QI_type_node, 16808 tree_cons (NULL_TREE, V8QI_type_node, 16809 tree_cons (NULL_TREE, V8QI_type_node, 16810 endlink))); 16811 tree v4hi_ftype_v4hi_v4hi 16812 = build_function_type (V4HI_type_node, 16813 tree_cons (NULL_TREE, V4HI_type_node, 16814 tree_cons (NULL_TREE, V4HI_type_node, 16815 endlink))); 16816 tree v2si_ftype_v2si_v2si 16817 = build_function_type (V2SI_type_node, 16818 tree_cons (NULL_TREE, V2SI_type_node, 16819 tree_cons (NULL_TREE, V2SI_type_node, 16820 endlink))); 16821 tree di_ftype_di_di 16822 = build_function_type (long_long_unsigned_type_node, 16823 tree_cons (NULL_TREE, long_long_unsigned_type_node, 16824 tree_cons (NULL_TREE, 16825 long_long_unsigned_type_node, 16826 endlink))); 16827 16828 /* Add all builtins that are more or less simple operations on two 16829 operands. */ 16830 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 16831 { 16832 /* Use one of the operands; the target can have a different mode for 16833 mask-generating compares. */ 16834 enum machine_mode mode; 16835 tree type; 16836 16837 if (d->name == 0) 16838 continue; 16839 16840 mode = insn_data[d->icode].operand[1].mode; 16841 16842 switch (mode) 16843 { 16844 case V8QImode: 16845 type = v8qi_ftype_v8qi_v8qi; 16846 break; 16847 case V4HImode: 16848 type = v4hi_ftype_v4hi_v4hi; 16849 break; 16850 case V2SImode: 16851 type = v2si_ftype_v2si_v2si; 16852 break; 16853 case DImode: 16854 type = di_ftype_di_di; 16855 break; 16856 16857 default: 16858 gcc_unreachable (); 16859 } 16860 16861 def_mbuiltin (d->mask, d->name, type, d->code); 16862 } 16863 16864 /* Add the remaining MMX insns with somewhat more complicated types. */ 16865 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO); 16866 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX); 16867 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX); 16868 16869 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH); 16870 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW); 16871 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD); 16872 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI); 16873 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI); 16874 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI); 16875 16876 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH); 16877 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW); 16878 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD); 16879 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI); 16880 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI); 16881 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI); 16882 16883 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH); 16884 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW); 16885 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD); 16886 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI); 16887 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI); 16888 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI); 16889 16890 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH); 16891 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW); 16892 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD); 16893 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI); 16894 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI); 16895 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI); 16896 16897 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH); 16898 16899 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB); 16900 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH); 16901 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ); 16902 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ); 16903 16904 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB); 16905 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH); 16906 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW); 16907 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB); 16908 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH); 16909 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW); 16910 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB); 16911 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH); 16912 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW); 16913 16914 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB); 16915 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH); 16916 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW); 16917 16918 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB); 16919 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH); 16920 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW); 16921 16922 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS); 16923 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS); 16924 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS); 16925 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS); 16926 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS); 16927 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS); 16928 16929 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB); 16930 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH); 16931 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW); 16932 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB); 16933 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH); 16934 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW); 16935 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB); 16936 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH); 16937 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW); 16938 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB); 16939 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH); 16940 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW); 16941 16942 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS); 16943 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ); 16944 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU); 16945 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ); 16946 16947 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN); 16948 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA); 16949 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH); 16950 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB); 16951 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT); 16952 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB); 16953 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT); 16954 } 16955 16956 static void 16957 arm_init_tls_builtins (void) 16958 { 16959 tree ftype, decl; 16960 16961 ftype = build_function_type (ptr_type_node, void_list_node); 16962 decl = add_builtin_function ("__builtin_thread_pointer", ftype, 16963 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD, 16964 NULL, NULL_TREE); 16965 TREE_NOTHROW (decl) = 1; 16966 TREE_READONLY (decl) = 1; 16967 } 16968 16969 enum neon_builtin_type_bits { 16970 T_V8QI = 0x0001, 16971 T_V4HI = 0x0002, 16972 T_V2SI = 0x0004, 16973 T_V2SF = 0x0008, 16974 T_DI = 0x0010, 16975 T_V16QI = 0x0020, 16976 T_V8HI = 0x0040, 16977 T_V4SI = 0x0080, 16978 T_V4SF = 0x0100, 16979 T_V2DI = 0x0200, 16980 T_TI = 0x0400, 16981 T_EI = 0x0800, 16982 T_OI = 0x1000 16983 }; 16984 16985 #define v8qi_UP T_V8QI 16986 #define v4hi_UP T_V4HI 16987 #define v2si_UP T_V2SI 16988 #define v2sf_UP T_V2SF 16989 #define di_UP T_DI 16990 #define v16qi_UP T_V16QI 16991 #define v8hi_UP T_V8HI 16992 #define v4si_UP T_V4SI 16993 #define v4sf_UP T_V4SF 16994 #define v2di_UP T_V2DI 16995 #define ti_UP T_TI 16996 #define ei_UP T_EI 16997 #define oi_UP T_OI 16998 16999 #define UP(X) X##_UP 17000 17001 #define T_MAX 13 17002 17003 typedef enum { 17004 NEON_BINOP, 17005 NEON_TERNOP, 17006 NEON_UNOP, 17007 NEON_GETLANE, 17008 NEON_SETLANE, 17009 NEON_CREATE, 17010 NEON_DUP, 17011 NEON_DUPLANE, 17012 NEON_COMBINE, 17013 NEON_SPLIT, 17014 NEON_LANEMUL, 17015 NEON_LANEMULL, 17016 NEON_LANEMULH, 17017 NEON_LANEMAC, 17018 NEON_SCALARMUL, 17019 NEON_SCALARMULL, 17020 NEON_SCALARMULH, 17021 NEON_SCALARMAC, 17022 NEON_CONVERT, 17023 NEON_FIXCONV, 17024 NEON_SELECT, 17025 NEON_RESULTPAIR, 17026 NEON_REINTERP, 17027 NEON_VTBL, 17028 NEON_VTBX, 17029 NEON_LOAD1, 17030 NEON_LOAD1LANE, 17031 NEON_STORE1, 17032 NEON_STORE1LANE, 17033 NEON_LOADSTRUCT, 17034 NEON_LOADSTRUCTLANE, 17035 NEON_STORESTRUCT, 17036 NEON_STORESTRUCTLANE, 17037 NEON_LOGICBINOP, 17038 NEON_SHIFTINSERT, 17039 NEON_SHIFTIMM, 17040 NEON_SHIFTACC 17041 } neon_itype; 17042 17043 typedef struct { 17044 const char *name; 17045 const neon_itype itype; 17046 const int bits; 17047 const enum insn_code codes[T_MAX]; 17048 const unsigned int num_vars; 17049 unsigned int base_fcode; 17050 } neon_builtin_datum; 17051 17052 #define CF(N,X) CODE_FOR_neon_##N##X 17053 17054 #define VAR1(T, N, A) \ 17055 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0 17056 #define VAR2(T, N, A, B) \ 17057 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0 17058 #define VAR3(T, N, A, B, C) \ 17059 #N, NEON_##T, UP (A) | UP (B) | UP (C), \ 17060 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0 17061 #define VAR4(T, N, A, B, C, D) \ 17062 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \ 17063 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0 17064 #define VAR5(T, N, A, B, C, D, E) \ 17065 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \ 17066 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0 17067 #define VAR6(T, N, A, B, C, D, E, F) \ 17068 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \ 17069 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0 17070 #define VAR7(T, N, A, B, C, D, E, F, G) \ 17071 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \ 17072 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ 17073 CF (N, G) }, 7, 0 17074 #define VAR8(T, N, A, B, C, D, E, F, G, H) \ 17075 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ 17076 | UP (H), \ 17077 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ 17078 CF (N, G), CF (N, H) }, 8, 0 17079 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ 17080 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ 17081 | UP (H) | UP (I), \ 17082 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ 17083 CF (N, G), CF (N, H), CF (N, I) }, 9, 0 17084 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ 17085 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ 17086 | UP (H) | UP (I) | UP (J), \ 17087 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ 17088 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0 17089 17090 /* The mode entries in the following table correspond to the "key" type of the 17091 instruction variant, i.e. equivalent to that which would be specified after 17092 the assembler mnemonic, which usually refers to the last vector operand. 17093 (Signed/unsigned/polynomial types are not differentiated between though, and 17094 are all mapped onto the same mode for a given element size.) The modes 17095 listed per instruction should be the same as those defined for that 17096 instruction's pattern in neon.md. 17097 WARNING: Variants should be listed in the same increasing order as 17098 neon_builtin_type_bits. */ 17099 17100 static neon_builtin_datum neon_builtin_data[] = 17101 { 17102 { VAR10 (BINOP, vadd, 17103 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17104 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) }, 17105 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) }, 17106 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17107 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17108 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) }, 17109 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17110 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17111 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) }, 17112 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17113 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) }, 17114 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) }, 17115 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) }, 17116 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) }, 17117 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) }, 17118 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) }, 17119 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) }, 17120 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) }, 17121 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) }, 17122 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) }, 17123 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) }, 17124 { VAR2 (BINOP, vqdmull, v4hi, v2si) }, 17125 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17126 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17127 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17128 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) }, 17129 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) }, 17130 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) }, 17131 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17132 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17133 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17134 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) }, 17135 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17136 { VAR10 (BINOP, vsub, 17137 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17138 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) }, 17139 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) }, 17140 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17141 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17142 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) }, 17143 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17144 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17145 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17146 { VAR2 (BINOP, vcage, v2sf, v4sf) }, 17147 { VAR2 (BINOP, vcagt, v2sf, v4sf) }, 17148 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17149 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17150 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) }, 17151 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17152 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) }, 17153 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17154 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17155 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) }, 17156 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17157 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17158 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) }, 17159 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) }, 17160 { VAR2 (BINOP, vrecps, v2sf, v4sf) }, 17161 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) }, 17162 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17163 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, 17164 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17165 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17166 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17167 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17168 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17169 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17170 { VAR2 (UNOP, vcnt, v8qi, v16qi) }, 17171 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) }, 17172 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) }, 17173 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, 17174 /* FIXME: vget_lane supports more variants than this! */ 17175 { VAR10 (GETLANE, vget_lane, 17176 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17177 { VAR10 (SETLANE, vset_lane, 17178 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17179 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) }, 17180 { VAR10 (DUP, vdup_n, 17181 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17182 { VAR10 (DUPLANE, vdup_lane, 17183 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17184 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) }, 17185 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) }, 17186 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) }, 17187 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) }, 17188 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) }, 17189 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) }, 17190 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) }, 17191 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17192 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17193 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) }, 17194 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) }, 17195 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17196 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) }, 17197 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) }, 17198 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17199 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17200 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) }, 17201 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) }, 17202 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17203 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) }, 17204 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) }, 17205 { VAR10 (BINOP, vext, 17206 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17207 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17208 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) }, 17209 { VAR2 (UNOP, vrev16, v8qi, v16qi) }, 17210 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) }, 17211 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) }, 17212 { VAR10 (SELECT, vbsl, 17213 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17214 { VAR1 (VTBL, vtbl1, v8qi) }, 17215 { VAR1 (VTBL, vtbl2, v8qi) }, 17216 { VAR1 (VTBL, vtbl3, v8qi) }, 17217 { VAR1 (VTBL, vtbl4, v8qi) }, 17218 { VAR1 (VTBX, vtbx1, v8qi) }, 17219 { VAR1 (VTBX, vtbx2, v8qi) }, 17220 { VAR1 (VTBX, vtbx3, v8qi) }, 17221 { VAR1 (VTBX, vtbx4, v8qi) }, 17222 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17223 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17224 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, 17225 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) }, 17226 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) }, 17227 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) }, 17228 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) }, 17229 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) }, 17230 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) }, 17231 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) }, 17232 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) }, 17233 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) }, 17234 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) }, 17235 { VAR10 (LOAD1, vld1, 17236 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17237 { VAR10 (LOAD1LANE, vld1_lane, 17238 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17239 { VAR10 (LOAD1, vld1_dup, 17240 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17241 { VAR10 (STORE1, vst1, 17242 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17243 { VAR10 (STORE1LANE, vst1_lane, 17244 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17245 { VAR9 (LOADSTRUCT, 17246 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, 17247 { VAR7 (LOADSTRUCTLANE, vld2_lane, 17248 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17249 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) }, 17250 { VAR9 (STORESTRUCT, vst2, 17251 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, 17252 { VAR7 (STORESTRUCTLANE, vst2_lane, 17253 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17254 { VAR9 (LOADSTRUCT, 17255 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, 17256 { VAR7 (LOADSTRUCTLANE, vld3_lane, 17257 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17258 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) }, 17259 { VAR9 (STORESTRUCT, vst3, 17260 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, 17261 { VAR7 (STORESTRUCTLANE, vst3_lane, 17262 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17263 { VAR9 (LOADSTRUCT, vld4, 17264 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, 17265 { VAR7 (LOADSTRUCTLANE, vld4_lane, 17266 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17267 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) }, 17268 { VAR9 (STORESTRUCT, vst4, 17269 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, 17270 { VAR7 (STORESTRUCTLANE, vst4_lane, 17271 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, 17272 { VAR10 (LOGICBINOP, vand, 17273 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17274 { VAR10 (LOGICBINOP, vorr, 17275 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17276 { VAR10 (BINOP, veor, 17277 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17278 { VAR10 (LOGICBINOP, vbic, 17279 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, 17280 { VAR10 (LOGICBINOP, vorn, 17281 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) } 17282 }; 17283 17284 #undef CF 17285 #undef VAR1 17286 #undef VAR2 17287 #undef VAR3 17288 #undef VAR4 17289 #undef VAR5 17290 #undef VAR6 17291 #undef VAR7 17292 #undef VAR8 17293 #undef VAR9 17294 #undef VAR10 17295 17296 static void 17297 arm_init_neon_builtins (void) 17298 { 17299 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE; 17300 17301 tree neon_intQI_type_node; 17302 tree neon_intHI_type_node; 17303 tree neon_polyQI_type_node; 17304 tree neon_polyHI_type_node; 17305 tree neon_intSI_type_node; 17306 tree neon_intDI_type_node; 17307 tree neon_float_type_node; 17308 17309 tree intQI_pointer_node; 17310 tree intHI_pointer_node; 17311 tree intSI_pointer_node; 17312 tree intDI_pointer_node; 17313 tree float_pointer_node; 17314 17315 tree const_intQI_node; 17316 tree const_intHI_node; 17317 tree const_intSI_node; 17318 tree const_intDI_node; 17319 tree const_float_node; 17320 17321 tree const_intQI_pointer_node; 17322 tree const_intHI_pointer_node; 17323 tree const_intSI_pointer_node; 17324 tree const_intDI_pointer_node; 17325 tree const_float_pointer_node; 17326 17327 tree V8QI_type_node; 17328 tree V4HI_type_node; 17329 tree V2SI_type_node; 17330 tree V2SF_type_node; 17331 tree V16QI_type_node; 17332 tree V8HI_type_node; 17333 tree V4SI_type_node; 17334 tree V4SF_type_node; 17335 tree V2DI_type_node; 17336 17337 tree intUQI_type_node; 17338 tree intUHI_type_node; 17339 tree intUSI_type_node; 17340 tree intUDI_type_node; 17341 17342 tree intEI_type_node; 17343 tree intOI_type_node; 17344 tree intCI_type_node; 17345 tree intXI_type_node; 17346 17347 tree V8QI_pointer_node; 17348 tree V4HI_pointer_node; 17349 tree V2SI_pointer_node; 17350 tree V2SF_pointer_node; 17351 tree V16QI_pointer_node; 17352 tree V8HI_pointer_node; 17353 tree V4SI_pointer_node; 17354 tree V4SF_pointer_node; 17355 tree V2DI_pointer_node; 17356 17357 tree void_ftype_pv8qi_v8qi_v8qi; 17358 tree void_ftype_pv4hi_v4hi_v4hi; 17359 tree void_ftype_pv2si_v2si_v2si; 17360 tree void_ftype_pv2sf_v2sf_v2sf; 17361 tree void_ftype_pdi_di_di; 17362 tree void_ftype_pv16qi_v16qi_v16qi; 17363 tree void_ftype_pv8hi_v8hi_v8hi; 17364 tree void_ftype_pv4si_v4si_v4si; 17365 tree void_ftype_pv4sf_v4sf_v4sf; 17366 tree void_ftype_pv2di_v2di_v2di; 17367 17368 tree reinterp_ftype_dreg[5][5]; 17369 tree reinterp_ftype_qreg[5][5]; 17370 tree dreg_types[5], qreg_types[5]; 17371 17372 /* Create distinguished type nodes for NEON vector element types, 17373 and pointers to values of such types, so we can detect them later. */ 17374 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); 17375 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); 17376 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); 17377 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); 17378 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode)); 17379 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode)); 17380 neon_float_type_node = make_node (REAL_TYPE); 17381 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; 17382 layout_type (neon_float_type_node); 17383 17384 /* Define typedefs which exactly correspond to the modes we are basing vector 17385 types on. If you change these names you'll need to change 17386 the table used by arm_mangle_type too. */ 17387 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node, 17388 "__builtin_neon_qi"); 17389 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, 17390 "__builtin_neon_hi"); 17391 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, 17392 "__builtin_neon_si"); 17393 (*lang_hooks.types.register_builtin_type) (neon_float_type_node, 17394 "__builtin_neon_sf"); 17395 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node, 17396 "__builtin_neon_di"); 17397 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node, 17398 "__builtin_neon_poly8"); 17399 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, 17400 "__builtin_neon_poly16"); 17401 17402 intQI_pointer_node = build_pointer_type (neon_intQI_type_node); 17403 intHI_pointer_node = build_pointer_type (neon_intHI_type_node); 17404 intSI_pointer_node = build_pointer_type (neon_intSI_type_node); 17405 intDI_pointer_node = build_pointer_type (neon_intDI_type_node); 17406 float_pointer_node = build_pointer_type (neon_float_type_node); 17407 17408 /* Next create constant-qualified versions of the above types. */ 17409 const_intQI_node = build_qualified_type (neon_intQI_type_node, 17410 TYPE_QUAL_CONST); 17411 const_intHI_node = build_qualified_type (neon_intHI_type_node, 17412 TYPE_QUAL_CONST); 17413 const_intSI_node = build_qualified_type (neon_intSI_type_node, 17414 TYPE_QUAL_CONST); 17415 const_intDI_node = build_qualified_type (neon_intDI_type_node, 17416 TYPE_QUAL_CONST); 17417 const_float_node = build_qualified_type (neon_float_type_node, 17418 TYPE_QUAL_CONST); 17419 17420 const_intQI_pointer_node = build_pointer_type (const_intQI_node); 17421 const_intHI_pointer_node = build_pointer_type (const_intHI_node); 17422 const_intSI_pointer_node = build_pointer_type (const_intSI_node); 17423 const_intDI_pointer_node = build_pointer_type (const_intDI_node); 17424 const_float_pointer_node = build_pointer_type (const_float_node); 17425 17426 /* Now create vector types based on our NEON element types. */ 17427 /* 64-bit vectors. */ 17428 V8QI_type_node = 17429 build_vector_type_for_mode (neon_intQI_type_node, V8QImode); 17430 V4HI_type_node = 17431 build_vector_type_for_mode (neon_intHI_type_node, V4HImode); 17432 V2SI_type_node = 17433 build_vector_type_for_mode (neon_intSI_type_node, V2SImode); 17434 V2SF_type_node = 17435 build_vector_type_for_mode (neon_float_type_node, V2SFmode); 17436 /* 128-bit vectors. */ 17437 V16QI_type_node = 17438 build_vector_type_for_mode (neon_intQI_type_node, V16QImode); 17439 V8HI_type_node = 17440 build_vector_type_for_mode (neon_intHI_type_node, V8HImode); 17441 V4SI_type_node = 17442 build_vector_type_for_mode (neon_intSI_type_node, V4SImode); 17443 V4SF_type_node = 17444 build_vector_type_for_mode (neon_float_type_node, V4SFmode); 17445 V2DI_type_node = 17446 build_vector_type_for_mode (neon_intDI_type_node, V2DImode); 17447 17448 /* Unsigned integer types for various mode sizes. */ 17449 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); 17450 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); 17451 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); 17452 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); 17453 17454 (*lang_hooks.types.register_builtin_type) (intUQI_type_node, 17455 "__builtin_neon_uqi"); 17456 (*lang_hooks.types.register_builtin_type) (intUHI_type_node, 17457 "__builtin_neon_uhi"); 17458 (*lang_hooks.types.register_builtin_type) (intUSI_type_node, 17459 "__builtin_neon_usi"); 17460 (*lang_hooks.types.register_builtin_type) (intUDI_type_node, 17461 "__builtin_neon_udi"); 17462 17463 /* Opaque integer types for structures of vectors. */ 17464 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode)); 17465 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode)); 17466 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode)); 17467 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode)); 17468 17469 (*lang_hooks.types.register_builtin_type) (intTI_type_node, 17470 "__builtin_neon_ti"); 17471 (*lang_hooks.types.register_builtin_type) (intEI_type_node, 17472 "__builtin_neon_ei"); 17473 (*lang_hooks.types.register_builtin_type) (intOI_type_node, 17474 "__builtin_neon_oi"); 17475 (*lang_hooks.types.register_builtin_type) (intCI_type_node, 17476 "__builtin_neon_ci"); 17477 (*lang_hooks.types.register_builtin_type) (intXI_type_node, 17478 "__builtin_neon_xi"); 17479 17480 /* Pointers to vector types. */ 17481 V8QI_pointer_node = build_pointer_type (V8QI_type_node); 17482 V4HI_pointer_node = build_pointer_type (V4HI_type_node); 17483 V2SI_pointer_node = build_pointer_type (V2SI_type_node); 17484 V2SF_pointer_node = build_pointer_type (V2SF_type_node); 17485 V16QI_pointer_node = build_pointer_type (V16QI_type_node); 17486 V8HI_pointer_node = build_pointer_type (V8HI_type_node); 17487 V4SI_pointer_node = build_pointer_type (V4SI_type_node); 17488 V4SF_pointer_node = build_pointer_type (V4SF_type_node); 17489 V2DI_pointer_node = build_pointer_type (V2DI_type_node); 17490 17491 /* Operations which return results as pairs. */ 17492 void_ftype_pv8qi_v8qi_v8qi = 17493 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node, 17494 V8QI_type_node, NULL); 17495 void_ftype_pv4hi_v4hi_v4hi = 17496 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node, 17497 V4HI_type_node, NULL); 17498 void_ftype_pv2si_v2si_v2si = 17499 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node, 17500 V2SI_type_node, NULL); 17501 void_ftype_pv2sf_v2sf_v2sf = 17502 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node, 17503 V2SF_type_node, NULL); 17504 void_ftype_pdi_di_di = 17505 build_function_type_list (void_type_node, intDI_pointer_node, 17506 neon_intDI_type_node, neon_intDI_type_node, NULL); 17507 void_ftype_pv16qi_v16qi_v16qi = 17508 build_function_type_list (void_type_node, V16QI_pointer_node, 17509 V16QI_type_node, V16QI_type_node, NULL); 17510 void_ftype_pv8hi_v8hi_v8hi = 17511 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node, 17512 V8HI_type_node, NULL); 17513 void_ftype_pv4si_v4si_v4si = 17514 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node, 17515 V4SI_type_node, NULL); 17516 void_ftype_pv4sf_v4sf_v4sf = 17517 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node, 17518 V4SF_type_node, NULL); 17519 void_ftype_pv2di_v2di_v2di = 17520 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, 17521 V2DI_type_node, NULL); 17522 17523 dreg_types[0] = V8QI_type_node; 17524 dreg_types[1] = V4HI_type_node; 17525 dreg_types[2] = V2SI_type_node; 17526 dreg_types[3] = V2SF_type_node; 17527 dreg_types[4] = neon_intDI_type_node; 17528 17529 qreg_types[0] = V16QI_type_node; 17530 qreg_types[1] = V8HI_type_node; 17531 qreg_types[2] = V4SI_type_node; 17532 qreg_types[3] = V4SF_type_node; 17533 qreg_types[4] = V2DI_type_node; 17534 17535 for (i = 0; i < 5; i++) 17536 { 17537 int j; 17538 for (j = 0; j < 5; j++) 17539 { 17540 reinterp_ftype_dreg[i][j] 17541 = build_function_type_list (dreg_types[i], dreg_types[j], NULL); 17542 reinterp_ftype_qreg[i][j] 17543 = build_function_type_list (qreg_types[i], qreg_types[j], NULL); 17544 } 17545 } 17546 17547 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++) 17548 { 17549 neon_builtin_datum *d = &neon_builtin_data[i]; 17550 unsigned int j, codeidx = 0; 17551 17552 d->base_fcode = fcode; 17553 17554 for (j = 0; j < T_MAX; j++) 17555 { 17556 const char* const modenames[] = { 17557 "v8qi", "v4hi", "v2si", "v2sf", "di", 17558 "v16qi", "v8hi", "v4si", "v4sf", "v2di" 17559 }; 17560 char namebuf[60]; 17561 tree ftype = NULL; 17562 enum insn_code icode; 17563 int is_load = 0, is_store = 0; 17564 17565 if ((d->bits & (1 << j)) == 0) 17566 continue; 17567 17568 icode = d->codes[codeidx++]; 17569 17570 switch (d->itype) 17571 { 17572 case NEON_LOAD1: 17573 case NEON_LOAD1LANE: 17574 case NEON_LOADSTRUCT: 17575 case NEON_LOADSTRUCTLANE: 17576 is_load = 1; 17577 /* Fall through. */ 17578 case NEON_STORE1: 17579 case NEON_STORE1LANE: 17580 case NEON_STORESTRUCT: 17581 case NEON_STORESTRUCTLANE: 17582 if (!is_load) 17583 is_store = 1; 17584 /* Fall through. */ 17585 case NEON_UNOP: 17586 case NEON_BINOP: 17587 case NEON_LOGICBINOP: 17588 case NEON_SHIFTINSERT: 17589 case NEON_TERNOP: 17590 case NEON_GETLANE: 17591 case NEON_SETLANE: 17592 case NEON_CREATE: 17593 case NEON_DUP: 17594 case NEON_DUPLANE: 17595 case NEON_SHIFTIMM: 17596 case NEON_SHIFTACC: 17597 case NEON_COMBINE: 17598 case NEON_SPLIT: 17599 case NEON_CONVERT: 17600 case NEON_FIXCONV: 17601 case NEON_LANEMUL: 17602 case NEON_LANEMULL: 17603 case NEON_LANEMULH: 17604 case NEON_LANEMAC: 17605 case NEON_SCALARMUL: 17606 case NEON_SCALARMULL: 17607 case NEON_SCALARMULH: 17608 case NEON_SCALARMAC: 17609 case NEON_SELECT: 17610 case NEON_VTBL: 17611 case NEON_VTBX: 17612 { 17613 int k; 17614 tree return_type = void_type_node, args = void_list_node; 17615 17616 /* Build a function type directly from the insn_data for this 17617 builtin. The build_function_type() function takes care of 17618 removing duplicates for us. */ 17619 for (k = insn_data[icode].n_operands - 1; k >= 0; k--) 17620 { 17621 tree eltype; 17622 17623 if (is_load && k == 1) 17624 { 17625 /* Neon load patterns always have the memory operand 17626 (a SImode pointer) in the operand 1 position. We 17627 want a const pointer to the element type in that 17628 position. */ 17629 gcc_assert (insn_data[icode].operand[k].mode == SImode); 17630 17631 switch (1 << j) 17632 { 17633 case T_V8QI: 17634 case T_V16QI: 17635 eltype = const_intQI_pointer_node; 17636 break; 17637 17638 case T_V4HI: 17639 case T_V8HI: 17640 eltype = const_intHI_pointer_node; 17641 break; 17642 17643 case T_V2SI: 17644 case T_V4SI: 17645 eltype = const_intSI_pointer_node; 17646 break; 17647 17648 case T_V2SF: 17649 case T_V4SF: 17650 eltype = const_float_pointer_node; 17651 break; 17652 17653 case T_DI: 17654 case T_V2DI: 17655 eltype = const_intDI_pointer_node; 17656 break; 17657 17658 default: gcc_unreachable (); 17659 } 17660 } 17661 else if (is_store && k == 0) 17662 { 17663 /* Similarly, Neon store patterns use operand 0 as 17664 the memory location to store to (a SImode pointer). 17665 Use a pointer to the element type of the store in 17666 that position. */ 17667 gcc_assert (insn_data[icode].operand[k].mode == SImode); 17668 17669 switch (1 << j) 17670 { 17671 case T_V8QI: 17672 case T_V16QI: 17673 eltype = intQI_pointer_node; 17674 break; 17675 17676 case T_V4HI: 17677 case T_V8HI: 17678 eltype = intHI_pointer_node; 17679 break; 17680 17681 case T_V2SI: 17682 case T_V4SI: 17683 eltype = intSI_pointer_node; 17684 break; 17685 17686 case T_V2SF: 17687 case T_V4SF: 17688 eltype = float_pointer_node; 17689 break; 17690 17691 case T_DI: 17692 case T_V2DI: 17693 eltype = intDI_pointer_node; 17694 break; 17695 17696 default: gcc_unreachable (); 17697 } 17698 } 17699 else 17700 { 17701 switch (insn_data[icode].operand[k].mode) 17702 { 17703 case VOIDmode: eltype = void_type_node; break; 17704 /* Scalars. */ 17705 case QImode: eltype = neon_intQI_type_node; break; 17706 case HImode: eltype = neon_intHI_type_node; break; 17707 case SImode: eltype = neon_intSI_type_node; break; 17708 case SFmode: eltype = neon_float_type_node; break; 17709 case DImode: eltype = neon_intDI_type_node; break; 17710 case TImode: eltype = intTI_type_node; break; 17711 case EImode: eltype = intEI_type_node; break; 17712 case OImode: eltype = intOI_type_node; break; 17713 case CImode: eltype = intCI_type_node; break; 17714 case XImode: eltype = intXI_type_node; break; 17715 /* 64-bit vectors. */ 17716 case V8QImode: eltype = V8QI_type_node; break; 17717 case V4HImode: eltype = V4HI_type_node; break; 17718 case V2SImode: eltype = V2SI_type_node; break; 17719 case V2SFmode: eltype = V2SF_type_node; break; 17720 /* 128-bit vectors. */ 17721 case V16QImode: eltype = V16QI_type_node; break; 17722 case V8HImode: eltype = V8HI_type_node; break; 17723 case V4SImode: eltype = V4SI_type_node; break; 17724 case V4SFmode: eltype = V4SF_type_node; break; 17725 case V2DImode: eltype = V2DI_type_node; break; 17726 default: gcc_unreachable (); 17727 } 17728 } 17729 17730 if (k == 0 && !is_store) 17731 return_type = eltype; 17732 else 17733 args = tree_cons (NULL_TREE, eltype, args); 17734 } 17735 17736 ftype = build_function_type (return_type, args); 17737 } 17738 break; 17739 17740 case NEON_RESULTPAIR: 17741 { 17742 switch (insn_data[icode].operand[1].mode) 17743 { 17744 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; 17745 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; 17746 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; 17747 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; 17748 case DImode: ftype = void_ftype_pdi_di_di; break; 17749 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; 17750 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; 17751 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; 17752 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; 17753 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; 17754 default: gcc_unreachable (); 17755 } 17756 } 17757 break; 17758 17759 case NEON_REINTERP: 17760 { 17761 /* We iterate over 5 doubleword types, then 5 quadword 17762 types. */ 17763 int rhs = j % 5; 17764 switch (insn_data[icode].operand[0].mode) 17765 { 17766 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; 17767 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break; 17768 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break; 17769 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break; 17770 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break; 17771 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break; 17772 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break; 17773 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break; 17774 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break; 17775 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break; 17776 default: gcc_unreachable (); 17777 } 17778 } 17779 break; 17780 17781 default: 17782 gcc_unreachable (); 17783 } 17784 17785 gcc_assert (ftype != NULL); 17786 17787 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]); 17788 17789 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL, 17790 NULL_TREE); 17791 } 17792 } 17793 } 17794 17795 static void 17796 arm_init_fp16_builtins (void) 17797 { 17798 tree fp16_type = make_node (REAL_TYPE); 17799 TYPE_PRECISION (fp16_type) = 16; 17800 layout_type (fp16_type); 17801 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); 17802 } 17803 17804 static void 17805 arm_init_builtins (void) 17806 { 17807 arm_init_tls_builtins (); 17808 17809 if (TARGET_REALLY_IWMMXT) 17810 arm_init_iwmmxt_builtins (); 17811 17812 if (TARGET_NEON) 17813 arm_init_neon_builtins (); 17814 17815 if (arm_fp16_format) 17816 arm_init_fp16_builtins (); 17817 } 17818 17819 /* Implement TARGET_INVALID_PARAMETER_TYPE. */ 17820 17821 static const char * 17822 arm_invalid_parameter_type (const_tree t) 17823 { 17824 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) 17825 return N_("function parameters cannot have __fp16 type"); 17826 return NULL; 17827 } 17828 17829 /* Implement TARGET_INVALID_PARAMETER_TYPE. */ 17830 17831 static const char * 17832 arm_invalid_return_type (const_tree t) 17833 { 17834 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) 17835 return N_("functions cannot return __fp16 type"); 17836 return NULL; 17837 } 17838 17839 /* Implement TARGET_PROMOTED_TYPE. */ 17840 17841 static tree 17842 arm_promoted_type (const_tree t) 17843 { 17844 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) 17845 return float_type_node; 17846 return NULL_TREE; 17847 } 17848 17849 /* Implement TARGET_CONVERT_TO_TYPE. 17850 Specifically, this hook implements the peculiarity of the ARM 17851 half-precision floating-point C semantics that requires conversions between 17852 __fp16 to or from double to do an intermediate conversion to float. */ 17853 17854 static tree 17855 arm_convert_to_type (tree type, tree expr) 17856 { 17857 tree fromtype = TREE_TYPE (expr); 17858 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) 17859 return NULL_TREE; 17860 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) 17861 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) 17862 return convert (type, convert (float_type_node, expr)); 17863 return NULL_TREE; 17864 } 17865 17866 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. 17867 This simply adds HFmode as a supported mode; even though we don't 17868 implement arithmetic on this type directly, it's supported by 17869 optabs conversions, much the way the double-word arithmetic is 17870 special-cased in the default hook. */ 17871 17872 static bool 17873 arm_scalar_mode_supported_p (enum machine_mode mode) 17874 { 17875 if (mode == HFmode) 17876 return (arm_fp16_format != ARM_FP16_FORMAT_NONE); 17877 else 17878 return default_scalar_mode_supported_p (mode); 17879 } 17880 17881 /* Errors in the source file can cause expand_expr to return const0_rtx 17882 where we expect a vector. To avoid crashing, use one of the vector 17883 clear instructions. */ 17884 17885 static rtx 17886 safe_vector_operand (rtx x, enum machine_mode mode) 17887 { 17888 if (x != const0_rtx) 17889 return x; 17890 x = gen_reg_rtx (mode); 17891 17892 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x 17893 : gen_rtx_SUBREG (DImode, x, 0))); 17894 return x; 17895 } 17896 17897 /* Subroutine of arm_expand_builtin to take care of binop insns. */ 17898 17899 static rtx 17900 arm_expand_binop_builtin (enum insn_code icode, 17901 tree exp, rtx target) 17902 { 17903 rtx pat; 17904 tree arg0 = CALL_EXPR_ARG (exp, 0); 17905 tree arg1 = CALL_EXPR_ARG (exp, 1); 17906 rtx op0 = expand_normal (arg0); 17907 rtx op1 = expand_normal (arg1); 17908 enum machine_mode tmode = insn_data[icode].operand[0].mode; 17909 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 17910 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 17911 17912 if (VECTOR_MODE_P (mode0)) 17913 op0 = safe_vector_operand (op0, mode0); 17914 if (VECTOR_MODE_P (mode1)) 17915 op1 = safe_vector_operand (op1, mode1); 17916 17917 if (! target 17918 || GET_MODE (target) != tmode 17919 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17920 target = gen_reg_rtx (tmode); 17921 17922 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1); 17923 17924 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 17925 op0 = copy_to_mode_reg (mode0, op0); 17926 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 17927 op1 = copy_to_mode_reg (mode1, op1); 17928 17929 pat = GEN_FCN (icode) (target, op0, op1); 17930 if (! pat) 17931 return 0; 17932 emit_insn (pat); 17933 return target; 17934 } 17935 17936 /* Subroutine of arm_expand_builtin to take care of unop insns. */ 17937 17938 static rtx 17939 arm_expand_unop_builtin (enum insn_code icode, 17940 tree exp, rtx target, int do_load) 17941 { 17942 rtx pat; 17943 tree arg0 = CALL_EXPR_ARG (exp, 0); 17944 rtx op0 = expand_normal (arg0); 17945 enum machine_mode tmode = insn_data[icode].operand[0].mode; 17946 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 17947 17948 if (! target 17949 || GET_MODE (target) != tmode 17950 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17951 target = gen_reg_rtx (tmode); 17952 if (do_load) 17953 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 17954 else 17955 { 17956 if (VECTOR_MODE_P (mode0)) 17957 op0 = safe_vector_operand (op0, mode0); 17958 17959 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 17960 op0 = copy_to_mode_reg (mode0, op0); 17961 } 17962 17963 pat = GEN_FCN (icode) (target, op0); 17964 if (! pat) 17965 return 0; 17966 emit_insn (pat); 17967 return target; 17968 } 17969 17970 static int 17971 neon_builtin_compare (const void *a, const void *b) 17972 { 17973 const neon_builtin_datum *const key = (const neon_builtin_datum *) a; 17974 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b; 17975 unsigned int soughtcode = key->base_fcode; 17976 17977 if (soughtcode >= memb->base_fcode 17978 && soughtcode < memb->base_fcode + memb->num_vars) 17979 return 0; 17980 else if (soughtcode < memb->base_fcode) 17981 return -1; 17982 else 17983 return 1; 17984 } 17985 17986 static enum insn_code 17987 locate_neon_builtin_icode (int fcode, neon_itype *itype) 17988 { 17989 neon_builtin_datum key, *found; 17990 int idx; 17991 17992 key.base_fcode = fcode; 17993 found = (neon_builtin_datum *) 17994 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data), 17995 sizeof (neon_builtin_data[0]), neon_builtin_compare); 17996 gcc_assert (found); 17997 idx = fcode - (int) found->base_fcode; 17998 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars); 17999 18000 if (itype) 18001 *itype = found->itype; 18002 18003 return found->codes[idx]; 18004 } 18005 18006 typedef enum { 18007 NEON_ARG_COPY_TO_REG, 18008 NEON_ARG_CONSTANT, 18009 NEON_ARG_STOP 18010 } builtin_arg; 18011 18012 #define NEON_MAX_BUILTIN_ARGS 5 18013 18014 /* Expand a Neon builtin. */ 18015 static rtx 18016 arm_expand_neon_args (rtx target, int icode, int have_retval, 18017 tree exp, ...) 18018 { 18019 va_list ap; 18020 rtx pat; 18021 tree arg[NEON_MAX_BUILTIN_ARGS]; 18022 rtx op[NEON_MAX_BUILTIN_ARGS]; 18023 enum machine_mode tmode = insn_data[icode].operand[0].mode; 18024 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; 18025 int argc = 0; 18026 18027 if (have_retval 18028 && (!target 18029 || GET_MODE (target) != tmode 18030 || !(*insn_data[icode].operand[0].predicate) (target, tmode))) 18031 target = gen_reg_rtx (tmode); 18032 18033 va_start (ap, exp); 18034 18035 for (;;) 18036 { 18037 builtin_arg thisarg = (builtin_arg) va_arg (ap, int); 18038 18039 if (thisarg == NEON_ARG_STOP) 18040 break; 18041 else 18042 { 18043 arg[argc] = CALL_EXPR_ARG (exp, argc); 18044 op[argc] = expand_normal (arg[argc]); 18045 mode[argc] = insn_data[icode].operand[argc + have_retval].mode; 18046 18047 switch (thisarg) 18048 { 18049 case NEON_ARG_COPY_TO_REG: 18050 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/ 18051 if (!(*insn_data[icode].operand[argc + have_retval].predicate) 18052 (op[argc], mode[argc])) 18053 op[argc] = copy_to_mode_reg (mode[argc], op[argc]); 18054 break; 18055 18056 case NEON_ARG_CONSTANT: 18057 /* FIXME: This error message is somewhat unhelpful. */ 18058 if (!(*insn_data[icode].operand[argc + have_retval].predicate) 18059 (op[argc], mode[argc])) 18060 error ("argument must be a constant"); 18061 break; 18062 18063 case NEON_ARG_STOP: 18064 gcc_unreachable (); 18065 } 18066 18067 argc++; 18068 } 18069 } 18070 18071 va_end (ap); 18072 18073 if (have_retval) 18074 switch (argc) 18075 { 18076 case 1: 18077 pat = GEN_FCN (icode) (target, op[0]); 18078 break; 18079 18080 case 2: 18081 pat = GEN_FCN (icode) (target, op[0], op[1]); 18082 break; 18083 18084 case 3: 18085 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); 18086 break; 18087 18088 case 4: 18089 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); 18090 break; 18091 18092 case 5: 18093 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); 18094 break; 18095 18096 default: 18097 gcc_unreachable (); 18098 } 18099 else 18100 switch (argc) 18101 { 18102 case 1: 18103 pat = GEN_FCN (icode) (op[0]); 18104 break; 18105 18106 case 2: 18107 pat = GEN_FCN (icode) (op[0], op[1]); 18108 break; 18109 18110 case 3: 18111 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 18112 break; 18113 18114 case 4: 18115 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 18116 break; 18117 18118 case 5: 18119 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); 18120 break; 18121 18122 default: 18123 gcc_unreachable (); 18124 } 18125 18126 if (!pat) 18127 return 0; 18128 18129 emit_insn (pat); 18130 18131 return target; 18132 } 18133 18134 /* Expand a Neon builtin. These are "special" because they don't have symbolic 18135 constants defined per-instruction or per instruction-variant. Instead, the 18136 required info is looked up in the table neon_builtin_data. */ 18137 static rtx 18138 arm_expand_neon_builtin (int fcode, tree exp, rtx target) 18139 { 18140 neon_itype itype; 18141 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype); 18142 18143 switch (itype) 18144 { 18145 case NEON_UNOP: 18146 case NEON_CONVERT: 18147 case NEON_DUPLANE: 18148 return arm_expand_neon_args (target, icode, 1, exp, 18149 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); 18150 18151 case NEON_BINOP: 18152 case NEON_SETLANE: 18153 case NEON_SCALARMUL: 18154 case NEON_SCALARMULL: 18155 case NEON_SCALARMULH: 18156 case NEON_SHIFTINSERT: 18157 case NEON_LOGICBINOP: 18158 return arm_expand_neon_args (target, icode, 1, exp, 18159 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, 18160 NEON_ARG_STOP); 18161 18162 case NEON_TERNOP: 18163 return arm_expand_neon_args (target, icode, 1, exp, 18164 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, 18165 NEON_ARG_CONSTANT, NEON_ARG_STOP); 18166 18167 case NEON_GETLANE: 18168 case NEON_FIXCONV: 18169 case NEON_SHIFTIMM: 18170 return arm_expand_neon_args (target, icode, 1, exp, 18171 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, 18172 NEON_ARG_STOP); 18173 18174 case NEON_CREATE: 18175 return arm_expand_neon_args (target, icode, 1, exp, 18176 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); 18177 18178 case NEON_DUP: 18179 case NEON_SPLIT: 18180 case NEON_REINTERP: 18181 return arm_expand_neon_args (target, icode, 1, exp, 18182 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); 18183 18184 case NEON_COMBINE: 18185 case NEON_VTBL: 18186 return arm_expand_neon_args (target, icode, 1, exp, 18187 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); 18188 18189 case NEON_RESULTPAIR: 18190 return arm_expand_neon_args (target, icode, 0, exp, 18191 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, 18192 NEON_ARG_STOP); 18193 18194 case NEON_LANEMUL: 18195 case NEON_LANEMULL: 18196 case NEON_LANEMULH: 18197 return arm_expand_neon_args (target, icode, 1, exp, 18198 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, 18199 NEON_ARG_CONSTANT, NEON_ARG_STOP); 18200 18201 case NEON_LANEMAC: 18202 return arm_expand_neon_args (target, icode, 1, exp, 18203 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, 18204 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP); 18205 18206 case NEON_SHIFTACC: 18207 return arm_expand_neon_args (target, icode, 1, exp, 18208 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, 18209 NEON_ARG_CONSTANT, NEON_ARG_STOP); 18210 18211 case NEON_SCALARMAC: 18212 return arm_expand_neon_args (target, icode, 1, exp, 18213 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, 18214 NEON_ARG_CONSTANT, NEON_ARG_STOP); 18215 18216 case NEON_SELECT: 18217 case NEON_VTBX: 18218 return arm_expand_neon_args (target, icode, 1, exp, 18219 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, 18220 NEON_ARG_STOP); 18221 18222 case NEON_LOAD1: 18223 case NEON_LOADSTRUCT: 18224 return arm_expand_neon_args (target, icode, 1, exp, 18225 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); 18226 18227 case NEON_LOAD1LANE: 18228 case NEON_LOADSTRUCTLANE: 18229 return arm_expand_neon_args (target, icode, 1, exp, 18230 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, 18231 NEON_ARG_STOP); 18232 18233 case NEON_STORE1: 18234 case NEON_STORESTRUCT: 18235 return arm_expand_neon_args (target, icode, 0, exp, 18236 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); 18237 18238 case NEON_STORE1LANE: 18239 case NEON_STORESTRUCTLANE: 18240 return arm_expand_neon_args (target, icode, 0, exp, 18241 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, 18242 NEON_ARG_STOP); 18243 } 18244 18245 gcc_unreachable (); 18246 } 18247 18248 /* Emit code to reinterpret one Neon type as another, without altering bits. */ 18249 void 18250 neon_reinterpret (rtx dest, rtx src) 18251 { 18252 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); 18253 } 18254 18255 /* Emit code to place a Neon pair result in memory locations (with equal 18256 registers). */ 18257 void 18258 neon_emit_pair_result_insn (enum machine_mode mode, 18259 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr, 18260 rtx op1, rtx op2) 18261 { 18262 rtx mem = gen_rtx_MEM (mode, destaddr); 18263 rtx tmp1 = gen_reg_rtx (mode); 18264 rtx tmp2 = gen_reg_rtx (mode); 18265 18266 emit_insn (intfn (tmp1, op1, op2, tmp2)); 18267 18268 emit_move_insn (mem, tmp1); 18269 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); 18270 emit_move_insn (mem, tmp2); 18271 } 18272 18273 /* Set up operands for a register copy from src to dest, taking care not to 18274 clobber registers in the process. 18275 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't 18276 be called with a large N, so that should be OK. */ 18277 18278 void 18279 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count) 18280 { 18281 unsigned int copied = 0, opctr = 0; 18282 unsigned int done = (1 << count) - 1; 18283 unsigned int i, j; 18284 18285 while (copied != done) 18286 { 18287 for (i = 0; i < count; i++) 18288 { 18289 int good = 1; 18290 18291 for (j = 0; good && j < count; j++) 18292 if (i != j && (copied & (1 << j)) == 0 18293 && reg_overlap_mentioned_p (src[j], dest[i])) 18294 good = 0; 18295 18296 if (good) 18297 { 18298 operands[opctr++] = dest[i]; 18299 operands[opctr++] = src[i]; 18300 copied |= 1 << i; 18301 } 18302 } 18303 } 18304 18305 gcc_assert (opctr == count * 2); 18306 } 18307 18308 /* Expand an expression EXP that calls a built-in function, 18309 with result going to TARGET if that's convenient 18310 (and in mode MODE if that's convenient). 18311 SUBTARGET may be used as the target for computing one of EXP's operands. 18312 IGNORE is nonzero if the value is to be ignored. */ 18313 18314 static rtx 18315 arm_expand_builtin (tree exp, 18316 rtx target, 18317 rtx subtarget ATTRIBUTE_UNUSED, 18318 enum machine_mode mode ATTRIBUTE_UNUSED, 18319 int ignore ATTRIBUTE_UNUSED) 18320 { 18321 const struct builtin_description * d; 18322 enum insn_code icode; 18323 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 18324 tree arg0; 18325 tree arg1; 18326 tree arg2; 18327 rtx op0; 18328 rtx op1; 18329 rtx op2; 18330 rtx pat; 18331 int fcode = DECL_FUNCTION_CODE (fndecl); 18332 size_t i; 18333 enum machine_mode tmode; 18334 enum machine_mode mode0; 18335 enum machine_mode mode1; 18336 enum machine_mode mode2; 18337 18338 if (fcode >= ARM_BUILTIN_NEON_BASE) 18339 return arm_expand_neon_builtin (fcode, exp, target); 18340 18341 switch (fcode) 18342 { 18343 case ARM_BUILTIN_TEXTRMSB: 18344 case ARM_BUILTIN_TEXTRMUB: 18345 case ARM_BUILTIN_TEXTRMSH: 18346 case ARM_BUILTIN_TEXTRMUH: 18347 case ARM_BUILTIN_TEXTRMSW: 18348 case ARM_BUILTIN_TEXTRMUW: 18349 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb 18350 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub 18351 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh 18352 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh 18353 : CODE_FOR_iwmmxt_textrmw); 18354 18355 arg0 = CALL_EXPR_ARG (exp, 0); 18356 arg1 = CALL_EXPR_ARG (exp, 1); 18357 op0 = expand_normal (arg0); 18358 op1 = expand_normal (arg1); 18359 tmode = insn_data[icode].operand[0].mode; 18360 mode0 = insn_data[icode].operand[1].mode; 18361 mode1 = insn_data[icode].operand[2].mode; 18362 18363 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 18364 op0 = copy_to_mode_reg (mode0, op0); 18365 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 18366 { 18367 /* @@@ better error message */ 18368 error ("selector must be an immediate"); 18369 return gen_reg_rtx (tmode); 18370 } 18371 if (target == 0 18372 || GET_MODE (target) != tmode 18373 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 18374 target = gen_reg_rtx (tmode); 18375 pat = GEN_FCN (icode) (target, op0, op1); 18376 if (! pat) 18377 return 0; 18378 emit_insn (pat); 18379 return target; 18380 18381 case ARM_BUILTIN_TINSRB: 18382 case ARM_BUILTIN_TINSRH: 18383 case ARM_BUILTIN_TINSRW: 18384 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb 18385 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh 18386 : CODE_FOR_iwmmxt_tinsrw); 18387 arg0 = CALL_EXPR_ARG (exp, 0); 18388 arg1 = CALL_EXPR_ARG (exp, 1); 18389 arg2 = CALL_EXPR_ARG (exp, 2); 18390 op0 = expand_normal (arg0); 18391 op1 = expand_normal (arg1); 18392 op2 = expand_normal (arg2); 18393 tmode = insn_data[icode].operand[0].mode; 18394 mode0 = insn_data[icode].operand[1].mode; 18395 mode1 = insn_data[icode].operand[2].mode; 18396 mode2 = insn_data[icode].operand[3].mode; 18397 18398 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 18399 op0 = copy_to_mode_reg (mode0, op0); 18400 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 18401 op1 = copy_to_mode_reg (mode1, op1); 18402 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 18403 { 18404 /* @@@ better error message */ 18405 error ("selector must be an immediate"); 18406 return const0_rtx; 18407 } 18408 if (target == 0 18409 || GET_MODE (target) != tmode 18410 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 18411 target = gen_reg_rtx (tmode); 18412 pat = GEN_FCN (icode) (target, op0, op1, op2); 18413 if (! pat) 18414 return 0; 18415 emit_insn (pat); 18416 return target; 18417 18418 case ARM_BUILTIN_SETWCX: 18419 arg0 = CALL_EXPR_ARG (exp, 0); 18420 arg1 = CALL_EXPR_ARG (exp, 1); 18421 op0 = force_reg (SImode, expand_normal (arg0)); 18422 op1 = expand_normal (arg1); 18423 emit_insn (gen_iwmmxt_tmcr (op1, op0)); 18424 return 0; 18425 18426 case ARM_BUILTIN_GETWCX: 18427 arg0 = CALL_EXPR_ARG (exp, 0); 18428 op0 = expand_normal (arg0); 18429 target = gen_reg_rtx (SImode); 18430 emit_insn (gen_iwmmxt_tmrc (target, op0)); 18431 return target; 18432 18433 case ARM_BUILTIN_WSHUFH: 18434 icode = CODE_FOR_iwmmxt_wshufh; 18435 arg0 = CALL_EXPR_ARG (exp, 0); 18436 arg1 = CALL_EXPR_ARG (exp, 1); 18437 op0 = expand_normal (arg0); 18438 op1 = expand_normal (arg1); 18439 tmode = insn_data[icode].operand[0].mode; 18440 mode1 = insn_data[icode].operand[1].mode; 18441 mode2 = insn_data[icode].operand[2].mode; 18442 18443 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 18444 op0 = copy_to_mode_reg (mode1, op0); 18445 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 18446 { 18447 /* @@@ better error message */ 18448 error ("mask must be an immediate"); 18449 return const0_rtx; 18450 } 18451 if (target == 0 18452 || GET_MODE (target) != tmode 18453 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 18454 target = gen_reg_rtx (tmode); 18455 pat = GEN_FCN (icode) (target, op0, op1); 18456 if (! pat) 18457 return 0; 18458 emit_insn (pat); 18459 return target; 18460 18461 case ARM_BUILTIN_WSADB: 18462 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target); 18463 case ARM_BUILTIN_WSADH: 18464 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target); 18465 case ARM_BUILTIN_WSADBZ: 18466 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target); 18467 case ARM_BUILTIN_WSADHZ: 18468 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target); 18469 18470 /* Several three-argument builtins. */ 18471 case ARM_BUILTIN_WMACS: 18472 case ARM_BUILTIN_WMACU: 18473 case ARM_BUILTIN_WALIGN: 18474 case ARM_BUILTIN_TMIA: 18475 case ARM_BUILTIN_TMIAPH: 18476 case ARM_BUILTIN_TMIATT: 18477 case ARM_BUILTIN_TMIATB: 18478 case ARM_BUILTIN_TMIABT: 18479 case ARM_BUILTIN_TMIABB: 18480 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs 18481 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu 18482 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia 18483 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph 18484 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb 18485 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt 18486 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb 18487 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt 18488 : CODE_FOR_iwmmxt_walign); 18489 arg0 = CALL_EXPR_ARG (exp, 0); 18490 arg1 = CALL_EXPR_ARG (exp, 1); 18491 arg2 = CALL_EXPR_ARG (exp, 2); 18492 op0 = expand_normal (arg0); 18493 op1 = expand_normal (arg1); 18494 op2 = expand_normal (arg2); 18495 tmode = insn_data[icode].operand[0].mode; 18496 mode0 = insn_data[icode].operand[1].mode; 18497 mode1 = insn_data[icode].operand[2].mode; 18498 mode2 = insn_data[icode].operand[3].mode; 18499 18500 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 18501 op0 = copy_to_mode_reg (mode0, op0); 18502 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 18503 op1 = copy_to_mode_reg (mode1, op1); 18504 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 18505 op2 = copy_to_mode_reg (mode2, op2); 18506 if (target == 0 18507 || GET_MODE (target) != tmode 18508 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 18509 target = gen_reg_rtx (tmode); 18510 pat = GEN_FCN (icode) (target, op0, op1, op2); 18511 if (! pat) 18512 return 0; 18513 emit_insn (pat); 18514 return target; 18515 18516 case ARM_BUILTIN_WZERO: 18517 target = gen_reg_rtx (DImode); 18518 emit_insn (gen_iwmmxt_clrdi (target)); 18519 return target; 18520 18521 case ARM_BUILTIN_THREAD_POINTER: 18522 return arm_load_tp (target); 18523 18524 default: 18525 break; 18526 } 18527 18528 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 18529 if (d->code == (const enum arm_builtins) fcode) 18530 return arm_expand_binop_builtin (d->icode, exp, target); 18531 18532 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 18533 if (d->code == (const enum arm_builtins) fcode) 18534 return arm_expand_unop_builtin (d->icode, exp, target, 0); 18535 18536 /* @@@ Should really do something sensible here. */ 18537 return NULL_RTX; 18538 } 18539 18540 /* Return the number (counting from 0) of 18541 the least significant set bit in MASK. */ 18542 18543 inline static int 18544 number_of_first_bit_set (unsigned mask) 18545 { 18546 int bit; 18547 18548 for (bit = 0; 18549 (mask & (1 << bit)) == 0; 18550 ++bit) 18551 continue; 18552 18553 return bit; 18554 } 18555 18556 /* Emit code to push or pop registers to or from the stack. F is the 18557 assembly file. MASK is the registers to push or pop. PUSH is 18558 nonzero if we should push, and zero if we should pop. For debugging 18559 output, if pushing, adjust CFA_OFFSET by the amount of space added 18560 to the stack. REAL_REGS should have the same number of bits set as 18561 MASK, and will be used instead (in the same order) to describe which 18562 registers were saved - this is used to mark the save slots when we 18563 push high registers after moving them to low registers. */ 18564 static void 18565 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset, 18566 unsigned long real_regs) 18567 { 18568 int regno; 18569 int lo_mask = mask & 0xFF; 18570 int pushed_words = 0; 18571 18572 gcc_assert (mask); 18573 18574 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM))) 18575 { 18576 /* Special case. Do not generate a POP PC statement here, do it in 18577 thumb_exit() */ 18578 thumb_exit (f, -1); 18579 return; 18580 } 18581 18582 if (ARM_EABI_UNWIND_TABLES && push) 18583 { 18584 fprintf (f, "\t.save\t{"); 18585 for (regno = 0; regno < 15; regno++) 18586 { 18587 if (real_regs & (1 << regno)) 18588 { 18589 if (real_regs & ((1 << regno) -1)) 18590 fprintf (f, ", "); 18591 asm_fprintf (f, "%r", regno); 18592 } 18593 } 18594 fprintf (f, "}\n"); 18595 } 18596 18597 fprintf (f, "\t%s\t{", push ? "push" : "pop"); 18598 18599 /* Look at the low registers first. */ 18600 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1) 18601 { 18602 if (lo_mask & 1) 18603 { 18604 asm_fprintf (f, "%r", regno); 18605 18606 if ((lo_mask & ~1) != 0) 18607 fprintf (f, ", "); 18608 18609 pushed_words++; 18610 } 18611 } 18612 18613 if (push && (mask & (1 << LR_REGNUM))) 18614 { 18615 /* Catch pushing the LR. */ 18616 if (mask & 0xFF) 18617 fprintf (f, ", "); 18618 18619 asm_fprintf (f, "%r", LR_REGNUM); 18620 18621 pushed_words++; 18622 } 18623 else if (!push && (mask & (1 << PC_REGNUM))) 18624 { 18625 /* Catch popping the PC. */ 18626 if (TARGET_INTERWORK || TARGET_BACKTRACE 18627 || crtl->calls_eh_return) 18628 { 18629 /* The PC is never poped directly, instead 18630 it is popped into r3 and then BX is used. */ 18631 fprintf (f, "}\n"); 18632 18633 thumb_exit (f, -1); 18634 18635 return; 18636 } 18637 else 18638 { 18639 if (mask & 0xFF) 18640 fprintf (f, ", "); 18641 18642 asm_fprintf (f, "%r", PC_REGNUM); 18643 } 18644 } 18645 18646 fprintf (f, "}\n"); 18647 18648 if (push && pushed_words && dwarf2out_do_frame ()) 18649 { 18650 char *l = dwarf2out_cfi_label (false); 18651 int pushed_mask = real_regs; 18652 18653 *cfa_offset += pushed_words * 4; 18654 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset); 18655 18656 pushed_words = 0; 18657 pushed_mask = real_regs; 18658 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1) 18659 { 18660 if (pushed_mask & 1) 18661 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset); 18662 } 18663 } 18664 } 18665 18666 /* Generate code to return from a thumb function. 18667 If 'reg_containing_return_addr' is -1, then the return address is 18668 actually on the stack, at the stack pointer. */ 18669 static void 18670 thumb_exit (FILE *f, int reg_containing_return_addr) 18671 { 18672 unsigned regs_available_for_popping; 18673 unsigned regs_to_pop; 18674 int pops_needed; 18675 unsigned available; 18676 unsigned required; 18677 int mode; 18678 int size; 18679 int restore_a4 = FALSE; 18680 18681 /* Compute the registers we need to pop. */ 18682 regs_to_pop = 0; 18683 pops_needed = 0; 18684 18685 if (reg_containing_return_addr == -1) 18686 { 18687 regs_to_pop |= 1 << LR_REGNUM; 18688 ++pops_needed; 18689 } 18690 18691 if (TARGET_BACKTRACE) 18692 { 18693 /* Restore the (ARM) frame pointer and stack pointer. */ 18694 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM); 18695 pops_needed += 2; 18696 } 18697 18698 /* If there is nothing to pop then just emit the BX instruction and 18699 return. */ 18700 if (pops_needed == 0) 18701 { 18702 if (crtl->calls_eh_return) 18703 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); 18704 18705 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); 18706 return; 18707 } 18708 /* Otherwise if we are not supporting interworking and we have not created 18709 a backtrace structure and the function was not entered in ARM mode then 18710 just pop the return address straight into the PC. */ 18711 else if (!TARGET_INTERWORK 18712 && !TARGET_BACKTRACE 18713 && !is_called_in_ARM_mode (current_function_decl) 18714 && !crtl->calls_eh_return) 18715 { 18716 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM); 18717 return; 18718 } 18719 18720 /* Find out how many of the (return) argument registers we can corrupt. */ 18721 regs_available_for_popping = 0; 18722 18723 /* If returning via __builtin_eh_return, the bottom three registers 18724 all contain information needed for the return. */ 18725 if (crtl->calls_eh_return) 18726 size = 12; 18727 else 18728 { 18729 /* If we can deduce the registers used from the function's 18730 return value. This is more reliable that examining 18731 df_regs_ever_live_p () because that will be set if the register is 18732 ever used in the function, not just if the register is used 18733 to hold a return value. */ 18734 18735 if (crtl->return_rtx != 0) 18736 mode = GET_MODE (crtl->return_rtx); 18737 else 18738 mode = DECL_MODE (DECL_RESULT (current_function_decl)); 18739 18740 size = GET_MODE_SIZE (mode); 18741 18742 if (size == 0) 18743 { 18744 /* In a void function we can use any argument register. 18745 In a function that returns a structure on the stack 18746 we can use the second and third argument registers. */ 18747 if (mode == VOIDmode) 18748 regs_available_for_popping = 18749 (1 << ARG_REGISTER (1)) 18750 | (1 << ARG_REGISTER (2)) 18751 | (1 << ARG_REGISTER (3)); 18752 else 18753 regs_available_for_popping = 18754 (1 << ARG_REGISTER (2)) 18755 | (1 << ARG_REGISTER (3)); 18756 } 18757 else if (size <= 4) 18758 regs_available_for_popping = 18759 (1 << ARG_REGISTER (2)) 18760 | (1 << ARG_REGISTER (3)); 18761 else if (size <= 8) 18762 regs_available_for_popping = 18763 (1 << ARG_REGISTER (3)); 18764 } 18765 18766 /* Match registers to be popped with registers into which we pop them. */ 18767 for (available = regs_available_for_popping, 18768 required = regs_to_pop; 18769 required != 0 && available != 0; 18770 available &= ~(available & - available), 18771 required &= ~(required & - required)) 18772 -- pops_needed; 18773 18774 /* If we have any popping registers left over, remove them. */ 18775 if (available > 0) 18776 regs_available_for_popping &= ~available; 18777 18778 /* Otherwise if we need another popping register we can use 18779 the fourth argument register. */ 18780 else if (pops_needed) 18781 { 18782 /* If we have not found any free argument registers and 18783 reg a4 contains the return address, we must move it. */ 18784 if (regs_available_for_popping == 0 18785 && reg_containing_return_addr == LAST_ARG_REGNUM) 18786 { 18787 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM); 18788 reg_containing_return_addr = LR_REGNUM; 18789 } 18790 else if (size > 12) 18791 { 18792 /* Register a4 is being used to hold part of the return value, 18793 but we have dire need of a free, low register. */ 18794 restore_a4 = TRUE; 18795 18796 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM); 18797 } 18798 18799 if (reg_containing_return_addr != LAST_ARG_REGNUM) 18800 { 18801 /* The fourth argument register is available. */ 18802 regs_available_for_popping |= 1 << LAST_ARG_REGNUM; 18803 18804 --pops_needed; 18805 } 18806 } 18807 18808 /* Pop as many registers as we can. */ 18809 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL, 18810 regs_available_for_popping); 18811 18812 /* Process the registers we popped. */ 18813 if (reg_containing_return_addr == -1) 18814 { 18815 /* The return address was popped into the lowest numbered register. */ 18816 regs_to_pop &= ~(1 << LR_REGNUM); 18817 18818 reg_containing_return_addr = 18819 number_of_first_bit_set (regs_available_for_popping); 18820 18821 /* Remove this register for the mask of available registers, so that 18822 the return address will not be corrupted by further pops. */ 18823 regs_available_for_popping &= ~(1 << reg_containing_return_addr); 18824 } 18825 18826 /* If we popped other registers then handle them here. */ 18827 if (regs_available_for_popping) 18828 { 18829 int frame_pointer; 18830 18831 /* Work out which register currently contains the frame pointer. */ 18832 frame_pointer = number_of_first_bit_set (regs_available_for_popping); 18833 18834 /* Move it into the correct place. */ 18835 asm_fprintf (f, "\tmov\t%r, %r\n", 18836 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer); 18837 18838 /* (Temporarily) remove it from the mask of popped registers. */ 18839 regs_available_for_popping &= ~(1 << frame_pointer); 18840 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM); 18841 18842 if (regs_available_for_popping) 18843 { 18844 int stack_pointer; 18845 18846 /* We popped the stack pointer as well, 18847 find the register that contains it. */ 18848 stack_pointer = number_of_first_bit_set (regs_available_for_popping); 18849 18850 /* Move it into the stack register. */ 18851 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer); 18852 18853 /* At this point we have popped all necessary registers, so 18854 do not worry about restoring regs_available_for_popping 18855 to its correct value: 18856 18857 assert (pops_needed == 0) 18858 assert (regs_available_for_popping == (1 << frame_pointer)) 18859 assert (regs_to_pop == (1 << STACK_POINTER)) */ 18860 } 18861 else 18862 { 18863 /* Since we have just move the popped value into the frame 18864 pointer, the popping register is available for reuse, and 18865 we know that we still have the stack pointer left to pop. */ 18866 regs_available_for_popping |= (1 << frame_pointer); 18867 } 18868 } 18869 18870 /* If we still have registers left on the stack, but we no longer have 18871 any registers into which we can pop them, then we must move the return 18872 address into the link register and make available the register that 18873 contained it. */ 18874 if (regs_available_for_popping == 0 && pops_needed > 0) 18875 { 18876 regs_available_for_popping |= 1 << reg_containing_return_addr; 18877 18878 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, 18879 reg_containing_return_addr); 18880 18881 reg_containing_return_addr = LR_REGNUM; 18882 } 18883 18884 /* If we have registers left on the stack then pop some more. 18885 We know that at most we will want to pop FP and SP. */ 18886 if (pops_needed > 0) 18887 { 18888 int popped_into; 18889 int move_to; 18890 18891 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL, 18892 regs_available_for_popping); 18893 18894 /* We have popped either FP or SP. 18895 Move whichever one it is into the correct register. */ 18896 popped_into = number_of_first_bit_set (regs_available_for_popping); 18897 move_to = number_of_first_bit_set (regs_to_pop); 18898 18899 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into); 18900 18901 regs_to_pop &= ~(1 << move_to); 18902 18903 --pops_needed; 18904 } 18905 18906 /* If we still have not popped everything then we must have only 18907 had one register available to us and we are now popping the SP. */ 18908 if (pops_needed > 0) 18909 { 18910 int popped_into; 18911 18912 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL, 18913 regs_available_for_popping); 18914 18915 popped_into = number_of_first_bit_set (regs_available_for_popping); 18916 18917 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into); 18918 /* 18919 assert (regs_to_pop == (1 << STACK_POINTER)) 18920 assert (pops_needed == 1) 18921 */ 18922 } 18923 18924 /* If necessary restore the a4 register. */ 18925 if (restore_a4) 18926 { 18927 if (reg_containing_return_addr != LR_REGNUM) 18928 { 18929 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM); 18930 reg_containing_return_addr = LR_REGNUM; 18931 } 18932 18933 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM); 18934 } 18935 18936 if (crtl->calls_eh_return) 18937 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); 18938 18939 /* Return to caller. */ 18940 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); 18941 } 18942 18943 18944 void 18945 thumb1_final_prescan_insn (rtx insn) 18946 { 18947 if (flag_print_asm_name) 18948 asm_fprintf (asm_out_file, "%@ 0x%04x\n", 18949 INSN_ADDRESSES (INSN_UID (insn))); 18950 } 18951 18952 int 18953 thumb_shiftable_const (unsigned HOST_WIDE_INT val) 18954 { 18955 unsigned HOST_WIDE_INT mask = 0xff; 18956 int i; 18957 18958 val = val & (unsigned HOST_WIDE_INT)0xffffffffu; 18959 if (val == 0) /* XXX */ 18960 return 0; 18961 18962 for (i = 0; i < 25; i++) 18963 if ((val & (mask << i)) == val) 18964 return 1; 18965 18966 return 0; 18967 } 18968 18969 /* Returns nonzero if the current function contains, 18970 or might contain a far jump. */ 18971 static int 18972 thumb_far_jump_used_p (void) 18973 { 18974 rtx insn; 18975 18976 /* This test is only important for leaf functions. */ 18977 /* assert (!leaf_function_p ()); */ 18978 18979 /* If we have already decided that far jumps may be used, 18980 do not bother checking again, and always return true even if 18981 it turns out that they are not being used. Once we have made 18982 the decision that far jumps are present (and that hence the link 18983 register will be pushed onto the stack) we cannot go back on it. */ 18984 if (cfun->machine->far_jump_used) 18985 return 1; 18986 18987 /* If this function is not being called from the prologue/epilogue 18988 generation code then it must be being called from the 18989 INITIAL_ELIMINATION_OFFSET macro. */ 18990 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed)) 18991 { 18992 /* In this case we know that we are being asked about the elimination 18993 of the arg pointer register. If that register is not being used, 18994 then there are no arguments on the stack, and we do not have to 18995 worry that a far jump might force the prologue to push the link 18996 register, changing the stack offsets. In this case we can just 18997 return false, since the presence of far jumps in the function will 18998 not affect stack offsets. 18999 19000 If the arg pointer is live (or if it was live, but has now been 19001 eliminated and so set to dead) then we do have to test to see if 19002 the function might contain a far jump. This test can lead to some 19003 false negatives, since before reload is completed, then length of 19004 branch instructions is not known, so gcc defaults to returning their 19005 longest length, which in turn sets the far jump attribute to true. 19006 19007 A false negative will not result in bad code being generated, but it 19008 will result in a needless push and pop of the link register. We 19009 hope that this does not occur too often. 19010 19011 If we need doubleword stack alignment this could affect the other 19012 elimination offsets so we can't risk getting it wrong. */ 19013 if (df_regs_ever_live_p (ARG_POINTER_REGNUM)) 19014 cfun->machine->arg_pointer_live = 1; 19015 else if (!cfun->machine->arg_pointer_live) 19016 return 0; 19017 } 19018 19019 /* Check to see if the function contains a branch 19020 insn with the far jump attribute set. */ 19021 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 19022 { 19023 if (GET_CODE (insn) == JUMP_INSN 19024 /* Ignore tablejump patterns. */ 19025 && GET_CODE (PATTERN (insn)) != ADDR_VEC 19026 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC 19027 && get_attr_far_jump (insn) == FAR_JUMP_YES 19028 ) 19029 { 19030 /* Record the fact that we have decided that 19031 the function does use far jumps. */ 19032 cfun->machine->far_jump_used = 1; 19033 return 1; 19034 } 19035 } 19036 19037 return 0; 19038 } 19039 19040 /* Return nonzero if FUNC must be entered in ARM mode. */ 19041 int 19042 is_called_in_ARM_mode (tree func) 19043 { 19044 gcc_assert (TREE_CODE (func) == FUNCTION_DECL); 19045 19046 /* Ignore the problem about functions whose address is taken. */ 19047 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func)) 19048 return TRUE; 19049 19050 #ifdef ARM_PE 19051 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE; 19052 #else 19053 return FALSE; 19054 #endif 19055 } 19056 19057 /* The bits which aren't usefully expanded as rtl. */ 19058 const char * 19059 thumb_unexpanded_epilogue (void) 19060 { 19061 arm_stack_offsets *offsets; 19062 int regno; 19063 unsigned long live_regs_mask = 0; 19064 int high_regs_pushed = 0; 19065 int had_to_push_lr; 19066 int size; 19067 19068 if (cfun->machine->return_used_this_function != 0) 19069 return ""; 19070 19071 if (IS_NAKED (arm_current_func_type ())) 19072 return ""; 19073 19074 offsets = arm_get_frame_offsets (); 19075 live_regs_mask = offsets->saved_regs_mask; 19076 high_regs_pushed = bit_count (live_regs_mask & 0x0f00); 19077 19078 /* If we can deduce the registers used from the function's return value. 19079 This is more reliable that examining df_regs_ever_live_p () because that 19080 will be set if the register is ever used in the function, not just if 19081 the register is used to hold a return value. */ 19082 size = arm_size_return_regs (); 19083 19084 /* The prolog may have pushed some high registers to use as 19085 work registers. e.g. the testsuite file: 19086 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c 19087 compiles to produce: 19088 push {r4, r5, r6, r7, lr} 19089 mov r7, r9 19090 mov r6, r8 19091 push {r6, r7} 19092 as part of the prolog. We have to undo that pushing here. */ 19093 19094 if (high_regs_pushed) 19095 { 19096 unsigned long mask = live_regs_mask & 0xff; 19097 int next_hi_reg; 19098 19099 /* The available low registers depend on the size of the value we are 19100 returning. */ 19101 if (size <= 12) 19102 mask |= 1 << 3; 19103 if (size <= 8) 19104 mask |= 1 << 2; 19105 19106 if (mask == 0) 19107 /* Oh dear! We have no low registers into which we can pop 19108 high registers! */ 19109 internal_error 19110 ("no low registers available for popping high registers"); 19111 19112 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++) 19113 if (live_regs_mask & (1 << next_hi_reg)) 19114 break; 19115 19116 while (high_regs_pushed) 19117 { 19118 /* Find lo register(s) into which the high register(s) can 19119 be popped. */ 19120 for (regno = 0; regno <= LAST_LO_REGNUM; regno++) 19121 { 19122 if (mask & (1 << regno)) 19123 high_regs_pushed--; 19124 if (high_regs_pushed == 0) 19125 break; 19126 } 19127 19128 mask &= (2 << regno) - 1; /* A noop if regno == 8 */ 19129 19130 /* Pop the values into the low register(s). */ 19131 thumb_pushpop (asm_out_file, mask, 0, NULL, mask); 19132 19133 /* Move the value(s) into the high registers. */ 19134 for (regno = 0; regno <= LAST_LO_REGNUM; regno++) 19135 { 19136 if (mask & (1 << regno)) 19137 { 19138 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg, 19139 regno); 19140 19141 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++) 19142 if (live_regs_mask & (1 << next_hi_reg)) 19143 break; 19144 } 19145 } 19146 } 19147 live_regs_mask &= ~0x0f00; 19148 } 19149 19150 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0; 19151 live_regs_mask &= 0xff; 19152 19153 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE) 19154 { 19155 /* Pop the return address into the PC. */ 19156 if (had_to_push_lr) 19157 live_regs_mask |= 1 << PC_REGNUM; 19158 19159 /* Either no argument registers were pushed or a backtrace 19160 structure was created which includes an adjusted stack 19161 pointer, so just pop everything. */ 19162 if (live_regs_mask) 19163 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL, 19164 live_regs_mask); 19165 19166 /* We have either just popped the return address into the 19167 PC or it is was kept in LR for the entire function. */ 19168 if (!had_to_push_lr) 19169 thumb_exit (asm_out_file, LR_REGNUM); 19170 } 19171 else 19172 { 19173 /* Pop everything but the return address. */ 19174 if (live_regs_mask) 19175 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL, 19176 live_regs_mask); 19177 19178 if (had_to_push_lr) 19179 { 19180 if (size > 12) 19181 { 19182 /* We have no free low regs, so save one. */ 19183 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM, 19184 LAST_ARG_REGNUM); 19185 } 19186 19187 /* Get the return address into a temporary register. */ 19188 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL, 19189 1 << LAST_ARG_REGNUM); 19190 19191 if (size > 12) 19192 { 19193 /* Move the return address to lr. */ 19194 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM, 19195 LAST_ARG_REGNUM); 19196 /* Restore the low register. */ 19197 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, 19198 IP_REGNUM); 19199 regno = LR_REGNUM; 19200 } 19201 else 19202 regno = LAST_ARG_REGNUM; 19203 } 19204 else 19205 regno = LR_REGNUM; 19206 19207 /* Remove the argument registers that were pushed onto the stack. */ 19208 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n", 19209 SP_REGNUM, SP_REGNUM, 19210 crtl->args.pretend_args_size); 19211 19212 thumb_exit (asm_out_file, regno); 19213 } 19214 19215 return ""; 19216 } 19217 19218 /* Functions to save and restore machine-specific function data. */ 19219 static struct machine_function * 19220 arm_init_machine_status (void) 19221 { 19222 struct machine_function *machine; 19223 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function)); 19224 19225 #if ARM_FT_UNKNOWN != 0 19226 machine->func_type = ARM_FT_UNKNOWN; 19227 #endif 19228 return machine; 19229 } 19230 19231 /* Return an RTX indicating where the return address to the 19232 calling function can be found. */ 19233 rtx 19234 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 19235 { 19236 if (count != 0) 19237 return NULL_RTX; 19238 19239 return get_hard_reg_initial_val (Pmode, LR_REGNUM); 19240 } 19241 19242 /* Do anything needed before RTL is emitted for each function. */ 19243 void 19244 arm_init_expanders (void) 19245 { 19246 /* Arrange to initialize and mark the machine per-function status. */ 19247 init_machine_status = arm_init_machine_status; 19248 19249 /* This is to stop the combine pass optimizing away the alignment 19250 adjustment of va_arg. */ 19251 /* ??? It is claimed that this should not be necessary. */ 19252 if (cfun) 19253 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY); 19254 } 19255 19256 19257 /* Like arm_compute_initial_elimination offset. Simpler because there 19258 isn't an ABI specified frame pointer for Thumb. Instead, we set it 19259 to point at the base of the local variables after static stack 19260 space for a function has been allocated. */ 19261 19262 HOST_WIDE_INT 19263 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to) 19264 { 19265 arm_stack_offsets *offsets; 19266 19267 offsets = arm_get_frame_offsets (); 19268 19269 switch (from) 19270 { 19271 case ARG_POINTER_REGNUM: 19272 switch (to) 19273 { 19274 case STACK_POINTER_REGNUM: 19275 return offsets->outgoing_args - offsets->saved_args; 19276 19277 case FRAME_POINTER_REGNUM: 19278 return offsets->soft_frame - offsets->saved_args; 19279 19280 case ARM_HARD_FRAME_POINTER_REGNUM: 19281 return offsets->saved_regs - offsets->saved_args; 19282 19283 case THUMB_HARD_FRAME_POINTER_REGNUM: 19284 return offsets->locals_base - offsets->saved_args; 19285 19286 default: 19287 gcc_unreachable (); 19288 } 19289 break; 19290 19291 case FRAME_POINTER_REGNUM: 19292 switch (to) 19293 { 19294 case STACK_POINTER_REGNUM: 19295 return offsets->outgoing_args - offsets->soft_frame; 19296 19297 case ARM_HARD_FRAME_POINTER_REGNUM: 19298 return offsets->saved_regs - offsets->soft_frame; 19299 19300 case THUMB_HARD_FRAME_POINTER_REGNUM: 19301 return offsets->locals_base - offsets->soft_frame; 19302 19303 default: 19304 gcc_unreachable (); 19305 } 19306 break; 19307 19308 default: 19309 gcc_unreachable (); 19310 } 19311 } 19312 19313 /* Generate the rest of a function's prologue. */ 19314 void 19315 thumb1_expand_prologue (void) 19316 { 19317 rtx insn, dwarf; 19318 19319 HOST_WIDE_INT amount; 19320 arm_stack_offsets *offsets; 19321 unsigned long func_type; 19322 int regno; 19323 unsigned long live_regs_mask; 19324 19325 func_type = arm_current_func_type (); 19326 19327 /* Naked functions don't have prologues. */ 19328 if (IS_NAKED (func_type)) 19329 return; 19330 19331 if (IS_INTERRUPT (func_type)) 19332 { 19333 error ("interrupt Service Routines cannot be coded in Thumb mode"); 19334 return; 19335 } 19336 19337 offsets = arm_get_frame_offsets (); 19338 live_regs_mask = offsets->saved_regs_mask; 19339 /* Load the pic register before setting the frame pointer, 19340 so we can use r7 as a temporary work register. */ 19341 if (flag_pic && arm_pic_register != INVALID_REGNUM) 19342 arm_load_pic_register (live_regs_mask); 19343 19344 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) 19345 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), 19346 stack_pointer_rtx); 19347 19348 amount = offsets->outgoing_args - offsets->saved_regs; 19349 if (amount) 19350 { 19351 if (amount < 512) 19352 { 19353 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 19354 GEN_INT (- amount))); 19355 RTX_FRAME_RELATED_P (insn) = 1; 19356 } 19357 else 19358 { 19359 rtx reg; 19360 19361 /* The stack decrement is too big for an immediate value in a single 19362 insn. In theory we could issue multiple subtracts, but after 19363 three of them it becomes more space efficient to place the full 19364 value in the constant pool and load into a register. (Also the 19365 ARM debugger really likes to see only one stack decrement per 19366 function). So instead we look for a scratch register into which 19367 we can load the decrement, and then we subtract this from the 19368 stack pointer. Unfortunately on the thumb the only available 19369 scratch registers are the argument registers, and we cannot use 19370 these as they may hold arguments to the function. Instead we 19371 attempt to locate a call preserved register which is used by this 19372 function. If we can find one, then we know that it will have 19373 been pushed at the start of the prologue and so we can corrupt 19374 it now. */ 19375 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++) 19376 if (live_regs_mask & (1 << regno)) 19377 break; 19378 19379 gcc_assert(regno <= LAST_LO_REGNUM); 19380 19381 reg = gen_rtx_REG (SImode, regno); 19382 19383 emit_insn (gen_movsi (reg, GEN_INT (- amount))); 19384 19385 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, 19386 stack_pointer_rtx, reg)); 19387 RTX_FRAME_RELATED_P (insn) = 1; 19388 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, 19389 plus_constant (stack_pointer_rtx, 19390 -amount)); 19391 RTX_FRAME_RELATED_P (dwarf) = 1; 19392 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); 19393 } 19394 } 19395 19396 if (frame_pointer_needed) 19397 thumb_set_frame_pointer (offsets); 19398 19399 /* If we are profiling, make sure no instructions are scheduled before 19400 the call to mcount. Similarly if the user has requested no 19401 scheduling in the prolog. Similarly if we want non-call exceptions 19402 using the EABI unwinder, to prevent faulting instructions from being 19403 swapped with a stack adjustment. */ 19404 if (crtl->profile || !TARGET_SCHED_PROLOG 19405 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions)) 19406 emit_insn (gen_blockage ()); 19407 19408 cfun->machine->lr_save_eliminated = !thumb_force_lr_save (); 19409 if (live_regs_mask & 0xff) 19410 cfun->machine->lr_save_eliminated = 0; 19411 } 19412 19413 19414 void 19415 thumb1_expand_epilogue (void) 19416 { 19417 HOST_WIDE_INT amount; 19418 arm_stack_offsets *offsets; 19419 int regno; 19420 19421 /* Naked functions don't have prologues. */ 19422 if (IS_NAKED (arm_current_func_type ())) 19423 return; 19424 19425 offsets = arm_get_frame_offsets (); 19426 amount = offsets->outgoing_args - offsets->saved_regs; 19427 19428 if (frame_pointer_needed) 19429 { 19430 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); 19431 amount = offsets->locals_base - offsets->saved_regs; 19432 } 19433 19434 gcc_assert (amount >= 0); 19435 if (amount) 19436 { 19437 if (amount < 512) 19438 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 19439 GEN_INT (amount))); 19440 else 19441 { 19442 /* r3 is always free in the epilogue. */ 19443 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); 19444 19445 emit_insn (gen_movsi (reg, GEN_INT (amount))); 19446 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg)); 19447 } 19448 } 19449 19450 /* Emit a USE (stack_pointer_rtx), so that 19451 the stack adjustment will not be deleted. */ 19452 emit_insn (gen_prologue_use (stack_pointer_rtx)); 19453 19454 if (crtl->profile || !TARGET_SCHED_PROLOG) 19455 emit_insn (gen_blockage ()); 19456 19457 /* Emit a clobber for each insn that will be restored in the epilogue, 19458 so that flow2 will get register lifetimes correct. */ 19459 for (regno = 0; regno < 13; regno++) 19460 if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) 19461 emit_clobber (gen_rtx_REG (SImode, regno)); 19462 19463 if (! df_regs_ever_live_p (LR_REGNUM)) 19464 emit_use (gen_rtx_REG (SImode, LR_REGNUM)); 19465 } 19466 19467 static void 19468 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 19469 { 19470 arm_stack_offsets *offsets; 19471 unsigned long live_regs_mask = 0; 19472 unsigned long l_mask; 19473 unsigned high_regs_pushed = 0; 19474 int cfa_offset = 0; 19475 int regno; 19476 19477 if (IS_NAKED (arm_current_func_type ())) 19478 return; 19479 19480 if (is_called_in_ARM_mode (current_function_decl)) 19481 { 19482 const char * name; 19483 19484 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM); 19485 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0)) 19486 == SYMBOL_REF); 19487 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); 19488 19489 /* Generate code sequence to switch us into Thumb mode. */ 19490 /* The .code 32 directive has already been emitted by 19491 ASM_DECLARE_FUNCTION_NAME. */ 19492 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM); 19493 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM); 19494 19495 /* Generate a label, so that the debugger will notice the 19496 change in instruction sets. This label is also used by 19497 the assembler to bypass the ARM code when this function 19498 is called from a Thumb encoded function elsewhere in the 19499 same file. Hence the definition of STUB_NAME here must 19500 agree with the definition in gas/config/tc-arm.c. */ 19501 19502 #define STUB_NAME ".real_start_of" 19503 19504 fprintf (f, "\t.code\t16\n"); 19505 #ifdef ARM_PE 19506 if (arm_dllexport_name_p (name)) 19507 name = arm_strip_name_encoding (name); 19508 #endif 19509 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name); 19510 fprintf (f, "\t.thumb_func\n"); 19511 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name); 19512 } 19513 19514 if (crtl->args.pretend_args_size) 19515 { 19516 /* Output unwind directive for the stack adjustment. */ 19517 if (ARM_EABI_UNWIND_TABLES) 19518 fprintf (f, "\t.pad #%d\n", 19519 crtl->args.pretend_args_size); 19520 19521 if (cfun->machine->uses_anonymous_args) 19522 { 19523 int num_pushes; 19524 19525 fprintf (f, "\tpush\t{"); 19526 19527 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size); 19528 19529 for (regno = LAST_ARG_REGNUM + 1 - num_pushes; 19530 regno <= LAST_ARG_REGNUM; 19531 regno++) 19532 asm_fprintf (f, "%r%s", regno, 19533 regno == LAST_ARG_REGNUM ? "" : ", "); 19534 19535 fprintf (f, "}\n"); 19536 } 19537 else 19538 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", 19539 SP_REGNUM, SP_REGNUM, 19540 crtl->args.pretend_args_size); 19541 19542 /* We don't need to record the stores for unwinding (would it 19543 help the debugger any if we did?), but record the change in 19544 the stack pointer. */ 19545 if (dwarf2out_do_frame ()) 19546 { 19547 char *l = dwarf2out_cfi_label (false); 19548 19549 cfa_offset = cfa_offset + crtl->args.pretend_args_size; 19550 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); 19551 } 19552 } 19553 19554 /* Get the registers we are going to push. */ 19555 offsets = arm_get_frame_offsets (); 19556 live_regs_mask = offsets->saved_regs_mask; 19557 /* Extract a mask of the ones we can give to the Thumb's push instruction. */ 19558 l_mask = live_regs_mask & 0x40ff; 19559 /* Then count how many other high registers will need to be pushed. */ 19560 high_regs_pushed = bit_count (live_regs_mask & 0x0f00); 19561 19562 if (TARGET_BACKTRACE) 19563 { 19564 unsigned offset; 19565 unsigned work_register; 19566 19567 /* We have been asked to create a stack backtrace structure. 19568 The code looks like this: 19569 19570 0 .align 2 19571 0 func: 19572 0 sub SP, #16 Reserve space for 4 registers. 19573 2 push {R7} Push low registers. 19574 4 add R7, SP, #20 Get the stack pointer before the push. 19575 6 str R7, [SP, #8] Store the stack pointer (before reserving the space). 19576 8 mov R7, PC Get hold of the start of this code plus 12. 19577 10 str R7, [SP, #16] Store it. 19578 12 mov R7, FP Get hold of the current frame pointer. 19579 14 str R7, [SP, #4] Store it. 19580 16 mov R7, LR Get hold of the current return address. 19581 18 str R7, [SP, #12] Store it. 19582 20 add R7, SP, #16 Point at the start of the backtrace structure. 19583 22 mov FP, R7 Put this value into the frame pointer. */ 19584 19585 work_register = thumb_find_work_register (live_regs_mask); 19586 19587 if (ARM_EABI_UNWIND_TABLES) 19588 asm_fprintf (f, "\t.pad #16\n"); 19589 19590 asm_fprintf 19591 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n", 19592 SP_REGNUM, SP_REGNUM); 19593 19594 if (dwarf2out_do_frame ()) 19595 { 19596 char *l = dwarf2out_cfi_label (false); 19597 19598 cfa_offset = cfa_offset + 16; 19599 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); 19600 } 19601 19602 if (l_mask) 19603 { 19604 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask); 19605 offset = bit_count (l_mask) * UNITS_PER_WORD; 19606 } 19607 else 19608 offset = 0; 19609 19610 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM, 19611 offset + 16 + crtl->args.pretend_args_size); 19612 19613 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, 19614 offset + 4); 19615 19616 /* Make sure that the instruction fetching the PC is in the right place 19617 to calculate "start of backtrace creation code + 12". */ 19618 if (l_mask) 19619 { 19620 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM); 19621 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, 19622 offset + 12); 19623 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, 19624 ARM_HARD_FRAME_POINTER_REGNUM); 19625 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, 19626 offset); 19627 } 19628 else 19629 { 19630 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, 19631 ARM_HARD_FRAME_POINTER_REGNUM); 19632 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, 19633 offset); 19634 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM); 19635 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, 19636 offset + 12); 19637 } 19638 19639 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM); 19640 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, 19641 offset + 8); 19642 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM, 19643 offset + 12); 19644 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n", 19645 ARM_HARD_FRAME_POINTER_REGNUM, work_register); 19646 } 19647 /* Optimization: If we are not pushing any low registers but we are going 19648 to push some high registers then delay our first push. This will just 19649 be a push of LR and we can combine it with the push of the first high 19650 register. */ 19651 else if ((l_mask & 0xff) != 0 19652 || (high_regs_pushed == 0 && l_mask)) 19653 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask); 19654 19655 if (high_regs_pushed) 19656 { 19657 unsigned pushable_regs; 19658 unsigned next_hi_reg; 19659 19660 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--) 19661 if (live_regs_mask & (1 << next_hi_reg)) 19662 break; 19663 19664 pushable_regs = l_mask & 0xff; 19665 19666 if (pushable_regs == 0) 19667 pushable_regs = 1 << thumb_find_work_register (live_regs_mask); 19668 19669 while (high_regs_pushed > 0) 19670 { 19671 unsigned long real_regs_mask = 0; 19672 19673 for (regno = LAST_LO_REGNUM; regno >= 0; regno --) 19674 { 19675 if (pushable_regs & (1 << regno)) 19676 { 19677 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg); 19678 19679 high_regs_pushed --; 19680 real_regs_mask |= (1 << next_hi_reg); 19681 19682 if (high_regs_pushed) 19683 { 19684 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM; 19685 next_hi_reg --) 19686 if (live_regs_mask & (1 << next_hi_reg)) 19687 break; 19688 } 19689 else 19690 { 19691 pushable_regs &= ~((1 << regno) - 1); 19692 break; 19693 } 19694 } 19695 } 19696 19697 /* If we had to find a work register and we have not yet 19698 saved the LR then add it to the list of regs to push. */ 19699 if (l_mask == (1 << LR_REGNUM)) 19700 { 19701 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM), 19702 1, &cfa_offset, 19703 real_regs_mask | (1 << LR_REGNUM)); 19704 l_mask = 0; 19705 } 19706 else 19707 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask); 19708 } 19709 } 19710 } 19711 19712 /* Handle the case of a double word load into a low register from 19713 a computed memory address. The computed address may involve a 19714 register which is overwritten by the load. */ 19715 const char * 19716 thumb_load_double_from_address (rtx *operands) 19717 { 19718 rtx addr; 19719 rtx base; 19720 rtx offset; 19721 rtx arg1; 19722 rtx arg2; 19723 19724 gcc_assert (GET_CODE (operands[0]) == REG); 19725 gcc_assert (GET_CODE (operands[1]) == MEM); 19726 19727 /* Get the memory address. */ 19728 addr = XEXP (operands[1], 0); 19729 19730 /* Work out how the memory address is computed. */ 19731 switch (GET_CODE (addr)) 19732 { 19733 case REG: 19734 operands[2] = adjust_address (operands[1], SImode, 4); 19735 19736 if (REGNO (operands[0]) == REGNO (addr)) 19737 { 19738 output_asm_insn ("ldr\t%H0, %2", operands); 19739 output_asm_insn ("ldr\t%0, %1", operands); 19740 } 19741 else 19742 { 19743 output_asm_insn ("ldr\t%0, %1", operands); 19744 output_asm_insn ("ldr\t%H0, %2", operands); 19745 } 19746 break; 19747 19748 case CONST: 19749 /* Compute <address> + 4 for the high order load. */ 19750 operands[2] = adjust_address (operands[1], SImode, 4); 19751 19752 output_asm_insn ("ldr\t%0, %1", operands); 19753 output_asm_insn ("ldr\t%H0, %2", operands); 19754 break; 19755 19756 case PLUS: 19757 arg1 = XEXP (addr, 0); 19758 arg2 = XEXP (addr, 1); 19759 19760 if (CONSTANT_P (arg1)) 19761 base = arg2, offset = arg1; 19762 else 19763 base = arg1, offset = arg2; 19764 19765 gcc_assert (GET_CODE (base) == REG); 19766 19767 /* Catch the case of <address> = <reg> + <reg> */ 19768 if (GET_CODE (offset) == REG) 19769 { 19770 int reg_offset = REGNO (offset); 19771 int reg_base = REGNO (base); 19772 int reg_dest = REGNO (operands[0]); 19773 19774 /* Add the base and offset registers together into the 19775 higher destination register. */ 19776 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r", 19777 reg_dest + 1, reg_base, reg_offset); 19778 19779 /* Load the lower destination register from the address in 19780 the higher destination register. */ 19781 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]", 19782 reg_dest, reg_dest + 1); 19783 19784 /* Load the higher destination register from its own address 19785 plus 4. */ 19786 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]", 19787 reg_dest + 1, reg_dest + 1); 19788 } 19789 else 19790 { 19791 /* Compute <address> + 4 for the high order load. */ 19792 operands[2] = adjust_address (operands[1], SImode, 4); 19793 19794 /* If the computed address is held in the low order register 19795 then load the high order register first, otherwise always 19796 load the low order register first. */ 19797 if (REGNO (operands[0]) == REGNO (base)) 19798 { 19799 output_asm_insn ("ldr\t%H0, %2", operands); 19800 output_asm_insn ("ldr\t%0, %1", operands); 19801 } 19802 else 19803 { 19804 output_asm_insn ("ldr\t%0, %1", operands); 19805 output_asm_insn ("ldr\t%H0, %2", operands); 19806 } 19807 } 19808 break; 19809 19810 case LABEL_REF: 19811 /* With no registers to worry about we can just load the value 19812 directly. */ 19813 operands[2] = adjust_address (operands[1], SImode, 4); 19814 19815 output_asm_insn ("ldr\t%H0, %2", operands); 19816 output_asm_insn ("ldr\t%0, %1", operands); 19817 break; 19818 19819 default: 19820 gcc_unreachable (); 19821 } 19822 19823 return ""; 19824 } 19825 19826 const char * 19827 thumb_output_move_mem_multiple (int n, rtx *operands) 19828 { 19829 rtx tmp; 19830 19831 switch (n) 19832 { 19833 case 2: 19834 if (REGNO (operands[4]) > REGNO (operands[5])) 19835 { 19836 tmp = operands[4]; 19837 operands[4] = operands[5]; 19838 operands[5] = tmp; 19839 } 19840 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands); 19841 output_asm_insn ("stmia\t%0!, {%4, %5}", operands); 19842 break; 19843 19844 case 3: 19845 if (REGNO (operands[4]) > REGNO (operands[5])) 19846 { 19847 tmp = operands[4]; 19848 operands[4] = operands[5]; 19849 operands[5] = tmp; 19850 } 19851 if (REGNO (operands[5]) > REGNO (operands[6])) 19852 { 19853 tmp = operands[5]; 19854 operands[5] = operands[6]; 19855 operands[6] = tmp; 19856 } 19857 if (REGNO (operands[4]) > REGNO (operands[5])) 19858 { 19859 tmp = operands[4]; 19860 operands[4] = operands[5]; 19861 operands[5] = tmp; 19862 } 19863 19864 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands); 19865 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands); 19866 break; 19867 19868 default: 19869 gcc_unreachable (); 19870 } 19871 19872 return ""; 19873 } 19874 19875 /* Output a call-via instruction for thumb state. */ 19876 const char * 19877 thumb_call_via_reg (rtx reg) 19878 { 19879 int regno = REGNO (reg); 19880 rtx *labelp; 19881 19882 gcc_assert (regno < LR_REGNUM); 19883 19884 /* If we are in the normal text section we can use a single instance 19885 per compilation unit. If we are doing function sections, then we need 19886 an entry per section, since we can't rely on reachability. */ 19887 if (in_section == text_section) 19888 { 19889 thumb_call_reg_needed = 1; 19890 19891 if (thumb_call_via_label[regno] == NULL) 19892 thumb_call_via_label[regno] = gen_label_rtx (); 19893 labelp = thumb_call_via_label + regno; 19894 } 19895 else 19896 { 19897 if (cfun->machine->call_via[regno] == NULL) 19898 cfun->machine->call_via[regno] = gen_label_rtx (); 19899 labelp = cfun->machine->call_via + regno; 19900 } 19901 19902 output_asm_insn ("bl\t%a0", labelp); 19903 return ""; 19904 } 19905 19906 /* Routines for generating rtl. */ 19907 void 19908 thumb_expand_movmemqi (rtx *operands) 19909 { 19910 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0)); 19911 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); 19912 HOST_WIDE_INT len = INTVAL (operands[2]); 19913 HOST_WIDE_INT offset = 0; 19914 19915 while (len >= 12) 19916 { 19917 emit_insn (gen_movmem12b (out, in, out, in)); 19918 len -= 12; 19919 } 19920 19921 if (len >= 8) 19922 { 19923 emit_insn (gen_movmem8b (out, in, out, in)); 19924 len -= 8; 19925 } 19926 19927 if (len >= 4) 19928 { 19929 rtx reg = gen_reg_rtx (SImode); 19930 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in))); 19931 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg)); 19932 len -= 4; 19933 offset += 4; 19934 } 19935 19936 if (len >= 2) 19937 { 19938 rtx reg = gen_reg_rtx (HImode); 19939 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode, 19940 plus_constant (in, offset)))); 19941 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)), 19942 reg)); 19943 len -= 2; 19944 offset += 2; 19945 } 19946 19947 if (len) 19948 { 19949 rtx reg = gen_reg_rtx (QImode); 19950 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode, 19951 plus_constant (in, offset)))); 19952 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)), 19953 reg)); 19954 } 19955 } 19956 19957 void 19958 thumb_reload_out_hi (rtx *operands) 19959 { 19960 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2])); 19961 } 19962 19963 /* Handle reading a half-word from memory during reload. */ 19964 void 19965 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED) 19966 { 19967 gcc_unreachable (); 19968 } 19969 19970 /* Return the length of a function name prefix 19971 that starts with the character 'c'. */ 19972 static int 19973 arm_get_strip_length (int c) 19974 { 19975 switch (c) 19976 { 19977 ARM_NAME_ENCODING_LENGTHS 19978 default: return 0; 19979 } 19980 } 19981 19982 /* Return a pointer to a function's name with any 19983 and all prefix encodings stripped from it. */ 19984 const char * 19985 arm_strip_name_encoding (const char *name) 19986 { 19987 int skip; 19988 19989 while ((skip = arm_get_strip_length (* name))) 19990 name += skip; 19991 19992 return name; 19993 } 19994 19995 /* If there is a '*' anywhere in the name's prefix, then 19996 emit the stripped name verbatim, otherwise prepend an 19997 underscore if leading underscores are being used. */ 19998 void 19999 arm_asm_output_labelref (FILE *stream, const char *name) 20000 { 20001 int skip; 20002 int verbatim = 0; 20003 20004 while ((skip = arm_get_strip_length (* name))) 20005 { 20006 verbatim |= (*name == '*'); 20007 name += skip; 20008 } 20009 20010 if (verbatim) 20011 fputs (name, stream); 20012 else 20013 asm_fprintf (stream, "%U%s", name); 20014 } 20015 20016 static void 20017 arm_file_start (void) 20018 { 20019 int val; 20020 20021 if (TARGET_UNIFIED_ASM) 20022 asm_fprintf (asm_out_file, "\t.syntax unified\n"); 20023 20024 if (TARGET_BPABI) 20025 { 20026 const char *fpu_name; 20027 if (arm_select[0].string) 20028 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string); 20029 else if (arm_select[1].string) 20030 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string); 20031 else 20032 asm_fprintf (asm_out_file, "\t.cpu %s\n", 20033 all_cores[arm_default_cpu].name); 20034 20035 if (TARGET_SOFT_FLOAT) 20036 { 20037 if (TARGET_VFP) 20038 fpu_name = "softvfp"; 20039 else 20040 fpu_name = "softfpa"; 20041 } 20042 else 20043 { 20044 fpu_name = arm_fpu_desc->name; 20045 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP) 20046 { 20047 if (TARGET_HARD_FLOAT) 20048 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); 20049 if (TARGET_HARD_FLOAT_ABI) 20050 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n"); 20051 } 20052 } 20053 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name); 20054 20055 /* Some of these attributes only apply when the corresponding features 20056 are used. However we don't have any easy way of figuring this out. 20057 Conservatively record the setting that would have been used. */ 20058 20059 /* Tag_ABI_FP_rounding. */ 20060 if (flag_rounding_math) 20061 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n"); 20062 if (!flag_unsafe_math_optimizations) 20063 { 20064 /* Tag_ABI_FP_denomal. */ 20065 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n"); 20066 /* Tag_ABI_FP_exceptions. */ 20067 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n"); 20068 } 20069 /* Tag_ABI_FP_user_exceptions. */ 20070 if (flag_signaling_nans) 20071 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n"); 20072 /* Tag_ABI_FP_number_model. */ 20073 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n", 20074 flag_finite_math_only ? 1 : 3); 20075 20076 /* Tag_ABI_align8_needed. */ 20077 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n"); 20078 /* Tag_ABI_align8_preserved. */ 20079 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n"); 20080 /* Tag_ABI_enum_size. */ 20081 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n", 20082 flag_short_enums ? 1 : 2); 20083 20084 /* Tag_ABI_optimization_goals. */ 20085 if (optimize_size) 20086 val = 4; 20087 else if (optimize >= 2) 20088 val = 2; 20089 else if (optimize) 20090 val = 1; 20091 else 20092 val = 6; 20093 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); 20094 20095 /* Tag_ABI_FP_16bit_format. */ 20096 if (arm_fp16_format) 20097 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", 20098 (int)arm_fp16_format); 20099 20100 if (arm_lang_output_object_attributes_hook) 20101 arm_lang_output_object_attributes_hook(); 20102 } 20103 default_file_start(); 20104 } 20105 20106 static void 20107 arm_file_end (void) 20108 { 20109 int regno; 20110 20111 if (NEED_INDICATE_EXEC_STACK) 20112 /* Add .note.GNU-stack. */ 20113 file_end_indicate_exec_stack (); 20114 20115 if (! thumb_call_reg_needed) 20116 return; 20117 20118 switch_to_section (text_section); 20119 asm_fprintf (asm_out_file, "\t.code 16\n"); 20120 ASM_OUTPUT_ALIGN (asm_out_file, 1); 20121 20122 for (regno = 0; regno < LR_REGNUM; regno++) 20123 { 20124 rtx label = thumb_call_via_label[regno]; 20125 20126 if (label != 0) 20127 { 20128 targetm.asm_out.internal_label (asm_out_file, "L", 20129 CODE_LABEL_NUMBER (label)); 20130 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno); 20131 } 20132 } 20133 } 20134 20135 #ifndef ARM_PE 20136 /* Symbols in the text segment can be accessed without indirecting via the 20137 constant pool; it may take an extra binary operation, but this is still 20138 faster than indirecting via memory. Don't do this when not optimizing, 20139 since we won't be calculating al of the offsets necessary to do this 20140 simplification. */ 20141 20142 static void 20143 arm_encode_section_info (tree decl, rtx rtl, int first) 20144 { 20145 if (optimize > 0 && TREE_CONSTANT (decl)) 20146 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; 20147 20148 default_encode_section_info (decl, rtl, first); 20149 } 20150 #endif /* !ARM_PE */ 20151 20152 static void 20153 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno) 20154 { 20155 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno 20156 && !strcmp (prefix, "L")) 20157 { 20158 arm_ccfsm_state = 0; 20159 arm_target_insn = NULL; 20160 } 20161 default_internal_label (stream, prefix, labelno); 20162 } 20163 20164 /* Output code to add DELTA to the first argument, and then jump 20165 to FUNCTION. Used for C++ multiple inheritance. */ 20166 static void 20167 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 20168 HOST_WIDE_INT delta, 20169 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, 20170 tree function) 20171 { 20172 static int thunk_label = 0; 20173 char label[256]; 20174 char labelpc[256]; 20175 int mi_delta = delta; 20176 const char *const mi_op = mi_delta < 0 ? "sub" : "add"; 20177 int shift = 0; 20178 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) 20179 ? 1 : 0); 20180 if (mi_delta < 0) 20181 mi_delta = - mi_delta; 20182 20183 if (TARGET_THUMB1) 20184 { 20185 int labelno = thunk_label++; 20186 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno); 20187 /* Thunks are entered in arm mode when avaiable. */ 20188 if (TARGET_THUMB1_ONLY) 20189 { 20190 /* push r3 so we can use it as a temporary. */ 20191 /* TODO: Omit this save if r3 is not used. */ 20192 fputs ("\tpush {r3}\n", file); 20193 fputs ("\tldr\tr3, ", file); 20194 } 20195 else 20196 { 20197 fputs ("\tldr\tr12, ", file); 20198 } 20199 assemble_name (file, label); 20200 fputc ('\n', file); 20201 if (flag_pic) 20202 { 20203 /* If we are generating PIC, the ldr instruction below loads 20204 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as 20205 the address of the add + 8, so we have: 20206 20207 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8) 20208 = target + 1. 20209 20210 Note that we have "+ 1" because some versions of GNU ld 20211 don't set the low bit of the result for R_ARM_REL32 20212 relocations against thumb function symbols. 20213 On ARMv6M this is +4, not +8. */ 20214 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno); 20215 assemble_name (file, labelpc); 20216 fputs (":\n", file); 20217 if (TARGET_THUMB1_ONLY) 20218 { 20219 /* This is 2 insns after the start of the thunk, so we know it 20220 is 4-byte aligned. */ 20221 fputs ("\tadd\tr3, pc, r3\n", file); 20222 fputs ("\tmov r12, r3\n", file); 20223 } 20224 else 20225 fputs ("\tadd\tr12, pc, r12\n", file); 20226 } 20227 else if (TARGET_THUMB1_ONLY) 20228 fputs ("\tmov r12, r3\n", file); 20229 } 20230 if (TARGET_THUMB1_ONLY) 20231 { 20232 if (mi_delta > 255) 20233 { 20234 fputs ("\tldr\tr3, ", file); 20235 assemble_name (file, label); 20236 fputs ("+4\n", file); 20237 asm_fprintf (file, "\t%s\t%r, %r, r3\n", 20238 mi_op, this_regno, this_regno); 20239 } 20240 else if (mi_delta != 0) 20241 { 20242 asm_fprintf (file, "\t%s\t%r, %r, #%d\n", 20243 mi_op, this_regno, this_regno, 20244 mi_delta); 20245 } 20246 } 20247 else 20248 { 20249 /* TODO: Use movw/movt for large constants when available. */ 20250 while (mi_delta != 0) 20251 { 20252 if ((mi_delta & (3 << shift)) == 0) 20253 shift += 2; 20254 else 20255 { 20256 asm_fprintf (file, "\t%s\t%r, %r, #%d\n", 20257 mi_op, this_regno, this_regno, 20258 mi_delta & (0xff << shift)); 20259 mi_delta &= ~(0xff << shift); 20260 shift += 8; 20261 } 20262 } 20263 } 20264 if (TARGET_THUMB1) 20265 { 20266 if (TARGET_THUMB1_ONLY) 20267 fputs ("\tpop\t{r3}\n", file); 20268 20269 fprintf (file, "\tbx\tr12\n"); 20270 ASM_OUTPUT_ALIGN (file, 2); 20271 assemble_name (file, label); 20272 fputs (":\n", file); 20273 if (flag_pic) 20274 { 20275 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */ 20276 rtx tem = XEXP (DECL_RTL (function), 0); 20277 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7)); 20278 tem = gen_rtx_MINUS (GET_MODE (tem), 20279 tem, 20280 gen_rtx_SYMBOL_REF (Pmode, 20281 ggc_strdup (labelpc))); 20282 assemble_integer (tem, 4, BITS_PER_WORD, 1); 20283 } 20284 else 20285 /* Output ".word .LTHUNKn". */ 20286 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1); 20287 20288 if (TARGET_THUMB1_ONLY && mi_delta > 255) 20289 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1); 20290 } 20291 else 20292 { 20293 fputs ("\tb\t", file); 20294 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); 20295 if (NEED_PLT_RELOC) 20296 fputs ("(PLT)", file); 20297 fputc ('\n', file); 20298 } 20299 } 20300 20301 int 20302 arm_emit_vector_const (FILE *file, rtx x) 20303 { 20304 int i; 20305 const char * pattern; 20306 20307 gcc_assert (GET_CODE (x) == CONST_VECTOR); 20308 20309 switch (GET_MODE (x)) 20310 { 20311 case V2SImode: pattern = "%08x"; break; 20312 case V4HImode: pattern = "%04x"; break; 20313 case V8QImode: pattern = "%02x"; break; 20314 default: gcc_unreachable (); 20315 } 20316 20317 fprintf (file, "0x"); 20318 for (i = CONST_VECTOR_NUNITS (x); i--;) 20319 { 20320 rtx element; 20321 20322 element = CONST_VECTOR_ELT (x, i); 20323 fprintf (file, pattern, INTVAL (element)); 20324 } 20325 20326 return 1; 20327 } 20328 20329 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word. 20330 HFmode constant pool entries are actually loaded with ldr. */ 20331 void 20332 arm_emit_fp16_const (rtx c) 20333 { 20334 REAL_VALUE_TYPE r; 20335 long bits; 20336 20337 REAL_VALUE_FROM_CONST_DOUBLE (r, c); 20338 bits = real_to_target (NULL, &r, HFmode); 20339 if (WORDS_BIG_ENDIAN) 20340 assemble_zeros (2); 20341 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); 20342 if (!WORDS_BIG_ENDIAN) 20343 assemble_zeros (2); 20344 } 20345 20346 const char * 20347 arm_output_load_gr (rtx *operands) 20348 { 20349 rtx reg; 20350 rtx offset; 20351 rtx wcgr; 20352 rtx sum; 20353 20354 if (GET_CODE (operands [1]) != MEM 20355 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS 20356 || GET_CODE (reg = XEXP (sum, 0)) != REG 20357 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT 20358 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024))) 20359 return "wldrw%?\t%0, %1"; 20360 20361 /* Fix up an out-of-range load of a GR register. */ 20362 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg); 20363 wcgr = operands[0]; 20364 operands[0] = reg; 20365 output_asm_insn ("ldr%?\t%0, %1", operands); 20366 20367 operands[0] = wcgr; 20368 operands[1] = reg; 20369 output_asm_insn ("tmcr%?\t%0, %1", operands); 20370 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg); 20371 20372 return ""; 20373 } 20374 20375 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. 20376 20377 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last 20378 named arg and all anonymous args onto the stack. 20379 XXX I know the prologue shouldn't be pushing registers, but it is faster 20380 that way. */ 20381 20382 static void 20383 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, 20384 enum machine_mode mode, 20385 tree type, 20386 int *pretend_size, 20387 int second_time ATTRIBUTE_UNUSED) 20388 { 20389 int nregs; 20390 20391 cfun->machine->uses_anonymous_args = 1; 20392 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) 20393 { 20394 nregs = pcum->aapcs_ncrn; 20395 if ((nregs & 1) && arm_needs_doubleword_align (mode, type)) 20396 nregs++; 20397 } 20398 else 20399 nregs = pcum->nregs; 20400 20401 if (nregs < NUM_ARG_REGS) 20402 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; 20403 } 20404 20405 /* Return nonzero if the CONSUMER instruction (a store) does not need 20406 PRODUCER's value to calculate the address. */ 20407 20408 int 20409 arm_no_early_store_addr_dep (rtx producer, rtx consumer) 20410 { 20411 rtx value = PATTERN (producer); 20412 rtx addr = PATTERN (consumer); 20413 20414 if (GET_CODE (value) == COND_EXEC) 20415 value = COND_EXEC_CODE (value); 20416 if (GET_CODE (value) == PARALLEL) 20417 value = XVECEXP (value, 0, 0); 20418 value = XEXP (value, 0); 20419 if (GET_CODE (addr) == COND_EXEC) 20420 addr = COND_EXEC_CODE (addr); 20421 if (GET_CODE (addr) == PARALLEL) 20422 addr = XVECEXP (addr, 0, 0); 20423 addr = XEXP (addr, 0); 20424 20425 return !reg_overlap_mentioned_p (value, addr); 20426 } 20427 20428 /* Return nonzero if the CONSUMER instruction (an ALU op) does not 20429 have an early register shift value or amount dependency on the 20430 result of PRODUCER. */ 20431 20432 int 20433 arm_no_early_alu_shift_dep (rtx producer, rtx consumer) 20434 { 20435 rtx value = PATTERN (producer); 20436 rtx op = PATTERN (consumer); 20437 rtx early_op; 20438 20439 if (GET_CODE (value) == COND_EXEC) 20440 value = COND_EXEC_CODE (value); 20441 if (GET_CODE (value) == PARALLEL) 20442 value = XVECEXP (value, 0, 0); 20443 value = XEXP (value, 0); 20444 if (GET_CODE (op) == COND_EXEC) 20445 op = COND_EXEC_CODE (op); 20446 if (GET_CODE (op) == PARALLEL) 20447 op = XVECEXP (op, 0, 0); 20448 op = XEXP (op, 1); 20449 20450 early_op = XEXP (op, 0); 20451 /* This is either an actual independent shift, or a shift applied to 20452 the first operand of another operation. We want the whole shift 20453 operation. */ 20454 if (GET_CODE (early_op) == REG) 20455 early_op = op; 20456 20457 return !reg_overlap_mentioned_p (value, early_op); 20458 } 20459 20460 /* Return nonzero if the CONSUMER instruction (an ALU op) does not 20461 have an early register shift value dependency on the result of 20462 PRODUCER. */ 20463 20464 int 20465 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) 20466 { 20467 rtx value = PATTERN (producer); 20468 rtx op = PATTERN (consumer); 20469 rtx early_op; 20470 20471 if (GET_CODE (value) == COND_EXEC) 20472 value = COND_EXEC_CODE (value); 20473 if (GET_CODE (value) == PARALLEL) 20474 value = XVECEXP (value, 0, 0); 20475 value = XEXP (value, 0); 20476 if (GET_CODE (op) == COND_EXEC) 20477 op = COND_EXEC_CODE (op); 20478 if (GET_CODE (op) == PARALLEL) 20479 op = XVECEXP (op, 0, 0); 20480 op = XEXP (op, 1); 20481 20482 early_op = XEXP (op, 0); 20483 20484 /* This is either an actual independent shift, or a shift applied to 20485 the first operand of another operation. We want the value being 20486 shifted, in either case. */ 20487 if (GET_CODE (early_op) != REG) 20488 early_op = XEXP (early_op, 0); 20489 20490 return !reg_overlap_mentioned_p (value, early_op); 20491 } 20492 20493 /* Return nonzero if the CONSUMER (a mul or mac op) does not 20494 have an early register mult dependency on the result of 20495 PRODUCER. */ 20496 20497 int 20498 arm_no_early_mul_dep (rtx producer, rtx consumer) 20499 { 20500 rtx value = PATTERN (producer); 20501 rtx op = PATTERN (consumer); 20502 20503 if (GET_CODE (value) == COND_EXEC) 20504 value = COND_EXEC_CODE (value); 20505 if (GET_CODE (value) == PARALLEL) 20506 value = XVECEXP (value, 0, 0); 20507 value = XEXP (value, 0); 20508 if (GET_CODE (op) == COND_EXEC) 20509 op = COND_EXEC_CODE (op); 20510 if (GET_CODE (op) == PARALLEL) 20511 op = XVECEXP (op, 0, 0); 20512 op = XEXP (op, 1); 20513 20514 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) 20515 { 20516 if (GET_CODE (XEXP (op, 0)) == MULT) 20517 return !reg_overlap_mentioned_p (value, XEXP (op, 0)); 20518 else 20519 return !reg_overlap_mentioned_p (value, XEXP (op, 1)); 20520 } 20521 20522 return 0; 20523 } 20524 20525 /* We can't rely on the caller doing the proper promotion when 20526 using APCS or ATPCS. */ 20527 20528 static bool 20529 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED) 20530 { 20531 return !TARGET_AAPCS_BASED; 20532 } 20533 20534 static enum machine_mode 20535 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, 20536 enum machine_mode mode, 20537 int *punsignedp ATTRIBUTE_UNUSED, 20538 const_tree fntype ATTRIBUTE_UNUSED, 20539 int for_return ATTRIBUTE_UNUSED) 20540 { 20541 if (GET_MODE_CLASS (mode) == MODE_INT 20542 && GET_MODE_SIZE (mode) < 4) 20543 return SImode; 20544 20545 return mode; 20546 } 20547 20548 /* AAPCS based ABIs use short enums by default. */ 20549 20550 static bool 20551 arm_default_short_enums (void) 20552 { 20553 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX; 20554 } 20555 20556 20557 /* AAPCS requires that anonymous bitfields affect structure alignment. */ 20558 20559 static bool 20560 arm_align_anon_bitfield (void) 20561 { 20562 return TARGET_AAPCS_BASED; 20563 } 20564 20565 20566 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */ 20567 20568 static tree 20569 arm_cxx_guard_type (void) 20570 { 20571 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node; 20572 } 20573 20574 /* Return non-zero if the consumer (a multiply-accumulate instruction) 20575 has an accumulator dependency on the result of the producer (a 20576 multiplication instruction) and no other dependency on that result. */ 20577 int 20578 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) 20579 { 20580 rtx mul = PATTERN (producer); 20581 rtx mac = PATTERN (consumer); 20582 rtx mul_result; 20583 rtx mac_op0, mac_op1, mac_acc; 20584 20585 if (GET_CODE (mul) == COND_EXEC) 20586 mul = COND_EXEC_CODE (mul); 20587 if (GET_CODE (mac) == COND_EXEC) 20588 mac = COND_EXEC_CODE (mac); 20589 20590 /* Check that mul is of the form (set (...) (mult ...)) 20591 and mla is of the form (set (...) (plus (mult ...) (...))). */ 20592 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) 20593 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS 20594 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) 20595 return 0; 20596 20597 mul_result = XEXP (mul, 0); 20598 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); 20599 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); 20600 mac_acc = XEXP (XEXP (mac, 1), 1); 20601 20602 return (reg_overlap_mentioned_p (mul_result, mac_acc) 20603 && !reg_overlap_mentioned_p (mul_result, mac_op0) 20604 && !reg_overlap_mentioned_p (mul_result, mac_op1)); 20605 } 20606 20607 20608 /* The EABI says test the least significant bit of a guard variable. */ 20609 20610 static bool 20611 arm_cxx_guard_mask_bit (void) 20612 { 20613 return TARGET_AAPCS_BASED; 20614 } 20615 20616 20617 /* The EABI specifies that all array cookies are 8 bytes long. */ 20618 20619 static tree 20620 arm_get_cookie_size (tree type) 20621 { 20622 tree size; 20623 20624 if (!TARGET_AAPCS_BASED) 20625 return default_cxx_get_cookie_size (type); 20626 20627 size = build_int_cst (sizetype, 8); 20628 return size; 20629 } 20630 20631 20632 /* The EABI says that array cookies should also contain the element size. */ 20633 20634 static bool 20635 arm_cookie_has_size (void) 20636 { 20637 return TARGET_AAPCS_BASED; 20638 } 20639 20640 20641 /* The EABI says constructors and destructors should return a pointer to 20642 the object constructed/destroyed. */ 20643 20644 static bool 20645 arm_cxx_cdtor_returns_this (void) 20646 { 20647 return TARGET_AAPCS_BASED; 20648 } 20649 20650 /* The EABI says that an inline function may never be the key 20651 method. */ 20652 20653 static bool 20654 arm_cxx_key_method_may_be_inline (void) 20655 { 20656 return !TARGET_AAPCS_BASED; 20657 } 20658 20659 static void 20660 arm_cxx_determine_class_data_visibility (tree decl) 20661 { 20662 if (!TARGET_AAPCS_BASED 20663 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES) 20664 return; 20665 20666 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data 20667 is exported. However, on systems without dynamic vague linkage, 20668 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */ 20669 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl)) 20670 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; 20671 else 20672 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT; 20673 DECL_VISIBILITY_SPECIFIED (decl) = 1; 20674 } 20675 20676 static bool 20677 arm_cxx_class_data_always_comdat (void) 20678 { 20679 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have 20680 vague linkage if the class has no key function. */ 20681 return !TARGET_AAPCS_BASED; 20682 } 20683 20684 20685 /* The EABI says __aeabi_atexit should be used to register static 20686 destructors. */ 20687 20688 static bool 20689 arm_cxx_use_aeabi_atexit (void) 20690 { 20691 return TARGET_AAPCS_BASED; 20692 } 20693 20694 20695 void 20696 arm_set_return_address (rtx source, rtx scratch) 20697 { 20698 arm_stack_offsets *offsets; 20699 HOST_WIDE_INT delta; 20700 rtx addr; 20701 unsigned long saved_regs; 20702 20703 offsets = arm_get_frame_offsets (); 20704 saved_regs = offsets->saved_regs_mask; 20705 20706 if ((saved_regs & (1 << LR_REGNUM)) == 0) 20707 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); 20708 else 20709 { 20710 if (frame_pointer_needed) 20711 addr = plus_constant(hard_frame_pointer_rtx, -4); 20712 else 20713 { 20714 /* LR will be the first saved register. */ 20715 delta = offsets->outgoing_args - (offsets->frame + 4); 20716 20717 20718 if (delta >= 4096) 20719 { 20720 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx, 20721 GEN_INT (delta & ~4095))); 20722 addr = scratch; 20723 delta &= 4095; 20724 } 20725 else 20726 addr = stack_pointer_rtx; 20727 20728 addr = plus_constant (addr, delta); 20729 } 20730 emit_move_insn (gen_frame_mem (Pmode, addr), source); 20731 } 20732 } 20733 20734 20735 void 20736 thumb_set_return_address (rtx source, rtx scratch) 20737 { 20738 arm_stack_offsets *offsets; 20739 HOST_WIDE_INT delta; 20740 HOST_WIDE_INT limit; 20741 int reg; 20742 rtx addr; 20743 unsigned long mask; 20744 20745 emit_use (source); 20746 20747 offsets = arm_get_frame_offsets (); 20748 mask = offsets->saved_regs_mask; 20749 if (mask & (1 << LR_REGNUM)) 20750 { 20751 limit = 1024; 20752 /* Find the saved regs. */ 20753 if (frame_pointer_needed) 20754 { 20755 delta = offsets->soft_frame - offsets->saved_args; 20756 reg = THUMB_HARD_FRAME_POINTER_REGNUM; 20757 if (TARGET_THUMB1) 20758 limit = 128; 20759 } 20760 else 20761 { 20762 delta = offsets->outgoing_args - offsets->saved_args; 20763 reg = SP_REGNUM; 20764 } 20765 /* Allow for the stack frame. */ 20766 if (TARGET_THUMB1 && TARGET_BACKTRACE) 20767 delta -= 16; 20768 /* The link register is always the first saved register. */ 20769 delta -= 4; 20770 20771 /* Construct the address. */ 20772 addr = gen_rtx_REG (SImode, reg); 20773 if (delta > limit) 20774 { 20775 emit_insn (gen_movsi (scratch, GEN_INT (delta))); 20776 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx)); 20777 addr = scratch; 20778 } 20779 else 20780 addr = plus_constant (addr, delta); 20781 20782 emit_move_insn (gen_frame_mem (Pmode, addr), source); 20783 } 20784 else 20785 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); 20786 } 20787 20788 /* Implements target hook vector_mode_supported_p. */ 20789 bool 20790 arm_vector_mode_supported_p (enum machine_mode mode) 20791 { 20792 /* Neon also supports V2SImode, etc. listed in the clause below. */ 20793 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode 20794 || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) 20795 return true; 20796 20797 if ((TARGET_NEON || TARGET_IWMMXT) 20798 && ((mode == V2SImode) 20799 || (mode == V4HImode) 20800 || (mode == V8QImode))) 20801 return true; 20802 20803 return false; 20804 } 20805 20806 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal 20807 ARM insns and therefore guarantee that the shift count is modulo 256. 20808 DImode shifts (those implemented by lib1funcs.asm or by optabs.c) 20809 guarantee no particular behavior for out-of-range counts. */ 20810 20811 static unsigned HOST_WIDE_INT 20812 arm_shift_truncation_mask (enum machine_mode mode) 20813 { 20814 return mode == SImode ? 255 : 0; 20815 } 20816 20817 20818 /* Map internal gcc register numbers to DWARF2 register numbers. */ 20819 20820 unsigned int 20821 arm_dbx_register_number (unsigned int regno) 20822 { 20823 if (regno < 16) 20824 return regno; 20825 20826 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards 20827 compatibility. The EABI defines them as registers 96-103. */ 20828 if (IS_FPA_REGNUM (regno)) 20829 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM; 20830 20831 if (IS_VFP_REGNUM (regno)) 20832 { 20833 /* See comment in arm_dwarf_register_span. */ 20834 if (VFP_REGNO_OK_FOR_SINGLE (regno)) 20835 return 64 + regno - FIRST_VFP_REGNUM; 20836 else 20837 return 256 + (regno - FIRST_VFP_REGNUM) / 2; 20838 } 20839 20840 if (IS_IWMMXT_GR_REGNUM (regno)) 20841 return 104 + regno - FIRST_IWMMXT_GR_REGNUM; 20842 20843 if (IS_IWMMXT_REGNUM (regno)) 20844 return 112 + regno - FIRST_IWMMXT_REGNUM; 20845 20846 gcc_unreachable (); 20847 } 20848 20849 /* Dwarf models VFPv3 registers as 32 64-bit registers. 20850 GCC models tham as 64 32-bit registers, so we need to describe this to 20851 the DWARF generation code. Other registers can use the default. */ 20852 static rtx 20853 arm_dwarf_register_span (rtx rtl) 20854 { 20855 unsigned regno; 20856 int nregs; 20857 int i; 20858 rtx p; 20859 20860 regno = REGNO (rtl); 20861 if (!IS_VFP_REGNUM (regno)) 20862 return NULL_RTX; 20863 20864 /* XXX FIXME: The EABI defines two VFP register ranges: 20865 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent) 20866 256-287: D0-D31 20867 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the 20868 corresponding D register. Until GDB supports this, we shall use the 20869 legacy encodings. We also use these encodings for D0-D15 for 20870 compatibility with older debuggers. */ 20871 if (VFP_REGNO_OK_FOR_SINGLE (regno)) 20872 return NULL_RTX; 20873 20874 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8; 20875 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs)); 20876 regno = (regno - FIRST_VFP_REGNUM) / 2; 20877 for (i = 0; i < nregs; i++) 20878 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); 20879 20880 return p; 20881 } 20882 20883 #ifdef TARGET_UNWIND_INFO 20884 /* Emit unwind directives for a store-multiple instruction or stack pointer 20885 push during alignment. 20886 These should only ever be generated by the function prologue code, so 20887 expect them to have a particular form. */ 20888 20889 static void 20890 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p) 20891 { 20892 int i; 20893 HOST_WIDE_INT offset; 20894 HOST_WIDE_INT nregs; 20895 int reg_size; 20896 unsigned reg; 20897 unsigned lastreg; 20898 rtx e; 20899 20900 e = XVECEXP (p, 0, 0); 20901 if (GET_CODE (e) != SET) 20902 abort (); 20903 20904 /* First insn will adjust the stack pointer. */ 20905 if (GET_CODE (e) != SET 20906 || GET_CODE (XEXP (e, 0)) != REG 20907 || REGNO (XEXP (e, 0)) != SP_REGNUM 20908 || GET_CODE (XEXP (e, 1)) != PLUS) 20909 abort (); 20910 20911 offset = -INTVAL (XEXP (XEXP (e, 1), 1)); 20912 nregs = XVECLEN (p, 0) - 1; 20913 20914 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1)); 20915 if (reg < 16) 20916 { 20917 /* The function prologue may also push pc, but not annotate it as it is 20918 never restored. We turn this into a stack pointer adjustment. */ 20919 if (nregs * 4 == offset - 4) 20920 { 20921 fprintf (asm_out_file, "\t.pad #4\n"); 20922 offset -= 4; 20923 } 20924 reg_size = 4; 20925 fprintf (asm_out_file, "\t.save {"); 20926 } 20927 else if (IS_VFP_REGNUM (reg)) 20928 { 20929 reg_size = 8; 20930 fprintf (asm_out_file, "\t.vsave {"); 20931 } 20932 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM) 20933 { 20934 /* FPA registers are done differently. */ 20935 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs); 20936 return; 20937 } 20938 else 20939 /* Unknown register type. */ 20940 abort (); 20941 20942 /* If the stack increment doesn't match the size of the saved registers, 20943 something has gone horribly wrong. */ 20944 if (offset != nregs * reg_size) 20945 abort (); 20946 20947 offset = 0; 20948 lastreg = 0; 20949 /* The remaining insns will describe the stores. */ 20950 for (i = 1; i <= nregs; i++) 20951 { 20952 /* Expect (set (mem <addr>) (reg)). 20953 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */ 20954 e = XVECEXP (p, 0, i); 20955 if (GET_CODE (e) != SET 20956 || GET_CODE (XEXP (e, 0)) != MEM 20957 || GET_CODE (XEXP (e, 1)) != REG) 20958 abort (); 20959 20960 reg = REGNO (XEXP (e, 1)); 20961 if (reg < lastreg) 20962 abort (); 20963 20964 if (i != 1) 20965 fprintf (asm_out_file, ", "); 20966 /* We can't use %r for vfp because we need to use the 20967 double precision register names. */ 20968 if (IS_VFP_REGNUM (reg)) 20969 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2); 20970 else 20971 asm_fprintf (asm_out_file, "%r", reg); 20972 20973 #ifdef ENABLE_CHECKING 20974 /* Check that the addresses are consecutive. */ 20975 e = XEXP (XEXP (e, 0), 0); 20976 if (GET_CODE (e) == PLUS) 20977 { 20978 offset += reg_size; 20979 if (GET_CODE (XEXP (e, 0)) != REG 20980 || REGNO (XEXP (e, 0)) != SP_REGNUM 20981 || GET_CODE (XEXP (e, 1)) != CONST_INT 20982 || offset != INTVAL (XEXP (e, 1))) 20983 abort (); 20984 } 20985 else if (i != 1 20986 || GET_CODE (e) != REG 20987 || REGNO (e) != SP_REGNUM) 20988 abort (); 20989 #endif 20990 } 20991 fprintf (asm_out_file, "}\n"); 20992 } 20993 20994 /* Emit unwind directives for a SET. */ 20995 20996 static void 20997 arm_unwind_emit_set (FILE * asm_out_file, rtx p) 20998 { 20999 rtx e0; 21000 rtx e1; 21001 unsigned reg; 21002 21003 e0 = XEXP (p, 0); 21004 e1 = XEXP (p, 1); 21005 switch (GET_CODE (e0)) 21006 { 21007 case MEM: 21008 /* Pushing a single register. */ 21009 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC 21010 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG 21011 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM) 21012 abort (); 21013 21014 asm_fprintf (asm_out_file, "\t.save "); 21015 if (IS_VFP_REGNUM (REGNO (e1))) 21016 asm_fprintf(asm_out_file, "{d%d}\n", 21017 (REGNO (e1) - FIRST_VFP_REGNUM) / 2); 21018 else 21019 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1)); 21020 break; 21021 21022 case REG: 21023 if (REGNO (e0) == SP_REGNUM) 21024 { 21025 /* A stack increment. */ 21026 if (GET_CODE (e1) != PLUS 21027 || GET_CODE (XEXP (e1, 0)) != REG 21028 || REGNO (XEXP (e1, 0)) != SP_REGNUM 21029 || GET_CODE (XEXP (e1, 1)) != CONST_INT) 21030 abort (); 21031 21032 asm_fprintf (asm_out_file, "\t.pad #%wd\n", 21033 -INTVAL (XEXP (e1, 1))); 21034 } 21035 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM) 21036 { 21037 HOST_WIDE_INT offset; 21038 21039 if (GET_CODE (e1) == PLUS) 21040 { 21041 if (GET_CODE (XEXP (e1, 0)) != REG 21042 || GET_CODE (XEXP (e1, 1)) != CONST_INT) 21043 abort (); 21044 reg = REGNO (XEXP (e1, 0)); 21045 offset = INTVAL (XEXP (e1, 1)); 21046 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n", 21047 HARD_FRAME_POINTER_REGNUM, reg, 21048 INTVAL (XEXP (e1, 1))); 21049 } 21050 else if (GET_CODE (e1) == REG) 21051 { 21052 reg = REGNO (e1); 21053 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n", 21054 HARD_FRAME_POINTER_REGNUM, reg); 21055 } 21056 else 21057 abort (); 21058 } 21059 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM) 21060 { 21061 /* Move from sp to reg. */ 21062 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0)); 21063 } 21064 else if (GET_CODE (e1) == PLUS 21065 && GET_CODE (XEXP (e1, 0)) == REG 21066 && REGNO (XEXP (e1, 0)) == SP_REGNUM 21067 && GET_CODE (XEXP (e1, 1)) == CONST_INT) 21068 { 21069 /* Set reg to offset from sp. */ 21070 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n", 21071 REGNO (e0), (int)INTVAL(XEXP (e1, 1))); 21072 } 21073 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN) 21074 { 21075 /* Stack pointer save before alignment. */ 21076 reg = REGNO (e0); 21077 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n", 21078 reg + 0x90, reg); 21079 } 21080 else 21081 abort (); 21082 break; 21083 21084 default: 21085 abort (); 21086 } 21087 } 21088 21089 21090 /* Emit unwind directives for the given insn. */ 21091 21092 static void 21093 arm_unwind_emit (FILE * asm_out_file, rtx insn) 21094 { 21095 rtx pat; 21096 21097 if (!ARM_EABI_UNWIND_TABLES) 21098 return; 21099 21100 if (!(flag_unwind_tables || crtl->uses_eh_lsda) 21101 && (TREE_NOTHROW (current_function_decl) 21102 || crtl->all_throwers_are_sibcalls)) 21103 return; 21104 21105 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn)) 21106 return; 21107 21108 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); 21109 if (pat) 21110 pat = XEXP (pat, 0); 21111 else 21112 pat = PATTERN (insn); 21113 21114 switch (GET_CODE (pat)) 21115 { 21116 case SET: 21117 arm_unwind_emit_set (asm_out_file, pat); 21118 break; 21119 21120 case SEQUENCE: 21121 /* Store multiple. */ 21122 arm_unwind_emit_sequence (asm_out_file, pat); 21123 break; 21124 21125 default: 21126 abort(); 21127 } 21128 } 21129 21130 21131 /* Output a reference from a function exception table to the type_info 21132 object X. The EABI specifies that the symbol should be relocated by 21133 an R_ARM_TARGET2 relocation. */ 21134 21135 static bool 21136 arm_output_ttype (rtx x) 21137 { 21138 fputs ("\t.word\t", asm_out_file); 21139 output_addr_const (asm_out_file, x); 21140 /* Use special relocations for symbol references. */ 21141 if (GET_CODE (x) != CONST_INT) 21142 fputs ("(TARGET2)", asm_out_file); 21143 fputc ('\n', asm_out_file); 21144 21145 return TRUE; 21146 } 21147 #endif /* TARGET_UNWIND_INFO */ 21148 21149 21150 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic 21151 stack alignment. */ 21152 21153 static void 21154 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 21155 { 21156 rtx unspec = SET_SRC (pattern); 21157 gcc_assert (GET_CODE (unspec) == UNSPEC); 21158 21159 switch (index) 21160 { 21161 case UNSPEC_STACK_ALIGN: 21162 /* ??? We should set the CFA = (SP & ~7). At this point we haven't 21163 put anything on the stack, so hopefully it won't matter. 21164 CFA = SP will be correct after alignment. */ 21165 dwarf2out_reg_save_reg (label, stack_pointer_rtx, 21166 SET_DEST (pattern)); 21167 break; 21168 default: 21169 gcc_unreachable (); 21170 } 21171 } 21172 21173 21174 /* Output unwind directives for the start/end of a function. */ 21175 21176 void 21177 arm_output_fn_unwind (FILE * f, bool prologue) 21178 { 21179 if (!ARM_EABI_UNWIND_TABLES) 21180 return; 21181 21182 if (prologue) 21183 fputs ("\t.fnstart\n", f); 21184 else 21185 { 21186 /* If this function will never be unwound, then mark it as such. 21187 The came condition is used in arm_unwind_emit to suppress 21188 the frame annotations. */ 21189 if (!(flag_unwind_tables || crtl->uses_eh_lsda) 21190 && (TREE_NOTHROW (current_function_decl) 21191 || crtl->all_throwers_are_sibcalls)) 21192 fputs("\t.cantunwind\n", f); 21193 21194 fputs ("\t.fnend\n", f); 21195 } 21196 } 21197 21198 static bool 21199 arm_emit_tls_decoration (FILE *fp, rtx x) 21200 { 21201 enum tls_reloc reloc; 21202 rtx val; 21203 21204 val = XVECEXP (x, 0, 0); 21205 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1)); 21206 21207 output_addr_const (fp, val); 21208 21209 switch (reloc) 21210 { 21211 case TLS_GD32: 21212 fputs ("(tlsgd)", fp); 21213 break; 21214 case TLS_LDM32: 21215 fputs ("(tlsldm)", fp); 21216 break; 21217 case TLS_LDO32: 21218 fputs ("(tlsldo)", fp); 21219 break; 21220 case TLS_IE32: 21221 fputs ("(gottpoff)", fp); 21222 break; 21223 case TLS_LE32: 21224 fputs ("(tpoff)", fp); 21225 break; 21226 default: 21227 gcc_unreachable (); 21228 } 21229 21230 switch (reloc) 21231 { 21232 case TLS_GD32: 21233 case TLS_LDM32: 21234 case TLS_IE32: 21235 fputs (" + (. - ", fp); 21236 output_addr_const (fp, XVECEXP (x, 0, 2)); 21237 fputs (" - ", fp); 21238 output_addr_const (fp, XVECEXP (x, 0, 3)); 21239 fputc (')', fp); 21240 break; 21241 default: 21242 break; 21243 } 21244 21245 return TRUE; 21246 } 21247 21248 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */ 21249 21250 static void 21251 arm_output_dwarf_dtprel (FILE *file, int size, rtx x) 21252 { 21253 gcc_assert (size == 4); 21254 fputs ("\t.word\t", file); 21255 output_addr_const (file, x); 21256 fputs ("(tlsldo)", file); 21257 } 21258 21259 bool 21260 arm_output_addr_const_extra (FILE *fp, rtx x) 21261 { 21262 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) 21263 return arm_emit_tls_decoration (fp, x); 21264 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL) 21265 { 21266 char label[256]; 21267 int labelno = INTVAL (XVECEXP (x, 0, 0)); 21268 21269 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno); 21270 assemble_name_raw (fp, label); 21271 21272 return TRUE; 21273 } 21274 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF) 21275 { 21276 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_"); 21277 if (GOT_PCREL) 21278 fputs ("+.", fp); 21279 fputs ("-(", fp); 21280 output_addr_const (fp, XVECEXP (x, 0, 0)); 21281 fputc (')', fp); 21282 return TRUE; 21283 } 21284 else if (GET_CODE (x) == CONST_VECTOR) 21285 return arm_emit_vector_const (fp, x); 21286 21287 return FALSE; 21288 } 21289 21290 /* Output assembly for a shift instruction. 21291 SET_FLAGS determines how the instruction modifies the condition codes. 21292 0 - Do not set condition codes. 21293 1 - Set condition codes. 21294 2 - Use smallest instruction. */ 21295 const char * 21296 arm_output_shift(rtx * operands, int set_flags) 21297 { 21298 char pattern[100]; 21299 static const char flag_chars[3] = {'?', '.', '!'}; 21300 const char *shift; 21301 HOST_WIDE_INT val; 21302 char c; 21303 21304 c = flag_chars[set_flags]; 21305 if (TARGET_UNIFIED_ASM) 21306 { 21307 shift = shift_op(operands[3], &val); 21308 if (shift) 21309 { 21310 if (val != -1) 21311 operands[2] = GEN_INT(val); 21312 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c); 21313 } 21314 else 21315 sprintf (pattern, "mov%%%c\t%%0, %%1", c); 21316 } 21317 else 21318 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c); 21319 output_asm_insn (pattern, operands); 21320 return ""; 21321 } 21322 21323 /* Output a Thumb-1 casesi dispatch sequence. */ 21324 const char * 21325 thumb1_output_casesi (rtx *operands) 21326 { 21327 rtx diff_vec = PATTERN (next_real_insn (operands[0])); 21328 addr_diff_vec_flags flags; 21329 21330 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); 21331 21332 flags = ADDR_DIFF_VEC_FLAGS (diff_vec); 21333 21334 switch (GET_MODE(diff_vec)) 21335 { 21336 case QImode: 21337 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? 21338 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi"); 21339 case HImode: 21340 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? 21341 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi"); 21342 case SImode: 21343 return "bl\t%___gnu_thumb1_case_si"; 21344 default: 21345 gcc_unreachable (); 21346 } 21347 } 21348 21349 /* Output a Thumb-2 casesi instruction. */ 21350 const char * 21351 thumb2_output_casesi (rtx *operands) 21352 { 21353 rtx diff_vec = PATTERN (next_real_insn (operands[2])); 21354 21355 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); 21356 21357 output_asm_insn ("cmp\t%0, %1", operands); 21358 output_asm_insn ("bhi\t%l3", operands); 21359 switch (GET_MODE(diff_vec)) 21360 { 21361 case QImode: 21362 return "tbb\t[%|pc, %0]"; 21363 case HImode: 21364 return "tbh\t[%|pc, %0, lsl #1]"; 21365 case SImode: 21366 if (flag_pic) 21367 { 21368 output_asm_insn ("adr\t%4, %l2", operands); 21369 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands); 21370 output_asm_insn ("add\t%4, %4, %5", operands); 21371 return "bx\t%4"; 21372 } 21373 else 21374 { 21375 output_asm_insn ("adr\t%4, %l2", operands); 21376 return "ldr\t%|pc, [%4, %0, lsl #2]"; 21377 } 21378 default: 21379 gcc_unreachable (); 21380 } 21381 } 21382 21383 /* Most ARM cores are single issue, but some newer ones can dual issue. 21384 The scheduler descriptions rely on this being correct. */ 21385 static int 21386 arm_issue_rate (void) 21387 { 21388 switch (arm_tune) 21389 { 21390 case cortexr4: 21391 case cortexr4f: 21392 case cortexa8: 21393 case cortexa9: 21394 return 2; 21395 21396 default: 21397 return 1; 21398 } 21399 } 21400 21401 /* A table and a function to perform ARM-specific name mangling for 21402 NEON vector types in order to conform to the AAPCS (see "Procedure 21403 Call Standard for the ARM Architecture", Appendix A). To qualify 21404 for emission with the mangled names defined in that document, a 21405 vector type must not only be of the correct mode but also be 21406 composed of NEON vector element types (e.g. __builtin_neon_qi). */ 21407 typedef struct 21408 { 21409 enum machine_mode mode; 21410 const char *element_type_name; 21411 const char *aapcs_name; 21412 } arm_mangle_map_entry; 21413 21414 static arm_mangle_map_entry arm_mangle_map[] = { 21415 /* 64-bit containerized types. */ 21416 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" }, 21417 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" }, 21418 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" }, 21419 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" }, 21420 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" }, 21421 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" }, 21422 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" }, 21423 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" }, 21424 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" }, 21425 /* 128-bit containerized types. */ 21426 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" }, 21427 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" }, 21428 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" }, 21429 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" }, 21430 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" }, 21431 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" }, 21432 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" }, 21433 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" }, 21434 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" }, 21435 { VOIDmode, NULL, NULL } 21436 }; 21437 21438 const char * 21439 arm_mangle_type (const_tree type) 21440 { 21441 arm_mangle_map_entry *pos = arm_mangle_map; 21442 21443 /* The ARM ABI documents (10th October 2008) say that "__va_list" 21444 has to be managled as if it is in the "std" namespace. */ 21445 if (TARGET_AAPCS_BASED 21446 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) 21447 { 21448 static bool warned; 21449 if (!warned && warn_psabi && !in_system_header) 21450 { 21451 warned = true; 21452 inform (input_location, 21453 "the mangling of %<va_list%> has changed in GCC 4.4"); 21454 } 21455 return "St9__va_list"; 21456 } 21457 21458 /* Half-precision float. */ 21459 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) 21460 return "Dh"; 21461 21462 if (TREE_CODE (type) != VECTOR_TYPE) 21463 return NULL; 21464 21465 /* Check the mode of the vector type, and the name of the vector 21466 element type, against the table. */ 21467 while (pos->mode != VOIDmode) 21468 { 21469 tree elt_type = TREE_TYPE (type); 21470 21471 if (pos->mode == TYPE_MODE (type) 21472 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL 21473 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))), 21474 pos->element_type_name)) 21475 return pos->aapcs_name; 21476 21477 pos++; 21478 } 21479 21480 /* Use the default mangling for unrecognized (possibly user-defined) 21481 vector types. */ 21482 return NULL; 21483 } 21484 21485 /* Order of allocation of core registers for Thumb: this allocation is 21486 written over the corresponding initial entries of the array 21487 initialized with REG_ALLOC_ORDER. We allocate all low registers 21488 first. Saving and restoring a low register is usually cheaper than 21489 using a call-clobbered high register. */ 21490 21491 static const int thumb_core_reg_alloc_order[] = 21492 { 21493 3, 2, 1, 0, 4, 5, 6, 7, 21494 14, 12, 8, 9, 10, 11, 13, 15 21495 }; 21496 21497 /* Adjust register allocation order when compiling for Thumb. */ 21498 21499 void 21500 arm_order_regs_for_local_alloc (void) 21501 { 21502 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER; 21503 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order)); 21504 if (TARGET_THUMB) 21505 memcpy (reg_alloc_order, thumb_core_reg_alloc_order, 21506 sizeof (thumb_core_reg_alloc_order)); 21507 } 21508 21509 /* Set default optimization options. */ 21510 void 21511 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED) 21512 { 21513 /* Enable section anchors by default at -O1 or higher. 21514 Use 2 to distinguish from an explicit -fsection-anchors 21515 given on the command line. */ 21516 if (level > 0) 21517 flag_section_anchors = 2; 21518 } 21519 21520 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ 21521 21522 bool 21523 arm_frame_pointer_required (void) 21524 { 21525 return (cfun->has_nonlocal_label 21526 || SUBTARGET_FRAME_POINTER_REQUIRED 21527 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ())); 21528 } 21529 21530 /* Only thumb1 can't support conditional execution, so return true if 21531 the target is not thumb1. */ 21532 static bool 21533 arm_have_conditional_execution (void) 21534 { 21535 return !TARGET_THUMB1; 21536 } 21537 21538 #include "gt-arm.h" 21539