1 /* Subroutines for insn-output.c for HPPA. 2 Copyright (C) 1992-2020 Free Software Foundation, Inc. 3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #define IN_TARGET_CODE 1 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "memmodel.h" 27 #include "backend.h" 28 #include "target.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "df.h" 32 #include "tm_p.h" 33 #include "stringpool.h" 34 #include "attribs.h" 35 #include "optabs.h" 36 #include "regs.h" 37 #include "emit-rtl.h" 38 #include "recog.h" 39 #include "diagnostic-core.h" 40 #include "insn-attr.h" 41 #include "alias.h" 42 #include "fold-const.h" 43 #include "stor-layout.h" 44 #include "varasm.h" 45 #include "calls.h" 46 #include "output.h" 47 #include "except.h" 48 #include "explow.h" 49 #include "expr.h" 50 #include "reload.h" 51 #include "common/common-target.h" 52 #include "langhooks.h" 53 #include "cfgrtl.h" 54 #include "opts.h" 55 #include "builtins.h" 56 57 /* This file should be included last. */ 58 #include "target-def.h" 59 60 /* Return nonzero if there is a bypass for the output of 61 OUT_INSN and the fp store IN_INSN. */ 62 int 63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) 64 { 65 machine_mode store_mode; 66 machine_mode other_mode; 67 rtx set; 68 69 if (recog_memoized (in_insn) < 0 70 || (get_attr_type (in_insn) != TYPE_FPSTORE 71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD) 72 || recog_memoized (out_insn) < 0) 73 return 0; 74 75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn))); 76 77 set = single_set (out_insn); 78 if (!set) 79 return 0; 80 81 other_mode = GET_MODE (SET_SRC (set)); 82 83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode)); 84 } 85 86 87 #ifndef DO_FRAME_NOTES 88 #ifdef INCOMING_RETURN_ADDR_RTX 89 #define DO_FRAME_NOTES 1 90 #else 91 #define DO_FRAME_NOTES 0 92 #endif 93 #endif 94 95 static void pa_option_override (void); 96 static void copy_reg_pointer (rtx, rtx); 97 static void fix_range (const char *); 98 static int hppa_register_move_cost (machine_mode mode, reg_class_t, 99 reg_class_t); 100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool); 101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool); 102 static inline rtx force_mode (machine_mode, rtx); 103 static void pa_reorg (void); 104 static void pa_combine_instructions (void); 105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx, 106 rtx, rtx); 107 static bool forward_branch_p (rtx_insn *); 108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); 109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *); 110 static int compute_cpymem_length (rtx_insn *); 111 static int compute_clrmem_length (rtx_insn *); 112 static bool pa_assemble_integer (rtx, unsigned int, int); 113 static void remove_useless_addtr_insns (int); 114 static void store_reg (int, HOST_WIDE_INT, int); 115 static void store_reg_modify (int, int, HOST_WIDE_INT); 116 static void load_reg (int, HOST_WIDE_INT, int); 117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int); 118 static rtx pa_function_value (const_tree, const_tree, bool); 119 static rtx pa_libcall_value (machine_mode, const_rtx); 120 static bool pa_function_value_regno_p (const unsigned int); 121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED; 122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED; 123 static void update_total_code_bytes (unsigned int); 124 static void pa_output_function_epilogue (FILE *); 125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); 126 static int pa_issue_rate (void); 127 static int pa_reloc_rw_mask (void); 128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; 129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED; 130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT) 131 ATTRIBUTE_UNUSED; 132 static void pa_encode_section_info (tree, rtx, int); 133 static const char *pa_strip_name_encoding (const char *); 134 static bool pa_function_ok_for_sibcall (tree, tree); 135 static void pa_globalize_label (FILE *, const char *) 136 ATTRIBUTE_UNUSED; 137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 138 HOST_WIDE_INT, tree); 139 #if !defined(USE_COLLECT2) 140 static void pa_asm_out_constructor (rtx, int); 141 static void pa_asm_out_destructor (rtx, int); 142 #endif 143 static void pa_init_builtins (void); 144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int); 145 static rtx hppa_builtin_saveregs (void); 146 static void hppa_va_start (tree, rtx); 147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 148 static bool pa_scalar_mode_supported_p (scalar_mode); 149 static bool pa_commutative_p (const_rtx x, int outer_code); 150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED; 151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED; 152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode); 153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED; 154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED; 155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED; 156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED; 157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED; 158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED; 159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED; 160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED; 161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED; 162 static void output_deferred_plabels (void); 163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED; 164 static void pa_file_end (void); 165 static void pa_init_libfuncs (void); 166 static rtx pa_struct_value_rtx (tree, int); 167 static bool pa_pass_by_reference (cumulative_args_t, 168 const function_arg_info &); 169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &); 170 static void pa_function_arg_advance (cumulative_args_t, 171 const function_arg_info &); 172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &); 173 static pad_direction pa_function_arg_padding (machine_mode, const_tree); 174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree); 175 static struct machine_function * pa_init_machine_status (void); 176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t, 177 machine_mode, 178 secondary_reload_info *); 179 static bool pa_secondary_memory_needed (machine_mode, 180 reg_class_t, reg_class_t); 181 static void pa_extra_live_on_entry (bitmap); 182 static machine_mode pa_promote_function_mode (const_tree, 183 machine_mode, int *, 184 const_tree, int); 185 186 static void pa_asm_trampoline_template (FILE *); 187 static void pa_trampoline_init (rtx, tree, rtx); 188 static rtx pa_trampoline_adjust_address (rtx); 189 static rtx pa_delegitimize_address (rtx); 190 static bool pa_print_operand_punct_valid_p (unsigned char); 191 static rtx pa_internal_arg_pointer (void); 192 static bool pa_can_eliminate (const int, const int); 193 static void pa_conditional_register_usage (void); 194 static machine_mode pa_c_mode_for_suffix (char); 195 static section *pa_function_section (tree, enum node_frequency, bool, bool); 196 static bool pa_cannot_force_const_mem (machine_mode, rtx); 197 static bool pa_legitimate_constant_p (machine_mode, rtx); 198 static unsigned int pa_section_type_flags (tree, const char *, int); 199 static bool pa_legitimate_address_p (machine_mode, rtx, bool); 200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &); 201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode); 202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode); 203 static bool pa_modes_tieable_p (machine_mode, machine_mode); 204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t); 205 static HOST_WIDE_INT pa_starting_frame_offset (void); 206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED; 207 208 /* The following extra sections are only used for SOM. */ 209 static GTY(()) section *som_readonly_data_section; 210 static GTY(()) section *som_one_only_readonly_data_section; 211 static GTY(()) section *som_one_only_data_section; 212 static GTY(()) section *som_tm_clone_table_section; 213 214 /* Counts for the number of callee-saved general and floating point 215 registers which were saved by the current function's prologue. */ 216 static int gr_saved, fr_saved; 217 218 /* Boolean indicating whether the return pointer was saved by the 219 current function's prologue. */ 220 static bool rp_saved; 221 222 static rtx find_addr_reg (rtx); 223 224 /* Keep track of the number of bytes we have output in the CODE subspace 225 during this compilation so we'll know when to emit inline long-calls. */ 226 unsigned long total_code_bytes; 227 228 /* The last address of the previous function plus the number of bytes in 229 associated thunks that have been output. This is used to determine if 230 a thunk can use an IA-relative branch to reach its target function. */ 231 static unsigned int last_address; 232 233 /* Variables to handle plabels that we discover are necessary at assembly 234 output time. They are output after the current function. */ 235 struct GTY(()) deferred_plabel 236 { 237 rtx internal_label; 238 rtx symbol; 239 }; 240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel * 241 deferred_plabels; 242 static size_t n_deferred_plabels = 0; 243 244 /* Initialize the GCC target structure. */ 245 246 #undef TARGET_OPTION_OVERRIDE 247 #define TARGET_OPTION_OVERRIDE pa_option_override 248 249 #undef TARGET_ASM_ALIGNED_HI_OP 250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 251 #undef TARGET_ASM_ALIGNED_SI_OP 252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 253 #undef TARGET_ASM_ALIGNED_DI_OP 254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" 255 #undef TARGET_ASM_UNALIGNED_HI_OP 256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 257 #undef TARGET_ASM_UNALIGNED_SI_OP 258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 259 #undef TARGET_ASM_UNALIGNED_DI_OP 260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 261 #undef TARGET_ASM_INTEGER 262 #define TARGET_ASM_INTEGER pa_assemble_integer 263 264 #undef TARGET_ASM_FUNCTION_EPILOGUE 265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue 266 267 #undef TARGET_FUNCTION_VALUE 268 #define TARGET_FUNCTION_VALUE pa_function_value 269 #undef TARGET_LIBCALL_VALUE 270 #define TARGET_LIBCALL_VALUE pa_libcall_value 271 #undef TARGET_FUNCTION_VALUE_REGNO_P 272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p 273 274 #undef TARGET_LEGITIMIZE_ADDRESS 275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address 276 277 #undef TARGET_SCHED_ADJUST_COST 278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost 279 #undef TARGET_SCHED_ISSUE_RATE 280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate 281 282 #undef TARGET_ENCODE_SECTION_INFO 283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info 284 #undef TARGET_STRIP_NAME_ENCODING 285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding 286 287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall 289 290 #undef TARGET_COMMUTATIVE_P 291 #define TARGET_COMMUTATIVE_P pa_commutative_p 292 293 #undef TARGET_ASM_OUTPUT_MI_THUNK 294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk 295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 297 298 #undef TARGET_ASM_FILE_END 299 #define TARGET_ASM_FILE_END pa_file_end 300 301 #undef TARGET_ASM_RELOC_RW_MASK 302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask 303 304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p 306 307 #if !defined(USE_COLLECT2) 308 #undef TARGET_ASM_CONSTRUCTOR 309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor 310 #undef TARGET_ASM_DESTRUCTOR 311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor 312 #endif 313 314 #undef TARGET_INIT_BUILTINS 315 #define TARGET_INIT_BUILTINS pa_init_builtins 316 317 #undef TARGET_EXPAND_BUILTIN 318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin 319 320 #undef TARGET_REGISTER_MOVE_COST 321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost 322 #undef TARGET_RTX_COSTS 323 #define TARGET_RTX_COSTS hppa_rtx_costs 324 #undef TARGET_ADDRESS_COST 325 #define TARGET_ADDRESS_COST hppa_address_cost 326 327 #undef TARGET_MACHINE_DEPENDENT_REORG 328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg 329 330 #undef TARGET_INIT_LIBFUNCS 331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs 332 333 #undef TARGET_PROMOTE_FUNCTION_MODE 334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode 335 #undef TARGET_PROMOTE_PROTOTYPES 336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 337 338 #undef TARGET_STRUCT_VALUE_RTX 339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx 340 #undef TARGET_RETURN_IN_MEMORY 341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory 342 #undef TARGET_MUST_PASS_IN_STACK 343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 344 #undef TARGET_PASS_BY_REFERENCE 345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference 346 #undef TARGET_CALLEE_COPIES 347 #define TARGET_CALLEE_COPIES pa_callee_copies 348 #undef TARGET_ARG_PARTIAL_BYTES 349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes 350 #undef TARGET_FUNCTION_ARG 351 #define TARGET_FUNCTION_ARG pa_function_arg 352 #undef TARGET_FUNCTION_ARG_ADVANCE 353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance 354 #undef TARGET_FUNCTION_ARG_PADDING 355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding 356 #undef TARGET_FUNCTION_ARG_BOUNDARY 357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary 358 359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs 361 #undef TARGET_EXPAND_BUILTIN_VA_START 362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start 363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr 365 366 #undef TARGET_SCALAR_MODE_SUPPORTED_P 367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p 368 369 #undef TARGET_CANNOT_FORCE_CONST_MEM 370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem 371 372 #undef TARGET_SECONDARY_RELOAD 373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload 374 #undef TARGET_SECONDARY_MEMORY_NEEDED 375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed 376 377 #undef TARGET_EXTRA_LIVE_ON_ENTRY 378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry 379 380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE 381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template 382 #undef TARGET_TRAMPOLINE_INIT 383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init 384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address 386 #undef TARGET_DELEGITIMIZE_ADDRESS 387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address 388 #undef TARGET_INTERNAL_ARG_POINTER 389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer 390 #undef TARGET_CAN_ELIMINATE 391 #define TARGET_CAN_ELIMINATE pa_can_eliminate 392 #undef TARGET_CONDITIONAL_REGISTER_USAGE 393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage 394 #undef TARGET_C_MODE_FOR_SUFFIX 395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix 396 #undef TARGET_ASM_FUNCTION_SECTION 397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section 398 399 #undef TARGET_LEGITIMATE_CONSTANT_P 400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p 401 #undef TARGET_SECTION_TYPE_FLAGS 402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags 403 #undef TARGET_LEGITIMATE_ADDRESS_P 404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p 405 406 #undef TARGET_LRA_P 407 #define TARGET_LRA_P hook_bool_void_false 408 409 #undef TARGET_HARD_REGNO_NREGS 410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs 411 #undef TARGET_HARD_REGNO_MODE_OK 412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok 413 #undef TARGET_MODES_TIEABLE_P 414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p 415 416 #undef TARGET_CAN_CHANGE_MODE_CLASS 417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class 418 419 #undef TARGET_CONSTANT_ALIGNMENT 420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings 421 422 #undef TARGET_STARTING_FRAME_OFFSET 423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset 424 425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE 426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed 427 428 struct gcc_target targetm = TARGET_INITIALIZER; 429 430 /* Parse the -mfixed-range= option string. */ 431 432 static void 433 fix_range (const char *const_str) 434 { 435 int i, first, last; 436 char *str, *dash, *comma; 437 438 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 439 REG2 are either register names or register numbers. The effect 440 of this option is to mark the registers in the range from REG1 to 441 REG2 as ``fixed'' so they won't be used by the compiler. This is 442 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */ 443 444 i = strlen (const_str); 445 str = (char *) alloca (i + 1); 446 memcpy (str, const_str, i + 1); 447 448 while (1) 449 { 450 dash = strchr (str, '-'); 451 if (!dash) 452 { 453 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2"); 454 return; 455 } 456 *dash = '\0'; 457 458 comma = strchr (dash + 1, ','); 459 if (comma) 460 *comma = '\0'; 461 462 first = decode_reg_name (str); 463 if (first < 0) 464 { 465 warning (0, "unknown register name: %s", str); 466 return; 467 } 468 469 last = decode_reg_name (dash + 1); 470 if (last < 0) 471 { 472 warning (0, "unknown register name: %s", dash + 1); 473 return; 474 } 475 476 *dash = '-'; 477 478 if (first > last) 479 { 480 warning (0, "%s-%s is an empty range", str, dash + 1); 481 return; 482 } 483 484 for (i = first; i <= last; ++i) 485 fixed_regs[i] = call_used_regs[i] = 1; 486 487 if (!comma) 488 break; 489 490 *comma = ','; 491 str = comma + 1; 492 } 493 494 /* Check if all floating point registers have been fixed. */ 495 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 496 if (!fixed_regs[i]) 497 break; 498 499 if (i > FP_REG_LAST) 500 target_flags |= MASK_DISABLE_FPREGS; 501 } 502 503 /* Implement the TARGET_OPTION_OVERRIDE hook. */ 504 505 static void 506 pa_option_override (void) 507 { 508 unsigned int i; 509 cl_deferred_option *opt; 510 vec<cl_deferred_option> *v 511 = (vec<cl_deferred_option> *) pa_deferred_options; 512 513 if (v) 514 FOR_EACH_VEC_ELT (*v, i, opt) 515 { 516 switch (opt->opt_index) 517 { 518 case OPT_mfixed_range_: 519 fix_range (opt->arg); 520 break; 521 522 default: 523 gcc_unreachable (); 524 } 525 } 526 527 if (flag_pic && TARGET_PORTABLE_RUNTIME) 528 { 529 warning (0, "PIC code generation is not supported in the portable runtime model"); 530 } 531 532 if (flag_pic && TARGET_FAST_INDIRECT_CALLS) 533 { 534 warning (0, "PIC code generation is not compatible with fast indirect calls"); 535 } 536 537 if (! TARGET_GAS && write_symbols != NO_DEBUG) 538 { 539 warning (0, "%<-g%> is only supported when using GAS on this processor"); 540 warning (0, "%<-g%> option disabled"); 541 write_symbols = NO_DEBUG; 542 } 543 544 /* We only support the "big PIC" model now. And we always generate PIC 545 code when in 64bit mode. */ 546 if (flag_pic == 1 || TARGET_64BIT) 547 flag_pic = 2; 548 549 /* Disable -freorder-blocks-and-partition as we don't support hot and 550 cold partitioning. */ 551 if (flag_reorder_blocks_and_partition) 552 { 553 inform (input_location, 554 "%<-freorder-blocks-and-partition%> does not work " 555 "on this architecture"); 556 flag_reorder_blocks_and_partition = 0; 557 flag_reorder_blocks = 1; 558 } 559 560 /* We can't guarantee that .dword is available for 32-bit targets. */ 561 if (UNITS_PER_WORD == 4) 562 targetm.asm_out.aligned_op.di = NULL; 563 564 /* The unaligned ops are only available when using GAS. */ 565 if (!TARGET_GAS) 566 { 567 targetm.asm_out.unaligned_op.hi = NULL; 568 targetm.asm_out.unaligned_op.si = NULL; 569 targetm.asm_out.unaligned_op.di = NULL; 570 } 571 572 init_machine_status = pa_init_machine_status; 573 } 574 575 enum pa_builtins 576 { 577 PA_BUILTIN_COPYSIGNQ, 578 PA_BUILTIN_FABSQ, 579 PA_BUILTIN_INFQ, 580 PA_BUILTIN_HUGE_VALQ, 581 PA_BUILTIN_max 582 }; 583 584 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max]; 585 586 static void 587 pa_init_builtins (void) 588 { 589 #ifdef DONT_HAVE_FPUTC_UNLOCKED 590 { 591 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED); 592 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl, 593 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED)); 594 } 595 #endif 596 #if TARGET_HPUX_11 597 { 598 tree decl; 599 600 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE) 601 set_user_assembler_name (decl, "_Isfinite"); 602 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE) 603 set_user_assembler_name (decl, "_Isfinitef"); 604 } 605 #endif 606 607 if (HPUX_LONG_DOUBLE_LIBRARY) 608 { 609 tree decl, ftype; 610 611 /* Under HPUX, the __float128 type is a synonym for "long double". */ 612 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 613 "__float128"); 614 615 /* TFmode support builtins. */ 616 ftype = build_function_type_list (long_double_type_node, 617 long_double_type_node, 618 NULL_TREE); 619 decl = add_builtin_function ("__builtin_fabsq", ftype, 620 PA_BUILTIN_FABSQ, BUILT_IN_MD, 621 "_U_Qfabs", NULL_TREE); 622 TREE_READONLY (decl) = 1; 623 pa_builtins[PA_BUILTIN_FABSQ] = decl; 624 625 ftype = build_function_type_list (long_double_type_node, 626 long_double_type_node, 627 long_double_type_node, 628 NULL_TREE); 629 decl = add_builtin_function ("__builtin_copysignq", ftype, 630 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 631 "_U_Qfcopysign", NULL_TREE); 632 TREE_READONLY (decl) = 1; 633 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl; 634 635 ftype = build_function_type_list (long_double_type_node, NULL_TREE); 636 decl = add_builtin_function ("__builtin_infq", ftype, 637 PA_BUILTIN_INFQ, BUILT_IN_MD, 638 NULL, NULL_TREE); 639 pa_builtins[PA_BUILTIN_INFQ] = decl; 640 641 decl = add_builtin_function ("__builtin_huge_valq", ftype, 642 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD, 643 NULL, NULL_TREE); 644 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl; 645 } 646 } 647 648 static rtx 649 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 650 machine_mode mode ATTRIBUTE_UNUSED, 651 int ignore ATTRIBUTE_UNUSED) 652 { 653 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 654 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); 655 656 switch (fcode) 657 { 658 case PA_BUILTIN_FABSQ: 659 case PA_BUILTIN_COPYSIGNQ: 660 return expand_call (exp, target, ignore); 661 662 case PA_BUILTIN_INFQ: 663 case PA_BUILTIN_HUGE_VALQ: 664 { 665 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); 666 REAL_VALUE_TYPE inf; 667 rtx tmp; 668 669 real_inf (&inf); 670 tmp = const_double_from_real_value (inf, target_mode); 671 672 tmp = validize_mem (force_const_mem (target_mode, tmp)); 673 674 if (target == 0) 675 target = gen_reg_rtx (target_mode); 676 677 emit_move_insn (target, tmp); 678 return target; 679 } 680 681 default: 682 gcc_unreachable (); 683 } 684 685 return NULL_RTX; 686 } 687 688 /* Function to init struct machine_function. 689 This will be called, via a pointer variable, 690 from push_function_context. */ 691 692 static struct machine_function * 693 pa_init_machine_status (void) 694 { 695 return ggc_cleared_alloc<machine_function> (); 696 } 697 698 /* If FROM is a probable pointer register, mark TO as a probable 699 pointer register with the same pointer alignment as FROM. */ 700 701 static void 702 copy_reg_pointer (rtx to, rtx from) 703 { 704 if (REG_POINTER (from)) 705 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from))); 706 } 707 708 /* Return 1 if X contains a symbolic expression. We know these 709 expressions will have one of a few well defined forms, so 710 we need only check those forms. */ 711 int 712 pa_symbolic_expression_p (rtx x) 713 { 714 715 /* Strip off any HIGH. */ 716 if (GET_CODE (x) == HIGH) 717 x = XEXP (x, 0); 718 719 return symbolic_operand (x, VOIDmode); 720 } 721 722 /* Accept any constant that can be moved in one instruction into a 723 general register. */ 724 int 725 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival) 726 { 727 /* OK if ldo, ldil, or zdepi, can be used. */ 728 return (VAL_14_BITS_P (ival) 729 || pa_ldil_cint_p (ival) 730 || pa_zdepi_cint_p (ival)); 731 } 732 733 /* True iff ldil can be used to load this CONST_INT. The least 734 significant 11 bits of the value must be zero and the value must 735 not change sign when extended from 32 to 64 bits. */ 736 int 737 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival) 738 { 739 unsigned HOST_WIDE_INT x; 740 741 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff); 742 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31); 743 } 744 745 /* True iff zdepi can be used to generate this CONST_INT. 746 zdepi first sign extends a 5-bit signed number to a given field 747 length, then places this field anywhere in a zero. */ 748 int 749 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x) 750 { 751 unsigned HOST_WIDE_INT lsb_mask, t; 752 753 /* This might not be obvious, but it's at least fast. 754 This function is critical; we don't have the time loops would take. */ 755 lsb_mask = x & -x; 756 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); 757 /* Return true iff t is a power of two. */ 758 return ((t & (t - 1)) == 0); 759 } 760 761 /* True iff depi or extru can be used to compute (reg & mask). 762 Accept bit pattern like these: 763 0....01....1 764 1....10....0 765 1..10..01..1 */ 766 int 767 pa_and_mask_p (unsigned HOST_WIDE_INT mask) 768 { 769 mask = ~mask; 770 mask += mask & -mask; 771 return (mask & (mask - 1)) == 0; 772 } 773 774 /* True iff depi can be used to compute (reg | MASK). */ 775 int 776 pa_ior_mask_p (unsigned HOST_WIDE_INT mask) 777 { 778 mask += mask & -mask; 779 return (mask & (mask - 1)) == 0; 780 } 781 782 /* Legitimize PIC addresses. If the address is already 783 position-independent, we return ORIG. Newly generated 784 position-independent addresses go to REG. If we need more 785 than one register, we lose. */ 786 787 static rtx 788 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) 789 { 790 rtx pic_ref = orig; 791 792 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig)); 793 794 /* Labels need special handling. */ 795 if (pic_label_operand (orig, mode)) 796 { 797 rtx_insn *insn; 798 799 /* We do not want to go through the movXX expanders here since that 800 would create recursion. 801 802 Nor do we really want to call a generator for a named pattern 803 since that requires multiple patterns if we want to support 804 multiple word sizes. 805 806 So instead we just emit the raw set, which avoids the movXX 807 expanders completely. */ 808 mark_reg_pointer (reg, BITS_PER_UNIT); 809 insn = emit_insn (gen_rtx_SET (reg, orig)); 810 811 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 812 add_reg_note (insn, REG_EQUAL, orig); 813 814 /* During and after reload, we need to generate a REG_LABEL_OPERAND note 815 and update LABEL_NUSES because this is not done automatically. */ 816 if (reload_in_progress || reload_completed) 817 { 818 /* Extract LABEL_REF. */ 819 if (GET_CODE (orig) == CONST) 820 orig = XEXP (XEXP (orig, 0), 0); 821 /* Extract CODE_LABEL. */ 822 orig = XEXP (orig, 0); 823 add_reg_note (insn, REG_LABEL_OPERAND, orig); 824 /* Make sure we have label and not a note. */ 825 if (LABEL_P (orig)) 826 LABEL_NUSES (orig)++; 827 } 828 crtl->uses_pic_offset_table = 1; 829 return reg; 830 } 831 if (GET_CODE (orig) == SYMBOL_REF) 832 { 833 rtx_insn *insn; 834 rtx tmp_reg; 835 836 gcc_assert (reg); 837 838 /* Before reload, allocate a temporary register for the intermediate 839 result. This allows the sequence to be deleted when the final 840 result is unused and the insns are trivially dead. */ 841 tmp_reg = ((reload_in_progress || reload_completed) 842 ? reg : gen_reg_rtx (Pmode)); 843 844 if (function_label_operand (orig, VOIDmode)) 845 { 846 /* Force function label into memory in word mode. */ 847 orig = XEXP (force_const_mem (word_mode, orig), 0); 848 /* Load plabel address from DLT. */ 849 emit_move_insn (tmp_reg, 850 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 851 gen_rtx_HIGH (word_mode, orig))); 852 pic_ref 853 = gen_const_mem (Pmode, 854 gen_rtx_LO_SUM (Pmode, tmp_reg, 855 gen_rtx_UNSPEC (Pmode, 856 gen_rtvec (1, orig), 857 UNSPEC_DLTIND14R))); 858 emit_move_insn (reg, pic_ref); 859 /* Now load address of function descriptor. */ 860 pic_ref = gen_rtx_MEM (Pmode, reg); 861 } 862 else 863 { 864 /* Load symbol reference from DLT. */ 865 emit_move_insn (tmp_reg, 866 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 867 gen_rtx_HIGH (word_mode, orig))); 868 pic_ref 869 = gen_const_mem (Pmode, 870 gen_rtx_LO_SUM (Pmode, tmp_reg, 871 gen_rtx_UNSPEC (Pmode, 872 gen_rtvec (1, orig), 873 UNSPEC_DLTIND14R))); 874 } 875 876 crtl->uses_pic_offset_table = 1; 877 mark_reg_pointer (reg, BITS_PER_UNIT); 878 insn = emit_move_insn (reg, pic_ref); 879 880 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 881 set_unique_reg_note (insn, REG_EQUAL, orig); 882 883 return reg; 884 } 885 else if (GET_CODE (orig) == CONST) 886 { 887 rtx base; 888 889 if (GET_CODE (XEXP (orig, 0)) == PLUS 890 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 891 return orig; 892 893 gcc_assert (reg); 894 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 895 896 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); 897 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, 898 base == reg ? 0 : reg); 899 900 if (GET_CODE (orig) == CONST_INT) 901 { 902 if (INT_14_BITS (orig)) 903 return plus_constant (Pmode, base, INTVAL (orig)); 904 orig = force_reg (Pmode, orig); 905 } 906 pic_ref = gen_rtx_PLUS (Pmode, base, orig); 907 /* Likewise, should we set special REG_NOTEs here? */ 908 } 909 910 return pic_ref; 911 } 912 913 static GTY(()) rtx gen_tls_tga; 914 915 static rtx 916 gen_tls_get_addr (void) 917 { 918 if (!gen_tls_tga) 919 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 920 return gen_tls_tga; 921 } 922 923 static rtx 924 hppa_tls_call (rtx arg) 925 { 926 rtx ret; 927 928 ret = gen_reg_rtx (Pmode); 929 emit_library_call_value (gen_tls_get_addr (), ret, 930 LCT_CONST, Pmode, arg, Pmode); 931 932 return ret; 933 } 934 935 static rtx 936 legitimize_tls_address (rtx addr) 937 { 938 rtx ret, tmp, t1, t2, tp; 939 rtx_insn *insn; 940 941 /* Currently, we can't handle anything but a SYMBOL_REF. */ 942 if (GET_CODE (addr) != SYMBOL_REF) 943 return addr; 944 945 switch (SYMBOL_REF_TLS_MODEL (addr)) 946 { 947 case TLS_MODEL_GLOBAL_DYNAMIC: 948 tmp = gen_reg_rtx (Pmode); 949 if (flag_pic) 950 emit_insn (gen_tgd_load_pic (tmp, addr)); 951 else 952 emit_insn (gen_tgd_load (tmp, addr)); 953 ret = hppa_tls_call (tmp); 954 break; 955 956 case TLS_MODEL_LOCAL_DYNAMIC: 957 ret = gen_reg_rtx (Pmode); 958 tmp = gen_reg_rtx (Pmode); 959 start_sequence (); 960 if (flag_pic) 961 emit_insn (gen_tld_load_pic (tmp, addr)); 962 else 963 emit_insn (gen_tld_load (tmp, addr)); 964 t1 = hppa_tls_call (tmp); 965 insn = get_insns (); 966 end_sequence (); 967 t2 = gen_reg_rtx (Pmode); 968 emit_libcall_block (insn, t2, t1, 969 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 970 UNSPEC_TLSLDBASE)); 971 emit_insn (gen_tld_offset_load (ret, addr, t2)); 972 break; 973 974 case TLS_MODEL_INITIAL_EXEC: 975 tp = gen_reg_rtx (Pmode); 976 tmp = gen_reg_rtx (Pmode); 977 ret = gen_reg_rtx (Pmode); 978 emit_insn (gen_tp_load (tp)); 979 if (flag_pic) 980 emit_insn (gen_tie_load_pic (tmp, addr)); 981 else 982 emit_insn (gen_tie_load (tmp, addr)); 983 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp)); 984 break; 985 986 case TLS_MODEL_LOCAL_EXEC: 987 tp = gen_reg_rtx (Pmode); 988 ret = gen_reg_rtx (Pmode); 989 emit_insn (gen_tp_load (tp)); 990 emit_insn (gen_tle_load (ret, addr, tp)); 991 break; 992 993 default: 994 gcc_unreachable (); 995 } 996 997 return ret; 998 } 999 1000 /* Helper for hppa_legitimize_address. Given X, return true if it 1001 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. 1002 1003 This respectively represent canonical shift-add rtxs or scaled 1004 memory addresses. */ 1005 static bool 1006 mem_shadd_or_shadd_rtx_p (rtx x) 1007 { 1008 return ((GET_CODE (x) == ASHIFT 1009 || GET_CODE (x) == MULT) 1010 && GET_CODE (XEXP (x, 1)) == CONST_INT 1011 && ((GET_CODE (x) == ASHIFT 1012 && pa_shadd_constant_p (INTVAL (XEXP (x, 1)))) 1013 || (GET_CODE (x) == MULT 1014 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1)))))); 1015 } 1016 1017 /* Try machine-dependent ways of modifying an illegitimate address 1018 to be legitimate. If we find one, return the new, valid address. 1019 This macro is used in only one place: `memory_address' in explow.c. 1020 1021 OLDX is the address as it was before break_out_memory_refs was called. 1022 In some cases it is useful to look at this to decide what needs to be done. 1023 1024 It is always safe for this macro to do nothing. It exists to recognize 1025 opportunities to optimize the output. 1026 1027 For the PA, transform: 1028 1029 memory(X + <large int>) 1030 1031 into: 1032 1033 if (<large int> & mask) >= 16 1034 Y = (<large int> & ~mask) + mask + 1 Round up. 1035 else 1036 Y = (<large int> & ~mask) Round down. 1037 Z = X + Y 1038 memory (Z + (<large int> - Y)); 1039 1040 This is for CSE to find several similar references, and only use one Z. 1041 1042 X can either be a SYMBOL_REF or REG, but because combine cannot 1043 perform a 4->2 combination we do nothing for SYMBOL_REF + D where 1044 D will not fit in 14 bits. 1045 1046 MODE_FLOAT references allow displacements which fit in 5 bits, so use 1047 0x1f as the mask. 1048 1049 MODE_INT references allow displacements which fit in 14 bits, so use 1050 0x3fff as the mask. 1051 1052 This relies on the fact that most mode MODE_FLOAT references will use FP 1053 registers and most mode MODE_INT references will use integer registers. 1054 (In the rare case of an FP register used in an integer MODE, we depend 1055 on secondary reloads to clean things up.) 1056 1057 1058 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special 1059 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed 1060 addressing modes to be used). 1061 1062 Note that the addresses passed into hppa_legitimize_address always 1063 come from a MEM, so we only have to match the MULT form on incoming 1064 addresses. But to be future proof we also match the ASHIFT form. 1065 1066 However, this routine always places those shift-add sequences into 1067 registers, so we have to generate the ASHIFT form as our output. 1068 1069 Put X and Z into registers. Then put the entire expression into 1070 a register. */ 1071 1072 rtx 1073 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 1074 machine_mode mode) 1075 { 1076 rtx orig = x; 1077 1078 /* We need to canonicalize the order of operands in unscaled indexed 1079 addresses since the code that checks if an address is valid doesn't 1080 always try both orders. */ 1081 if (!TARGET_NO_SPACE_REGS 1082 && GET_CODE (x) == PLUS 1083 && GET_MODE (x) == Pmode 1084 && REG_P (XEXP (x, 0)) 1085 && REG_P (XEXP (x, 1)) 1086 && REG_POINTER (XEXP (x, 0)) 1087 && !REG_POINTER (XEXP (x, 1))) 1088 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0)); 1089 1090 if (tls_referenced_p (x)) 1091 return legitimize_tls_address (x); 1092 else if (flag_pic) 1093 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); 1094 1095 /* Strip off CONST. */ 1096 if (GET_CODE (x) == CONST) 1097 x = XEXP (x, 0); 1098 1099 /* Special case. Get the SYMBOL_REF into a register and use indexing. 1100 That should always be safe. */ 1101 if (GET_CODE (x) == PLUS 1102 && GET_CODE (XEXP (x, 0)) == REG 1103 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) 1104 { 1105 rtx reg = force_reg (Pmode, XEXP (x, 1)); 1106 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0))); 1107 } 1108 1109 /* Note we must reject symbols which represent function addresses 1110 since the assembler/linker can't handle arithmetic on plabels. */ 1111 if (GET_CODE (x) == PLUS 1112 && GET_CODE (XEXP (x, 1)) == CONST_INT 1113 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF 1114 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) 1115 || GET_CODE (XEXP (x, 0)) == REG)) 1116 { 1117 rtx int_part, ptr_reg; 1118 HOST_WIDE_INT newoffset; 1119 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 1120 HOST_WIDE_INT mask; 1121 1122 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 1123 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 1124 1125 /* Choose which way to round the offset. Round up if we 1126 are >= halfway to the next boundary. */ 1127 if ((offset & mask) >= ((mask + 1) / 2)) 1128 newoffset = (offset & ~ mask) + mask + 1; 1129 else 1130 newoffset = (offset & ~ mask); 1131 1132 /* If the newoffset will not fit in 14 bits (ldo), then 1133 handling this would take 4 or 5 instructions (2 to load 1134 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to 1135 add the new offset and the SYMBOL_REF.) Combine cannot 1136 handle 4->2 or 5->2 combinations, so do not create 1137 them. */ 1138 if (! VAL_14_BITS_P (newoffset) 1139 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) 1140 { 1141 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset); 1142 rtx tmp_reg 1143 = force_reg (Pmode, 1144 gen_rtx_HIGH (Pmode, const_part)); 1145 ptr_reg 1146 = force_reg (Pmode, 1147 gen_rtx_LO_SUM (Pmode, 1148 tmp_reg, const_part)); 1149 } 1150 else 1151 { 1152 if (! VAL_14_BITS_P (newoffset)) 1153 int_part = force_reg (Pmode, GEN_INT (newoffset)); 1154 else 1155 int_part = GEN_INT (newoffset); 1156 1157 ptr_reg = force_reg (Pmode, 1158 gen_rtx_PLUS (Pmode, 1159 force_reg (Pmode, XEXP (x, 0)), 1160 int_part)); 1161 } 1162 return plus_constant (Pmode, ptr_reg, offset - newoffset); 1163 } 1164 1165 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */ 1166 1167 if (GET_CODE (x) == PLUS 1168 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0)) 1169 && (OBJECT_P (XEXP (x, 1)) 1170 || GET_CODE (XEXP (x, 1)) == SUBREG) 1171 && GET_CODE (XEXP (x, 1)) != CONST) 1172 { 1173 /* If we were given a MULT, we must fix the constant 1174 as we're going to create the ASHIFT form. */ 1175 HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1)); 1176 if (GET_CODE (XEXP (x, 0)) == MULT) 1177 shift_val = exact_log2 (shift_val); 1178 1179 rtx reg1, reg2; 1180 reg1 = XEXP (x, 1); 1181 if (GET_CODE (reg1) != REG) 1182 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1183 1184 reg2 = XEXP (XEXP (x, 0), 0); 1185 if (GET_CODE (reg2) != REG) 1186 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1187 1188 return force_reg (Pmode, 1189 gen_rtx_PLUS (Pmode, 1190 gen_rtx_ASHIFT (Pmode, reg2, 1191 GEN_INT (shift_val)), 1192 reg1)); 1193 } 1194 1195 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)). 1196 1197 Only do so for floating point modes since this is more speculative 1198 and we lose if it's an integer store. */ 1199 if (GET_CODE (x) == PLUS 1200 && GET_CODE (XEXP (x, 0)) == PLUS 1201 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0)) 1202 && (mode == SFmode || mode == DFmode)) 1203 { 1204 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 1205 1206 /* If we were given a MULT, we must fix the constant 1207 as we're going to create the ASHIFT form. */ 1208 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) 1209 shift_val = exact_log2 (shift_val); 1210 1211 /* Try and figure out what to use as a base register. */ 1212 rtx reg1, reg2, base, idx; 1213 1214 reg1 = XEXP (XEXP (x, 0), 1); 1215 reg2 = XEXP (x, 1); 1216 base = NULL_RTX; 1217 idx = NULL_RTX; 1218 1219 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], 1220 then pa_emit_move_sequence will turn on REG_POINTER so we'll know 1221 it's a base register below. */ 1222 if (GET_CODE (reg1) != REG) 1223 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1224 1225 if (GET_CODE (reg2) != REG) 1226 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1227 1228 /* Figure out what the base and index are. */ 1229 1230 if (GET_CODE (reg1) == REG 1231 && REG_POINTER (reg1)) 1232 { 1233 base = reg1; 1234 idx = gen_rtx_PLUS (Pmode, 1235 gen_rtx_ASHIFT (Pmode, 1236 XEXP (XEXP (XEXP (x, 0), 0), 0), 1237 GEN_INT (shift_val)), 1238 XEXP (x, 1)); 1239 } 1240 else if (GET_CODE (reg2) == REG 1241 && REG_POINTER (reg2)) 1242 { 1243 base = reg2; 1244 idx = XEXP (x, 0); 1245 } 1246 1247 if (base == 0) 1248 return orig; 1249 1250 /* If the index adds a large constant, try to scale the 1251 constant so that it can be loaded with only one insn. */ 1252 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1253 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) 1254 / INTVAL (XEXP (XEXP (idx, 0), 1))) 1255 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) 1256 { 1257 /* Divide the CONST_INT by the scale factor, then add it to A. */ 1258 HOST_WIDE_INT val = INTVAL (XEXP (idx, 1)); 1259 val /= (1 << shift_val); 1260 1261 reg1 = XEXP (XEXP (idx, 0), 0); 1262 if (GET_CODE (reg1) != REG) 1263 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1264 1265 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); 1266 1267 /* We can now generate a simple scaled indexed address. */ 1268 return 1269 force_reg 1270 (Pmode, gen_rtx_PLUS (Pmode, 1271 gen_rtx_ASHIFT (Pmode, reg1, 1272 GEN_INT (shift_val)), 1273 base)); 1274 } 1275 1276 /* If B + C is still a valid base register, then add them. */ 1277 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1278 && INTVAL (XEXP (idx, 1)) <= 4096 1279 && INTVAL (XEXP (idx, 1)) >= -4096) 1280 { 1281 rtx reg1, reg2; 1282 1283 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); 1284 1285 reg2 = XEXP (XEXP (idx, 0), 0); 1286 if (GET_CODE (reg2) != CONST_INT) 1287 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1288 1289 return force_reg (Pmode, 1290 gen_rtx_PLUS (Pmode, 1291 gen_rtx_ASHIFT (Pmode, reg2, 1292 GEN_INT (shift_val)), 1293 reg1)); 1294 } 1295 1296 /* Get the index into a register, then add the base + index and 1297 return a register holding the result. */ 1298 1299 /* First get A into a register. */ 1300 reg1 = XEXP (XEXP (idx, 0), 0); 1301 if (GET_CODE (reg1) != REG) 1302 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1303 1304 /* And get B into a register. */ 1305 reg2 = XEXP (idx, 1); 1306 if (GET_CODE (reg2) != REG) 1307 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1308 1309 reg1 = force_reg (Pmode, 1310 gen_rtx_PLUS (Pmode, 1311 gen_rtx_ASHIFT (Pmode, reg1, 1312 GEN_INT (shift_val)), 1313 reg2)); 1314 1315 /* Add the result to our base register and return. */ 1316 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); 1317 1318 } 1319 1320 /* Uh-oh. We might have an address for x[n-100000]. This needs 1321 special handling to avoid creating an indexed memory address 1322 with x-100000 as the base. 1323 1324 If the constant part is small enough, then it's still safe because 1325 there is a guard page at the beginning and end of the data segment. 1326 1327 Scaled references are common enough that we want to try and rearrange the 1328 terms so that we can use indexing for these addresses too. Only 1329 do the optimization for floatint point modes. */ 1330 1331 if (GET_CODE (x) == PLUS 1332 && pa_symbolic_expression_p (XEXP (x, 1))) 1333 { 1334 /* Ugly. We modify things here so that the address offset specified 1335 by the index expression is computed first, then added to x to form 1336 the entire address. */ 1337 1338 rtx regx1, regx2, regy1, regy2, y; 1339 1340 /* Strip off any CONST. */ 1341 y = XEXP (x, 1); 1342 if (GET_CODE (y) == CONST) 1343 y = XEXP (y, 0); 1344 1345 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) 1346 { 1347 /* See if this looks like 1348 (plus (mult (reg) (mem_shadd_const)) 1349 (const (plus (symbol_ref) (const_int)))) 1350 1351 Where const_int is small. In that case the const 1352 expression is a valid pointer for indexing. 1353 1354 If const_int is big, but can be divided evenly by shadd_const 1355 and added to (reg). This allows more scaled indexed addresses. */ 1356 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1357 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0)) 1358 && GET_CODE (XEXP (y, 1)) == CONST_INT 1359 && INTVAL (XEXP (y, 1)) >= -4096 1360 && INTVAL (XEXP (y, 1)) <= 4095) 1361 { 1362 HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1)); 1363 1364 /* If we were given a MULT, we must fix the constant 1365 as we're going to create the ASHIFT form. */ 1366 if (GET_CODE (XEXP (x, 0)) == MULT) 1367 shift_val = exact_log2 (shift_val); 1368 1369 rtx reg1, reg2; 1370 1371 reg1 = XEXP (x, 1); 1372 if (GET_CODE (reg1) != REG) 1373 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1374 1375 reg2 = XEXP (XEXP (x, 0), 0); 1376 if (GET_CODE (reg2) != REG) 1377 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1378 1379 return 1380 force_reg (Pmode, 1381 gen_rtx_PLUS (Pmode, 1382 gen_rtx_ASHIFT (Pmode, 1383 reg2, 1384 GEN_INT (shift_val)), 1385 reg1)); 1386 } 1387 else if ((mode == DFmode || mode == SFmode) 1388 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1389 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0)) 1390 && GET_CODE (XEXP (y, 1)) == CONST_INT 1391 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0) 1392 { 1393 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1)); 1394 1395 /* If we were given a MULT, we must fix the constant 1396 as we're going to create the ASHIFT form. */ 1397 if (GET_CODE (XEXP (x, 0)) == MULT) 1398 shift_val = exact_log2 (shift_val); 1399 1400 regx1 1401 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) 1402 / INTVAL (XEXP (XEXP (x, 0), 1)))); 1403 regx2 = XEXP (XEXP (x, 0), 0); 1404 if (GET_CODE (regx2) != REG) 1405 regx2 = force_reg (Pmode, force_operand (regx2, 0)); 1406 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1407 regx2, regx1)); 1408 return 1409 force_reg (Pmode, 1410 gen_rtx_PLUS (Pmode, 1411 gen_rtx_ASHIFT (Pmode, regx2, 1412 GEN_INT (shift_val)), 1413 force_reg (Pmode, XEXP (y, 0)))); 1414 } 1415 else if (GET_CODE (XEXP (y, 1)) == CONST_INT 1416 && INTVAL (XEXP (y, 1)) >= -4096 1417 && INTVAL (XEXP (y, 1)) <= 4095) 1418 { 1419 /* This is safe because of the guard page at the 1420 beginning and end of the data space. Just 1421 return the original address. */ 1422 return orig; 1423 } 1424 else 1425 { 1426 /* Doesn't look like one we can optimize. */ 1427 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); 1428 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); 1429 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); 1430 regx1 = force_reg (Pmode, 1431 gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1432 regx1, regy2)); 1433 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); 1434 } 1435 } 1436 } 1437 1438 return orig; 1439 } 1440 1441 /* Implement the TARGET_REGISTER_MOVE_COST hook. 1442 1443 Compute extra cost of moving data between one register class 1444 and another. 1445 1446 Make moves from SAR so expensive they should never happen. We used to 1447 have 0xffff here, but that generates overflow in rare cases. 1448 1449 Copies involving a FP register and a non-FP register are relatively 1450 expensive because they must go through memory. 1451 1452 Other copies are reasonably cheap. */ 1453 1454 static int 1455 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 1456 reg_class_t from, reg_class_t to) 1457 { 1458 if (from == SHIFT_REGS) 1459 return 0x100; 1460 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from)) 1461 return 18; 1462 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to)) 1463 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from))) 1464 return 16; 1465 else 1466 return 2; 1467 } 1468 1469 /* For the HPPA, REG and REG+CONST is cost 0 1470 and addresses involving symbolic constants are cost 2. 1471 1472 PIC addresses are very expensive. 1473 1474 It is no coincidence that this has the same structure 1475 as pa_legitimate_address_p. */ 1476 1477 static int 1478 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED, 1479 addr_space_t as ATTRIBUTE_UNUSED, 1480 bool speed ATTRIBUTE_UNUSED) 1481 { 1482 switch (GET_CODE (X)) 1483 { 1484 case REG: 1485 case PLUS: 1486 case LO_SUM: 1487 return 1; 1488 case HIGH: 1489 return 2; 1490 default: 1491 return 4; 1492 } 1493 } 1494 1495 /* Return true if X represents a (possibly non-canonical) shNadd pattern. 1496 The machine mode of X is known to be SImode or DImode. */ 1497 1498 static bool 1499 hppa_rtx_costs_shadd_p (rtx x) 1500 { 1501 if (GET_CODE (x) != PLUS 1502 || !REG_P (XEXP (x, 1))) 1503 return false; 1504 rtx op0 = XEXP (x, 0); 1505 if (GET_CODE (op0) == ASHIFT 1506 && CONST_INT_P (XEXP (op0, 1)) 1507 && REG_P (XEXP (op0, 0))) 1508 { 1509 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1)); 1510 return x == 1 || x == 2 || x == 3; 1511 } 1512 if (GET_CODE (op0) == MULT 1513 && CONST_INT_P (XEXP (op0, 1)) 1514 && REG_P (XEXP (op0, 0))) 1515 { 1516 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1)); 1517 return x == 2 || x == 4 || x == 8; 1518 } 1519 return false; 1520 } 1521 1522 /* Compute a (partial) cost for rtx X. Return true if the complete 1523 cost has been computed, and false if subexpressions should be 1524 scanned. In either case, *TOTAL contains the cost result. */ 1525 1526 static bool 1527 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code, 1528 int opno ATTRIBUTE_UNUSED, 1529 int *total, bool speed) 1530 { 1531 int code = GET_CODE (x); 1532 1533 switch (code) 1534 { 1535 case CONST_INT: 1536 if (outer_code == SET) 1537 *total = COSTS_N_INSNS (1); 1538 else if (INTVAL (x) == 0) 1539 *total = 0; 1540 else if (INT_14_BITS (x)) 1541 *total = 1; 1542 else 1543 *total = 2; 1544 return true; 1545 1546 case HIGH: 1547 *total = 2; 1548 return true; 1549 1550 case CONST: 1551 case LABEL_REF: 1552 case SYMBOL_REF: 1553 *total = 4; 1554 return true; 1555 1556 case CONST_DOUBLE: 1557 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode)) 1558 && outer_code != SET) 1559 *total = 0; 1560 else 1561 *total = 8; 1562 return true; 1563 1564 case MULT: 1565 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1566 { 1567 *total = COSTS_N_INSNS (3); 1568 } 1569 else if (mode == DImode) 1570 { 1571 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) 1572 *total = COSTS_N_INSNS (32); 1573 else 1574 *total = COSTS_N_INSNS (80); 1575 } 1576 else 1577 { 1578 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) 1579 *total = COSTS_N_INSNS (8); 1580 else 1581 *total = COSTS_N_INSNS (20); 1582 } 1583 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)); 1584 1585 case DIV: 1586 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1587 { 1588 *total = COSTS_N_INSNS (14); 1589 return false; 1590 } 1591 /* FALLTHRU */ 1592 1593 case UDIV: 1594 case MOD: 1595 case UMOD: 1596 /* A mode size N times larger than SImode needs O(N*N) more insns. */ 1597 if (mode == DImode) 1598 *total = COSTS_N_INSNS (240); 1599 else 1600 *total = COSTS_N_INSNS (60); 1601 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)); 1602 1603 case PLUS: /* this includes shNadd insns */ 1604 case MINUS: 1605 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1606 *total = COSTS_N_INSNS (3); 1607 else if (mode == DImode) 1608 { 1609 if (TARGET_64BIT) 1610 { 1611 *total = COSTS_N_INSNS (1); 1612 /* Handle shladd,l instructions. */ 1613 if (hppa_rtx_costs_shadd_p (x)) 1614 return true; 1615 } 1616 else 1617 *total = COSTS_N_INSNS (2); 1618 } 1619 else 1620 { 1621 *total = COSTS_N_INSNS (1); 1622 /* Handle shNadd instructions. */ 1623 if (hppa_rtx_costs_shadd_p (x)) 1624 return true; 1625 } 1626 return REG_P (XEXP (x, 0)) 1627 && (REG_P (XEXP (x, 1)) 1628 || CONST_INT_P (XEXP (x, 1))); 1629 1630 case ASHIFT: 1631 if (mode == DImode) 1632 { 1633 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) 1634 { 1635 if (TARGET_64BIT) 1636 *total = COSTS_N_INSNS (1); 1637 else 1638 *total = COSTS_N_INSNS (2); 1639 return true; 1640 } 1641 else if (TARGET_64BIT) 1642 *total = COSTS_N_INSNS (3); 1643 else if (speed) 1644 *total = COSTS_N_INSNS (13); 1645 else 1646 *total = COSTS_N_INSNS (18); 1647 } 1648 else if (TARGET_64BIT) 1649 *total = COSTS_N_INSNS (4); 1650 else 1651 *total = COSTS_N_INSNS (2); 1652 return REG_P (XEXP (x, 0)) 1653 && (REG_P (XEXP (x, 1)) 1654 || CONST_INT_P (XEXP (x, 1))); 1655 1656 case ASHIFTRT: 1657 if (mode == DImode) 1658 { 1659 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) 1660 { 1661 if (TARGET_64BIT) 1662 *total = COSTS_N_INSNS (1); 1663 else 1664 *total = COSTS_N_INSNS (2); 1665 return true; 1666 } 1667 else if (TARGET_64BIT) 1668 *total = COSTS_N_INSNS (3); 1669 else if (speed) 1670 *total = COSTS_N_INSNS (14); 1671 else 1672 *total = COSTS_N_INSNS (19); 1673 } 1674 else if (TARGET_64BIT) 1675 *total = COSTS_N_INSNS (4); 1676 else 1677 *total = COSTS_N_INSNS (2); 1678 return REG_P (XEXP (x, 0)) 1679 && (REG_P (XEXP (x, 1)) 1680 || CONST_INT_P (XEXP (x, 1))); 1681 1682 case LSHIFTRT: 1683 if (mode == DImode) 1684 { 1685 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) 1686 { 1687 if (TARGET_64BIT) 1688 *total = COSTS_N_INSNS (1); 1689 else 1690 *total = COSTS_N_INSNS (2); 1691 return true; 1692 } 1693 else if (TARGET_64BIT) 1694 *total = COSTS_N_INSNS (2); 1695 else if (speed) 1696 *total = COSTS_N_INSNS (12); 1697 else 1698 *total = COSTS_N_INSNS (15); 1699 } 1700 else if (TARGET_64BIT) 1701 *total = COSTS_N_INSNS (3); 1702 else 1703 *total = COSTS_N_INSNS (2); 1704 return REG_P (XEXP (x, 0)) 1705 && (REG_P (XEXP (x, 1)) 1706 || CONST_INT_P (XEXP (x, 1))); 1707 1708 default: 1709 return false; 1710 } 1711 } 1712 1713 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a 1714 new rtx with the correct mode. */ 1715 static inline rtx 1716 force_mode (machine_mode mode, rtx orig) 1717 { 1718 if (mode == GET_MODE (orig)) 1719 return orig; 1720 1721 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER); 1722 1723 return gen_rtx_REG (mode, REGNO (orig)); 1724 } 1725 1726 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 1727 1728 static bool 1729 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1730 { 1731 return tls_referenced_p (x); 1732 } 1733 1734 /* Emit insns to move operands[1] into operands[0]. 1735 1736 Return 1 if we have written out everything that needs to be done to 1737 do the move. Otherwise, return 0 and the caller will emit the move 1738 normally. 1739 1740 Note SCRATCH_REG may not be in the proper mode depending on how it 1741 will be used. This routine is responsible for creating a new copy 1742 of SCRATCH_REG in the proper mode. */ 1743 1744 int 1745 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg) 1746 { 1747 register rtx operand0 = operands[0]; 1748 register rtx operand1 = operands[1]; 1749 register rtx tem; 1750 1751 /* We can only handle indexed addresses in the destination operand 1752 of floating point stores. Thus, we need to break out indexed 1753 addresses from the destination operand. */ 1754 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) 1755 { 1756 gcc_assert (can_create_pseudo_p ()); 1757 1758 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); 1759 operand0 = replace_equiv_address (operand0, tem); 1760 } 1761 1762 /* On targets with non-equivalent space registers, break out unscaled 1763 indexed addresses from the source operand before the final CSE. 1764 We have to do this because the REG_POINTER flag is not correctly 1765 carried through various optimization passes and CSE may substitute 1766 a pseudo without the pointer set for one with the pointer set. As 1767 a result, we loose various opportunities to create insns with 1768 unscaled indexed addresses. */ 1769 if (!TARGET_NO_SPACE_REGS 1770 && !cse_not_expected 1771 && GET_CODE (operand1) == MEM 1772 && GET_CODE (XEXP (operand1, 0)) == PLUS 1773 && REG_P (XEXP (XEXP (operand1, 0), 0)) 1774 && REG_P (XEXP (XEXP (operand1, 0), 1))) 1775 operand1 1776 = replace_equiv_address (operand1, 1777 copy_to_mode_reg (Pmode, XEXP (operand1, 0))); 1778 1779 if (scratch_reg 1780 && reload_in_progress && GET_CODE (operand0) == REG 1781 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 1782 operand0 = reg_equiv_mem (REGNO (operand0)); 1783 else if (scratch_reg 1784 && reload_in_progress && GET_CODE (operand0) == SUBREG 1785 && GET_CODE (SUBREG_REG (operand0)) == REG 1786 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) 1787 { 1788 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1789 the code which tracks sets/uses for delete_output_reload. */ 1790 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0), 1791 reg_equiv_mem (REGNO (SUBREG_REG (operand0))), 1792 SUBREG_BYTE (operand0)); 1793 operand0 = alter_subreg (&temp, true); 1794 } 1795 1796 if (scratch_reg 1797 && reload_in_progress && GET_CODE (operand1) == REG 1798 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) 1799 operand1 = reg_equiv_mem (REGNO (operand1)); 1800 else if (scratch_reg 1801 && reload_in_progress && GET_CODE (operand1) == SUBREG 1802 && GET_CODE (SUBREG_REG (operand1)) == REG 1803 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) 1804 { 1805 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1806 the code which tracks sets/uses for delete_output_reload. */ 1807 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1), 1808 reg_equiv_mem (REGNO (SUBREG_REG (operand1))), 1809 SUBREG_BYTE (operand1)); 1810 operand1 = alter_subreg (&temp, true); 1811 } 1812 1813 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM 1814 && ((tem = find_replacement (&XEXP (operand0, 0))) 1815 != XEXP (operand0, 0))) 1816 operand0 = replace_equiv_address (operand0, tem); 1817 1818 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM 1819 && ((tem = find_replacement (&XEXP (operand1, 0))) 1820 != XEXP (operand1, 0))) 1821 operand1 = replace_equiv_address (operand1, tem); 1822 1823 /* Handle secondary reloads for loads/stores of FP registers from 1824 REG+D addresses where D does not fit in 5 or 14 bits, including 1825 (subreg (mem (addr))) cases, and reloads for other unsupported 1826 memory operands. */ 1827 if (scratch_reg 1828 && FP_REG_P (operand0) 1829 && (MEM_P (operand1) 1830 || (GET_CODE (operand1) == SUBREG 1831 && MEM_P (XEXP (operand1, 0))))) 1832 { 1833 rtx op1 = operand1; 1834 1835 if (GET_CODE (op1) == SUBREG) 1836 op1 = XEXP (op1, 0); 1837 1838 if (reg_plus_base_memory_operand (op1, GET_MODE (op1))) 1839 { 1840 if (!(TARGET_PA_20 1841 && !TARGET_ELF32 1842 && INT_14_BITS (XEXP (XEXP (op1, 0), 1))) 1843 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1))) 1844 { 1845 /* SCRATCH_REG will hold an address and maybe the actual data. 1846 We want it in WORD_MODE regardless of what mode it was 1847 originally given to us. */ 1848 scratch_reg = force_mode (word_mode, scratch_reg); 1849 1850 /* D might not fit in 14 bits either; for such cases load D 1851 into scratch reg. */ 1852 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1))) 1853 { 1854 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1)); 1855 emit_move_insn (scratch_reg, 1856 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)), 1857 Pmode, 1858 XEXP (XEXP (op1, 0), 0), 1859 scratch_reg)); 1860 } 1861 else 1862 emit_move_insn (scratch_reg, XEXP (op1, 0)); 1863 op1 = replace_equiv_address (op1, scratch_reg); 1864 } 1865 } 1866 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode)) 1867 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0)) 1868 || IS_INDEX_ADDR_P (XEXP (op1, 0))) 1869 { 1870 /* Load memory address into SCRATCH_REG. */ 1871 scratch_reg = force_mode (word_mode, scratch_reg); 1872 emit_move_insn (scratch_reg, XEXP (op1, 0)); 1873 op1 = replace_equiv_address (op1, scratch_reg); 1874 } 1875 emit_insn (gen_rtx_SET (operand0, op1)); 1876 return 1; 1877 } 1878 else if (scratch_reg 1879 && FP_REG_P (operand1) 1880 && (MEM_P (operand0) 1881 || (GET_CODE (operand0) == SUBREG 1882 && MEM_P (XEXP (operand0, 0))))) 1883 { 1884 rtx op0 = operand0; 1885 1886 if (GET_CODE (op0) == SUBREG) 1887 op0 = XEXP (op0, 0); 1888 1889 if (reg_plus_base_memory_operand (op0, GET_MODE (op0))) 1890 { 1891 if (!(TARGET_PA_20 1892 && !TARGET_ELF32 1893 && INT_14_BITS (XEXP (XEXP (op0, 0), 1))) 1894 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1))) 1895 { 1896 /* SCRATCH_REG will hold an address and maybe the actual data. 1897 We want it in WORD_MODE regardless of what mode it was 1898 originally given to us. */ 1899 scratch_reg = force_mode (word_mode, scratch_reg); 1900 1901 /* D might not fit in 14 bits either; for such cases load D 1902 into scratch reg. */ 1903 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1))) 1904 { 1905 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1)); 1906 emit_move_insn (scratch_reg, 1907 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)), 1908 Pmode, 1909 XEXP (XEXP (op0, 0), 0), 1910 scratch_reg)); 1911 } 1912 else 1913 emit_move_insn (scratch_reg, XEXP (op0, 0)); 1914 op0 = replace_equiv_address (op0, scratch_reg); 1915 } 1916 } 1917 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode)) 1918 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0)) 1919 || IS_INDEX_ADDR_P (XEXP (op0, 0))) 1920 { 1921 /* Load memory address into SCRATCH_REG. */ 1922 scratch_reg = force_mode (word_mode, scratch_reg); 1923 emit_move_insn (scratch_reg, XEXP (op0, 0)); 1924 op0 = replace_equiv_address (op0, scratch_reg); 1925 } 1926 emit_insn (gen_rtx_SET (op0, operand1)); 1927 return 1; 1928 } 1929 /* Handle secondary reloads for loads of FP registers from constant 1930 expressions by forcing the constant into memory. For the most part, 1931 this is only necessary for SImode and DImode. 1932 1933 Use scratch_reg to hold the address of the memory location. */ 1934 else if (scratch_reg 1935 && CONSTANT_P (operand1) 1936 && FP_REG_P (operand0)) 1937 { 1938 rtx const_mem, xoperands[2]; 1939 1940 if (operand1 == CONST0_RTX (mode)) 1941 { 1942 emit_insn (gen_rtx_SET (operand0, operand1)); 1943 return 1; 1944 } 1945 1946 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1947 it in WORD_MODE regardless of what mode it was originally given 1948 to us. */ 1949 scratch_reg = force_mode (word_mode, scratch_reg); 1950 1951 /* Force the constant into memory and put the address of the 1952 memory location into scratch_reg. */ 1953 const_mem = force_const_mem (mode, operand1); 1954 xoperands[0] = scratch_reg; 1955 xoperands[1] = XEXP (const_mem, 0); 1956 pa_emit_move_sequence (xoperands, Pmode, 0); 1957 1958 /* Now load the destination register. */ 1959 emit_insn (gen_rtx_SET (operand0, 1960 replace_equiv_address (const_mem, scratch_reg))); 1961 return 1; 1962 } 1963 /* Handle secondary reloads for SAR. These occur when trying to load 1964 the SAR from memory or a constant. */ 1965 else if (scratch_reg 1966 && GET_CODE (operand0) == REG 1967 && REGNO (operand0) < FIRST_PSEUDO_REGISTER 1968 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS 1969 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT)) 1970 { 1971 /* D might not fit in 14 bits either; for such cases load D into 1972 scratch reg. */ 1973 if (GET_CODE (operand1) == MEM 1974 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0))) 1975 { 1976 /* We are reloading the address into the scratch register, so we 1977 want to make sure the scratch register is a full register. */ 1978 scratch_reg = force_mode (word_mode, scratch_reg); 1979 1980 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); 1981 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 1982 0)), 1983 Pmode, 1984 XEXP (XEXP (operand1, 0), 1985 0), 1986 scratch_reg)); 1987 1988 /* Now we are going to load the scratch register from memory, 1989 we want to load it in the same width as the original MEM, 1990 which must be the same as the width of the ultimate destination, 1991 OPERAND0. */ 1992 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1993 1994 emit_move_insn (scratch_reg, 1995 replace_equiv_address (operand1, scratch_reg)); 1996 } 1997 else 1998 { 1999 /* We want to load the scratch register using the same mode as 2000 the ultimate destination. */ 2001 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 2002 2003 emit_move_insn (scratch_reg, operand1); 2004 } 2005 2006 /* And emit the insn to set the ultimate destination. We know that 2007 the scratch register has the same mode as the destination at this 2008 point. */ 2009 emit_move_insn (operand0, scratch_reg); 2010 return 1; 2011 } 2012 2013 /* Handle the most common case: storing into a register. */ 2014 if (register_operand (operand0, mode)) 2015 { 2016 /* Legitimize TLS symbol references. This happens for references 2017 that aren't a legitimate constant. */ 2018 if (PA_SYMBOL_REF_TLS_P (operand1)) 2019 operand1 = legitimize_tls_address (operand1); 2020 2021 if (register_operand (operand1, mode) 2022 || (GET_CODE (operand1) == CONST_INT 2023 && pa_cint_ok_for_move (UINTVAL (operand1))) 2024 || (operand1 == CONST0_RTX (mode)) 2025 || (GET_CODE (operand1) == HIGH 2026 && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) 2027 /* Only `general_operands' can come here, so MEM is ok. */ 2028 || GET_CODE (operand1) == MEM) 2029 { 2030 /* Various sets are created during RTL generation which don't 2031 have the REG_POINTER flag correctly set. After the CSE pass, 2032 instruction recognition can fail if we don't consistently 2033 set this flag when performing register copies. This should 2034 also improve the opportunities for creating insns that use 2035 unscaled indexing. */ 2036 if (REG_P (operand0) && REG_P (operand1)) 2037 { 2038 if (REG_POINTER (operand1) 2039 && !REG_POINTER (operand0) 2040 && !HARD_REGISTER_P (operand0)) 2041 copy_reg_pointer (operand0, operand1); 2042 } 2043 2044 /* When MEMs are broken out, the REG_POINTER flag doesn't 2045 get set. In some cases, we can set the REG_POINTER flag 2046 from the declaration for the MEM. */ 2047 if (REG_P (operand0) 2048 && GET_CODE (operand1) == MEM 2049 && !REG_POINTER (operand0)) 2050 { 2051 tree decl = MEM_EXPR (operand1); 2052 2053 /* Set the register pointer flag and register alignment 2054 if the declaration for this memory reference is a 2055 pointer type. */ 2056 if (decl) 2057 { 2058 tree type; 2059 2060 /* If this is a COMPONENT_REF, use the FIELD_DECL from 2061 tree operand 1. */ 2062 if (TREE_CODE (decl) == COMPONENT_REF) 2063 decl = TREE_OPERAND (decl, 1); 2064 2065 type = TREE_TYPE (decl); 2066 type = strip_array_types (type); 2067 2068 if (POINTER_TYPE_P (type)) 2069 mark_reg_pointer (operand0, BITS_PER_UNIT); 2070 } 2071 } 2072 2073 emit_insn (gen_rtx_SET (operand0, operand1)); 2074 return 1; 2075 } 2076 } 2077 else if (GET_CODE (operand0) == MEM) 2078 { 2079 if (mode == DFmode && operand1 == CONST0_RTX (mode) 2080 && !(reload_in_progress || reload_completed)) 2081 { 2082 rtx temp = gen_reg_rtx (DFmode); 2083 2084 emit_insn (gen_rtx_SET (temp, operand1)); 2085 emit_insn (gen_rtx_SET (operand0, temp)); 2086 return 1; 2087 } 2088 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) 2089 { 2090 /* Run this case quickly. */ 2091 emit_insn (gen_rtx_SET (operand0, operand1)); 2092 return 1; 2093 } 2094 if (! (reload_in_progress || reload_completed)) 2095 { 2096 operands[0] = validize_mem (operand0); 2097 operands[1] = operand1 = force_reg (mode, operand1); 2098 } 2099 } 2100 2101 /* Simplify the source if we need to. 2102 Note we do have to handle function labels here, even though we do 2103 not consider them legitimate constants. Loop optimizations can 2104 call the emit_move_xxx with one as a source. */ 2105 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) 2106 || (GET_CODE (operand1) == HIGH 2107 && symbolic_operand (XEXP (operand1, 0), mode)) 2108 || function_label_operand (operand1, VOIDmode) 2109 || tls_referenced_p (operand1)) 2110 { 2111 int ishighonly = 0; 2112 2113 if (GET_CODE (operand1) == HIGH) 2114 { 2115 ishighonly = 1; 2116 operand1 = XEXP (operand1, 0); 2117 } 2118 if (symbolic_operand (operand1, mode)) 2119 { 2120 /* Argh. The assembler and linker can't handle arithmetic 2121 involving plabels. 2122 2123 So we force the plabel into memory, load operand0 from 2124 the memory location, then add in the constant part. */ 2125 if ((GET_CODE (operand1) == CONST 2126 && GET_CODE (XEXP (operand1, 0)) == PLUS 2127 && function_label_operand (XEXP (XEXP (operand1, 0), 0), 2128 VOIDmode)) 2129 || function_label_operand (operand1, VOIDmode)) 2130 { 2131 rtx temp, const_part; 2132 2133 /* Figure out what (if any) scratch register to use. */ 2134 if (reload_in_progress || reload_completed) 2135 { 2136 scratch_reg = scratch_reg ? scratch_reg : operand0; 2137 /* SCRATCH_REG will hold an address and maybe the actual 2138 data. We want it in WORD_MODE regardless of what mode it 2139 was originally given to us. */ 2140 scratch_reg = force_mode (word_mode, scratch_reg); 2141 } 2142 else if (flag_pic) 2143 scratch_reg = gen_reg_rtx (Pmode); 2144 2145 if (GET_CODE (operand1) == CONST) 2146 { 2147 /* Save away the constant part of the expression. */ 2148 const_part = XEXP (XEXP (operand1, 0), 1); 2149 gcc_assert (GET_CODE (const_part) == CONST_INT); 2150 2151 /* Force the function label into memory. */ 2152 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); 2153 } 2154 else 2155 { 2156 /* No constant part. */ 2157 const_part = NULL_RTX; 2158 2159 /* Force the function label into memory. */ 2160 temp = force_const_mem (mode, operand1); 2161 } 2162 2163 2164 /* Get the address of the memory location. PIC-ify it if 2165 necessary. */ 2166 temp = XEXP (temp, 0); 2167 if (flag_pic) 2168 temp = legitimize_pic_address (temp, mode, scratch_reg); 2169 2170 /* Put the address of the memory location into our destination 2171 register. */ 2172 operands[1] = temp; 2173 pa_emit_move_sequence (operands, mode, scratch_reg); 2174 2175 /* Now load from the memory location into our destination 2176 register. */ 2177 operands[1] = gen_rtx_MEM (Pmode, operands[0]); 2178 pa_emit_move_sequence (operands, mode, scratch_reg); 2179 2180 /* And add back in the constant part. */ 2181 if (const_part != NULL_RTX) 2182 expand_inc (operand0, const_part); 2183 2184 return 1; 2185 } 2186 2187 if (flag_pic) 2188 { 2189 rtx_insn *insn; 2190 rtx temp; 2191 2192 if (reload_in_progress || reload_completed) 2193 { 2194 temp = scratch_reg ? scratch_reg : operand0; 2195 /* TEMP will hold an address and maybe the actual 2196 data. We want it in WORD_MODE regardless of what mode it 2197 was originally given to us. */ 2198 temp = force_mode (word_mode, temp); 2199 } 2200 else 2201 temp = gen_reg_rtx (Pmode); 2202 2203 /* Force (const (plus (symbol) (const_int))) to memory 2204 if the const_int will not fit in 14 bits. Although 2205 this requires a relocation, the instruction sequence 2206 needed to load the value is shorter. */ 2207 if (GET_CODE (operand1) == CONST 2208 && GET_CODE (XEXP (operand1, 0)) == PLUS 2209 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT 2210 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))) 2211 { 2212 rtx x, m = force_const_mem (mode, operand1); 2213 2214 x = legitimize_pic_address (XEXP (m, 0), mode, temp); 2215 x = replace_equiv_address (m, x); 2216 insn = emit_move_insn (operand0, x); 2217 } 2218 else 2219 { 2220 operands[1] = legitimize_pic_address (operand1, mode, temp); 2221 if (REG_P (operand0) && REG_P (operands[1])) 2222 copy_reg_pointer (operand0, operands[1]); 2223 insn = emit_move_insn (operand0, operands[1]); 2224 } 2225 2226 /* Put a REG_EQUAL note on this insn. */ 2227 set_unique_reg_note (insn, REG_EQUAL, operand1); 2228 } 2229 /* On the HPPA, references to data space are supposed to use dp, 2230 register 27, but showing it in the RTL inhibits various cse 2231 and loop optimizations. */ 2232 else 2233 { 2234 rtx temp, set; 2235 2236 if (reload_in_progress || reload_completed) 2237 { 2238 temp = scratch_reg ? scratch_reg : operand0; 2239 /* TEMP will hold an address and maybe the actual 2240 data. We want it in WORD_MODE regardless of what mode it 2241 was originally given to us. */ 2242 temp = force_mode (word_mode, temp); 2243 } 2244 else 2245 temp = gen_reg_rtx (mode); 2246 2247 /* Loading a SYMBOL_REF into a register makes that register 2248 safe to be used as the base in an indexed address. 2249 2250 Don't mark hard registers though. That loses. */ 2251 if (GET_CODE (operand0) == REG 2252 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 2253 mark_reg_pointer (operand0, BITS_PER_UNIT); 2254 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) 2255 mark_reg_pointer (temp, BITS_PER_UNIT); 2256 2257 if (ishighonly) 2258 set = gen_rtx_SET (operand0, temp); 2259 else 2260 set = gen_rtx_SET (operand0, 2261 gen_rtx_LO_SUM (mode, temp, operand1)); 2262 2263 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1))); 2264 emit_insn (set); 2265 2266 } 2267 return 1; 2268 } 2269 else if (tls_referenced_p (operand1)) 2270 { 2271 rtx tmp = operand1; 2272 rtx addend = NULL; 2273 2274 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) 2275 { 2276 addend = XEXP (XEXP (tmp, 0), 1); 2277 tmp = XEXP (XEXP (tmp, 0), 0); 2278 } 2279 2280 gcc_assert (GET_CODE (tmp) == SYMBOL_REF); 2281 tmp = legitimize_tls_address (tmp); 2282 if (addend) 2283 { 2284 tmp = gen_rtx_PLUS (mode, tmp, addend); 2285 tmp = force_operand (tmp, operands[0]); 2286 } 2287 operands[1] = tmp; 2288 } 2289 else if (GET_CODE (operand1) != CONST_INT 2290 || !pa_cint_ok_for_move (UINTVAL (operand1))) 2291 { 2292 rtx temp; 2293 rtx_insn *insn; 2294 rtx op1 = operand1; 2295 HOST_WIDE_INT value = 0; 2296 HOST_WIDE_INT insv = 0; 2297 int insert = 0; 2298 2299 if (GET_CODE (operand1) == CONST_INT) 2300 value = INTVAL (operand1); 2301 2302 if (TARGET_64BIT 2303 && GET_CODE (operand1) == CONST_INT 2304 && HOST_BITS_PER_WIDE_INT > 32 2305 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32) 2306 { 2307 HOST_WIDE_INT nval; 2308 2309 /* Extract the low order 32 bits of the value and sign extend. 2310 If the new value is the same as the original value, we can 2311 can use the original value as-is. If the new value is 2312 different, we use it and insert the most-significant 32-bits 2313 of the original value into the final result. */ 2314 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1)) 2315 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31); 2316 if (value != nval) 2317 { 2318 #if HOST_BITS_PER_WIDE_INT > 32 2319 insv = value >= 0 ? value >> 32 : ~(~value >> 32); 2320 #endif 2321 insert = 1; 2322 value = nval; 2323 operand1 = GEN_INT (nval); 2324 } 2325 } 2326 2327 if (reload_in_progress || reload_completed) 2328 temp = scratch_reg ? scratch_reg : operand0; 2329 else 2330 temp = gen_reg_rtx (mode); 2331 2332 /* We don't directly split DImode constants on 32-bit targets 2333 because PLUS uses an 11-bit immediate and the insn sequence 2334 generated is not as efficient as the one using HIGH/LO_SUM. */ 2335 if (GET_CODE (operand1) == CONST_INT 2336 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD 2337 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT 2338 && !insert) 2339 { 2340 /* Directly break constant into high and low parts. This 2341 provides better optimization opportunities because various 2342 passes recognize constants split with PLUS but not LO_SUM. 2343 We use a 14-bit signed low part except when the addition 2344 of 0x4000 to the high part might change the sign of the 2345 high part. */ 2346 HOST_WIDE_INT low = value & 0x3fff; 2347 HOST_WIDE_INT high = value & ~ 0x3fff; 2348 2349 if (low >= 0x2000) 2350 { 2351 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000)) 2352 high += 0x2000; 2353 else 2354 high += 0x4000; 2355 } 2356 2357 low = value - high; 2358 2359 emit_insn (gen_rtx_SET (temp, GEN_INT (high))); 2360 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 2361 } 2362 else 2363 { 2364 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1))); 2365 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); 2366 } 2367 2368 insn = emit_move_insn (operands[0], operands[1]); 2369 2370 /* Now insert the most significant 32 bits of the value 2371 into the register. When we don't have a second register 2372 available, it could take up to nine instructions to load 2373 a 64-bit integer constant. Prior to reload, we force 2374 constants that would take more than three instructions 2375 to load to the constant pool. During and after reload, 2376 we have to handle all possible values. */ 2377 if (insert) 2378 { 2379 /* Use a HIGH/LO_SUM/INSV sequence if we have a second 2380 register and the value to be inserted is outside the 2381 range that can be loaded with three depdi instructions. */ 2382 if (temp != operand0 && (insv >= 16384 || insv < -16384)) 2383 { 2384 operand1 = GEN_INT (insv); 2385 2386 emit_insn (gen_rtx_SET (temp, 2387 gen_rtx_HIGH (mode, operand1))); 2388 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1)); 2389 if (mode == DImode) 2390 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32), 2391 const0_rtx, temp)); 2392 else 2393 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32), 2394 const0_rtx, temp)); 2395 } 2396 else 2397 { 2398 int len = 5, pos = 27; 2399 2400 /* Insert the bits using the depdi instruction. */ 2401 while (pos >= 0) 2402 { 2403 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16; 2404 HOST_WIDE_INT sign = v5 < 0; 2405 2406 /* Left extend the insertion. */ 2407 insv = (insv >= 0 ? insv >> len : ~(~insv >> len)); 2408 while (pos > 0 && (insv & 1) == sign) 2409 { 2410 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1)); 2411 len += 1; 2412 pos -= 1; 2413 } 2414 2415 if (mode == DImode) 2416 insn = emit_insn (gen_insvdi (operand0, 2417 GEN_INT (len), 2418 GEN_INT (pos), 2419 GEN_INT (v5))); 2420 else 2421 insn = emit_insn (gen_insvsi (operand0, 2422 GEN_INT (len), 2423 GEN_INT (pos), 2424 GEN_INT (v5))); 2425 2426 len = pos > 0 && pos < 5 ? pos : 5; 2427 pos -= len; 2428 } 2429 } 2430 } 2431 2432 set_unique_reg_note (insn, REG_EQUAL, op1); 2433 2434 return 1; 2435 } 2436 } 2437 /* Now have insn-emit do whatever it normally does. */ 2438 return 0; 2439 } 2440 2441 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning 2442 it will need a link/runtime reloc). */ 2443 2444 int 2445 pa_reloc_needed (tree exp) 2446 { 2447 int reloc = 0; 2448 2449 switch (TREE_CODE (exp)) 2450 { 2451 case ADDR_EXPR: 2452 return 1; 2453 2454 case POINTER_PLUS_EXPR: 2455 case PLUS_EXPR: 2456 case MINUS_EXPR: 2457 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2458 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1)); 2459 break; 2460 2461 CASE_CONVERT: 2462 case NON_LVALUE_EXPR: 2463 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2464 break; 2465 2466 case CONSTRUCTOR: 2467 { 2468 tree value; 2469 unsigned HOST_WIDE_INT ix; 2470 2471 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value) 2472 if (value) 2473 reloc |= pa_reloc_needed (value); 2474 } 2475 break; 2476 2477 case ERROR_MARK: 2478 break; 2479 2480 default: 2481 break; 2482 } 2483 return reloc; 2484 } 2485 2486 2487 /* Return the best assembler insn template 2488 for moving operands[1] into operands[0] as a fullword. */ 2489 const char * 2490 pa_singlemove_string (rtx *operands) 2491 { 2492 HOST_WIDE_INT intval; 2493 2494 if (GET_CODE (operands[0]) == MEM) 2495 return "stw %r1,%0"; 2496 if (GET_CODE (operands[1]) == MEM) 2497 return "ldw %1,%0"; 2498 if (GET_CODE (operands[1]) == CONST_DOUBLE) 2499 { 2500 long i; 2501 2502 gcc_assert (GET_MODE (operands[1]) == SFmode); 2503 2504 /* Translate the CONST_DOUBLE to a CONST_INT with the same target 2505 bit pattern. */ 2506 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i); 2507 2508 operands[1] = GEN_INT (i); 2509 /* Fall through to CONST_INT case. */ 2510 } 2511 if (GET_CODE (operands[1]) == CONST_INT) 2512 { 2513 intval = INTVAL (operands[1]); 2514 2515 if (VAL_14_BITS_P (intval)) 2516 return "ldi %1,%0"; 2517 else if ((intval & 0x7ff) == 0) 2518 return "ldil L'%1,%0"; 2519 else if (pa_zdepi_cint_p (intval)) 2520 return "{zdepi %Z1,%0|depwi,z %Z1,%0}"; 2521 else 2522 return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; 2523 } 2524 return "copy %1,%0"; 2525 } 2526 2527 2528 /* Compute position (in OP[1]) and width (in OP[2]) 2529 useful for copying IMM to a register using the zdepi 2530 instructions. Store the immediate value to insert in OP[0]. */ 2531 static void 2532 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2533 { 2534 int lsb, len; 2535 2536 /* Find the least significant set bit in IMM. */ 2537 for (lsb = 0; lsb < 32; lsb++) 2538 { 2539 if ((imm & 1) != 0) 2540 break; 2541 imm >>= 1; 2542 } 2543 2544 /* Choose variants based on *sign* of the 5-bit field. */ 2545 if ((imm & 0x10) == 0) 2546 len = (lsb <= 28) ? 4 : 32 - lsb; 2547 else 2548 { 2549 /* Find the width of the bitstring in IMM. */ 2550 for (len = 5; len < 32 - lsb; len++) 2551 { 2552 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2553 break; 2554 } 2555 2556 /* Sign extend IMM as a 5-bit value. */ 2557 imm = (imm & 0xf) - 0x10; 2558 } 2559 2560 op[0] = imm; 2561 op[1] = 31 - lsb; 2562 op[2] = len; 2563 } 2564 2565 /* Compute position (in OP[1]) and width (in OP[2]) 2566 useful for copying IMM to a register using the depdi,z 2567 instructions. Store the immediate value to insert in OP[0]. */ 2568 2569 static void 2570 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2571 { 2572 int lsb, len, maxlen; 2573 2574 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64); 2575 2576 /* Find the least significant set bit in IMM. */ 2577 for (lsb = 0; lsb < maxlen; lsb++) 2578 { 2579 if ((imm & 1) != 0) 2580 break; 2581 imm >>= 1; 2582 } 2583 2584 /* Choose variants based on *sign* of the 5-bit field. */ 2585 if ((imm & 0x10) == 0) 2586 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb; 2587 else 2588 { 2589 /* Find the width of the bitstring in IMM. */ 2590 for (len = 5; len < maxlen - lsb; len++) 2591 { 2592 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2593 break; 2594 } 2595 2596 /* Extend length if host is narrow and IMM is negative. */ 2597 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb) 2598 len += 32; 2599 2600 /* Sign extend IMM as a 5-bit value. */ 2601 imm = (imm & 0xf) - 0x10; 2602 } 2603 2604 op[0] = imm; 2605 op[1] = 63 - lsb; 2606 op[2] = len; 2607 } 2608 2609 /* Output assembler code to perform a doubleword move insn 2610 with operands OPERANDS. */ 2611 2612 const char * 2613 pa_output_move_double (rtx *operands) 2614 { 2615 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; 2616 rtx latehalf[2]; 2617 rtx addreg0 = 0, addreg1 = 0; 2618 int highonly = 0; 2619 2620 /* First classify both operands. */ 2621 2622 if (REG_P (operands[0])) 2623 optype0 = REGOP; 2624 else if (offsettable_memref_p (operands[0])) 2625 optype0 = OFFSOP; 2626 else if (GET_CODE (operands[0]) == MEM) 2627 optype0 = MEMOP; 2628 else 2629 optype0 = RNDOP; 2630 2631 if (REG_P (operands[1])) 2632 optype1 = REGOP; 2633 else if (CONSTANT_P (operands[1])) 2634 optype1 = CNSTOP; 2635 else if (offsettable_memref_p (operands[1])) 2636 optype1 = OFFSOP; 2637 else if (GET_CODE (operands[1]) == MEM) 2638 optype1 = MEMOP; 2639 else 2640 optype1 = RNDOP; 2641 2642 /* Check for the cases that the operand constraints are not 2643 supposed to allow to happen. */ 2644 gcc_assert (optype0 == REGOP || optype1 == REGOP); 2645 2646 /* Handle copies between general and floating registers. */ 2647 2648 if (optype0 == REGOP && optype1 == REGOP 2649 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])) 2650 { 2651 if (FP_REG_P (operands[0])) 2652 { 2653 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands); 2654 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands); 2655 return "{fldds|fldd} -16(%%sp),%0"; 2656 } 2657 else 2658 { 2659 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands); 2660 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands); 2661 return "{ldws|ldw} -12(%%sp),%R0"; 2662 } 2663 } 2664 2665 /* Handle auto decrementing and incrementing loads and stores 2666 specifically, since the structure of the function doesn't work 2667 for them without major modification. Do it better when we learn 2668 this port about the general inc/dec addressing of PA. 2669 (This was written by tege. Chide him if it doesn't work.) */ 2670 2671 if (optype0 == MEMOP) 2672 { 2673 /* We have to output the address syntax ourselves, since print_operand 2674 doesn't deal with the addresses we want to use. Fix this later. */ 2675 2676 rtx addr = XEXP (operands[0], 0); 2677 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2678 { 2679 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2680 2681 operands[0] = XEXP (addr, 0); 2682 gcc_assert (GET_CODE (operands[1]) == REG 2683 && GET_CODE (operands[0]) == REG); 2684 2685 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2686 2687 /* No overlap between high target register and address 2688 register. (We do this in a non-obvious way to 2689 save a register file writeback) */ 2690 if (GET_CODE (addr) == POST_INC) 2691 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)"; 2692 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)"; 2693 } 2694 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2695 { 2696 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2697 2698 operands[0] = XEXP (addr, 0); 2699 gcc_assert (GET_CODE (operands[1]) == REG 2700 && GET_CODE (operands[0]) == REG); 2701 2702 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2703 /* No overlap between high target register and address 2704 register. (We do this in a non-obvious way to save a 2705 register file writeback) */ 2706 if (GET_CODE (addr) == PRE_INC) 2707 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)"; 2708 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)"; 2709 } 2710 } 2711 if (optype1 == MEMOP) 2712 { 2713 /* We have to output the address syntax ourselves, since print_operand 2714 doesn't deal with the addresses we want to use. Fix this later. */ 2715 2716 rtx addr = XEXP (operands[1], 0); 2717 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2718 { 2719 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2720 2721 operands[1] = XEXP (addr, 0); 2722 gcc_assert (GET_CODE (operands[0]) == REG 2723 && GET_CODE (operands[1]) == REG); 2724 2725 if (!reg_overlap_mentioned_p (high_reg, addr)) 2726 { 2727 /* No overlap between high target register and address 2728 register. (We do this in a non-obvious way to 2729 save a register file writeback) */ 2730 if (GET_CODE (addr) == POST_INC) 2731 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0"; 2732 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0"; 2733 } 2734 else 2735 { 2736 /* This is an undefined situation. We should load into the 2737 address register *and* update that register. Probably 2738 we don't need to handle this at all. */ 2739 if (GET_CODE (addr) == POST_INC) 2740 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0"; 2741 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0"; 2742 } 2743 } 2744 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2745 { 2746 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2747 2748 operands[1] = XEXP (addr, 0); 2749 gcc_assert (GET_CODE (operands[0]) == REG 2750 && GET_CODE (operands[1]) == REG); 2751 2752 if (!reg_overlap_mentioned_p (high_reg, addr)) 2753 { 2754 /* No overlap between high target register and address 2755 register. (We do this in a non-obvious way to 2756 save a register file writeback) */ 2757 if (GET_CODE (addr) == PRE_INC) 2758 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0"; 2759 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0"; 2760 } 2761 else 2762 { 2763 /* This is an undefined situation. We should load into the 2764 address register *and* update that register. Probably 2765 we don't need to handle this at all. */ 2766 if (GET_CODE (addr) == PRE_INC) 2767 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0"; 2768 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0"; 2769 } 2770 } 2771 else if (GET_CODE (addr) == PLUS 2772 && GET_CODE (XEXP (addr, 0)) == MULT) 2773 { 2774 rtx xoperands[4]; 2775 2776 /* Load address into left half of destination register. */ 2777 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2778 xoperands[1] = XEXP (addr, 1); 2779 xoperands[2] = XEXP (XEXP (addr, 0), 0); 2780 xoperands[3] = XEXP (XEXP (addr, 0), 1); 2781 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}", 2782 xoperands); 2783 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2784 } 2785 else if (GET_CODE (addr) == PLUS 2786 && REG_P (XEXP (addr, 0)) 2787 && REG_P (XEXP (addr, 1))) 2788 { 2789 rtx xoperands[3]; 2790 2791 /* Load address into left half of destination register. */ 2792 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2793 xoperands[1] = XEXP (addr, 0); 2794 xoperands[2] = XEXP (addr, 1); 2795 output_asm_insn ("{addl|add,l} %1,%2,%0", 2796 xoperands); 2797 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2798 } 2799 } 2800 2801 /* If an operand is an unoffsettable memory ref, find a register 2802 we can increment temporarily to make it refer to the second word. */ 2803 2804 if (optype0 == MEMOP) 2805 addreg0 = find_addr_reg (XEXP (operands[0], 0)); 2806 2807 if (optype1 == MEMOP) 2808 addreg1 = find_addr_reg (XEXP (operands[1], 0)); 2809 2810 /* Ok, we can do one word at a time. 2811 Normally we do the low-numbered word first. 2812 2813 In either case, set up in LATEHALF the operands to use 2814 for the high-numbered word and in some cases alter the 2815 operands in OPERANDS to be suitable for the low-numbered word. */ 2816 2817 if (optype0 == REGOP) 2818 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2819 else if (optype0 == OFFSOP) 2820 latehalf[0] = adjust_address_nv (operands[0], SImode, 4); 2821 else 2822 latehalf[0] = operands[0]; 2823 2824 if (optype1 == REGOP) 2825 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); 2826 else if (optype1 == OFFSOP) 2827 latehalf[1] = adjust_address_nv (operands[1], SImode, 4); 2828 else if (optype1 == CNSTOP) 2829 { 2830 if (GET_CODE (operands[1]) == HIGH) 2831 { 2832 operands[1] = XEXP (operands[1], 0); 2833 highonly = 1; 2834 } 2835 split_double (operands[1], &operands[1], &latehalf[1]); 2836 } 2837 else 2838 latehalf[1] = operands[1]; 2839 2840 /* If the first move would clobber the source of the second one, 2841 do them in the other order. 2842 2843 This can happen in two cases: 2844 2845 mem -> register where the first half of the destination register 2846 is the same register used in the memory's address. Reload 2847 can create such insns. 2848 2849 mem in this case will be either register indirect or register 2850 indirect plus a valid offset. 2851 2852 register -> register move where REGNO(dst) == REGNO(src + 1) 2853 someone (Tim/Tege?) claimed this can happen for parameter loads. 2854 2855 Handle mem -> register case first. */ 2856 if (optype0 == REGOP 2857 && (optype1 == MEMOP || optype1 == OFFSOP) 2858 && refers_to_regno_p (REGNO (operands[0]), operands[1])) 2859 { 2860 /* Do the late half first. */ 2861 if (addreg1) 2862 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2863 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2864 2865 /* Then clobber. */ 2866 if (addreg1) 2867 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2868 return pa_singlemove_string (operands); 2869 } 2870 2871 /* Now handle register -> register case. */ 2872 if (optype0 == REGOP && optype1 == REGOP 2873 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 2874 { 2875 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2876 return pa_singlemove_string (operands); 2877 } 2878 2879 /* Normal case: do the two words, low-numbered first. */ 2880 2881 output_asm_insn (pa_singlemove_string (operands), operands); 2882 2883 /* Make any unoffsettable addresses point at high-numbered word. */ 2884 if (addreg0) 2885 output_asm_insn ("ldo 4(%0),%0", &addreg0); 2886 if (addreg1) 2887 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2888 2889 /* Do high-numbered word. */ 2890 if (highonly) 2891 output_asm_insn ("ldil L'%1,%0", latehalf); 2892 else 2893 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2894 2895 /* Undo the adds we just did. */ 2896 if (addreg0) 2897 output_asm_insn ("ldo -4(%0),%0", &addreg0); 2898 if (addreg1) 2899 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2900 2901 return ""; 2902 } 2903 2904 const char * 2905 pa_output_fp_move_double (rtx *operands) 2906 { 2907 if (FP_REG_P (operands[0])) 2908 { 2909 if (FP_REG_P (operands[1]) 2910 || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) 2911 output_asm_insn ("fcpy,dbl %f1,%0", operands); 2912 else 2913 output_asm_insn ("fldd%F1 %1,%0", operands); 2914 } 2915 else if (FP_REG_P (operands[1])) 2916 { 2917 output_asm_insn ("fstd%F0 %1,%0", operands); 2918 } 2919 else 2920 { 2921 rtx xoperands[2]; 2922 2923 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0]))); 2924 2925 /* This is a pain. You have to be prepared to deal with an 2926 arbitrary address here including pre/post increment/decrement. 2927 2928 so avoid this in the MD. */ 2929 gcc_assert (GET_CODE (operands[0]) == REG); 2930 2931 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2932 xoperands[0] = operands[0]; 2933 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); 2934 } 2935 return ""; 2936 } 2937 2938 /* Return a REG that occurs in ADDR with coefficient 1. 2939 ADDR can be effectively incremented by incrementing REG. */ 2940 2941 static rtx 2942 find_addr_reg (rtx addr) 2943 { 2944 while (GET_CODE (addr) == PLUS) 2945 { 2946 if (GET_CODE (XEXP (addr, 0)) == REG) 2947 addr = XEXP (addr, 0); 2948 else if (GET_CODE (XEXP (addr, 1)) == REG) 2949 addr = XEXP (addr, 1); 2950 else if (CONSTANT_P (XEXP (addr, 0))) 2951 addr = XEXP (addr, 1); 2952 else if (CONSTANT_P (XEXP (addr, 1))) 2953 addr = XEXP (addr, 0); 2954 else 2955 gcc_unreachable (); 2956 } 2957 gcc_assert (GET_CODE (addr) == REG); 2958 return addr; 2959 } 2960 2961 /* Emit code to perform a block move. 2962 2963 OPERANDS[0] is the destination pointer as a REG, clobbered. 2964 OPERANDS[1] is the source pointer as a REG, clobbered. 2965 OPERANDS[2] is a register for temporary storage. 2966 OPERANDS[3] is a register for temporary storage. 2967 OPERANDS[4] is the size as a CONST_INT 2968 OPERANDS[5] is the alignment safe to use, as a CONST_INT. 2969 OPERANDS[6] is another temporary register. */ 2970 2971 const char * 2972 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 2973 { 2974 HOST_WIDE_INT align = INTVAL (operands[5]); 2975 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]); 2976 2977 /* We can't move more than a word at a time because the PA 2978 has no longer integer move insns. (Could use fp mem ops?) */ 2979 if (align > (TARGET_64BIT ? 8 : 4)) 2980 align = (TARGET_64BIT ? 8 : 4); 2981 2982 /* Note that we know each loop below will execute at least twice 2983 (else we would have open-coded the copy). */ 2984 switch (align) 2985 { 2986 case 8: 2987 /* Pre-adjust the loop counter. */ 2988 operands[4] = GEN_INT (n_bytes - 16); 2989 output_asm_insn ("ldi %4,%2", operands); 2990 2991 /* Copying loop. */ 2992 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2993 output_asm_insn ("ldd,ma 8(%1),%6", operands); 2994 output_asm_insn ("std,ma %3,8(%0)", operands); 2995 output_asm_insn ("addib,>= -16,%2,.-12", operands); 2996 output_asm_insn ("std,ma %6,8(%0)", operands); 2997 2998 /* Handle the residual. There could be up to 7 bytes of 2999 residual to copy! */ 3000 if (n_bytes % 16 != 0) 3001 { 3002 operands[4] = GEN_INT (n_bytes % 8); 3003 if (n_bytes % 16 >= 8) 3004 output_asm_insn ("ldd,ma 8(%1),%3", operands); 3005 if (n_bytes % 8 != 0) 3006 output_asm_insn ("ldd 0(%1),%6", operands); 3007 if (n_bytes % 16 >= 8) 3008 output_asm_insn ("std,ma %3,8(%0)", operands); 3009 if (n_bytes % 8 != 0) 3010 output_asm_insn ("stdby,e %6,%4(%0)", operands); 3011 } 3012 return ""; 3013 3014 case 4: 3015 /* Pre-adjust the loop counter. */ 3016 operands[4] = GEN_INT (n_bytes - 8); 3017 output_asm_insn ("ldi %4,%2", operands); 3018 3019 /* Copying loop. */ 3020 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 3021 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands); 3022 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 3023 output_asm_insn ("addib,>= -8,%2,.-12", operands); 3024 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands); 3025 3026 /* Handle the residual. There could be up to 7 bytes of 3027 residual to copy! */ 3028 if (n_bytes % 8 != 0) 3029 { 3030 operands[4] = GEN_INT (n_bytes % 4); 3031 if (n_bytes % 8 >= 4) 3032 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 3033 if (n_bytes % 4 != 0) 3034 output_asm_insn ("ldw 0(%1),%6", operands); 3035 if (n_bytes % 8 >= 4) 3036 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 3037 if (n_bytes % 4 != 0) 3038 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands); 3039 } 3040 return ""; 3041 3042 case 2: 3043 /* Pre-adjust the loop counter. */ 3044 operands[4] = GEN_INT (n_bytes - 4); 3045 output_asm_insn ("ldi %4,%2", operands); 3046 3047 /* Copying loop. */ 3048 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 3049 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands); 3050 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 3051 output_asm_insn ("addib,>= -4,%2,.-12", operands); 3052 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands); 3053 3054 /* Handle the residual. */ 3055 if (n_bytes % 4 != 0) 3056 { 3057 if (n_bytes % 4 >= 2) 3058 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 3059 if (n_bytes % 2 != 0) 3060 output_asm_insn ("ldb 0(%1),%6", operands); 3061 if (n_bytes % 4 >= 2) 3062 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 3063 if (n_bytes % 2 != 0) 3064 output_asm_insn ("stb %6,0(%0)", operands); 3065 } 3066 return ""; 3067 3068 case 1: 3069 /* Pre-adjust the loop counter. */ 3070 operands[4] = GEN_INT (n_bytes - 2); 3071 output_asm_insn ("ldi %4,%2", operands); 3072 3073 /* Copying loop. */ 3074 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands); 3075 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands); 3076 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands); 3077 output_asm_insn ("addib,>= -2,%2,.-12", operands); 3078 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands); 3079 3080 /* Handle the residual. */ 3081 if (n_bytes % 2 != 0) 3082 { 3083 output_asm_insn ("ldb 0(%1),%3", operands); 3084 output_asm_insn ("stb %3,0(%0)", operands); 3085 } 3086 return ""; 3087 3088 default: 3089 gcc_unreachable (); 3090 } 3091 } 3092 3093 /* Count the number of insns necessary to handle this block move. 3094 3095 Basic structure is the same as emit_block_move, except that we 3096 count insns rather than emit them. */ 3097 3098 static int 3099 compute_cpymem_length (rtx_insn *insn) 3100 { 3101 rtx pat = PATTERN (insn); 3102 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); 3103 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); 3104 unsigned int n_insns = 0; 3105 3106 /* We can't move more than four bytes at a time because the PA 3107 has no longer integer move insns. (Could use fp mem ops?) */ 3108 if (align > (TARGET_64BIT ? 8 : 4)) 3109 align = (TARGET_64BIT ? 8 : 4); 3110 3111 /* The basic copying loop. */ 3112 n_insns = 6; 3113 3114 /* Residuals. */ 3115 if (n_bytes % (2 * align) != 0) 3116 { 3117 if ((n_bytes % (2 * align)) >= align) 3118 n_insns += 2; 3119 3120 if ((n_bytes % align) != 0) 3121 n_insns += 2; 3122 } 3123 3124 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 3125 return n_insns * 4; 3126 } 3127 3128 /* Emit code to perform a block clear. 3129 3130 OPERANDS[0] is the destination pointer as a REG, clobbered. 3131 OPERANDS[1] is a register for temporary storage. 3132 OPERANDS[2] is the size as a CONST_INT 3133 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ 3134 3135 const char * 3136 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 3137 { 3138 HOST_WIDE_INT align = INTVAL (operands[3]); 3139 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]); 3140 3141 /* We can't clear more than a word at a time because the PA 3142 has no longer integer move insns. */ 3143 if (align > (TARGET_64BIT ? 8 : 4)) 3144 align = (TARGET_64BIT ? 8 : 4); 3145 3146 /* Note that we know each loop below will execute at least twice 3147 (else we would have open-coded the copy). */ 3148 switch (align) 3149 { 3150 case 8: 3151 /* Pre-adjust the loop counter. */ 3152 operands[2] = GEN_INT (n_bytes - 16); 3153 output_asm_insn ("ldi %2,%1", operands); 3154 3155 /* Loop. */ 3156 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3157 output_asm_insn ("addib,>= -16,%1,.-4", operands); 3158 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3159 3160 /* Handle the residual. There could be up to 7 bytes of 3161 residual to copy! */ 3162 if (n_bytes % 16 != 0) 3163 { 3164 operands[2] = GEN_INT (n_bytes % 8); 3165 if (n_bytes % 16 >= 8) 3166 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3167 if (n_bytes % 8 != 0) 3168 output_asm_insn ("stdby,e %%r0,%2(%0)", operands); 3169 } 3170 return ""; 3171 3172 case 4: 3173 /* Pre-adjust the loop counter. */ 3174 operands[2] = GEN_INT (n_bytes - 8); 3175 output_asm_insn ("ldi %2,%1", operands); 3176 3177 /* Loop. */ 3178 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3179 output_asm_insn ("addib,>= -8,%1,.-4", operands); 3180 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3181 3182 /* Handle the residual. There could be up to 7 bytes of 3183 residual to copy! */ 3184 if (n_bytes % 8 != 0) 3185 { 3186 operands[2] = GEN_INT (n_bytes % 4); 3187 if (n_bytes % 8 >= 4) 3188 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3189 if (n_bytes % 4 != 0) 3190 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); 3191 } 3192 return ""; 3193 3194 case 2: 3195 /* Pre-adjust the loop counter. */ 3196 operands[2] = GEN_INT (n_bytes - 4); 3197 output_asm_insn ("ldi %2,%1", operands); 3198 3199 /* Loop. */ 3200 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3201 output_asm_insn ("addib,>= -4,%1,.-4", operands); 3202 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3203 3204 /* Handle the residual. */ 3205 if (n_bytes % 4 != 0) 3206 { 3207 if (n_bytes % 4 >= 2) 3208 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3209 if (n_bytes % 2 != 0) 3210 output_asm_insn ("stb %%r0,0(%0)", operands); 3211 } 3212 return ""; 3213 3214 case 1: 3215 /* Pre-adjust the loop counter. */ 3216 operands[2] = GEN_INT (n_bytes - 2); 3217 output_asm_insn ("ldi %2,%1", operands); 3218 3219 /* Loop. */ 3220 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3221 output_asm_insn ("addib,>= -2,%1,.-4", operands); 3222 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3223 3224 /* Handle the residual. */ 3225 if (n_bytes % 2 != 0) 3226 output_asm_insn ("stb %%r0,0(%0)", operands); 3227 3228 return ""; 3229 3230 default: 3231 gcc_unreachable (); 3232 } 3233 } 3234 3235 /* Count the number of insns necessary to handle this block move. 3236 3237 Basic structure is the same as emit_block_move, except that we 3238 count insns rather than emit them. */ 3239 3240 static int 3241 compute_clrmem_length (rtx_insn *insn) 3242 { 3243 rtx pat = PATTERN (insn); 3244 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); 3245 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); 3246 unsigned int n_insns = 0; 3247 3248 /* We can't clear more than a word at a time because the PA 3249 has no longer integer move insns. */ 3250 if (align > (TARGET_64BIT ? 8 : 4)) 3251 align = (TARGET_64BIT ? 8 : 4); 3252 3253 /* The basic loop. */ 3254 n_insns = 4; 3255 3256 /* Residuals. */ 3257 if (n_bytes % (2 * align) != 0) 3258 { 3259 if ((n_bytes % (2 * align)) >= align) 3260 n_insns++; 3261 3262 if ((n_bytes % align) != 0) 3263 n_insns++; 3264 } 3265 3266 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 3267 return n_insns * 4; 3268 } 3269 3270 3271 const char * 3272 pa_output_and (rtx *operands) 3273 { 3274 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3275 { 3276 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3277 int ls0, ls1, ms0, p, len; 3278 3279 for (ls0 = 0; ls0 < 32; ls0++) 3280 if ((mask & (1 << ls0)) == 0) 3281 break; 3282 3283 for (ls1 = ls0; ls1 < 32; ls1++) 3284 if ((mask & (1 << ls1)) != 0) 3285 break; 3286 3287 for (ms0 = ls1; ms0 < 32; ms0++) 3288 if ((mask & (1 << ms0)) == 0) 3289 break; 3290 3291 gcc_assert (ms0 == 32); 3292 3293 if (ls1 == 32) 3294 { 3295 len = ls0; 3296 3297 gcc_assert (len); 3298 3299 operands[2] = GEN_INT (len); 3300 return "{extru|extrw,u} %1,31,%2,%0"; 3301 } 3302 else 3303 { 3304 /* We could use this `depi' for the case above as well, but `depi' 3305 requires one more register file access than an `extru'. */ 3306 3307 p = 31 - ls0; 3308 len = ls1 - ls0; 3309 3310 operands[2] = GEN_INT (p); 3311 operands[3] = GEN_INT (len); 3312 return "{depi|depwi} 0,%2,%3,%0"; 3313 } 3314 } 3315 else 3316 return "and %1,%2,%0"; 3317 } 3318 3319 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3320 storing the result in operands[0]. */ 3321 const char * 3322 pa_output_64bit_and (rtx *operands) 3323 { 3324 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3325 { 3326 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3327 int ls0, ls1, ms0, p, len; 3328 3329 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++) 3330 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0) 3331 break; 3332 3333 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++) 3334 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0) 3335 break; 3336 3337 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++) 3338 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0) 3339 break; 3340 3341 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT); 3342 3343 if (ls1 == HOST_BITS_PER_WIDE_INT) 3344 { 3345 len = ls0; 3346 3347 gcc_assert (len); 3348 3349 operands[2] = GEN_INT (len); 3350 return "extrd,u %1,63,%2,%0"; 3351 } 3352 else 3353 { 3354 /* We could use this `depi' for the case above as well, but `depi' 3355 requires one more register file access than an `extru'. */ 3356 3357 p = 63 - ls0; 3358 len = ls1 - ls0; 3359 3360 operands[2] = GEN_INT (p); 3361 operands[3] = GEN_INT (len); 3362 return "depdi 0,%2,%3,%0"; 3363 } 3364 } 3365 else 3366 return "and %1,%2,%0"; 3367 } 3368 3369 const char * 3370 pa_output_ior (rtx *operands) 3371 { 3372 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3373 int bs0, bs1, p, len; 3374 3375 if (INTVAL (operands[2]) == 0) 3376 return "copy %1,%0"; 3377 3378 for (bs0 = 0; bs0 < 32; bs0++) 3379 if ((mask & (1 << bs0)) != 0) 3380 break; 3381 3382 for (bs1 = bs0; bs1 < 32; bs1++) 3383 if ((mask & (1 << bs1)) == 0) 3384 break; 3385 3386 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3387 3388 p = 31 - bs0; 3389 len = bs1 - bs0; 3390 3391 operands[2] = GEN_INT (p); 3392 operands[3] = GEN_INT (len); 3393 return "{depi|depwi} -1,%2,%3,%0"; 3394 } 3395 3396 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3397 storing the result in operands[0]. */ 3398 const char * 3399 pa_output_64bit_ior (rtx *operands) 3400 { 3401 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3402 int bs0, bs1, p, len; 3403 3404 if (INTVAL (operands[2]) == 0) 3405 return "copy %1,%0"; 3406 3407 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++) 3408 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0) 3409 break; 3410 3411 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++) 3412 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0) 3413 break; 3414 3415 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT 3416 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3417 3418 p = 63 - bs0; 3419 len = bs1 - bs0; 3420 3421 operands[2] = GEN_INT (p); 3422 operands[3] = GEN_INT (len); 3423 return "depdi -1,%2,%3,%0"; 3424 } 3425 3426 /* Target hook for assembling integer objects. This code handles 3427 aligned SI and DI integers specially since function references 3428 must be preceded by P%. */ 3429 3430 static bool 3431 pa_assemble_integer (rtx x, unsigned int size, int aligned_p) 3432 { 3433 bool result; 3434 tree decl = NULL; 3435 3436 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call 3437 call assemble_external and set the SYMBOL_REF_DECL to NULL before 3438 calling output_addr_const. Otherwise, it may call assemble_external 3439 in the midst of outputing the assembler code for the SYMBOL_REF. 3440 We restore the SYMBOL_REF_DECL after the output is done. */ 3441 if (GET_CODE (x) == SYMBOL_REF) 3442 { 3443 decl = SYMBOL_REF_DECL (x); 3444 if (decl) 3445 { 3446 assemble_external (decl); 3447 SET_SYMBOL_REF_DECL (x, NULL); 3448 } 3449 } 3450 3451 if (size == UNITS_PER_WORD 3452 && aligned_p 3453 && function_label_operand (x, VOIDmode)) 3454 { 3455 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file); 3456 3457 /* We don't want an OPD when generating fast indirect calls. */ 3458 if (!TARGET_FAST_INDIRECT_CALLS) 3459 fputs ("P%", asm_out_file); 3460 3461 output_addr_const (asm_out_file, x); 3462 fputc ('\n', asm_out_file); 3463 result = true; 3464 } 3465 else 3466 result = default_assemble_integer (x, size, aligned_p); 3467 3468 if (decl) 3469 SET_SYMBOL_REF_DECL (x, decl); 3470 3471 return result; 3472 } 3473 3474 /* Output an ascii string. */ 3475 void 3476 pa_output_ascii (FILE *file, const char *p, int size) 3477 { 3478 int i; 3479 int chars_output; 3480 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ 3481 3482 /* The HP assembler can only take strings of 256 characters at one 3483 time. This is a limitation on input line length, *not* the 3484 length of the string. Sigh. Even worse, it seems that the 3485 restriction is in number of input characters (see \xnn & 3486 \whatever). So we have to do this very carefully. */ 3487 3488 fputs ("\t.STRING \"", file); 3489 3490 chars_output = 0; 3491 for (i = 0; i < size; i += 4) 3492 { 3493 int co = 0; 3494 int io = 0; 3495 for (io = 0, co = 0; io < MIN (4, size - i); io++) 3496 { 3497 register unsigned int c = (unsigned char) p[i + io]; 3498 3499 if (c == '\"' || c == '\\') 3500 partial_output[co++] = '\\'; 3501 if (c >= ' ' && c < 0177) 3502 partial_output[co++] = c; 3503 else 3504 { 3505 unsigned int hexd; 3506 partial_output[co++] = '\\'; 3507 partial_output[co++] = 'x'; 3508 hexd = c / 16 - 0 + '0'; 3509 if (hexd > '9') 3510 hexd -= '9' - 'a' + 1; 3511 partial_output[co++] = hexd; 3512 hexd = c % 16 - 0 + '0'; 3513 if (hexd > '9') 3514 hexd -= '9' - 'a' + 1; 3515 partial_output[co++] = hexd; 3516 } 3517 } 3518 if (chars_output + co > 243) 3519 { 3520 fputs ("\"\n\t.STRING \"", file); 3521 chars_output = 0; 3522 } 3523 fwrite (partial_output, 1, (size_t) co, file); 3524 chars_output += co; 3525 co = 0; 3526 } 3527 fputs ("\"\n", file); 3528 } 3529 3530 /* Try to rewrite floating point comparisons & branches to avoid 3531 useless add,tr insns. 3532 3533 CHECK_NOTES is nonzero if we should examine REG_DEAD notes 3534 to see if FPCC is dead. CHECK_NOTES is nonzero for the 3535 first attempt to remove useless add,tr insns. It is zero 3536 for the second pass as reorg sometimes leaves bogus REG_DEAD 3537 notes lying around. 3538 3539 When CHECK_NOTES is zero we can only eliminate add,tr insns 3540 when there's a 1:1 correspondence between fcmp and ftest/fbranch 3541 instructions. */ 3542 static void 3543 remove_useless_addtr_insns (int check_notes) 3544 { 3545 rtx_insn *insn; 3546 static int pass = 0; 3547 3548 /* This is fairly cheap, so always run it when optimizing. */ 3549 if (optimize > 0) 3550 { 3551 int fcmp_count = 0; 3552 int fbranch_count = 0; 3553 3554 /* Walk all the insns in this function looking for fcmp & fbranch 3555 instructions. Keep track of how many of each we find. */ 3556 for (insn = get_insns (); insn; insn = next_insn (insn)) 3557 { 3558 rtx tmp; 3559 3560 /* Ignore anything that isn't an INSN or a JUMP_INSN. */ 3561 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn)) 3562 continue; 3563 3564 tmp = PATTERN (insn); 3565 3566 /* It must be a set. */ 3567 if (GET_CODE (tmp) != SET) 3568 continue; 3569 3570 /* If the destination is CCFP, then we've found an fcmp insn. */ 3571 tmp = SET_DEST (tmp); 3572 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) 3573 { 3574 fcmp_count++; 3575 continue; 3576 } 3577 3578 tmp = PATTERN (insn); 3579 /* If this is an fbranch instruction, bump the fbranch counter. */ 3580 if (GET_CODE (tmp) == SET 3581 && SET_DEST (tmp) == pc_rtx 3582 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE 3583 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE 3584 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG 3585 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) 3586 { 3587 fbranch_count++; 3588 continue; 3589 } 3590 } 3591 3592 3593 /* Find all floating point compare + branch insns. If possible, 3594 reverse the comparison & the branch to avoid add,tr insns. */ 3595 for (insn = get_insns (); insn; insn = next_insn (insn)) 3596 { 3597 rtx tmp; 3598 rtx_insn *next; 3599 3600 /* Ignore anything that isn't an INSN. */ 3601 if (! NONJUMP_INSN_P (insn)) 3602 continue; 3603 3604 tmp = PATTERN (insn); 3605 3606 /* It must be a set. */ 3607 if (GET_CODE (tmp) != SET) 3608 continue; 3609 3610 /* The destination must be CCFP, which is register zero. */ 3611 tmp = SET_DEST (tmp); 3612 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) 3613 continue; 3614 3615 /* INSN should be a set of CCFP. 3616 3617 See if the result of this insn is used in a reversed FP 3618 conditional branch. If so, reverse our condition and 3619 the branch. Doing so avoids useless add,tr insns. */ 3620 next = next_insn (insn); 3621 while (next) 3622 { 3623 /* Jumps, calls and labels stop our search. */ 3624 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next)) 3625 break; 3626 3627 /* As does another fcmp insn. */ 3628 if (NONJUMP_INSN_P (next) 3629 && GET_CODE (PATTERN (next)) == SET 3630 && GET_CODE (SET_DEST (PATTERN (next))) == REG 3631 && REGNO (SET_DEST (PATTERN (next))) == 0) 3632 break; 3633 3634 next = next_insn (next); 3635 } 3636 3637 /* Is NEXT_INSN a branch? */ 3638 if (next && JUMP_P (next)) 3639 { 3640 rtx pattern = PATTERN (next); 3641 3642 /* If it a reversed fp conditional branch (e.g. uses add,tr) 3643 and CCFP dies, then reverse our conditional and the branch 3644 to avoid the add,tr. */ 3645 if (GET_CODE (pattern) == SET 3646 && SET_DEST (pattern) == pc_rtx 3647 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE 3648 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE 3649 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG 3650 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 3651 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC 3652 && (fcmp_count == fbranch_count 3653 || (check_notes 3654 && find_regno_note (next, REG_DEAD, 0)))) 3655 { 3656 /* Reverse the branch. */ 3657 tmp = XEXP (SET_SRC (pattern), 1); 3658 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); 3659 XEXP (SET_SRC (pattern), 2) = tmp; 3660 INSN_CODE (next) = -1; 3661 3662 /* Reverse our condition. */ 3663 tmp = PATTERN (insn); 3664 PUT_CODE (XEXP (tmp, 1), 3665 (reverse_condition_maybe_unordered 3666 (GET_CODE (XEXP (tmp, 1))))); 3667 } 3668 } 3669 } 3670 } 3671 3672 pass = !pass; 3673 3674 } 3675 3676 /* You may have trouble believing this, but this is the 32 bit HP-PA 3677 stack layout. Wow. 3678 3679 Offset Contents 3680 3681 Variable arguments (optional; any number may be allocated) 3682 3683 SP-(4*(N+9)) arg word N 3684 : : 3685 SP-56 arg word 5 3686 SP-52 arg word 4 3687 3688 Fixed arguments (must be allocated; may remain unused) 3689 3690 SP-48 arg word 3 3691 SP-44 arg word 2 3692 SP-40 arg word 1 3693 SP-36 arg word 0 3694 3695 Frame Marker 3696 3697 SP-32 External Data Pointer (DP) 3698 SP-28 External sr4 3699 SP-24 External/stub RP (RP') 3700 SP-20 Current RP 3701 SP-16 Static Link 3702 SP-12 Clean up 3703 SP-8 Calling Stub RP (RP'') 3704 SP-4 Previous SP 3705 3706 Top of Frame 3707 3708 SP-0 Stack Pointer (points to next available address) 3709 3710 */ 3711 3712 /* This function saves registers as follows. Registers marked with ' are 3713 this function's registers (as opposed to the previous function's). 3714 If a frame_pointer isn't needed, r4 is saved as a general register; 3715 the space for the frame pointer is still allocated, though, to keep 3716 things simple. 3717 3718 3719 Top of Frame 3720 3721 SP (FP') Previous FP 3722 SP + 4 Alignment filler (sigh) 3723 SP + 8 Space for locals reserved here. 3724 . 3725 . 3726 . 3727 SP + n All call saved register used. 3728 . 3729 . 3730 . 3731 SP + o All call saved fp registers used. 3732 . 3733 . 3734 . 3735 SP + p (SP') points to next available address. 3736 3737 */ 3738 3739 /* Global variables set by output_function_prologue(). */ 3740 /* Size of frame. Need to know this to emit return insns from 3741 leaf procedures. */ 3742 static HOST_WIDE_INT actual_fsize, local_fsize; 3743 static int save_fregs; 3744 3745 /* Emit RTL to store REG at the memory location specified by BASE+DISP. 3746 Handle case where DISP > 8k by using the add_high_const patterns. 3747 3748 Note in DISP > 8k case, we will leave the high part of the address 3749 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ 3750 3751 static void 3752 store_reg (int reg, HOST_WIDE_INT disp, int base) 3753 { 3754 rtx dest, src, basereg; 3755 rtx_insn *insn; 3756 3757 src = gen_rtx_REG (word_mode, reg); 3758 basereg = gen_rtx_REG (Pmode, base); 3759 if (VAL_14_BITS_P (disp)) 3760 { 3761 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 3762 insn = emit_move_insn (dest, src); 3763 } 3764 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3765 { 3766 rtx delta = GEN_INT (disp); 3767 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3768 3769 emit_move_insn (tmpreg, delta); 3770 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3771 if (DO_FRAME_NOTES) 3772 { 3773 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3774 gen_rtx_SET (tmpreg, 3775 gen_rtx_PLUS (Pmode, basereg, delta))); 3776 RTX_FRAME_RELATED_P (insn) = 1; 3777 } 3778 dest = gen_rtx_MEM (word_mode, tmpreg); 3779 insn = emit_move_insn (dest, src); 3780 } 3781 else 3782 { 3783 rtx delta = GEN_INT (disp); 3784 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 3785 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3786 3787 emit_move_insn (tmpreg, high); 3788 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3789 insn = emit_move_insn (dest, src); 3790 if (DO_FRAME_NOTES) 3791 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3792 gen_rtx_SET (gen_rtx_MEM (word_mode, 3793 gen_rtx_PLUS (word_mode, 3794 basereg, 3795 delta)), 3796 src)); 3797 } 3798 3799 if (DO_FRAME_NOTES) 3800 RTX_FRAME_RELATED_P (insn) = 1; 3801 } 3802 3803 /* Emit RTL to store REG at the memory location specified by BASE and then 3804 add MOD to BASE. MOD must be <= 8k. */ 3805 3806 static void 3807 store_reg_modify (int base, int reg, HOST_WIDE_INT mod) 3808 { 3809 rtx basereg, srcreg, delta; 3810 rtx_insn *insn; 3811 3812 gcc_assert (VAL_14_BITS_P (mod)); 3813 3814 basereg = gen_rtx_REG (Pmode, base); 3815 srcreg = gen_rtx_REG (word_mode, reg); 3816 delta = GEN_INT (mod); 3817 3818 insn = emit_insn (gen_post_store (basereg, srcreg, delta)); 3819 if (DO_FRAME_NOTES) 3820 { 3821 RTX_FRAME_RELATED_P (insn) = 1; 3822 3823 /* RTX_FRAME_RELATED_P must be set on each frame related set 3824 in a parallel with more than one element. */ 3825 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1; 3826 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 3827 } 3828 } 3829 3830 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case 3831 where DISP > 8k by using the add_high_const patterns. NOTE indicates 3832 whether to add a frame note or not. 3833 3834 In the DISP > 8k case, we leave the high part of the address in %r1. 3835 There is code in expand_hppa_{prologue,epilogue} that knows about this. */ 3836 3837 static void 3838 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note) 3839 { 3840 rtx_insn *insn; 3841 3842 if (VAL_14_BITS_P (disp)) 3843 { 3844 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3845 plus_constant (Pmode, 3846 gen_rtx_REG (Pmode, base), disp)); 3847 } 3848 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3849 { 3850 rtx basereg = gen_rtx_REG (Pmode, base); 3851 rtx delta = GEN_INT (disp); 3852 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3853 3854 emit_move_insn (tmpreg, delta); 3855 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3856 gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3857 if (DO_FRAME_NOTES) 3858 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3859 gen_rtx_SET (tmpreg, 3860 gen_rtx_PLUS (Pmode, basereg, delta))); 3861 } 3862 else 3863 { 3864 rtx basereg = gen_rtx_REG (Pmode, base); 3865 rtx delta = GEN_INT (disp); 3866 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3867 3868 emit_move_insn (tmpreg, 3869 gen_rtx_PLUS (Pmode, basereg, 3870 gen_rtx_HIGH (Pmode, delta))); 3871 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3872 gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3873 } 3874 3875 if (DO_FRAME_NOTES && note) 3876 RTX_FRAME_RELATED_P (insn) = 1; 3877 } 3878 3879 HOST_WIDE_INT 3880 pa_compute_frame_size (poly_int64 size, int *fregs_live) 3881 { 3882 int freg_saved = 0; 3883 int i, j; 3884 3885 /* The code in pa_expand_prologue and pa_expand_epilogue must 3886 be consistent with the rounding and size calculation done here. 3887 Change them at the same time. */ 3888 3889 /* We do our own stack alignment. First, round the size of the 3890 stack locals up to a word boundary. */ 3891 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3892 3893 /* Space for previous frame pointer + filler. If any frame is 3894 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We 3895 waste some space here for the sake of HP compatibility. The 3896 first slot is only used when the frame pointer is needed. */ 3897 if (size || frame_pointer_needed) 3898 size += pa_starting_frame_offset (); 3899 3900 /* If the current function calls __builtin_eh_return, then we need 3901 to allocate stack space for registers that will hold data for 3902 the exception handler. */ 3903 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3904 { 3905 unsigned int i; 3906 3907 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) 3908 continue; 3909 size += i * UNITS_PER_WORD; 3910 } 3911 3912 /* Account for space used by the callee general register saves. */ 3913 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--) 3914 if (df_regs_ever_live_p (i)) 3915 size += UNITS_PER_WORD; 3916 3917 /* Account for space used by the callee floating point register saves. */ 3918 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 3919 if (df_regs_ever_live_p (i) 3920 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1))) 3921 { 3922 freg_saved = 1; 3923 3924 /* We always save both halves of the FP register, so always 3925 increment the frame size by 8 bytes. */ 3926 size += 8; 3927 } 3928 3929 /* If any of the floating registers are saved, account for the 3930 alignment needed for the floating point register save block. */ 3931 if (freg_saved) 3932 { 3933 size = (size + 7) & ~7; 3934 if (fregs_live) 3935 *fregs_live = 1; 3936 } 3937 3938 /* The various ABIs include space for the outgoing parameters in the 3939 size of the current function's stack frame. We don't need to align 3940 for the outgoing arguments as their alignment is set by the final 3941 rounding for the frame as a whole. */ 3942 size += crtl->outgoing_args_size; 3943 3944 /* Allocate space for the fixed frame marker. This space must be 3945 allocated for any function that makes calls or allocates 3946 stack space. */ 3947 if (!crtl->is_leaf || size) 3948 size += TARGET_64BIT ? 48 : 32; 3949 3950 /* Finally, round to the preferred stack boundary. */ 3951 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1) 3952 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); 3953 } 3954 3955 /* Output function label, and associated .PROC and .CALLINFO statements. */ 3956 3957 void 3958 pa_output_function_label (FILE *file) 3959 { 3960 /* The function's label and associated .PROC must never be 3961 separated and must be output *after* any profiling declarations 3962 to avoid changing spaces/subspaces within a procedure. */ 3963 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); 3964 fputs ("\t.PROC\n", file); 3965 3966 /* pa_expand_prologue does the dirty work now. We just need 3967 to output the assembler directives which denote the start 3968 of a function. */ 3969 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize); 3970 if (crtl->is_leaf) 3971 fputs (",NO_CALLS", file); 3972 else 3973 fputs (",CALLS", file); 3974 if (rp_saved) 3975 fputs (",SAVE_RP", file); 3976 3977 /* The SAVE_SP flag is used to indicate that register %r3 is stored 3978 at the beginning of the frame and that it is used as the frame 3979 pointer for the frame. We do this because our current frame 3980 layout doesn't conform to that specified in the HP runtime 3981 documentation and we need a way to indicate to programs such as 3982 GDB where %r3 is saved. The SAVE_SP flag was chosen because it 3983 isn't used by HP compilers but is supported by the assembler. 3984 However, SAVE_SP is supposed to indicate that the previous stack 3985 pointer has been saved in the frame marker. */ 3986 if (frame_pointer_needed) 3987 fputs (",SAVE_SP", file); 3988 3989 /* Pass on information about the number of callee register saves 3990 performed in the prologue. 3991 3992 The compiler is supposed to pass the highest register number 3993 saved, the assembler then has to adjust that number before 3994 entering it into the unwind descriptor (to account for any 3995 caller saved registers with lower register numbers than the 3996 first callee saved register). */ 3997 if (gr_saved) 3998 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); 3999 4000 if (fr_saved) 4001 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); 4002 4003 fputs ("\n\t.ENTRY\n", file); 4004 } 4005 4006 /* Output function prologue. */ 4007 4008 static void 4009 pa_output_function_prologue (FILE *file) 4010 { 4011 pa_output_function_label (file); 4012 remove_useless_addtr_insns (0); 4013 } 4014 4015 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */ 4016 4017 static void 4018 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED) 4019 { 4020 remove_useless_addtr_insns (0); 4021 } 4022 4023 void 4024 pa_expand_prologue (void) 4025 { 4026 int merge_sp_adjust_with_store = 0; 4027 HOST_WIDE_INT size = get_frame_size (); 4028 HOST_WIDE_INT offset; 4029 int i; 4030 rtx tmpreg; 4031 rtx_insn *insn; 4032 4033 gr_saved = 0; 4034 fr_saved = 0; 4035 save_fregs = 0; 4036 4037 /* Compute total size for frame pointer, filler, locals and rounding to 4038 the next word boundary. Similar code appears in pa_compute_frame_size 4039 and must be changed in tandem with this code. */ 4040 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 4041 if (local_fsize || frame_pointer_needed) 4042 local_fsize += pa_starting_frame_offset (); 4043 4044 actual_fsize = pa_compute_frame_size (size, &save_fregs); 4045 if (flag_stack_usage_info) 4046 current_function_static_stack_size = actual_fsize; 4047 4048 /* Compute a few things we will use often. */ 4049 tmpreg = gen_rtx_REG (word_mode, 1); 4050 4051 /* Save RP first. The calling conventions manual states RP will 4052 always be stored into the caller's frame at sp - 20 or sp - 16 4053 depending on which ABI is in use. */ 4054 if (df_regs_ever_live_p (2) || crtl->calls_eh_return) 4055 { 4056 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM); 4057 rp_saved = true; 4058 } 4059 else 4060 rp_saved = false; 4061 4062 /* Allocate the local frame and set up the frame pointer if needed. */ 4063 if (actual_fsize != 0) 4064 { 4065 if (frame_pointer_needed) 4066 { 4067 /* Copy the old frame pointer temporarily into %r1. Set up the 4068 new stack pointer, then store away the saved old frame pointer 4069 into the stack at sp and at the same time update the stack 4070 pointer by actual_fsize bytes. Two versions, first 4071 handles small (<8k) frames. The second handles large (>=8k) 4072 frames. */ 4073 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx); 4074 if (DO_FRAME_NOTES) 4075 RTX_FRAME_RELATED_P (insn) = 1; 4076 4077 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 4078 if (DO_FRAME_NOTES) 4079 RTX_FRAME_RELATED_P (insn) = 1; 4080 4081 if (VAL_14_BITS_P (actual_fsize)) 4082 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize); 4083 else 4084 { 4085 /* It is incorrect to store the saved frame pointer at *sp, 4086 then increment sp (writes beyond the current stack boundary). 4087 4088 So instead use stwm to store at *sp and post-increment the 4089 stack pointer as an atomic operation. Then increment sp to 4090 finish allocating the new frame. */ 4091 HOST_WIDE_INT adjust1 = 8192 - 64; 4092 HOST_WIDE_INT adjust2 = actual_fsize - adjust1; 4093 4094 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1); 4095 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4096 adjust2, 1); 4097 } 4098 4099 /* We set SAVE_SP in frames that need a frame pointer. Thus, 4100 we need to store the previous stack pointer (frame pointer) 4101 into the frame marker on targets that use the HP unwind 4102 library. This allows the HP unwind library to be used to 4103 unwind GCC frames. However, we are not fully compatible 4104 with the HP library because our frame layout differs from 4105 that specified in the HP runtime specification. 4106 4107 We don't want a frame note on this instruction as the frame 4108 marker moves during dynamic stack allocation. 4109 4110 This instruction also serves as a blockage to prevent 4111 register spills from being scheduled before the stack 4112 pointer is raised. This is necessary as we store 4113 registers using the frame pointer as a base register, 4114 and the frame pointer is set before sp is raised. */ 4115 if (TARGET_HPUX_UNWIND_LIBRARY) 4116 { 4117 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, 4118 GEN_INT (TARGET_64BIT ? -8 : -4)); 4119 4120 emit_move_insn (gen_rtx_MEM (word_mode, addr), 4121 hard_frame_pointer_rtx); 4122 } 4123 else 4124 emit_insn (gen_blockage ()); 4125 } 4126 /* no frame pointer needed. */ 4127 else 4128 { 4129 /* In some cases we can perform the first callee register save 4130 and allocating the stack frame at the same time. If so, just 4131 make a note of it and defer allocating the frame until saving 4132 the callee registers. */ 4133 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0) 4134 merge_sp_adjust_with_store = 1; 4135 /* Cannot optimize. Adjust the stack frame by actual_fsize 4136 bytes. */ 4137 else 4138 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4139 actual_fsize, 1); 4140 } 4141 } 4142 4143 /* Normal register save. 4144 4145 Do not save the frame pointer in the frame_pointer_needed case. It 4146 was done earlier. */ 4147 if (frame_pointer_needed) 4148 { 4149 offset = local_fsize; 4150 4151 /* Saving the EH return data registers in the frame is the simplest 4152 way to get the frame unwind information emitted. We put them 4153 just before the general registers. */ 4154 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4155 { 4156 unsigned int i, regno; 4157 4158 for (i = 0; ; ++i) 4159 { 4160 regno = EH_RETURN_DATA_REGNO (i); 4161 if (regno == INVALID_REGNUM) 4162 break; 4163 4164 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 4165 offset += UNITS_PER_WORD; 4166 } 4167 } 4168 4169 for (i = 18; i >= 4; i--) 4170 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i)) 4171 { 4172 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 4173 offset += UNITS_PER_WORD; 4174 gr_saved++; 4175 } 4176 /* Account for %r3 which is saved in a special place. */ 4177 gr_saved++; 4178 } 4179 /* No frame pointer needed. */ 4180 else 4181 { 4182 offset = local_fsize - actual_fsize; 4183 4184 /* Saving the EH return data registers in the frame is the simplest 4185 way to get the frame unwind information emitted. */ 4186 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4187 { 4188 unsigned int i, regno; 4189 4190 for (i = 0; ; ++i) 4191 { 4192 regno = EH_RETURN_DATA_REGNO (i); 4193 if (regno == INVALID_REGNUM) 4194 break; 4195 4196 /* If merge_sp_adjust_with_store is nonzero, then we can 4197 optimize the first save. */ 4198 if (merge_sp_adjust_with_store) 4199 { 4200 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset); 4201 merge_sp_adjust_with_store = 0; 4202 } 4203 else 4204 store_reg (regno, offset, STACK_POINTER_REGNUM); 4205 offset += UNITS_PER_WORD; 4206 } 4207 } 4208 4209 for (i = 18; i >= 3; i--) 4210 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i)) 4211 { 4212 /* If merge_sp_adjust_with_store is nonzero, then we can 4213 optimize the first GR save. */ 4214 if (merge_sp_adjust_with_store) 4215 { 4216 store_reg_modify (STACK_POINTER_REGNUM, i, -offset); 4217 merge_sp_adjust_with_store = 0; 4218 } 4219 else 4220 store_reg (i, offset, STACK_POINTER_REGNUM); 4221 offset += UNITS_PER_WORD; 4222 gr_saved++; 4223 } 4224 4225 /* If we wanted to merge the SP adjustment with a GR save, but we never 4226 did any GR saves, then just emit the adjustment here. */ 4227 if (merge_sp_adjust_with_store) 4228 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4229 actual_fsize, 1); 4230 } 4231 4232 /* The hppa calling conventions say that %r19, the pic offset 4233 register, is saved at sp - 32 (in this function's frame) 4234 when generating PIC code. FIXME: What is the correct thing 4235 to do for functions which make no calls and allocate no 4236 frame? Do we need to allocate a frame, or can we just omit 4237 the save? For now we'll just omit the save. 4238 4239 We don't want a note on this insn as the frame marker can 4240 move if there is a dynamic stack allocation. */ 4241 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT) 4242 { 4243 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); 4244 4245 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); 4246 4247 } 4248 4249 /* Align pointer properly (doubleword boundary). */ 4250 offset = (offset + 7) & ~7; 4251 4252 /* Floating point register store. */ 4253 if (save_fregs) 4254 { 4255 rtx base; 4256 4257 /* First get the frame or stack pointer to the start of the FP register 4258 save area. */ 4259 if (frame_pointer_needed) 4260 { 4261 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4262 base = hard_frame_pointer_rtx; 4263 } 4264 else 4265 { 4266 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4267 base = stack_pointer_rtx; 4268 } 4269 4270 /* Now actually save the FP registers. */ 4271 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4272 { 4273 if (df_regs_ever_live_p (i) 4274 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4275 { 4276 rtx addr, reg; 4277 rtx_insn *insn; 4278 addr = gen_rtx_MEM (DFmode, 4279 gen_rtx_POST_INC (word_mode, tmpreg)); 4280 reg = gen_rtx_REG (DFmode, i); 4281 insn = emit_move_insn (addr, reg); 4282 if (DO_FRAME_NOTES) 4283 { 4284 RTX_FRAME_RELATED_P (insn) = 1; 4285 if (TARGET_64BIT) 4286 { 4287 rtx mem = gen_rtx_MEM (DFmode, 4288 plus_constant (Pmode, base, 4289 offset)); 4290 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4291 gen_rtx_SET (mem, reg)); 4292 } 4293 else 4294 { 4295 rtx meml = gen_rtx_MEM (SFmode, 4296 plus_constant (Pmode, base, 4297 offset)); 4298 rtx memr = gen_rtx_MEM (SFmode, 4299 plus_constant (Pmode, base, 4300 offset + 4)); 4301 rtx regl = gen_rtx_REG (SFmode, i); 4302 rtx regr = gen_rtx_REG (SFmode, i + 1); 4303 rtx setl = gen_rtx_SET (meml, regl); 4304 rtx setr = gen_rtx_SET (memr, regr); 4305 rtvec vec; 4306 4307 RTX_FRAME_RELATED_P (setl) = 1; 4308 RTX_FRAME_RELATED_P (setr) = 1; 4309 vec = gen_rtvec (2, setl, setr); 4310 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4311 gen_rtx_SEQUENCE (VOIDmode, vec)); 4312 } 4313 } 4314 offset += GET_MODE_SIZE (DFmode); 4315 fr_saved++; 4316 } 4317 } 4318 } 4319 } 4320 4321 /* Emit RTL to load REG from the memory location specified by BASE+DISP. 4322 Handle case where DISP > 8k by using the add_high_const patterns. */ 4323 4324 static void 4325 load_reg (int reg, HOST_WIDE_INT disp, int base) 4326 { 4327 rtx dest = gen_rtx_REG (word_mode, reg); 4328 rtx basereg = gen_rtx_REG (Pmode, base); 4329 rtx src; 4330 4331 if (VAL_14_BITS_P (disp)) 4332 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 4333 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 4334 { 4335 rtx delta = GEN_INT (disp); 4336 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4337 4338 emit_move_insn (tmpreg, delta); 4339 if (TARGET_DISABLE_INDEXING) 4340 { 4341 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4342 src = gen_rtx_MEM (word_mode, tmpreg); 4343 } 4344 else 4345 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4346 } 4347 else 4348 { 4349 rtx delta = GEN_INT (disp); 4350 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 4351 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4352 4353 emit_move_insn (tmpreg, high); 4354 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 4355 } 4356 4357 emit_move_insn (dest, src); 4358 } 4359 4360 /* Update the total code bytes output to the text section. */ 4361 4362 static void 4363 update_total_code_bytes (unsigned int nbytes) 4364 { 4365 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM) 4366 && !IN_NAMED_SECTION_P (cfun->decl)) 4367 { 4368 unsigned int old_total = total_code_bytes; 4369 4370 total_code_bytes += nbytes; 4371 4372 /* Be prepared to handle overflows. */ 4373 if (old_total > total_code_bytes) 4374 total_code_bytes = UINT_MAX; 4375 } 4376 } 4377 4378 /* This function generates the assembly code for function exit. 4379 Args are as for output_function_prologue (). 4380 4381 The function epilogue should not depend on the current stack 4382 pointer! It should use the frame pointer only. This is mandatory 4383 because of alloca; we also take advantage of it to omit stack 4384 adjustments before returning. */ 4385 4386 static void 4387 pa_output_function_epilogue (FILE *file) 4388 { 4389 rtx_insn *insn = get_last_insn (); 4390 bool extra_nop; 4391 4392 /* pa_expand_epilogue does the dirty work now. We just need 4393 to output the assembler directives which denote the end 4394 of a function. 4395 4396 To make debuggers happy, emit a nop if the epilogue was completely 4397 eliminated due to a volatile call as the last insn in the 4398 current function. That way the return address (in %r2) will 4399 always point to a valid instruction in the current function. */ 4400 4401 /* Get the last real insn. */ 4402 if (NOTE_P (insn)) 4403 insn = prev_real_insn (insn); 4404 4405 /* If it is a sequence, then look inside. */ 4406 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE) 4407 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0); 4408 4409 /* If insn is a CALL_INSN, then it must be a call to a volatile 4410 function (otherwise there would be epilogue insns). */ 4411 if (insn && CALL_P (insn)) 4412 { 4413 fputs ("\tnop\n", file); 4414 extra_nop = true; 4415 } 4416 else 4417 extra_nop = false; 4418 4419 fputs ("\t.EXIT\n\t.PROCEND\n", file); 4420 4421 if (TARGET_SOM && TARGET_GAS) 4422 { 4423 /* We are done with this subspace except possibly for some additional 4424 debug information. Forget that we are in this subspace to ensure 4425 that the next function is output in its own subspace. */ 4426 in_section = NULL; 4427 cfun->machine->in_nsubspa = 2; 4428 } 4429 4430 /* Thunks do their own insn accounting. */ 4431 if (cfun->is_thunk) 4432 return; 4433 4434 if (INSN_ADDRESSES_SET_P ()) 4435 { 4436 last_address = extra_nop ? 4 : 0; 4437 insn = get_last_nonnote_insn (); 4438 if (insn) 4439 { 4440 last_address += INSN_ADDRESSES (INSN_UID (insn)); 4441 if (INSN_P (insn)) 4442 last_address += insn_default_length (insn); 4443 } 4444 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 4445 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 4446 } 4447 else 4448 last_address = UINT_MAX; 4449 4450 /* Finally, update the total number of code bytes output so far. */ 4451 update_total_code_bytes (last_address); 4452 } 4453 4454 void 4455 pa_expand_epilogue (void) 4456 { 4457 rtx tmpreg; 4458 HOST_WIDE_INT offset; 4459 HOST_WIDE_INT ret_off = 0; 4460 int i; 4461 int merge_sp_adjust_with_load = 0; 4462 4463 /* We will use this often. */ 4464 tmpreg = gen_rtx_REG (word_mode, 1); 4465 4466 /* Try to restore RP early to avoid load/use interlocks when 4467 RP gets used in the return (bv) instruction. This appears to still 4468 be necessary even when we schedule the prologue and epilogue. */ 4469 if (rp_saved) 4470 { 4471 ret_off = TARGET_64BIT ? -16 : -20; 4472 if (frame_pointer_needed) 4473 { 4474 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM); 4475 ret_off = 0; 4476 } 4477 else 4478 { 4479 /* No frame pointer, and stack is smaller than 8k. */ 4480 if (VAL_14_BITS_P (ret_off - actual_fsize)) 4481 { 4482 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM); 4483 ret_off = 0; 4484 } 4485 } 4486 } 4487 4488 /* General register restores. */ 4489 if (frame_pointer_needed) 4490 { 4491 offset = local_fsize; 4492 4493 /* If the current function calls __builtin_eh_return, then we need 4494 to restore the saved EH data registers. */ 4495 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4496 { 4497 unsigned int i, regno; 4498 4499 for (i = 0; ; ++i) 4500 { 4501 regno = EH_RETURN_DATA_REGNO (i); 4502 if (regno == INVALID_REGNUM) 4503 break; 4504 4505 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 4506 offset += UNITS_PER_WORD; 4507 } 4508 } 4509 4510 for (i = 18; i >= 4; i--) 4511 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i)) 4512 { 4513 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 4514 offset += UNITS_PER_WORD; 4515 } 4516 } 4517 else 4518 { 4519 offset = local_fsize - actual_fsize; 4520 4521 /* If the current function calls __builtin_eh_return, then we need 4522 to restore the saved EH data registers. */ 4523 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4524 { 4525 unsigned int i, regno; 4526 4527 for (i = 0; ; ++i) 4528 { 4529 regno = EH_RETURN_DATA_REGNO (i); 4530 if (regno == INVALID_REGNUM) 4531 break; 4532 4533 /* Only for the first load. 4534 merge_sp_adjust_with_load holds the register load 4535 with which we will merge the sp adjustment. */ 4536 if (merge_sp_adjust_with_load == 0 4537 && local_fsize == 0 4538 && VAL_14_BITS_P (-actual_fsize)) 4539 merge_sp_adjust_with_load = regno; 4540 else 4541 load_reg (regno, offset, STACK_POINTER_REGNUM); 4542 offset += UNITS_PER_WORD; 4543 } 4544 } 4545 4546 for (i = 18; i >= 3; i--) 4547 { 4548 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i)) 4549 { 4550 /* Only for the first load. 4551 merge_sp_adjust_with_load holds the register load 4552 with which we will merge the sp adjustment. */ 4553 if (merge_sp_adjust_with_load == 0 4554 && local_fsize == 0 4555 && VAL_14_BITS_P (-actual_fsize)) 4556 merge_sp_adjust_with_load = i; 4557 else 4558 load_reg (i, offset, STACK_POINTER_REGNUM); 4559 offset += UNITS_PER_WORD; 4560 } 4561 } 4562 } 4563 4564 /* Align pointer properly (doubleword boundary). */ 4565 offset = (offset + 7) & ~7; 4566 4567 /* FP register restores. */ 4568 if (save_fregs) 4569 { 4570 /* Adjust the register to index off of. */ 4571 if (frame_pointer_needed) 4572 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4573 else 4574 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4575 4576 /* Actually do the restores now. */ 4577 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4578 if (df_regs_ever_live_p (i) 4579 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4580 { 4581 rtx src = gen_rtx_MEM (DFmode, 4582 gen_rtx_POST_INC (word_mode, tmpreg)); 4583 rtx dest = gen_rtx_REG (DFmode, i); 4584 emit_move_insn (dest, src); 4585 } 4586 } 4587 4588 /* Emit a blockage insn here to keep these insns from being moved to 4589 an earlier spot in the epilogue, or into the main instruction stream. 4590 4591 This is necessary as we must not cut the stack back before all the 4592 restores are finished. */ 4593 emit_insn (gen_blockage ()); 4594 4595 /* Reset stack pointer (and possibly frame pointer). The stack 4596 pointer is initially set to fp + 64 to avoid a race condition. */ 4597 if (frame_pointer_needed) 4598 { 4599 rtx delta = GEN_INT (-64); 4600 4601 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0); 4602 emit_insn (gen_pre_load (hard_frame_pointer_rtx, 4603 stack_pointer_rtx, delta)); 4604 } 4605 /* If we were deferring a callee register restore, do it now. */ 4606 else if (merge_sp_adjust_with_load) 4607 { 4608 rtx delta = GEN_INT (-actual_fsize); 4609 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load); 4610 4611 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)); 4612 } 4613 else if (actual_fsize != 0) 4614 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4615 - actual_fsize, 0); 4616 4617 /* If we haven't restored %r2 yet (no frame pointer, and a stack 4618 frame greater than 8k), do so now. */ 4619 if (ret_off != 0) 4620 load_reg (2, ret_off, STACK_POINTER_REGNUM); 4621 4622 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4623 { 4624 rtx sa = EH_RETURN_STACKADJ_RTX; 4625 4626 emit_insn (gen_blockage ()); 4627 emit_insn (TARGET_64BIT 4628 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa) 4629 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa)); 4630 } 4631 } 4632 4633 bool 4634 pa_can_use_return_insn (void) 4635 { 4636 if (!reload_completed) 4637 return false; 4638 4639 if (frame_pointer_needed) 4640 return false; 4641 4642 if (df_regs_ever_live_p (2)) 4643 return false; 4644 4645 if (crtl->profile) 4646 return false; 4647 4648 return pa_compute_frame_size (get_frame_size (), 0) == 0; 4649 } 4650 4651 rtx 4652 hppa_pic_save_rtx (void) 4653 { 4654 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM); 4655 } 4656 4657 #ifndef NO_DEFERRED_PROFILE_COUNTERS 4658 #define NO_DEFERRED_PROFILE_COUNTERS 0 4659 #endif 4660 4661 4662 /* Vector of funcdef numbers. */ 4663 static vec<int> funcdef_nos; 4664 4665 /* Output deferred profile counters. */ 4666 static void 4667 output_deferred_profile_counters (void) 4668 { 4669 unsigned int i; 4670 int align, n; 4671 4672 if (funcdef_nos.is_empty ()) 4673 return; 4674 4675 switch_to_section (data_section); 4676 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); 4677 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); 4678 4679 for (i = 0; funcdef_nos.iterate (i, &n); i++) 4680 { 4681 targetm.asm_out.internal_label (asm_out_file, "LP", n); 4682 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); 4683 } 4684 4685 funcdef_nos.release (); 4686 } 4687 4688 void 4689 hppa_profile_hook (int label_no) 4690 { 4691 rtx_code_label *label_rtx = gen_label_rtx (); 4692 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE); 4693 rtx arg_bytes, begin_label_rtx, mcount, sym; 4694 rtx_insn *call_insn; 4695 char begin_label_name[16]; 4696 bool use_mcount_pcrel_call; 4697 4698 /* Set up call destination. */ 4699 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount"); 4700 pa_encode_label (sym); 4701 mcount = gen_rtx_MEM (Pmode, sym); 4702 4703 /* If we can reach _mcount with a pc-relative call, we can optimize 4704 loading the address of the current function. This requires linker 4705 long branch stub support. */ 4706 if (!TARGET_PORTABLE_RUNTIME 4707 && !TARGET_LONG_CALLS 4708 && (TARGET_SOM || flag_function_sections)) 4709 use_mcount_pcrel_call = TRUE; 4710 else 4711 use_mcount_pcrel_call = FALSE; 4712 4713 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, 4714 label_no); 4715 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); 4716 4717 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); 4718 4719 if (!use_mcount_pcrel_call) 4720 { 4721 /* The address of the function is loaded into %r25 with an instruction- 4722 relative sequence that avoids the use of relocations. We use SImode 4723 for the address of the function in both 32 and 64-bit code to avoid 4724 having to provide DImode versions of the lcla2 pattern. */ 4725 if (TARGET_PA_20) 4726 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx)); 4727 else 4728 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx)); 4729 } 4730 4731 if (!NO_DEFERRED_PROFILE_COUNTERS) 4732 { 4733 rtx count_label_rtx, addr, r24; 4734 char count_label_name[16]; 4735 4736 funcdef_nos.safe_push (label_no); 4737 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); 4738 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, 4739 ggc_strdup (count_label_name)); 4740 4741 addr = force_reg (Pmode, count_label_rtx); 4742 r24 = gen_rtx_REG (Pmode, 24); 4743 emit_move_insn (r24, addr); 4744 4745 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12); 4746 if (use_mcount_pcrel_call) 4747 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, 4748 begin_label_rtx)); 4749 else 4750 call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); 4751 4752 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); 4753 } 4754 else 4755 { 4756 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8); 4757 if (use_mcount_pcrel_call) 4758 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, 4759 begin_label_rtx)); 4760 else 4761 call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); 4762 } 4763 4764 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); 4765 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); 4766 4767 /* Indicate the _mcount call cannot throw, nor will it execute a 4768 non-local goto. */ 4769 make_reg_eh_region_note_nothrow_nononlocal (call_insn); 4770 4771 /* Allocate space for fixed arguments. */ 4772 if (reg_parm_stack_space > crtl->outgoing_args_size) 4773 crtl->outgoing_args_size = reg_parm_stack_space; 4774 } 4775 4776 /* Fetch the return address for the frame COUNT steps up from 4777 the current frame, after the prologue. FRAMEADDR is the 4778 frame pointer of the COUNT frame. 4779 4780 We want to ignore any export stub remnants here. To handle this, 4781 we examine the code at the return address, and if it is an export 4782 stub, we return a memory rtx for the stub return address stored 4783 at frame-24. 4784 4785 The value returned is used in two different ways: 4786 4787 1. To find a function's caller. 4788 4789 2. To change the return address for a function. 4790 4791 This function handles most instances of case 1; however, it will 4792 fail if there are two levels of stubs to execute on the return 4793 path. The only way I believe that can happen is if the return value 4794 needs a parameter relocation, which never happens for C code. 4795 4796 This function handles most instances of case 2; however, it will 4797 fail if we did not originally have stub code on the return path 4798 but will need stub code on the new return path. This can happen if 4799 the caller & callee are both in the main program, but the new 4800 return location is in a shared library. */ 4801 4802 rtx 4803 pa_return_addr_rtx (int count, rtx frameaddr) 4804 { 4805 rtx label; 4806 rtx rp; 4807 rtx saved_rp; 4808 rtx ins; 4809 4810 /* The instruction stream at the return address of a PA1.X export stub is: 4811 4812 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4813 0x004010a1 | stub+12: ldsid (sr0,rp),r1 4814 0x00011820 | stub+16: mtsp r1,sr0 4815 0xe0400002 | stub+20: be,n 0(sr0,rp) 4816 4817 0xe0400002 must be specified as -532676606 so that it won't be 4818 rejected as an invalid immediate operand on 64-bit hosts. 4819 4820 The instruction stream at the return address of a PA2.0 export stub is: 4821 4822 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4823 0xe840d002 | stub+12: bve,n (rp) 4824 */ 4825 4826 HOST_WIDE_INT insns[4]; 4827 int i, len; 4828 4829 if (count != 0) 4830 return NULL_RTX; 4831 4832 rp = get_hard_reg_initial_val (Pmode, 2); 4833 4834 if (TARGET_64BIT || TARGET_NO_SPACE_REGS) 4835 return rp; 4836 4837 /* If there is no export stub then just use the value saved from 4838 the return pointer register. */ 4839 4840 saved_rp = gen_reg_rtx (Pmode); 4841 emit_move_insn (saved_rp, rp); 4842 4843 /* Get pointer to the instruction stream. We have to mask out the 4844 privilege level from the two low order bits of the return address 4845 pointer here so that ins will point to the start of the first 4846 instruction that would have been executed if we returned. */ 4847 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR)); 4848 label = gen_label_rtx (); 4849 4850 if (TARGET_PA_20) 4851 { 4852 insns[0] = 0x4bc23fd1; 4853 insns[1] = -398405630; 4854 len = 2; 4855 } 4856 else 4857 { 4858 insns[0] = 0x4bc23fd1; 4859 insns[1] = 0x004010a1; 4860 insns[2] = 0x00011820; 4861 insns[3] = -532676606; 4862 len = 4; 4863 } 4864 4865 /* Check the instruction stream at the normal return address for the 4866 export stub. If it is an export stub, than our return address is 4867 really in -24[frameaddr]. */ 4868 4869 for (i = 0; i < len; i++) 4870 { 4871 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4)); 4872 rtx op1 = GEN_INT (insns[i]); 4873 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label); 4874 } 4875 4876 /* Here we know that our return address points to an export 4877 stub. We don't want to return the address of the export stub, 4878 but rather the return address of the export stub. That return 4879 address is stored at -24[frameaddr]. */ 4880 4881 emit_move_insn (saved_rp, 4882 gen_rtx_MEM (Pmode, 4883 memory_address (Pmode, 4884 plus_constant (Pmode, frameaddr, 4885 -24)))); 4886 4887 emit_label (label); 4888 4889 return saved_rp; 4890 } 4891 4892 void 4893 pa_emit_bcond_fp (rtx operands[]) 4894 { 4895 enum rtx_code code = GET_CODE (operands[0]); 4896 rtx operand0 = operands[1]; 4897 rtx operand1 = operands[2]; 4898 rtx label = operands[3]; 4899 4900 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0), 4901 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1))); 4902 4903 emit_jump_insn (gen_rtx_SET (pc_rtx, 4904 gen_rtx_IF_THEN_ELSE (VOIDmode, 4905 gen_rtx_fmt_ee (NE, 4906 VOIDmode, 4907 gen_rtx_REG (CCFPmode, 0), 4908 const0_rtx), 4909 gen_rtx_LABEL_REF (VOIDmode, label), 4910 pc_rtx))); 4911 4912 } 4913 4914 /* Adjust the cost of a scheduling dependency. Return the new cost of 4915 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4916 4917 static int 4918 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, 4919 unsigned int) 4920 { 4921 enum attr_type attr_type; 4922 4923 /* Don't adjust costs for a pa8000 chip, also do not adjust any 4924 true dependencies as they are described with bypasses now. */ 4925 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0) 4926 return cost; 4927 4928 if (! recog_memoized (insn)) 4929 return 0; 4930 4931 attr_type = get_attr_type (insn); 4932 4933 switch (dep_type) 4934 { 4935 case REG_DEP_ANTI: 4936 /* Anti dependency; DEP_INSN reads a register that INSN writes some 4937 cycles later. */ 4938 4939 if (attr_type == TYPE_FPLOAD) 4940 { 4941 rtx pat = PATTERN (insn); 4942 rtx dep_pat = PATTERN (dep_insn); 4943 if (GET_CODE (pat) == PARALLEL) 4944 { 4945 /* This happens for the fldXs,mb patterns. */ 4946 pat = XVECEXP (pat, 0, 0); 4947 } 4948 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4949 /* If this happens, we have to extend this to schedule 4950 optimally. Return 0 for now. */ 4951 return 0; 4952 4953 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4954 { 4955 if (! recog_memoized (dep_insn)) 4956 return 0; 4957 switch (get_attr_type (dep_insn)) 4958 { 4959 case TYPE_FPALU: 4960 case TYPE_FPMULSGL: 4961 case TYPE_FPMULDBL: 4962 case TYPE_FPDIVSGL: 4963 case TYPE_FPDIVDBL: 4964 case TYPE_FPSQRTSGL: 4965 case TYPE_FPSQRTDBL: 4966 /* A fpload can't be issued until one cycle before a 4967 preceding arithmetic operation has finished if 4968 the target of the fpload is any of the sources 4969 (or destination) of the arithmetic operation. */ 4970 return insn_default_latency (dep_insn) - 1; 4971 4972 default: 4973 return 0; 4974 } 4975 } 4976 } 4977 else if (attr_type == TYPE_FPALU) 4978 { 4979 rtx pat = PATTERN (insn); 4980 rtx dep_pat = PATTERN (dep_insn); 4981 if (GET_CODE (pat) == PARALLEL) 4982 { 4983 /* This happens for the fldXs,mb patterns. */ 4984 pat = XVECEXP (pat, 0, 0); 4985 } 4986 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4987 /* If this happens, we have to extend this to schedule 4988 optimally. Return 0 for now. */ 4989 return 0; 4990 4991 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4992 { 4993 if (! recog_memoized (dep_insn)) 4994 return 0; 4995 switch (get_attr_type (dep_insn)) 4996 { 4997 case TYPE_FPDIVSGL: 4998 case TYPE_FPDIVDBL: 4999 case TYPE_FPSQRTSGL: 5000 case TYPE_FPSQRTDBL: 5001 /* An ALU flop can't be issued until two cycles before a 5002 preceding divide or sqrt operation has finished if 5003 the target of the ALU flop is any of the sources 5004 (or destination) of the divide or sqrt operation. */ 5005 return insn_default_latency (dep_insn) - 2; 5006 5007 default: 5008 return 0; 5009 } 5010 } 5011 } 5012 5013 /* For other anti dependencies, the cost is 0. */ 5014 return 0; 5015 5016 case REG_DEP_OUTPUT: 5017 /* Output dependency; DEP_INSN writes a register that INSN writes some 5018 cycles later. */ 5019 if (attr_type == TYPE_FPLOAD) 5020 { 5021 rtx pat = PATTERN (insn); 5022 rtx dep_pat = PATTERN (dep_insn); 5023 if (GET_CODE (pat) == PARALLEL) 5024 { 5025 /* This happens for the fldXs,mb patterns. */ 5026 pat = XVECEXP (pat, 0, 0); 5027 } 5028 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 5029 /* If this happens, we have to extend this to schedule 5030 optimally. Return 0 for now. */ 5031 return 0; 5032 5033 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 5034 { 5035 if (! recog_memoized (dep_insn)) 5036 return 0; 5037 switch (get_attr_type (dep_insn)) 5038 { 5039 case TYPE_FPALU: 5040 case TYPE_FPMULSGL: 5041 case TYPE_FPMULDBL: 5042 case TYPE_FPDIVSGL: 5043 case TYPE_FPDIVDBL: 5044 case TYPE_FPSQRTSGL: 5045 case TYPE_FPSQRTDBL: 5046 /* A fpload can't be issued until one cycle before a 5047 preceding arithmetic operation has finished if 5048 the target of the fpload is the destination of the 5049 arithmetic operation. 5050 5051 Exception: For PA7100LC, PA7200 and PA7300, the cost 5052 is 3 cycles, unless they bundle together. We also 5053 pay the penalty if the second insn is a fpload. */ 5054 return insn_default_latency (dep_insn) - 1; 5055 5056 default: 5057 return 0; 5058 } 5059 } 5060 } 5061 else if (attr_type == TYPE_FPALU) 5062 { 5063 rtx pat = PATTERN (insn); 5064 rtx dep_pat = PATTERN (dep_insn); 5065 if (GET_CODE (pat) == PARALLEL) 5066 { 5067 /* This happens for the fldXs,mb patterns. */ 5068 pat = XVECEXP (pat, 0, 0); 5069 } 5070 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 5071 /* If this happens, we have to extend this to schedule 5072 optimally. Return 0 for now. */ 5073 return 0; 5074 5075 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 5076 { 5077 if (! recog_memoized (dep_insn)) 5078 return 0; 5079 switch (get_attr_type (dep_insn)) 5080 { 5081 case TYPE_FPDIVSGL: 5082 case TYPE_FPDIVDBL: 5083 case TYPE_FPSQRTSGL: 5084 case TYPE_FPSQRTDBL: 5085 /* An ALU flop can't be issued until two cycles before a 5086 preceding divide or sqrt operation has finished if 5087 the target of the ALU flop is also the target of 5088 the divide or sqrt operation. */ 5089 return insn_default_latency (dep_insn) - 2; 5090 5091 default: 5092 return 0; 5093 } 5094 } 5095 } 5096 5097 /* For other output dependencies, the cost is 0. */ 5098 return 0; 5099 5100 default: 5101 gcc_unreachable (); 5102 } 5103 } 5104 5105 /* The 700 can only issue a single insn at a time. 5106 The 7XXX processors can issue two insns at a time. 5107 The 8000 can issue 4 insns at a time. */ 5108 static int 5109 pa_issue_rate (void) 5110 { 5111 switch (pa_cpu) 5112 { 5113 case PROCESSOR_700: return 1; 5114 case PROCESSOR_7100: return 2; 5115 case PROCESSOR_7100LC: return 2; 5116 case PROCESSOR_7200: return 2; 5117 case PROCESSOR_7300: return 2; 5118 case PROCESSOR_8000: return 4; 5119 5120 default: 5121 gcc_unreachable (); 5122 } 5123 } 5124 5125 5126 5127 /* Return any length plus adjustment needed by INSN which already has 5128 its length computed as LENGTH. Return LENGTH if no adjustment is 5129 necessary. 5130 5131 Also compute the length of an inline block move here as it is too 5132 complicated to express as a length attribute in pa.md. */ 5133 int 5134 pa_adjust_insn_length (rtx_insn *insn, int length) 5135 { 5136 rtx pat = PATTERN (insn); 5137 5138 /* If length is negative or undefined, provide initial length. */ 5139 if ((unsigned int) length >= INT_MAX) 5140 { 5141 if (GET_CODE (pat) == SEQUENCE) 5142 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0)); 5143 5144 switch (get_attr_type (insn)) 5145 { 5146 case TYPE_MILLI: 5147 length = pa_attr_length_millicode_call (insn); 5148 break; 5149 case TYPE_CALL: 5150 length = pa_attr_length_call (insn, 0); 5151 break; 5152 case TYPE_SIBCALL: 5153 length = pa_attr_length_call (insn, 1); 5154 break; 5155 case TYPE_DYNCALL: 5156 length = pa_attr_length_indirect_call (insn); 5157 break; 5158 case TYPE_SH_FUNC_ADRS: 5159 length = pa_attr_length_millicode_call (insn) + 20; 5160 break; 5161 default: 5162 gcc_unreachable (); 5163 } 5164 } 5165 5166 /* Block move pattern. */ 5167 if (NONJUMP_INSN_P (insn) 5168 && GET_CODE (pat) == PARALLEL 5169 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 5170 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 5171 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM 5172 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode 5173 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) 5174 length += compute_cpymem_length (insn) - 4; 5175 /* Block clear pattern. */ 5176 else if (NONJUMP_INSN_P (insn) 5177 && GET_CODE (pat) == PARALLEL 5178 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 5179 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 5180 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx 5181 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) 5182 length += compute_clrmem_length (insn) - 4; 5183 /* Conditional branch with an unfilled delay slot. */ 5184 else if (JUMP_P (insn) && ! simplejump_p (insn)) 5185 { 5186 /* Adjust a short backwards conditional with an unfilled delay slot. */ 5187 if (GET_CODE (pat) == SET 5188 && length == 4 5189 && JUMP_LABEL (insn) != NULL_RTX 5190 && ! forward_branch_p (insn)) 5191 length += 4; 5192 else if (GET_CODE (pat) == PARALLEL 5193 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH 5194 && length == 4) 5195 length += 4; 5196 /* Adjust dbra insn with short backwards conditional branch with 5197 unfilled delay slot -- only for case where counter is in a 5198 general register register. */ 5199 else if (GET_CODE (pat) == PARALLEL 5200 && GET_CODE (XVECEXP (pat, 0, 1)) == SET 5201 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG 5202 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) 5203 && length == 4 5204 && ! forward_branch_p (insn)) 5205 length += 4; 5206 } 5207 return length; 5208 } 5209 5210 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */ 5211 5212 static bool 5213 pa_print_operand_punct_valid_p (unsigned char code) 5214 { 5215 if (code == '@' 5216 || code == '#' 5217 || code == '*' 5218 || code == '^') 5219 return true; 5220 5221 return false; 5222 } 5223 5224 /* Print operand X (an rtx) in assembler syntax to file FILE. 5225 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 5226 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 5227 5228 void 5229 pa_print_operand (FILE *file, rtx x, int code) 5230 { 5231 switch (code) 5232 { 5233 case '#': 5234 /* Output a 'nop' if there's nothing for the delay slot. */ 5235 if (dbr_sequence_length () == 0) 5236 fputs ("\n\tnop", file); 5237 return; 5238 case '*': 5239 /* Output a nullification completer if there's nothing for the */ 5240 /* delay slot or nullification is requested. */ 5241 if (dbr_sequence_length () == 0 || 5242 (final_sequence && 5243 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) 5244 fputs (",n", file); 5245 return; 5246 case 'R': 5247 /* Print out the second register name of a register pair. 5248 I.e., R (6) => 7. */ 5249 fputs (reg_names[REGNO (x) + 1], file); 5250 return; 5251 case 'r': 5252 /* A register or zero. */ 5253 if (x == const0_rtx 5254 || (x == CONST0_RTX (DFmode)) 5255 || (x == CONST0_RTX (SFmode))) 5256 { 5257 fputs ("%r0", file); 5258 return; 5259 } 5260 else 5261 break; 5262 case 'f': 5263 /* A register or zero (floating point). */ 5264 if (x == const0_rtx 5265 || (x == CONST0_RTX (DFmode)) 5266 || (x == CONST0_RTX (SFmode))) 5267 { 5268 fputs ("%fr0", file); 5269 return; 5270 } 5271 else 5272 break; 5273 case 'A': 5274 { 5275 rtx xoperands[2]; 5276 5277 xoperands[0] = XEXP (XEXP (x, 0), 0); 5278 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0); 5279 pa_output_global_address (file, xoperands[1], 0); 5280 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]); 5281 return; 5282 } 5283 5284 case 'C': /* Plain (C)ondition */ 5285 case 'X': 5286 switch (GET_CODE (x)) 5287 { 5288 case EQ: 5289 fputs ("=", file); break; 5290 case NE: 5291 fputs ("<>", file); break; 5292 case GT: 5293 fputs (">", file); break; 5294 case GE: 5295 fputs (">=", file); break; 5296 case GEU: 5297 fputs (">>=", file); break; 5298 case GTU: 5299 fputs (">>", file); break; 5300 case LT: 5301 fputs ("<", file); break; 5302 case LE: 5303 fputs ("<=", file); break; 5304 case LEU: 5305 fputs ("<<=", file); break; 5306 case LTU: 5307 fputs ("<<", file); break; 5308 default: 5309 gcc_unreachable (); 5310 } 5311 return; 5312 case 'N': /* Condition, (N)egated */ 5313 switch (GET_CODE (x)) 5314 { 5315 case EQ: 5316 fputs ("<>", file); break; 5317 case NE: 5318 fputs ("=", file); break; 5319 case GT: 5320 fputs ("<=", file); break; 5321 case GE: 5322 fputs ("<", file); break; 5323 case GEU: 5324 fputs ("<<", file); break; 5325 case GTU: 5326 fputs ("<<=", file); break; 5327 case LT: 5328 fputs (">=", file); break; 5329 case LE: 5330 fputs (">", file); break; 5331 case LEU: 5332 fputs (">>", file); break; 5333 case LTU: 5334 fputs (">>=", file); break; 5335 default: 5336 gcc_unreachable (); 5337 } 5338 return; 5339 /* For floating point comparisons. Note that the output 5340 predicates are the complement of the desired mode. The 5341 conditions for GT, GE, LT, LE and LTGT cause an invalid 5342 operation exception if the result is unordered and this 5343 exception is enabled in the floating-point status register. */ 5344 case 'Y': 5345 switch (GET_CODE (x)) 5346 { 5347 case EQ: 5348 fputs ("!=", file); break; 5349 case NE: 5350 fputs ("=", file); break; 5351 case GT: 5352 fputs ("!>", file); break; 5353 case GE: 5354 fputs ("!>=", file); break; 5355 case LT: 5356 fputs ("!<", file); break; 5357 case LE: 5358 fputs ("!<=", file); break; 5359 case LTGT: 5360 fputs ("!<>", file); break; 5361 case UNLE: 5362 fputs ("!?<=", file); break; 5363 case UNLT: 5364 fputs ("!?<", file); break; 5365 case UNGE: 5366 fputs ("!?>=", file); break; 5367 case UNGT: 5368 fputs ("!?>", file); break; 5369 case UNEQ: 5370 fputs ("!?=", file); break; 5371 case UNORDERED: 5372 fputs ("!?", file); break; 5373 case ORDERED: 5374 fputs ("?", file); break; 5375 default: 5376 gcc_unreachable (); 5377 } 5378 return; 5379 case 'S': /* Condition, operands are (S)wapped. */ 5380 switch (GET_CODE (x)) 5381 { 5382 case EQ: 5383 fputs ("=", file); break; 5384 case NE: 5385 fputs ("<>", file); break; 5386 case GT: 5387 fputs ("<", file); break; 5388 case GE: 5389 fputs ("<=", file); break; 5390 case GEU: 5391 fputs ("<<=", file); break; 5392 case GTU: 5393 fputs ("<<", file); break; 5394 case LT: 5395 fputs (">", file); break; 5396 case LE: 5397 fputs (">=", file); break; 5398 case LEU: 5399 fputs (">>=", file); break; 5400 case LTU: 5401 fputs (">>", file); break; 5402 default: 5403 gcc_unreachable (); 5404 } 5405 return; 5406 case 'B': /* Condition, (B)oth swapped and negate. */ 5407 switch (GET_CODE (x)) 5408 { 5409 case EQ: 5410 fputs ("<>", file); break; 5411 case NE: 5412 fputs ("=", file); break; 5413 case GT: 5414 fputs (">=", file); break; 5415 case GE: 5416 fputs (">", file); break; 5417 case GEU: 5418 fputs (">>", file); break; 5419 case GTU: 5420 fputs (">>=", file); break; 5421 case LT: 5422 fputs ("<=", file); break; 5423 case LE: 5424 fputs ("<", file); break; 5425 case LEU: 5426 fputs ("<<", file); break; 5427 case LTU: 5428 fputs ("<<=", file); break; 5429 default: 5430 gcc_unreachable (); 5431 } 5432 return; 5433 case 'k': 5434 gcc_assert (GET_CODE (x) == CONST_INT); 5435 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); 5436 return; 5437 case 'Q': 5438 gcc_assert (GET_CODE (x) == CONST_INT); 5439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63)); 5440 return; 5441 case 'L': 5442 gcc_assert (GET_CODE (x) == CONST_INT); 5443 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31)); 5444 return; 5445 case 'o': 5446 gcc_assert (GET_CODE (x) == CONST_INT 5447 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3)); 5448 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 5449 return; 5450 case 'O': 5451 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0); 5452 fprintf (file, "%d", exact_log2 (INTVAL (x))); 5453 return; 5454 case 'p': 5455 gcc_assert (GET_CODE (x) == CONST_INT); 5456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63)); 5457 return; 5458 case 'P': 5459 gcc_assert (GET_CODE (x) == CONST_INT); 5460 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31)); 5461 return; 5462 case 'I': 5463 if (GET_CODE (x) == CONST_INT) 5464 fputs ("i", file); 5465 return; 5466 case 'M': 5467 case 'F': 5468 switch (GET_CODE (XEXP (x, 0))) 5469 { 5470 case PRE_DEC: 5471 case PRE_INC: 5472 if (ASSEMBLER_DIALECT == 0) 5473 fputs ("s,mb", file); 5474 else 5475 fputs (",mb", file); 5476 break; 5477 case POST_DEC: 5478 case POST_INC: 5479 if (ASSEMBLER_DIALECT == 0) 5480 fputs ("s,ma", file); 5481 else 5482 fputs (",ma", file); 5483 break; 5484 case PLUS: 5485 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5486 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5487 { 5488 if (ASSEMBLER_DIALECT == 0) 5489 fputs ("x", file); 5490 } 5491 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5492 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5493 { 5494 if (ASSEMBLER_DIALECT == 0) 5495 fputs ("x,s", file); 5496 else 5497 fputs (",s", file); 5498 } 5499 else if (code == 'F' && ASSEMBLER_DIALECT == 0) 5500 fputs ("s", file); 5501 break; 5502 default: 5503 if (code == 'F' && ASSEMBLER_DIALECT == 0) 5504 fputs ("s", file); 5505 break; 5506 } 5507 return; 5508 case 'G': 5509 pa_output_global_address (file, x, 0); 5510 return; 5511 case 'H': 5512 pa_output_global_address (file, x, 1); 5513 return; 5514 case 0: /* Don't do anything special */ 5515 break; 5516 case 'Z': 5517 { 5518 unsigned op[3]; 5519 compute_zdepwi_operands (INTVAL (x), op); 5520 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5521 return; 5522 } 5523 case 'z': 5524 { 5525 unsigned op[3]; 5526 compute_zdepdi_operands (INTVAL (x), op); 5527 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5528 return; 5529 } 5530 case 'c': 5531 /* We can get here from a .vtable_inherit due to our 5532 CONSTANT_ADDRESS_P rejecting perfectly good constant 5533 addresses. */ 5534 break; 5535 default: 5536 gcc_unreachable (); 5537 } 5538 if (GET_CODE (x) == REG) 5539 { 5540 fputs (reg_names [REGNO (x)], file); 5541 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4) 5542 { 5543 fputs ("R", file); 5544 return; 5545 } 5546 if (FP_REG_P (x) 5547 && GET_MODE_SIZE (GET_MODE (x)) <= 4 5548 && (REGNO (x) & 1) == 0) 5549 fputs ("L", file); 5550 } 5551 else if (GET_CODE (x) == MEM) 5552 { 5553 int size = GET_MODE_SIZE (GET_MODE (x)); 5554 rtx base = NULL_RTX; 5555 switch (GET_CODE (XEXP (x, 0))) 5556 { 5557 case PRE_DEC: 5558 case POST_DEC: 5559 base = XEXP (XEXP (x, 0), 0); 5560 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]); 5561 break; 5562 case PRE_INC: 5563 case POST_INC: 5564 base = XEXP (XEXP (x, 0), 0); 5565 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]); 5566 break; 5567 case PLUS: 5568 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) 5569 fprintf (file, "%s(%s)", 5570 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], 5571 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); 5572 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5573 fprintf (file, "%s(%s)", 5574 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], 5575 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); 5576 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5577 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5578 { 5579 /* Because the REG_POINTER flag can get lost during reload, 5580 pa_legitimate_address_p canonicalizes the order of the 5581 index and base registers in the combined move patterns. */ 5582 rtx base = XEXP (XEXP (x, 0), 1); 5583 rtx index = XEXP (XEXP (x, 0), 0); 5584 5585 fprintf (file, "%s(%s)", 5586 reg_names [REGNO (index)], reg_names [REGNO (base)]); 5587 } 5588 else 5589 output_address (GET_MODE (x), XEXP (x, 0)); 5590 break; 5591 default: 5592 output_address (GET_MODE (x), XEXP (x, 0)); 5593 break; 5594 } 5595 } 5596 else 5597 output_addr_const (file, x); 5598 } 5599 5600 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ 5601 5602 void 5603 pa_output_global_address (FILE *file, rtx x, int round_constant) 5604 { 5605 5606 /* Imagine (high (const (plus ...))). */ 5607 if (GET_CODE (x) == HIGH) 5608 x = XEXP (x, 0); 5609 5610 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode)) 5611 output_addr_const (file, x); 5612 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) 5613 { 5614 output_addr_const (file, x); 5615 fputs ("-$global$", file); 5616 } 5617 else if (GET_CODE (x) == CONST) 5618 { 5619 const char *sep = ""; 5620 int offset = 0; /* assembler wants -$global$ at end */ 5621 rtx base = NULL_RTX; 5622 5623 switch (GET_CODE (XEXP (XEXP (x, 0), 0))) 5624 { 5625 case LABEL_REF: 5626 case SYMBOL_REF: 5627 base = XEXP (XEXP (x, 0), 0); 5628 output_addr_const (file, base); 5629 break; 5630 case CONST_INT: 5631 offset = INTVAL (XEXP (XEXP (x, 0), 0)); 5632 break; 5633 default: 5634 gcc_unreachable (); 5635 } 5636 5637 switch (GET_CODE (XEXP (XEXP (x, 0), 1))) 5638 { 5639 case LABEL_REF: 5640 case SYMBOL_REF: 5641 base = XEXP (XEXP (x, 0), 1); 5642 output_addr_const (file, base); 5643 break; 5644 case CONST_INT: 5645 offset = INTVAL (XEXP (XEXP (x, 0), 1)); 5646 break; 5647 default: 5648 gcc_unreachable (); 5649 } 5650 5651 /* How bogus. The compiler is apparently responsible for 5652 rounding the constant if it uses an LR field selector. 5653 5654 The linker and/or assembler seem a better place since 5655 they have to do this kind of thing already. 5656 5657 If we fail to do this, HP's optimizing linker may eliminate 5658 an addil, but not update the ldw/stw/ldo instruction that 5659 uses the result of the addil. */ 5660 if (round_constant) 5661 offset = ((offset + 0x1000) & ~0x1fff); 5662 5663 switch (GET_CODE (XEXP (x, 0))) 5664 { 5665 case PLUS: 5666 if (offset < 0) 5667 { 5668 offset = -offset; 5669 sep = "-"; 5670 } 5671 else 5672 sep = "+"; 5673 break; 5674 5675 case MINUS: 5676 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF); 5677 sep = "-"; 5678 break; 5679 5680 default: 5681 gcc_unreachable (); 5682 } 5683 5684 if (!read_only_operand (base, VOIDmode) && !flag_pic) 5685 fputs ("-$global$", file); 5686 if (offset) 5687 fprintf (file, "%s%d", sep, offset); 5688 } 5689 else 5690 output_addr_const (file, x); 5691 } 5692 5693 /* Output boilerplate text to appear at the beginning of the file. 5694 There are several possible versions. */ 5695 #define aputs(x) fputs(x, asm_out_file) 5696 static inline void 5697 pa_file_start_level (void) 5698 { 5699 if (TARGET_64BIT) 5700 aputs ("\t.LEVEL 2.0w\n"); 5701 else if (TARGET_PA_20) 5702 aputs ("\t.LEVEL 2.0\n"); 5703 else if (TARGET_PA_11) 5704 aputs ("\t.LEVEL 1.1\n"); 5705 else 5706 aputs ("\t.LEVEL 1.0\n"); 5707 } 5708 5709 static inline void 5710 pa_file_start_space (int sortspace) 5711 { 5712 aputs ("\t.SPACE $PRIVATE$"); 5713 if (sortspace) 5714 aputs (",SORT=16"); 5715 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"); 5716 if (flag_tm) 5717 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31"); 5718 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82" 5719 "\n\t.SPACE $TEXT$"); 5720 if (sortspace) 5721 aputs (",SORT=8"); 5722 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44" 5723 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n"); 5724 } 5725 5726 static inline void 5727 pa_file_start_file (int want_version) 5728 { 5729 if (write_symbols != NO_DEBUG) 5730 { 5731 output_file_directive (asm_out_file, main_input_filename); 5732 if (want_version) 5733 aputs ("\t.version\t\"01.01\"\n"); 5734 } 5735 } 5736 5737 static inline void 5738 pa_file_start_mcount (const char *aswhat) 5739 { 5740 if (profile_flag) 5741 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat); 5742 } 5743 5744 static void 5745 pa_elf_file_start (void) 5746 { 5747 pa_file_start_level (); 5748 pa_file_start_mcount ("ENTRY"); 5749 pa_file_start_file (0); 5750 } 5751 5752 static void 5753 pa_som_file_start (void) 5754 { 5755 pa_file_start_level (); 5756 pa_file_start_space (0); 5757 aputs ("\t.IMPORT $global$,DATA\n" 5758 "\t.IMPORT $$dyncall,MILLICODE\n"); 5759 pa_file_start_mcount ("CODE"); 5760 pa_file_start_file (0); 5761 } 5762 5763 static void 5764 pa_linux_file_start (void) 5765 { 5766 pa_file_start_file (0); 5767 pa_file_start_level (); 5768 pa_file_start_mcount ("CODE"); 5769 } 5770 5771 static void 5772 pa_hpux64_gas_file_start (void) 5773 { 5774 pa_file_start_level (); 5775 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 5776 if (profile_flag) 5777 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function"); 5778 #endif 5779 pa_file_start_file (1); 5780 } 5781 5782 static void 5783 pa_hpux64_hpas_file_start (void) 5784 { 5785 pa_file_start_level (); 5786 pa_file_start_space (1); 5787 pa_file_start_mcount ("CODE"); 5788 pa_file_start_file (0); 5789 } 5790 #undef aputs 5791 5792 /* Search the deferred plabel list for SYMBOL and return its internal 5793 label. If an entry for SYMBOL is not found, a new entry is created. */ 5794 5795 rtx 5796 pa_get_deferred_plabel (rtx symbol) 5797 { 5798 const char *fname = XSTR (symbol, 0); 5799 size_t i; 5800 5801 /* See if we have already put this function on the list of deferred 5802 plabels. This list is generally small, so a liner search is not 5803 too ugly. If it proves too slow replace it with something faster. */ 5804 for (i = 0; i < n_deferred_plabels; i++) 5805 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0) 5806 break; 5807 5808 /* If the deferred plabel list is empty, or this entry was not found 5809 on the list, create a new entry on the list. */ 5810 if (deferred_plabels == NULL || i == n_deferred_plabels) 5811 { 5812 tree id; 5813 5814 if (deferred_plabels == 0) 5815 deferred_plabels = ggc_alloc<deferred_plabel> (); 5816 else 5817 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel, 5818 deferred_plabels, 5819 n_deferred_plabels + 1); 5820 5821 i = n_deferred_plabels++; 5822 deferred_plabels[i].internal_label = gen_label_rtx (); 5823 deferred_plabels[i].symbol = symbol; 5824 5825 /* Gross. We have just implicitly taken the address of this 5826 function. Mark it in the same manner as assemble_name. */ 5827 id = maybe_get_identifier (targetm.strip_name_encoding (fname)); 5828 if (id) 5829 mark_referenced (id); 5830 } 5831 5832 return deferred_plabels[i].internal_label; 5833 } 5834 5835 static void 5836 output_deferred_plabels (void) 5837 { 5838 size_t i; 5839 5840 /* If we have some deferred plabels, then we need to switch into the 5841 data or readonly data section, and align it to a 4 byte boundary 5842 before outputting the deferred plabels. */ 5843 if (n_deferred_plabels) 5844 { 5845 switch_to_section (flag_pic ? data_section : readonly_data_section); 5846 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 5847 } 5848 5849 /* Now output the deferred plabels. */ 5850 for (i = 0; i < n_deferred_plabels; i++) 5851 { 5852 targetm.asm_out.internal_label (asm_out_file, "L", 5853 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); 5854 assemble_integer (deferred_plabels[i].symbol, 5855 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1); 5856 } 5857 } 5858 5859 /* Initialize optabs to point to emulation routines. */ 5860 5861 static void 5862 pa_init_libfuncs (void) 5863 { 5864 if (HPUX_LONG_DOUBLE_LIBRARY) 5865 { 5866 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 5867 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 5868 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 5869 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 5870 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin"); 5871 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 5872 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt"); 5873 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 5874 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 5875 5876 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 5877 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 5878 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 5879 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 5880 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 5881 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 5882 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord"); 5883 5884 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 5885 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 5886 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 5887 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 5888 5889 set_conv_libfunc (sfix_optab, SImode, TFmode, 5890 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl" 5891 : "_U_Qfcnvfxt_quad_to_sgl"); 5892 set_conv_libfunc (sfix_optab, DImode, TFmode, 5893 "_U_Qfcnvfxt_quad_to_dbl"); 5894 set_conv_libfunc (ufix_optab, SImode, TFmode, 5895 "_U_Qfcnvfxt_quad_to_usgl"); 5896 set_conv_libfunc (ufix_optab, DImode, TFmode, 5897 "_U_Qfcnvfxt_quad_to_udbl"); 5898 5899 set_conv_libfunc (sfloat_optab, TFmode, SImode, 5900 "_U_Qfcnvxf_sgl_to_quad"); 5901 set_conv_libfunc (sfloat_optab, TFmode, DImode, 5902 "_U_Qfcnvxf_dbl_to_quad"); 5903 set_conv_libfunc (ufloat_optab, TFmode, SImode, 5904 "_U_Qfcnvxf_usgl_to_quad"); 5905 set_conv_libfunc (ufloat_optab, TFmode, DImode, 5906 "_U_Qfcnvxf_udbl_to_quad"); 5907 } 5908 5909 if (TARGET_SYNC_LIBCALL) 5910 init_sync_libfuncs (8); 5911 } 5912 5913 /* HP's millicode routines mean something special to the assembler. 5914 Keep track of which ones we have used. */ 5915 5916 enum millicodes { remI, remU, divI, divU, mulI, end1000 }; 5917 static void import_milli (enum millicodes); 5918 static char imported[(int) end1000]; 5919 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"}; 5920 static const char import_string[] = ".IMPORT $$....,MILLICODE"; 5921 #define MILLI_START 10 5922 5923 static void 5924 import_milli (enum millicodes code) 5925 { 5926 char str[sizeof (import_string)]; 5927 5928 if (!imported[(int) code]) 5929 { 5930 imported[(int) code] = 1; 5931 strcpy (str, import_string); 5932 memcpy (str + MILLI_START, milli_names[(int) code], 4); 5933 output_asm_insn (str, 0); 5934 } 5935 } 5936 5937 /* The register constraints have put the operands and return value in 5938 the proper registers. */ 5939 5940 const char * 5941 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn) 5942 { 5943 import_milli (mulI); 5944 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI")); 5945 } 5946 5947 /* Emit the rtl for doing a division by a constant. */ 5948 5949 /* Do magic division millicodes exist for this value? */ 5950 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1}; 5951 5952 /* We'll use an array to keep track of the magic millicodes and 5953 whether or not we've used them already. [n][0] is signed, [n][1] is 5954 unsigned. */ 5955 5956 static int div_milli[16][2]; 5957 5958 int 5959 pa_emit_hpdiv_const (rtx *operands, int unsignedp) 5960 { 5961 if (GET_CODE (operands[2]) == CONST_INT 5962 && INTVAL (operands[2]) > 0 5963 && INTVAL (operands[2]) < 16 5964 && pa_magic_milli[INTVAL (operands[2])]) 5965 { 5966 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); 5967 5968 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); 5969 emit 5970 (gen_rtx_PARALLEL 5971 (VOIDmode, 5972 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29), 5973 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 5974 SImode, 5975 gen_rtx_REG (SImode, 26), 5976 operands[2])), 5977 gen_rtx_CLOBBER (VOIDmode, operands[4]), 5978 gen_rtx_CLOBBER (VOIDmode, operands[3]), 5979 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), 5980 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), 5981 gen_rtx_CLOBBER (VOIDmode, ret)))); 5982 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); 5983 return 1; 5984 } 5985 return 0; 5986 } 5987 5988 const char * 5989 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn) 5990 { 5991 HOST_WIDE_INT divisor; 5992 5993 /* If the divisor is a constant, try to use one of the special 5994 opcodes .*/ 5995 if (GET_CODE (operands[0]) == CONST_INT) 5996 { 5997 static char buf[100]; 5998 divisor = INTVAL (operands[0]); 5999 if (!div_milli[divisor][unsignedp]) 6000 { 6001 div_milli[divisor][unsignedp] = 1; 6002 if (unsignedp) 6003 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); 6004 else 6005 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); 6006 } 6007 if (unsignedp) 6008 { 6009 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC, 6010 INTVAL (operands[0])); 6011 return pa_output_millicode_call (insn, 6012 gen_rtx_SYMBOL_REF (SImode, buf)); 6013 } 6014 else 6015 { 6016 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC, 6017 INTVAL (operands[0])); 6018 return pa_output_millicode_call (insn, 6019 gen_rtx_SYMBOL_REF (SImode, buf)); 6020 } 6021 } 6022 /* Divisor isn't a special constant. */ 6023 else 6024 { 6025 if (unsignedp) 6026 { 6027 import_milli (divU); 6028 return pa_output_millicode_call (insn, 6029 gen_rtx_SYMBOL_REF (SImode, "$$divU")); 6030 } 6031 else 6032 { 6033 import_milli (divI); 6034 return pa_output_millicode_call (insn, 6035 gen_rtx_SYMBOL_REF (SImode, "$$divI")); 6036 } 6037 } 6038 } 6039 6040 /* Output a $$rem millicode to do mod. */ 6041 6042 const char * 6043 pa_output_mod_insn (int unsignedp, rtx_insn *insn) 6044 { 6045 if (unsignedp) 6046 { 6047 import_milli (remU); 6048 return pa_output_millicode_call (insn, 6049 gen_rtx_SYMBOL_REF (SImode, "$$remU")); 6050 } 6051 else 6052 { 6053 import_milli (remI); 6054 return pa_output_millicode_call (insn, 6055 gen_rtx_SYMBOL_REF (SImode, "$$remI")); 6056 } 6057 } 6058 6059 void 6060 pa_output_arg_descriptor (rtx_insn *call_insn) 6061 { 6062 const char *arg_regs[4]; 6063 machine_mode arg_mode; 6064 rtx link; 6065 int i, output_flag = 0; 6066 int regno; 6067 6068 /* We neither need nor want argument location descriptors for the 6069 64bit runtime environment or the ELF32 environment. */ 6070 if (TARGET_64BIT || TARGET_ELF32) 6071 return; 6072 6073 for (i = 0; i < 4; i++) 6074 arg_regs[i] = 0; 6075 6076 /* Specify explicitly that no argument relocations should take place 6077 if using the portable runtime calling conventions. */ 6078 if (TARGET_PORTABLE_RUNTIME) 6079 { 6080 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", 6081 asm_out_file); 6082 return; 6083 } 6084 6085 gcc_assert (CALL_P (call_insn)); 6086 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); 6087 link; link = XEXP (link, 1)) 6088 { 6089 rtx use = XEXP (link, 0); 6090 6091 if (! (GET_CODE (use) == USE 6092 && GET_CODE (XEXP (use, 0)) == REG 6093 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 6094 continue; 6095 6096 arg_mode = GET_MODE (XEXP (use, 0)); 6097 regno = REGNO (XEXP (use, 0)); 6098 if (regno >= 23 && regno <= 26) 6099 { 6100 arg_regs[26 - regno] = "GR"; 6101 if (arg_mode == DImode) 6102 arg_regs[25 - regno] = "GR"; 6103 } 6104 else if (regno >= 32 && regno <= 39) 6105 { 6106 if (arg_mode == SFmode) 6107 arg_regs[(regno - 32) / 2] = "FR"; 6108 else 6109 { 6110 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED 6111 arg_regs[(regno - 34) / 2] = "FR"; 6112 arg_regs[(regno - 34) / 2 + 1] = "FU"; 6113 #else 6114 arg_regs[(regno - 34) / 2] = "FU"; 6115 arg_regs[(regno - 34) / 2 + 1] = "FR"; 6116 #endif 6117 } 6118 } 6119 } 6120 fputs ("\t.CALL ", asm_out_file); 6121 for (i = 0; i < 4; i++) 6122 { 6123 if (arg_regs[i]) 6124 { 6125 if (output_flag++) 6126 fputc (',', asm_out_file); 6127 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); 6128 } 6129 } 6130 fputc ('\n', asm_out_file); 6131 } 6132 6133 /* Inform reload about cases where moving X with a mode MODE to or from 6134 a register in RCLASS requires an extra scratch or immediate register. 6135 Return the class needed for the immediate register. */ 6136 6137 static reg_class_t 6138 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 6139 machine_mode mode, secondary_reload_info *sri) 6140 { 6141 int regno; 6142 enum reg_class rclass = (enum reg_class) rclass_i; 6143 6144 /* Handle the easy stuff first. */ 6145 if (rclass == R1_REGS) 6146 return NO_REGS; 6147 6148 if (REG_P (x)) 6149 { 6150 regno = REGNO (x); 6151 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER) 6152 return NO_REGS; 6153 } 6154 else 6155 regno = -1; 6156 6157 /* If we have something like (mem (mem (...)), we can safely assume the 6158 inner MEM will end up in a general register after reloading, so there's 6159 no need for a secondary reload. */ 6160 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM) 6161 return NO_REGS; 6162 6163 /* Trying to load a constant into a FP register during PIC code 6164 generation requires %r1 as a scratch register. For float modes, 6165 the only legitimate constant is CONST0_RTX. However, there are 6166 a few patterns that accept constant double operands. */ 6167 if (flag_pic 6168 && FP_REG_CLASS_P (rclass) 6169 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)) 6170 { 6171 switch (mode) 6172 { 6173 case E_SImode: 6174 sri->icode = CODE_FOR_reload_insi_r1; 6175 break; 6176 6177 case E_DImode: 6178 sri->icode = CODE_FOR_reload_indi_r1; 6179 break; 6180 6181 case E_SFmode: 6182 sri->icode = CODE_FOR_reload_insf_r1; 6183 break; 6184 6185 case E_DFmode: 6186 sri->icode = CODE_FOR_reload_indf_r1; 6187 break; 6188 6189 default: 6190 gcc_unreachable (); 6191 } 6192 return NO_REGS; 6193 } 6194 6195 /* Secondary reloads of symbolic expressions require %r1 as a scratch 6196 register when we're generating PIC code or when the operand isn't 6197 readonly. */ 6198 if (pa_symbolic_expression_p (x)) 6199 { 6200 if (GET_CODE (x) == HIGH) 6201 x = XEXP (x, 0); 6202 6203 if (flag_pic || !read_only_operand (x, VOIDmode)) 6204 { 6205 switch (mode) 6206 { 6207 case E_SImode: 6208 sri->icode = CODE_FOR_reload_insi_r1; 6209 break; 6210 6211 case E_DImode: 6212 sri->icode = CODE_FOR_reload_indi_r1; 6213 break; 6214 6215 default: 6216 gcc_unreachable (); 6217 } 6218 return NO_REGS; 6219 } 6220 } 6221 6222 /* Profiling showed the PA port spends about 1.3% of its compilation 6223 time in true_regnum from calls inside pa_secondary_reload_class. */ 6224 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) 6225 regno = true_regnum (x); 6226 6227 /* Handle reloads for floating point loads and stores. */ 6228 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1) 6229 && FP_REG_CLASS_P (rclass)) 6230 { 6231 if (MEM_P (x)) 6232 { 6233 x = XEXP (x, 0); 6234 6235 /* We don't need a secondary reload for indexed memory addresses. 6236 6237 When INT14_OK_STRICT is true, it might appear that we could 6238 directly allow register indirect memory addresses. However, 6239 this doesn't work because we don't support SUBREGs in 6240 floating-point register copies and reload doesn't tell us 6241 when it's going to use a SUBREG. */ 6242 if (IS_INDEX_ADDR_P (x)) 6243 return NO_REGS; 6244 } 6245 6246 /* Request a secondary reload with a general scratch register 6247 for everything else. ??? Could symbolic operands be handled 6248 directly when generating non-pic PA 2.0 code? */ 6249 sri->icode = (in_p 6250 ? direct_optab_handler (reload_in_optab, mode) 6251 : direct_optab_handler (reload_out_optab, mode)); 6252 return NO_REGS; 6253 } 6254 6255 /* A SAR<->FP register copy requires an intermediate general register 6256 and secondary memory. We need a secondary reload with a general 6257 scratch register for spills. */ 6258 if (rclass == SHIFT_REGS) 6259 { 6260 /* Handle spill. */ 6261 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0) 6262 { 6263 sri->icode = (in_p 6264 ? direct_optab_handler (reload_in_optab, mode) 6265 : direct_optab_handler (reload_out_optab, mode)); 6266 return NO_REGS; 6267 } 6268 6269 /* Handle FP copy. */ 6270 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno))) 6271 return GENERAL_REGS; 6272 } 6273 6274 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER 6275 && REGNO_REG_CLASS (regno) == SHIFT_REGS 6276 && FP_REG_CLASS_P (rclass)) 6277 return GENERAL_REGS; 6278 6279 return NO_REGS; 6280 } 6281 6282 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ 6283 6284 static bool 6285 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED, 6286 reg_class_t class1 ATTRIBUTE_UNUSED, 6287 reg_class_t class2 ATTRIBUTE_UNUSED) 6288 { 6289 #ifdef PA_SECONDARY_MEMORY_NEEDED 6290 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2); 6291 #else 6292 return false; 6293 #endif 6294 } 6295 6296 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer 6297 is only marked as live on entry by df-scan when it is a fixed 6298 register. It isn't a fixed register in the 64-bit runtime, 6299 so we need to mark it here. */ 6300 6301 static void 6302 pa_extra_live_on_entry (bitmap regs) 6303 { 6304 if (TARGET_64BIT) 6305 bitmap_set_bit (regs, ARG_POINTER_REGNUM); 6306 } 6307 6308 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile 6309 to prevent it from being deleted. */ 6310 6311 rtx 6312 pa_eh_return_handler_rtx (void) 6313 { 6314 rtx tmp; 6315 6316 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx, 6317 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20)); 6318 tmp = gen_rtx_MEM (word_mode, tmp); 6319 tmp->volatil = 1; 6320 return tmp; 6321 } 6322 6323 /* In the 32-bit runtime, arguments larger than eight bytes are passed 6324 by invisible reference. As a GCC extension, we also pass anything 6325 with a zero or variable size by reference. 6326 6327 The 64-bit runtime does not describe passing any types by invisible 6328 reference. The internals of GCC can't currently handle passing 6329 empty structures, and zero or variable length arrays when they are 6330 not passed entirely on the stack or by reference. Thus, as a GCC 6331 extension, we pass these types by reference. The HP compiler doesn't 6332 support these types, so hopefully there shouldn't be any compatibility 6333 issues. This may have to be revisited when HP releases a C99 compiler 6334 or updates the ABI. */ 6335 6336 static bool 6337 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg) 6338 { 6339 HOST_WIDE_INT size = arg.type_size_in_bytes (); 6340 if (TARGET_64BIT) 6341 return size <= 0; 6342 else 6343 return size <= 0 || size > 8; 6344 } 6345 6346 /* Implement TARGET_FUNCTION_ARG_PADDING. */ 6347 6348 static pad_direction 6349 pa_function_arg_padding (machine_mode mode, const_tree type) 6350 { 6351 if (mode == BLKmode 6352 || (TARGET_64BIT 6353 && type 6354 && (AGGREGATE_TYPE_P (type) 6355 || TREE_CODE (type) == COMPLEX_TYPE 6356 || TREE_CODE (type) == VECTOR_TYPE))) 6357 { 6358 /* Return PAD_NONE if justification is not required. */ 6359 if (type 6360 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 6361 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0) 6362 return PAD_NONE; 6363 6364 /* The directions set here are ignored when a BLKmode argument larger 6365 than a word is placed in a register. Different code is used for 6366 the stack and registers. This makes it difficult to have a 6367 consistent data representation for both the stack and registers. 6368 For both runtimes, the justification and padding for arguments on 6369 the stack and in registers should be identical. */ 6370 if (TARGET_64BIT) 6371 /* The 64-bit runtime specifies left justification for aggregates. */ 6372 return PAD_UPWARD; 6373 else 6374 /* The 32-bit runtime architecture specifies right justification. 6375 When the argument is passed on the stack, the argument is padded 6376 with garbage on the left. The HP compiler pads with zeros. */ 6377 return PAD_DOWNWARD; 6378 } 6379 6380 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 6381 return PAD_DOWNWARD; 6382 else 6383 return PAD_NONE; 6384 } 6385 6386 6387 /* Do what is necessary for `va_start'. We look at the current function 6388 to determine if stdargs or varargs is used and fill in an initial 6389 va_list. A pointer to this constructor is returned. */ 6390 6391 static rtx 6392 hppa_builtin_saveregs (void) 6393 { 6394 rtx offset, dest; 6395 tree fntype = TREE_TYPE (current_function_decl); 6396 int argadj = ((!stdarg_p (fntype)) 6397 ? UNITS_PER_WORD : 0); 6398 6399 if (argadj) 6400 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj); 6401 else 6402 offset = crtl->args.arg_offset_rtx; 6403 6404 if (TARGET_64BIT) 6405 { 6406 int i, off; 6407 6408 /* Adjust for varargs/stdarg differences. */ 6409 if (argadj) 6410 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj); 6411 else 6412 offset = crtl->args.arg_offset_rtx; 6413 6414 /* We need to save %r26 .. %r19 inclusive starting at offset -64 6415 from the incoming arg pointer and growing to larger addresses. */ 6416 for (i = 26, off = -64; i >= 19; i--, off += 8) 6417 emit_move_insn (gen_rtx_MEM (word_mode, 6418 plus_constant (Pmode, 6419 arg_pointer_rtx, off)), 6420 gen_rtx_REG (word_mode, i)); 6421 6422 /* The incoming args pointer points just beyond the flushback area; 6423 normally this is not a serious concern. However, when we are doing 6424 varargs/stdargs we want to make the arg pointer point to the start 6425 of the incoming argument area. */ 6426 emit_move_insn (virtual_incoming_args_rtx, 6427 plus_constant (Pmode, arg_pointer_rtx, -64)); 6428 6429 /* Now return a pointer to the first anonymous argument. */ 6430 return copy_to_reg (expand_binop (Pmode, add_optab, 6431 virtual_incoming_args_rtx, 6432 offset, 0, 0, OPTAB_LIB_WIDEN)); 6433 } 6434 6435 /* Store general registers on the stack. */ 6436 dest = gen_rtx_MEM (BLKmode, 6437 plus_constant (Pmode, crtl->args.internal_arg_pointer, 6438 -16)); 6439 set_mem_alias_set (dest, get_varargs_alias_set ()); 6440 set_mem_align (dest, BITS_PER_WORD); 6441 move_block_from_reg (23, dest, 4); 6442 6443 /* move_block_from_reg will emit code to store the argument registers 6444 individually as scalar stores. 6445 6446 However, other insns may later load from the same addresses for 6447 a structure load (passing a struct to a varargs routine). 6448 6449 The alias code assumes that such aliasing can never happen, so we 6450 have to keep memory referencing insns from moving up beyond the 6451 last argument register store. So we emit a blockage insn here. */ 6452 emit_insn (gen_blockage ()); 6453 6454 return copy_to_reg (expand_binop (Pmode, add_optab, 6455 crtl->args.internal_arg_pointer, 6456 offset, 0, 0, OPTAB_LIB_WIDEN)); 6457 } 6458 6459 static void 6460 hppa_va_start (tree valist, rtx nextarg) 6461 { 6462 nextarg = expand_builtin_saveregs (); 6463 std_expand_builtin_va_start (valist, nextarg); 6464 } 6465 6466 static tree 6467 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 6468 gimple_seq *post_p) 6469 { 6470 if (TARGET_64BIT) 6471 { 6472 /* Args grow upward. We can use the generic routines. */ 6473 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6474 } 6475 else /* !TARGET_64BIT */ 6476 { 6477 tree ptr = build_pointer_type (type); 6478 tree valist_type; 6479 tree t, u; 6480 unsigned int size, ofs; 6481 bool indirect; 6482 6483 indirect = pass_va_arg_by_reference (type); 6484 if (indirect) 6485 { 6486 type = ptr; 6487 ptr = build_pointer_type (type); 6488 } 6489 size = int_size_in_bytes (type); 6490 valist_type = TREE_TYPE (valist); 6491 6492 /* Args grow down. Not handled by generic routines. */ 6493 6494 u = fold_convert (sizetype, size_in_bytes (type)); 6495 u = fold_build1 (NEGATE_EXPR, sizetype, u); 6496 t = fold_build_pointer_plus (valist, u); 6497 6498 /* Align to 4 or 8 byte boundary depending on argument size. */ 6499 6500 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4)); 6501 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u); 6502 t = fold_convert (valist_type, t); 6503 6504 t = build2 (MODIFY_EXPR, valist_type, valist, t); 6505 6506 ofs = (8 - size) % 4; 6507 if (ofs != 0) 6508 t = fold_build_pointer_plus_hwi (t, ofs); 6509 6510 t = fold_convert (ptr, t); 6511 t = build_va_arg_indirect_ref (t); 6512 6513 if (indirect) 6514 t = build_va_arg_indirect_ref (t); 6515 6516 return t; 6517 } 6518 } 6519 6520 /* True if MODE is valid for the target. By "valid", we mean able to 6521 be manipulated in non-trivial ways. In particular, this means all 6522 the arithmetic is supported. 6523 6524 Currently, TImode is not valid as the HP 64-bit runtime documentation 6525 doesn't document the alignment and calling conventions for this type. 6526 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and 6527 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */ 6528 6529 static bool 6530 pa_scalar_mode_supported_p (scalar_mode mode) 6531 { 6532 int precision = GET_MODE_PRECISION (mode); 6533 6534 switch (GET_MODE_CLASS (mode)) 6535 { 6536 case MODE_PARTIAL_INT: 6537 case MODE_INT: 6538 if (precision == CHAR_TYPE_SIZE) 6539 return true; 6540 if (precision == SHORT_TYPE_SIZE) 6541 return true; 6542 if (precision == INT_TYPE_SIZE) 6543 return true; 6544 if (precision == LONG_TYPE_SIZE) 6545 return true; 6546 if (precision == LONG_LONG_TYPE_SIZE) 6547 return true; 6548 return false; 6549 6550 case MODE_FLOAT: 6551 if (precision == FLOAT_TYPE_SIZE) 6552 return true; 6553 if (precision == DOUBLE_TYPE_SIZE) 6554 return true; 6555 if (precision == LONG_DOUBLE_TYPE_SIZE) 6556 return true; 6557 return false; 6558 6559 case MODE_DECIMAL_FLOAT: 6560 return false; 6561 6562 default: 6563 gcc_unreachable (); 6564 } 6565 } 6566 6567 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and 6568 it branches into the delay slot. Otherwise, return FALSE. */ 6569 6570 static bool 6571 branch_to_delay_slot_p (rtx_insn *insn) 6572 { 6573 rtx_insn *jump_insn; 6574 6575 if (dbr_sequence_length ()) 6576 return FALSE; 6577 6578 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn)); 6579 while (insn) 6580 { 6581 insn = next_active_insn (insn); 6582 if (jump_insn == insn) 6583 return TRUE; 6584 6585 /* We can't rely on the length of asms. So, we return FALSE when 6586 the branch is followed by an asm. */ 6587 if (!insn 6588 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6589 || asm_noperands (PATTERN (insn)) >= 0 6590 || get_attr_length (insn) > 0) 6591 break; 6592 } 6593 6594 return FALSE; 6595 } 6596 6597 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot. 6598 6599 This occurs when INSN has an unfilled delay slot and is followed 6600 by an asm. Disaster can occur if the asm is empty and the jump 6601 branches into the delay slot. So, we add a nop in the delay slot 6602 when this occurs. */ 6603 6604 static bool 6605 branch_needs_nop_p (rtx_insn *insn) 6606 { 6607 rtx_insn *jump_insn; 6608 6609 if (dbr_sequence_length ()) 6610 return FALSE; 6611 6612 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn)); 6613 while (insn) 6614 { 6615 insn = next_active_insn (insn); 6616 if (!insn || jump_insn == insn) 6617 return TRUE; 6618 6619 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT 6620 || asm_noperands (PATTERN (insn)) >= 0) 6621 && get_attr_length (insn) > 0) 6622 break; 6623 } 6624 6625 return FALSE; 6626 } 6627 6628 /* Return TRUE if INSN, a forward jump insn, can use nullification 6629 to skip the following instruction. This avoids an extra cycle due 6630 to a mis-predicted branch when we fall through. */ 6631 6632 static bool 6633 use_skip_p (rtx_insn *insn) 6634 { 6635 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn)); 6636 6637 while (insn) 6638 { 6639 insn = next_active_insn (insn); 6640 6641 /* We can't rely on the length of asms, so we can't skip asms. */ 6642 if (!insn 6643 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6644 || asm_noperands (PATTERN (insn)) >= 0) 6645 break; 6646 if (get_attr_length (insn) == 4 6647 && jump_insn == next_active_insn (insn)) 6648 return TRUE; 6649 if (get_attr_length (insn) > 0) 6650 break; 6651 } 6652 6653 return FALSE; 6654 } 6655 6656 /* This routine handles all the normal conditional branch sequences we 6657 might need to generate. It handles compare immediate vs compare 6658 register, nullification of delay slots, varying length branches, 6659 negated branches, and all combinations of the above. It returns the 6660 output appropriate to emit the branch corresponding to all given 6661 parameters. */ 6662 6663 const char * 6664 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn) 6665 { 6666 static char buf[100]; 6667 bool useskip; 6668 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6669 int length = get_attr_length (insn); 6670 int xdelay; 6671 6672 /* A conditional branch to the following instruction (e.g. the delay slot) 6673 is asking for a disaster. This can happen when not optimizing and 6674 when jump optimization fails. 6675 6676 While it is usually safe to emit nothing, this can fail if the 6677 preceding instruction is a nullified branch with an empty delay 6678 slot and the same branch target as this branch. We could check 6679 for this but jump optimization should eliminate nop jumps. It 6680 is always safe to emit a nop. */ 6681 if (branch_to_delay_slot_p (insn)) 6682 return "nop"; 6683 6684 /* The doubleword form of the cmpib instruction doesn't have the LEU 6685 and GTU conditions while the cmpb instruction does. Since we accept 6686 zero for cmpb, we must ensure that we use cmpb for the comparison. */ 6687 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx) 6688 operands[2] = gen_rtx_REG (DImode, 0); 6689 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx) 6690 operands[1] = gen_rtx_REG (DImode, 0); 6691 6692 /* If this is a long branch with its delay slot unfilled, set `nullify' 6693 as it can nullify the delay slot and save a nop. */ 6694 if (length == 8 && dbr_sequence_length () == 0) 6695 nullify = 1; 6696 6697 /* If this is a short forward conditional branch which did not get 6698 its delay slot filled, the delay slot can still be nullified. */ 6699 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6700 nullify = forward_branch_p (insn); 6701 6702 /* A forward branch over a single nullified insn can be done with a 6703 comclr instruction. This avoids a single cycle penalty due to 6704 mis-predicted branch if we fall through (branch not taken). */ 6705 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6706 6707 switch (length) 6708 { 6709 /* All short conditional branches except backwards with an unfilled 6710 delay slot. */ 6711 case 4: 6712 if (useskip) 6713 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6714 else 6715 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6716 if (GET_MODE (operands[1]) == DImode) 6717 strcat (buf, "*"); 6718 if (negated) 6719 strcat (buf, "%B3"); 6720 else 6721 strcat (buf, "%S3"); 6722 if (useskip) 6723 strcat (buf, " %2,%r1,%%r0"); 6724 else if (nullify) 6725 { 6726 if (branch_needs_nop_p (insn)) 6727 strcat (buf, ",n %2,%r1,%0%#"); 6728 else 6729 strcat (buf, ",n %2,%r1,%0"); 6730 } 6731 else 6732 strcat (buf, " %2,%r1,%0"); 6733 break; 6734 6735 /* All long conditionals. Note a short backward branch with an 6736 unfilled delay slot is treated just like a long backward branch 6737 with an unfilled delay slot. */ 6738 case 8: 6739 /* Handle weird backwards branch with a filled delay slot 6740 which is nullified. */ 6741 if (dbr_sequence_length () != 0 6742 && ! forward_branch_p (insn) 6743 && nullify) 6744 { 6745 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6746 if (GET_MODE (operands[1]) == DImode) 6747 strcat (buf, "*"); 6748 if (negated) 6749 strcat (buf, "%S3"); 6750 else 6751 strcat (buf, "%B3"); 6752 strcat (buf, ",n %2,%r1,.+12\n\tb %0"); 6753 } 6754 /* Handle short backwards branch with an unfilled delay slot. 6755 Using a comb;nop rather than comiclr;bl saves 1 cycle for both 6756 taken and untaken branches. */ 6757 else if (dbr_sequence_length () == 0 6758 && ! forward_branch_p (insn) 6759 && INSN_ADDRESSES_SET_P () 6760 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6761 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6762 { 6763 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6764 if (GET_MODE (operands[1]) == DImode) 6765 strcat (buf, "*"); 6766 if (negated) 6767 strcat (buf, "%B3 %2,%r1,%0%#"); 6768 else 6769 strcat (buf, "%S3 %2,%r1,%0%#"); 6770 } 6771 else 6772 { 6773 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6774 if (GET_MODE (operands[1]) == DImode) 6775 strcat (buf, "*"); 6776 if (negated) 6777 strcat (buf, "%S3"); 6778 else 6779 strcat (buf, "%B3"); 6780 if (nullify) 6781 strcat (buf, " %2,%r1,%%r0\n\tb,n %0"); 6782 else 6783 strcat (buf, " %2,%r1,%%r0\n\tb %0"); 6784 } 6785 break; 6786 6787 default: 6788 /* The reversed conditional branch must branch over one additional 6789 instruction if the delay slot is filled and needs to be extracted 6790 by pa_output_lbranch. If the delay slot is empty or this is a 6791 nullified forward branch, the instruction after the reversed 6792 condition branch must be nullified. */ 6793 if (dbr_sequence_length () == 0 6794 || (nullify && forward_branch_p (insn))) 6795 { 6796 nullify = 1; 6797 xdelay = 0; 6798 operands[4] = GEN_INT (length); 6799 } 6800 else 6801 { 6802 xdelay = 1; 6803 operands[4] = GEN_INT (length + 4); 6804 } 6805 6806 /* Create a reversed conditional branch which branches around 6807 the following insns. */ 6808 if (GET_MODE (operands[1]) != DImode) 6809 { 6810 if (nullify) 6811 { 6812 if (negated) 6813 strcpy (buf, 6814 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}"); 6815 else 6816 strcpy (buf, 6817 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}"); 6818 } 6819 else 6820 { 6821 if (negated) 6822 strcpy (buf, 6823 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}"); 6824 else 6825 strcpy (buf, 6826 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}"); 6827 } 6828 } 6829 else 6830 { 6831 if (nullify) 6832 { 6833 if (negated) 6834 strcpy (buf, 6835 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}"); 6836 else 6837 strcpy (buf, 6838 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}"); 6839 } 6840 else 6841 { 6842 if (negated) 6843 strcpy (buf, 6844 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}"); 6845 else 6846 strcpy (buf, 6847 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}"); 6848 } 6849 } 6850 6851 output_asm_insn (buf, operands); 6852 return pa_output_lbranch (operands[0], insn, xdelay); 6853 } 6854 return buf; 6855 } 6856 6857 /* Output a PIC pc-relative instruction sequence to load the address of 6858 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref 6859 or a code label. OPERANDS[1] specifies the register to use to load 6860 the program counter. OPERANDS[3] may be used for label generation 6861 The sequence is always three instructions in length. The program 6862 counter recorded for PA 1.X is eight bytes more than that for PA 2.0. 6863 Register %r1 is clobbered. */ 6864 6865 static void 6866 pa_output_pic_pcrel_sequence (rtx *operands) 6867 { 6868 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0])); 6869 if (TARGET_PA_20) 6870 { 6871 /* We can use mfia to determine the current program counter. */ 6872 if (TARGET_SOM || !TARGET_GAS) 6873 { 6874 operands[3] = gen_label_rtx (); 6875 targetm.asm_out.internal_label (asm_out_file, "L", 6876 CODE_LABEL_NUMBER (operands[3])); 6877 output_asm_insn ("mfia %1", operands); 6878 output_asm_insn ("addil L'%0-%l3,%1", operands); 6879 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands); 6880 } 6881 else 6882 { 6883 output_asm_insn ("mfia %1", operands); 6884 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands); 6885 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands); 6886 } 6887 } 6888 else 6889 { 6890 /* We need to use a branch to determine the current program counter. */ 6891 output_asm_insn ("{bl|b,l} .+8,%1", operands); 6892 if (TARGET_SOM || !TARGET_GAS) 6893 { 6894 operands[3] = gen_label_rtx (); 6895 output_asm_insn ("addil L'%0-%l3,%1", operands); 6896 targetm.asm_out.internal_label (asm_out_file, "L", 6897 CODE_LABEL_NUMBER (operands[3])); 6898 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands); 6899 } 6900 else 6901 { 6902 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands); 6903 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands); 6904 } 6905 } 6906 } 6907 6908 /* This routine handles output of long unconditional branches that 6909 exceed the maximum range of a simple branch instruction. Since 6910 we don't have a register available for the branch, we save register 6911 %r1 in the frame marker, load the branch destination DEST into %r1, 6912 execute the branch, and restore %r1 in the delay slot of the branch. 6913 6914 Since long branches may have an insn in the delay slot and the 6915 delay slot is used to restore %r1, we in general need to extract 6916 this insn and execute it before the branch. However, to facilitate 6917 use of this function by conditional branches, we also provide an 6918 option to not extract the delay insn so that it will be emitted 6919 after the long branch. So, if there is an insn in the delay slot, 6920 it is extracted if XDELAY is nonzero. 6921 6922 The lengths of the various long-branch sequences are 20, 16 and 24 6923 bytes for the portable runtime, non-PIC and PIC cases, respectively. */ 6924 6925 const char * 6926 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay) 6927 { 6928 rtx xoperands[4]; 6929 6930 xoperands[0] = dest; 6931 6932 /* First, free up the delay slot. */ 6933 if (xdelay && dbr_sequence_length () != 0) 6934 { 6935 /* We can't handle a jump in the delay slot. */ 6936 gcc_assert (! JUMP_P (NEXT_INSN (insn))); 6937 6938 final_scan_insn (NEXT_INSN (insn), asm_out_file, 6939 optimize, 0, NULL); 6940 6941 /* Now delete the delay insn. */ 6942 SET_INSN_DELETED (NEXT_INSN (insn)); 6943 } 6944 6945 /* Output an insn to save %r1. The runtime documentation doesn't 6946 specify whether the "Clean Up" slot in the callers frame can 6947 be clobbered by the callee. It isn't copied by HP's builtin 6948 alloca, so this suggests that it can be clobbered if necessary. 6949 The "Static Link" location is copied by HP builtin alloca, so 6950 we avoid using it. Using the cleanup slot might be a problem 6951 if we have to interoperate with languages that pass cleanup 6952 information. However, it should be possible to handle these 6953 situations with GCC's asm feature. 6954 6955 The "Current RP" slot is reserved for the called procedure, so 6956 we try to use it when we don't have a frame of our own. It's 6957 rather unlikely that we won't have a frame when we need to emit 6958 a very long branch. 6959 6960 Really the way to go long term is a register scavenger; goto 6961 the target of the jump and find a register which we can use 6962 as a scratch to hold the value in %r1. Then, we wouldn't have 6963 to free up the delay slot or clobber a slot that may be needed 6964 for other purposes. */ 6965 if (TARGET_64BIT) 6966 { 6967 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6968 /* Use the return pointer slot in the frame marker. */ 6969 output_asm_insn ("std %%r1,-16(%%r30)", xoperands); 6970 else 6971 /* Use the slot at -40 in the frame marker since HP builtin 6972 alloca doesn't copy it. */ 6973 output_asm_insn ("std %%r1,-40(%%r30)", xoperands); 6974 } 6975 else 6976 { 6977 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6978 /* Use the return pointer slot in the frame marker. */ 6979 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands); 6980 else 6981 /* Use the "Clean Up" slot in the frame marker. In GCC, 6982 the only other use of this location is for copying a 6983 floating point double argument from a floating-point 6984 register to two general registers. The copy is done 6985 as an "atomic" operation when outputting a call, so it 6986 won't interfere with our using the location here. */ 6987 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands); 6988 } 6989 6990 if (TARGET_PORTABLE_RUNTIME) 6991 { 6992 output_asm_insn ("ldil L'%0,%%r1", xoperands); 6993 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 6994 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6995 } 6996 else if (flag_pic) 6997 { 6998 xoperands[1] = gen_rtx_REG (Pmode, 1); 6999 xoperands[2] = xoperands[1]; 7000 pa_output_pic_pcrel_sequence (xoperands); 7001 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7002 } 7003 else 7004 /* Now output a very long branch to the original target. */ 7005 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands); 7006 7007 /* Now restore the value of %r1 in the delay slot. */ 7008 if (TARGET_64BIT) 7009 { 7010 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 7011 return "ldd -16(%%r30),%%r1"; 7012 else 7013 return "ldd -40(%%r30),%%r1"; 7014 } 7015 else 7016 { 7017 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 7018 return "ldw -20(%%r30),%%r1"; 7019 else 7020 return "ldw -12(%%r30),%%r1"; 7021 } 7022 } 7023 7024 /* This routine handles all the branch-on-bit conditional branch sequences we 7025 might need to generate. It handles nullification of delay slots, 7026 varying length branches, negated branches and all combinations of the 7027 above. it returns the appropriate output template to emit the branch. */ 7028 7029 const char * 7030 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which) 7031 { 7032 static char buf[100]; 7033 bool useskip; 7034 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7035 int length = get_attr_length (insn); 7036 int xdelay; 7037 7038 /* A conditional branch to the following instruction (e.g. the delay slot) is 7039 asking for a disaster. I do not think this can happen as this pattern 7040 is only used when optimizing; jump optimization should eliminate the 7041 jump. But be prepared just in case. */ 7042 7043 if (branch_to_delay_slot_p (insn)) 7044 return "nop"; 7045 7046 /* If this is a long branch with its delay slot unfilled, set `nullify' 7047 as it can nullify the delay slot and save a nop. */ 7048 if (length == 8 && dbr_sequence_length () == 0) 7049 nullify = 1; 7050 7051 /* If this is a short forward conditional branch which did not get 7052 its delay slot filled, the delay slot can still be nullified. */ 7053 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7054 nullify = forward_branch_p (insn); 7055 7056 /* A forward branch over a single nullified insn can be done with a 7057 extrs instruction. This avoids a single cycle penalty due to 7058 mis-predicted branch if we fall through (branch not taken). */ 7059 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 7060 7061 switch (length) 7062 { 7063 7064 /* All short conditional branches except backwards with an unfilled 7065 delay slot. */ 7066 case 4: 7067 if (useskip) 7068 strcpy (buf, "{extrs,|extrw,s,}"); 7069 else 7070 strcpy (buf, "bb,"); 7071 if (useskip && GET_MODE (operands[0]) == DImode) 7072 strcpy (buf, "extrd,s,*"); 7073 else if (GET_MODE (operands[0]) == DImode) 7074 strcpy (buf, "bb,*"); 7075 if ((which == 0 && negated) 7076 || (which == 1 && ! negated)) 7077 strcat (buf, ">="); 7078 else 7079 strcat (buf, "<"); 7080 if (useskip) 7081 strcat (buf, " %0,%1,1,%%r0"); 7082 else if (nullify && negated) 7083 { 7084 if (branch_needs_nop_p (insn)) 7085 strcat (buf, ",n %0,%1,%3%#"); 7086 else 7087 strcat (buf, ",n %0,%1,%3"); 7088 } 7089 else if (nullify && ! negated) 7090 { 7091 if (branch_needs_nop_p (insn)) 7092 strcat (buf, ",n %0,%1,%2%#"); 7093 else 7094 strcat (buf, ",n %0,%1,%2"); 7095 } 7096 else if (! nullify && negated) 7097 strcat (buf, " %0,%1,%3"); 7098 else if (! nullify && ! negated) 7099 strcat (buf, " %0,%1,%2"); 7100 break; 7101 7102 /* All long conditionals. Note a short backward branch with an 7103 unfilled delay slot is treated just like a long backward branch 7104 with an unfilled delay slot. */ 7105 case 8: 7106 /* Handle weird backwards branch with a filled delay slot 7107 which is nullified. */ 7108 if (dbr_sequence_length () != 0 7109 && ! forward_branch_p (insn) 7110 && nullify) 7111 { 7112 strcpy (buf, "bb,"); 7113 if (GET_MODE (operands[0]) == DImode) 7114 strcat (buf, "*"); 7115 if ((which == 0 && negated) 7116 || (which == 1 && ! negated)) 7117 strcat (buf, "<"); 7118 else 7119 strcat (buf, ">="); 7120 if (negated) 7121 strcat (buf, ",n %0,%1,.+12\n\tb %3"); 7122 else 7123 strcat (buf, ",n %0,%1,.+12\n\tb %2"); 7124 } 7125 /* Handle short backwards branch with an unfilled delay slot. 7126 Using a bb;nop rather than extrs;bl saves 1 cycle for both 7127 taken and untaken branches. */ 7128 else if (dbr_sequence_length () == 0 7129 && ! forward_branch_p (insn) 7130 && INSN_ADDRESSES_SET_P () 7131 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7132 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7133 { 7134 strcpy (buf, "bb,"); 7135 if (GET_MODE (operands[0]) == DImode) 7136 strcat (buf, "*"); 7137 if ((which == 0 && negated) 7138 || (which == 1 && ! negated)) 7139 strcat (buf, ">="); 7140 else 7141 strcat (buf, "<"); 7142 if (negated) 7143 strcat (buf, " %0,%1,%3%#"); 7144 else 7145 strcat (buf, " %0,%1,%2%#"); 7146 } 7147 else 7148 { 7149 if (GET_MODE (operands[0]) == DImode) 7150 strcpy (buf, "extrd,s,*"); 7151 else 7152 strcpy (buf, "{extrs,|extrw,s,}"); 7153 if ((which == 0 && negated) 7154 || (which == 1 && ! negated)) 7155 strcat (buf, "<"); 7156 else 7157 strcat (buf, ">="); 7158 if (nullify && negated) 7159 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3"); 7160 else if (nullify && ! negated) 7161 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2"); 7162 else if (negated) 7163 strcat (buf, " %0,%1,1,%%r0\n\tb %3"); 7164 else 7165 strcat (buf, " %0,%1,1,%%r0\n\tb %2"); 7166 } 7167 break; 7168 7169 default: 7170 /* The reversed conditional branch must branch over one additional 7171 instruction if the delay slot is filled and needs to be extracted 7172 by pa_output_lbranch. If the delay slot is empty or this is a 7173 nullified forward branch, the instruction after the reversed 7174 condition branch must be nullified. */ 7175 if (dbr_sequence_length () == 0 7176 || (nullify && forward_branch_p (insn))) 7177 { 7178 nullify = 1; 7179 xdelay = 0; 7180 operands[4] = GEN_INT (length); 7181 } 7182 else 7183 { 7184 xdelay = 1; 7185 operands[4] = GEN_INT (length + 4); 7186 } 7187 7188 if (GET_MODE (operands[0]) == DImode) 7189 strcpy (buf, "bb,*"); 7190 else 7191 strcpy (buf, "bb,"); 7192 if ((which == 0 && negated) 7193 || (which == 1 && !negated)) 7194 strcat (buf, "<"); 7195 else 7196 strcat (buf, ">="); 7197 if (nullify) 7198 strcat (buf, ",n %0,%1,.+%4"); 7199 else 7200 strcat (buf, " %0,%1,.+%4"); 7201 output_asm_insn (buf, operands); 7202 return pa_output_lbranch (negated ? operands[3] : operands[2], 7203 insn, xdelay); 7204 } 7205 return buf; 7206 } 7207 7208 /* This routine handles all the branch-on-variable-bit conditional branch 7209 sequences we might need to generate. It handles nullification of delay 7210 slots, varying length branches, negated branches and all combinations 7211 of the above. it returns the appropriate output template to emit the 7212 branch. */ 7213 7214 const char * 7215 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, 7216 int which) 7217 { 7218 static char buf[100]; 7219 bool useskip; 7220 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7221 int length = get_attr_length (insn); 7222 int xdelay; 7223 7224 /* A conditional branch to the following instruction (e.g. the delay slot) is 7225 asking for a disaster. I do not think this can happen as this pattern 7226 is only used when optimizing; jump optimization should eliminate the 7227 jump. But be prepared just in case. */ 7228 7229 if (branch_to_delay_slot_p (insn)) 7230 return "nop"; 7231 7232 /* If this is a long branch with its delay slot unfilled, set `nullify' 7233 as it can nullify the delay slot and save a nop. */ 7234 if (length == 8 && dbr_sequence_length () == 0) 7235 nullify = 1; 7236 7237 /* If this is a short forward conditional branch which did not get 7238 its delay slot filled, the delay slot can still be nullified. */ 7239 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7240 nullify = forward_branch_p (insn); 7241 7242 /* A forward branch over a single nullified insn can be done with a 7243 extrs instruction. This avoids a single cycle penalty due to 7244 mis-predicted branch if we fall through (branch not taken). */ 7245 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 7246 7247 switch (length) 7248 { 7249 7250 /* All short conditional branches except backwards with an unfilled 7251 delay slot. */ 7252 case 4: 7253 if (useskip) 7254 strcpy (buf, "{vextrs,|extrw,s,}"); 7255 else 7256 strcpy (buf, "{bvb,|bb,}"); 7257 if (useskip && GET_MODE (operands[0]) == DImode) 7258 strcpy (buf, "extrd,s,*"); 7259 else if (GET_MODE (operands[0]) == DImode) 7260 strcpy (buf, "bb,*"); 7261 if ((which == 0 && negated) 7262 || (which == 1 && ! negated)) 7263 strcat (buf, ">="); 7264 else 7265 strcat (buf, "<"); 7266 if (useskip) 7267 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}"); 7268 else if (nullify && negated) 7269 { 7270 if (branch_needs_nop_p (insn)) 7271 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}"); 7272 else 7273 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}"); 7274 } 7275 else if (nullify && ! negated) 7276 { 7277 if (branch_needs_nop_p (insn)) 7278 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}"); 7279 else 7280 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}"); 7281 } 7282 else if (! nullify && negated) 7283 strcat (buf, "{ %0,%3| %0,%%sar,%3}"); 7284 else if (! nullify && ! negated) 7285 strcat (buf, "{ %0,%2| %0,%%sar,%2}"); 7286 break; 7287 7288 /* All long conditionals. Note a short backward branch with an 7289 unfilled delay slot is treated just like a long backward branch 7290 with an unfilled delay slot. */ 7291 case 8: 7292 /* Handle weird backwards branch with a filled delay slot 7293 which is nullified. */ 7294 if (dbr_sequence_length () != 0 7295 && ! forward_branch_p (insn) 7296 && nullify) 7297 { 7298 strcpy (buf, "{bvb,|bb,}"); 7299 if (GET_MODE (operands[0]) == DImode) 7300 strcat (buf, "*"); 7301 if ((which == 0 && negated) 7302 || (which == 1 && ! negated)) 7303 strcat (buf, "<"); 7304 else 7305 strcat (buf, ">="); 7306 if (negated) 7307 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}"); 7308 else 7309 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}"); 7310 } 7311 /* Handle short backwards branch with an unfilled delay slot. 7312 Using a bb;nop rather than extrs;bl saves 1 cycle for both 7313 taken and untaken branches. */ 7314 else if (dbr_sequence_length () == 0 7315 && ! forward_branch_p (insn) 7316 && INSN_ADDRESSES_SET_P () 7317 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7318 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7319 { 7320 strcpy (buf, "{bvb,|bb,}"); 7321 if (GET_MODE (operands[0]) == DImode) 7322 strcat (buf, "*"); 7323 if ((which == 0 && negated) 7324 || (which == 1 && ! negated)) 7325 strcat (buf, ">="); 7326 else 7327 strcat (buf, "<"); 7328 if (negated) 7329 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}"); 7330 else 7331 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}"); 7332 } 7333 else 7334 { 7335 strcpy (buf, "{vextrs,|extrw,s,}"); 7336 if (GET_MODE (operands[0]) == DImode) 7337 strcpy (buf, "extrd,s,*"); 7338 if ((which == 0 && negated) 7339 || (which == 1 && ! negated)) 7340 strcat (buf, "<"); 7341 else 7342 strcat (buf, ">="); 7343 if (nullify && negated) 7344 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}"); 7345 else if (nullify && ! negated) 7346 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}"); 7347 else if (negated) 7348 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}"); 7349 else 7350 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}"); 7351 } 7352 break; 7353 7354 default: 7355 /* The reversed conditional branch must branch over one additional 7356 instruction if the delay slot is filled and needs to be extracted 7357 by pa_output_lbranch. If the delay slot is empty or this is a 7358 nullified forward branch, the instruction after the reversed 7359 condition branch must be nullified. */ 7360 if (dbr_sequence_length () == 0 7361 || (nullify && forward_branch_p (insn))) 7362 { 7363 nullify = 1; 7364 xdelay = 0; 7365 operands[4] = GEN_INT (length); 7366 } 7367 else 7368 { 7369 xdelay = 1; 7370 operands[4] = GEN_INT (length + 4); 7371 } 7372 7373 if (GET_MODE (operands[0]) == DImode) 7374 strcpy (buf, "bb,*"); 7375 else 7376 strcpy (buf, "{bvb,|bb,}"); 7377 if ((which == 0 && negated) 7378 || (which == 1 && !negated)) 7379 strcat (buf, "<"); 7380 else 7381 strcat (buf, ">="); 7382 if (nullify) 7383 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}"); 7384 else 7385 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}"); 7386 output_asm_insn (buf, operands); 7387 return pa_output_lbranch (negated ? operands[3] : operands[2], 7388 insn, xdelay); 7389 } 7390 return buf; 7391 } 7392 7393 /* Return the output template for emitting a dbra type insn. 7394 7395 Note it may perform some output operations on its own before 7396 returning the final output string. */ 7397 const char * 7398 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative) 7399 { 7400 int length = get_attr_length (insn); 7401 7402 /* A conditional branch to the following instruction (e.g. the delay slot) is 7403 asking for a disaster. Be prepared! */ 7404 7405 if (branch_to_delay_slot_p (insn)) 7406 { 7407 if (which_alternative == 0) 7408 return "ldo %1(%0),%0"; 7409 else if (which_alternative == 1) 7410 { 7411 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands); 7412 output_asm_insn ("ldw -16(%%r30),%4", operands); 7413 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7414 return "{fldws|fldw} -16(%%r30),%0"; 7415 } 7416 else 7417 { 7418 output_asm_insn ("ldw %0,%4", operands); 7419 return "ldo %1(%4),%4\n\tstw %4,%0"; 7420 } 7421 } 7422 7423 if (which_alternative == 0) 7424 { 7425 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7426 int xdelay; 7427 7428 /* If this is a long branch with its delay slot unfilled, set `nullify' 7429 as it can nullify the delay slot and save a nop. */ 7430 if (length == 8 && dbr_sequence_length () == 0) 7431 nullify = 1; 7432 7433 /* If this is a short forward conditional branch which did not get 7434 its delay slot filled, the delay slot can still be nullified. */ 7435 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7436 nullify = forward_branch_p (insn); 7437 7438 switch (length) 7439 { 7440 case 4: 7441 if (nullify) 7442 { 7443 if (branch_needs_nop_p (insn)) 7444 return "addib,%C2,n %1,%0,%3%#"; 7445 else 7446 return "addib,%C2,n %1,%0,%3"; 7447 } 7448 else 7449 return "addib,%C2 %1,%0,%3"; 7450 7451 case 8: 7452 /* Handle weird backwards branch with a fulled delay slot 7453 which is nullified. */ 7454 if (dbr_sequence_length () != 0 7455 && ! forward_branch_p (insn) 7456 && nullify) 7457 return "addib,%N2,n %1,%0,.+12\n\tb %3"; 7458 /* Handle short backwards branch with an unfilled delay slot. 7459 Using a addb;nop rather than addi;bl saves 1 cycle for both 7460 taken and untaken branches. */ 7461 else if (dbr_sequence_length () == 0 7462 && ! forward_branch_p (insn) 7463 && INSN_ADDRESSES_SET_P () 7464 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7465 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7466 return "addib,%C2 %1,%0,%3%#"; 7467 7468 /* Handle normal cases. */ 7469 if (nullify) 7470 return "addi,%N2 %1,%0,%0\n\tb,n %3"; 7471 else 7472 return "addi,%N2 %1,%0,%0\n\tb %3"; 7473 7474 default: 7475 /* The reversed conditional branch must branch over one additional 7476 instruction if the delay slot is filled and needs to be extracted 7477 by pa_output_lbranch. If the delay slot is empty or this is a 7478 nullified forward branch, the instruction after the reversed 7479 condition branch must be nullified. */ 7480 if (dbr_sequence_length () == 0 7481 || (nullify && forward_branch_p (insn))) 7482 { 7483 nullify = 1; 7484 xdelay = 0; 7485 operands[4] = GEN_INT (length); 7486 } 7487 else 7488 { 7489 xdelay = 1; 7490 operands[4] = GEN_INT (length + 4); 7491 } 7492 7493 if (nullify) 7494 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands); 7495 else 7496 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands); 7497 7498 return pa_output_lbranch (operands[3], insn, xdelay); 7499 } 7500 7501 } 7502 /* Deal with gross reload from FP register case. */ 7503 else if (which_alternative == 1) 7504 { 7505 /* Move loop counter from FP register to MEM then into a GR, 7506 increment the GR, store the GR into MEM, and finally reload 7507 the FP register from MEM from within the branch's delay slot. */ 7508 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4", 7509 operands); 7510 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7511 if (length == 24) 7512 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7513 else if (length == 28) 7514 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7515 else 7516 { 7517 operands[5] = GEN_INT (length - 16); 7518 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands); 7519 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7520 return pa_output_lbranch (operands[3], insn, 0); 7521 } 7522 } 7523 /* Deal with gross reload from memory case. */ 7524 else 7525 { 7526 /* Reload loop counter from memory, the store back to memory 7527 happens in the branch's delay slot. */ 7528 output_asm_insn ("ldw %0,%4", operands); 7529 if (length == 12) 7530 return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; 7531 else if (length == 16) 7532 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0"; 7533 else 7534 { 7535 operands[5] = GEN_INT (length - 4); 7536 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands); 7537 return pa_output_lbranch (operands[3], insn, 0); 7538 } 7539 } 7540 } 7541 7542 /* Return the output template for emitting a movb type insn. 7543 7544 Note it may perform some output operations on its own before 7545 returning the final output string. */ 7546 const char * 7547 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative, 7548 int reverse_comparison) 7549 { 7550 int length = get_attr_length (insn); 7551 7552 /* A conditional branch to the following instruction (e.g. the delay slot) is 7553 asking for a disaster. Be prepared! */ 7554 7555 if (branch_to_delay_slot_p (insn)) 7556 { 7557 if (which_alternative == 0) 7558 return "copy %1,%0"; 7559 else if (which_alternative == 1) 7560 { 7561 output_asm_insn ("stw %1,-16(%%r30)", operands); 7562 return "{fldws|fldw} -16(%%r30),%0"; 7563 } 7564 else if (which_alternative == 2) 7565 return "stw %1,%0"; 7566 else 7567 return "mtsar %r1"; 7568 } 7569 7570 /* Support the second variant. */ 7571 if (reverse_comparison) 7572 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); 7573 7574 if (which_alternative == 0) 7575 { 7576 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7577 int xdelay; 7578 7579 /* If this is a long branch with its delay slot unfilled, set `nullify' 7580 as it can nullify the delay slot and save a nop. */ 7581 if (length == 8 && dbr_sequence_length () == 0) 7582 nullify = 1; 7583 7584 /* If this is a short forward conditional branch which did not get 7585 its delay slot filled, the delay slot can still be nullified. */ 7586 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7587 nullify = forward_branch_p (insn); 7588 7589 switch (length) 7590 { 7591 case 4: 7592 if (nullify) 7593 { 7594 if (branch_needs_nop_p (insn)) 7595 return "movb,%C2,n %1,%0,%3%#"; 7596 else 7597 return "movb,%C2,n %1,%0,%3"; 7598 } 7599 else 7600 return "movb,%C2 %1,%0,%3"; 7601 7602 case 8: 7603 /* Handle weird backwards branch with a filled delay slot 7604 which is nullified. */ 7605 if (dbr_sequence_length () != 0 7606 && ! forward_branch_p (insn) 7607 && nullify) 7608 return "movb,%N2,n %1,%0,.+12\n\tb %3"; 7609 7610 /* Handle short backwards branch with an unfilled delay slot. 7611 Using a movb;nop rather than or;bl saves 1 cycle for both 7612 taken and untaken branches. */ 7613 else if (dbr_sequence_length () == 0 7614 && ! forward_branch_p (insn) 7615 && INSN_ADDRESSES_SET_P () 7616 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7617 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7618 return "movb,%C2 %1,%0,%3%#"; 7619 /* Handle normal cases. */ 7620 if (nullify) 7621 return "or,%N2 %1,%%r0,%0\n\tb,n %3"; 7622 else 7623 return "or,%N2 %1,%%r0,%0\n\tb %3"; 7624 7625 default: 7626 /* The reversed conditional branch must branch over one additional 7627 instruction if the delay slot is filled and needs to be extracted 7628 by pa_output_lbranch. If the delay slot is empty or this is a 7629 nullified forward branch, the instruction after the reversed 7630 condition branch must be nullified. */ 7631 if (dbr_sequence_length () == 0 7632 || (nullify && forward_branch_p (insn))) 7633 { 7634 nullify = 1; 7635 xdelay = 0; 7636 operands[4] = GEN_INT (length); 7637 } 7638 else 7639 { 7640 xdelay = 1; 7641 operands[4] = GEN_INT (length + 4); 7642 } 7643 7644 if (nullify) 7645 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands); 7646 else 7647 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands); 7648 7649 return pa_output_lbranch (operands[3], insn, xdelay); 7650 } 7651 } 7652 /* Deal with gross reload for FP destination register case. */ 7653 else if (which_alternative == 1) 7654 { 7655 /* Move source register to MEM, perform the branch test, then 7656 finally load the FP register from MEM from within the branch's 7657 delay slot. */ 7658 output_asm_insn ("stw %1,-16(%%r30)", operands); 7659 if (length == 12) 7660 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7661 else if (length == 16) 7662 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7663 else 7664 { 7665 operands[4] = GEN_INT (length - 4); 7666 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands); 7667 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7668 return pa_output_lbranch (operands[3], insn, 0); 7669 } 7670 } 7671 /* Deal with gross reload from memory case. */ 7672 else if (which_alternative == 2) 7673 { 7674 /* Reload loop counter from memory, the store back to memory 7675 happens in the branch's delay slot. */ 7676 if (length == 8) 7677 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0"; 7678 else if (length == 12) 7679 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0"; 7680 else 7681 { 7682 operands[4] = GEN_INT (length); 7683 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0", 7684 operands); 7685 return pa_output_lbranch (operands[3], insn, 0); 7686 } 7687 } 7688 /* Handle SAR as a destination. */ 7689 else 7690 { 7691 if (length == 8) 7692 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1"; 7693 else if (length == 12) 7694 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1"; 7695 else 7696 { 7697 operands[4] = GEN_INT (length); 7698 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1", 7699 operands); 7700 return pa_output_lbranch (operands[3], insn, 0); 7701 } 7702 } 7703 } 7704 7705 /* Copy any FP arguments in INSN into integer registers. */ 7706 static void 7707 copy_fp_args (rtx_insn *insn) 7708 { 7709 rtx link; 7710 rtx xoperands[2]; 7711 7712 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7713 { 7714 int arg_mode, regno; 7715 rtx use = XEXP (link, 0); 7716 7717 if (! (GET_CODE (use) == USE 7718 && GET_CODE (XEXP (use, 0)) == REG 7719 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7720 continue; 7721 7722 arg_mode = GET_MODE (XEXP (use, 0)); 7723 regno = REGNO (XEXP (use, 0)); 7724 7725 /* Is it a floating point register? */ 7726 if (regno >= 32 && regno <= 39) 7727 { 7728 /* Copy the FP register into an integer register via memory. */ 7729 if (arg_mode == SFmode) 7730 { 7731 xoperands[0] = XEXP (use, 0); 7732 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); 7733 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands); 7734 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7735 } 7736 else 7737 { 7738 xoperands[0] = XEXP (use, 0); 7739 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); 7740 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands); 7741 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); 7742 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7743 } 7744 } 7745 } 7746 } 7747 7748 /* Compute length of the FP argument copy sequence for INSN. */ 7749 static int 7750 length_fp_args (rtx_insn *insn) 7751 { 7752 int length = 0; 7753 rtx link; 7754 7755 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7756 { 7757 int arg_mode, regno; 7758 rtx use = XEXP (link, 0); 7759 7760 if (! (GET_CODE (use) == USE 7761 && GET_CODE (XEXP (use, 0)) == REG 7762 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7763 continue; 7764 7765 arg_mode = GET_MODE (XEXP (use, 0)); 7766 regno = REGNO (XEXP (use, 0)); 7767 7768 /* Is it a floating point register? */ 7769 if (regno >= 32 && regno <= 39) 7770 { 7771 if (arg_mode == SFmode) 7772 length += 8; 7773 else 7774 length += 12; 7775 } 7776 } 7777 7778 return length; 7779 } 7780 7781 /* Return the attribute length for the millicode call instruction INSN. 7782 The length must match the code generated by pa_output_millicode_call. 7783 We include the delay slot in the returned length as it is better to 7784 over estimate the length than to under estimate it. */ 7785 7786 int 7787 pa_attr_length_millicode_call (rtx_insn *insn) 7788 { 7789 unsigned long distance = -1; 7790 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7791 7792 if (INSN_ADDRESSES_SET_P ()) 7793 { 7794 distance = (total + insn_current_reference_address (insn)); 7795 if (distance < total) 7796 distance = -1; 7797 } 7798 7799 if (TARGET_64BIT) 7800 { 7801 if (!TARGET_LONG_CALLS && distance < 7600000) 7802 return 8; 7803 7804 return 20; 7805 } 7806 else if (TARGET_PORTABLE_RUNTIME) 7807 return 24; 7808 else 7809 { 7810 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET) 7811 return 8; 7812 7813 if (!flag_pic) 7814 return 12; 7815 7816 return 24; 7817 } 7818 } 7819 7820 /* INSN is a function call. 7821 7822 CALL_DEST is the routine we are calling. */ 7823 7824 const char * 7825 pa_output_millicode_call (rtx_insn *insn, rtx call_dest) 7826 { 7827 int attr_length = get_attr_length (insn); 7828 int seq_length = dbr_sequence_length (); 7829 rtx xoperands[4]; 7830 7831 xoperands[0] = call_dest; 7832 7833 /* Handle the common case where we are sure that the branch will 7834 reach the beginning of the $CODE$ subspace. The within reach 7835 form of the $$sh_func_adrs call has a length of 28. Because it 7836 has an attribute type of sh_func_adrs, it never has a nonzero 7837 sequence length (i.e., the delay slot is never filled). */ 7838 if (!TARGET_LONG_CALLS 7839 && (attr_length == 8 7840 || (attr_length == 28 7841 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS))) 7842 { 7843 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31); 7844 output_asm_insn ("{bl|b,l} %0,%1", xoperands); 7845 } 7846 else 7847 { 7848 if (TARGET_64BIT) 7849 { 7850 /* It might seem that one insn could be saved by accessing 7851 the millicode function using the linkage table. However, 7852 this doesn't work in shared libraries and other dynamically 7853 loaded objects. Using a pc-relative sequence also avoids 7854 problems related to the implicit use of the gp register. */ 7855 xoperands[1] = gen_rtx_REG (Pmode, 1); 7856 xoperands[2] = xoperands[1]; 7857 pa_output_pic_pcrel_sequence (xoperands); 7858 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 7859 } 7860 else if (TARGET_PORTABLE_RUNTIME) 7861 { 7862 /* Pure portable runtime doesn't allow be/ble; we also don't 7863 have PIC support in the assembler/linker, so this sequence 7864 is needed. */ 7865 7866 /* Get the address of our target into %r1. */ 7867 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7868 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 7869 7870 /* Get our return address into %r31. */ 7871 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands); 7872 output_asm_insn ("addi 8,%%r31,%%r31", xoperands); 7873 7874 /* Jump to our target address in %r1. */ 7875 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7876 } 7877 else if (!flag_pic) 7878 { 7879 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7880 if (TARGET_PA_20) 7881 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands); 7882 else 7883 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7884 } 7885 else 7886 { 7887 xoperands[1] = gen_rtx_REG (Pmode, 31); 7888 xoperands[2] = gen_rtx_REG (Pmode, 1); 7889 pa_output_pic_pcrel_sequence (xoperands); 7890 7891 /* Adjust return address. */ 7892 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands); 7893 7894 /* Jump to our target address in %r1. */ 7895 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7896 } 7897 } 7898 7899 if (seq_length == 0) 7900 output_asm_insn ("nop", xoperands); 7901 7902 return ""; 7903 } 7904 7905 /* Return the attribute length of the call instruction INSN. The SIBCALL 7906 flag indicates whether INSN is a regular call or a sibling call. The 7907 length returned must be longer than the code actually generated by 7908 pa_output_call. Since branch shortening is done before delay branch 7909 sequencing, there is no way to determine whether or not the delay 7910 slot will be filled during branch shortening. Even when the delay 7911 slot is filled, we may have to add a nop if the delay slot contains 7912 a branch that can't reach its target. Thus, we always have to include 7913 the delay slot in the length estimate. This used to be done in 7914 pa_adjust_insn_length but we do it here now as some sequences always 7915 fill the delay slot and we can save four bytes in the estimate for 7916 these sequences. */ 7917 7918 int 7919 pa_attr_length_call (rtx_insn *insn, int sibcall) 7920 { 7921 int local_call; 7922 rtx call, call_dest; 7923 tree call_decl; 7924 int length = 0; 7925 rtx pat = PATTERN (insn); 7926 unsigned long distance = -1; 7927 7928 gcc_assert (CALL_P (insn)); 7929 7930 if (INSN_ADDRESSES_SET_P ()) 7931 { 7932 unsigned long total; 7933 7934 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7935 distance = (total + insn_current_reference_address (insn)); 7936 if (distance < total) 7937 distance = -1; 7938 } 7939 7940 gcc_assert (GET_CODE (pat) == PARALLEL); 7941 7942 /* Get the call rtx. */ 7943 call = XVECEXP (pat, 0, 0); 7944 if (GET_CODE (call) == SET) 7945 call = SET_SRC (call); 7946 7947 gcc_assert (GET_CODE (call) == CALL); 7948 7949 /* Determine if this is a local call. */ 7950 call_dest = XEXP (XEXP (call, 0), 0); 7951 call_decl = SYMBOL_REF_DECL (call_dest); 7952 local_call = call_decl && targetm.binds_local_p (call_decl); 7953 7954 /* pc-relative branch. */ 7955 if (!TARGET_LONG_CALLS 7956 && ((TARGET_PA_20 && !sibcall && distance < 7600000) 7957 || distance < MAX_PCREL17F_OFFSET)) 7958 length += 8; 7959 7960 /* 64-bit plabel sequence. */ 7961 else if (TARGET_64BIT && !local_call) 7962 length += 24; 7963 7964 /* non-pic long absolute branch sequence. */ 7965 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7966 length += 12; 7967 7968 /* long pc-relative branch sequence. */ 7969 else if (TARGET_LONG_PIC_SDIFF_CALL 7970 || (TARGET_GAS && !TARGET_SOM && local_call)) 7971 { 7972 length += 20; 7973 7974 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7975 length += 8; 7976 } 7977 7978 /* 32-bit plabel sequence. */ 7979 else 7980 { 7981 length += 32; 7982 7983 if (TARGET_SOM) 7984 length += length_fp_args (insn); 7985 7986 if (flag_pic) 7987 length += 4; 7988 7989 if (!TARGET_PA_20) 7990 { 7991 if (!sibcall) 7992 length += 8; 7993 7994 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7995 length += 8; 7996 } 7997 } 7998 7999 return length; 8000 } 8001 8002 /* INSN is a function call. 8003 8004 CALL_DEST is the routine we are calling. */ 8005 8006 const char * 8007 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall) 8008 { 8009 int seq_length = dbr_sequence_length (); 8010 tree call_decl = SYMBOL_REF_DECL (call_dest); 8011 int local_call = call_decl && targetm.binds_local_p (call_decl); 8012 rtx xoperands[4]; 8013 8014 xoperands[0] = call_dest; 8015 8016 /* Handle the common case where we're sure that the branch will reach 8017 the beginning of the "$CODE$" subspace. This is the beginning of 8018 the current function if we are in a named section. */ 8019 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8) 8020 { 8021 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2); 8022 output_asm_insn ("{bl|b,l} %0,%1", xoperands); 8023 } 8024 else 8025 { 8026 if (TARGET_64BIT && !local_call) 8027 { 8028 /* ??? As far as I can tell, the HP linker doesn't support the 8029 long pc-relative sequence described in the 64-bit runtime 8030 architecture. So, we use a slightly longer indirect call. */ 8031 xoperands[0] = pa_get_deferred_plabel (call_dest); 8032 xoperands[1] = gen_label_rtx (); 8033 8034 /* Put the load of %r27 into the delay slot. We don't need to 8035 do anything when generating fast indirect calls. */ 8036 if (seq_length != 0) 8037 { 8038 final_scan_insn (NEXT_INSN (insn), asm_out_file, 8039 optimize, 0, NULL); 8040 8041 /* Now delete the delay insn. */ 8042 SET_INSN_DELETED (NEXT_INSN (insn)); 8043 } 8044 8045 output_asm_insn ("addil LT'%0,%%r27", xoperands); 8046 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); 8047 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); 8048 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); 8049 output_asm_insn ("bve,l (%%r2),%%r2", xoperands); 8050 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 8051 seq_length = 1; 8052 } 8053 else 8054 { 8055 int indirect_call = 0; 8056 8057 /* Emit a long call. There are several different sequences 8058 of increasing length and complexity. In most cases, 8059 they don't allow an instruction in the delay slot. */ 8060 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 8061 && !TARGET_LONG_PIC_SDIFF_CALL 8062 && !(TARGET_GAS && !TARGET_SOM && local_call) 8063 && !TARGET_64BIT) 8064 indirect_call = 1; 8065 8066 if (seq_length != 0 8067 && !sibcall 8068 && (!TARGET_PA_20 8069 || indirect_call 8070 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic))) 8071 { 8072 /* A non-jump insn in the delay slot. By definition we can 8073 emit this insn before the call (and in fact before argument 8074 relocating. */ 8075 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 8076 NULL); 8077 8078 /* Now delete the delay insn. */ 8079 SET_INSN_DELETED (NEXT_INSN (insn)); 8080 seq_length = 0; 8081 } 8082 8083 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 8084 { 8085 /* This is the best sequence for making long calls in 8086 non-pic code. Unfortunately, GNU ld doesn't provide 8087 the stub needed for external calls, and GAS's support 8088 for this with the SOM linker is buggy. It is safe 8089 to use this for local calls. */ 8090 output_asm_insn ("ldil L'%0,%%r1", xoperands); 8091 if (sibcall) 8092 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands); 8093 else 8094 { 8095 if (TARGET_PA_20) 8096 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", 8097 xoperands); 8098 else 8099 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 8100 8101 output_asm_insn ("copy %%r31,%%r2", xoperands); 8102 seq_length = 1; 8103 } 8104 } 8105 else 8106 { 8107 /* The HP assembler and linker can handle relocations for 8108 the difference of two symbols. The HP assembler 8109 recognizes the sequence as a pc-relative call and 8110 the linker provides stubs when needed. */ 8111 8112 /* GAS currently can't generate the relocations that 8113 are needed for the SOM linker under HP-UX using this 8114 sequence. The GNU linker doesn't generate the stubs 8115 that are needed for external calls on TARGET_ELF32 8116 with this sequence. For now, we have to use a longer 8117 plabel sequence when using GAS for non local calls. */ 8118 if (TARGET_LONG_PIC_SDIFF_CALL 8119 || (TARGET_GAS && !TARGET_SOM && local_call)) 8120 { 8121 xoperands[1] = gen_rtx_REG (Pmode, 1); 8122 xoperands[2] = xoperands[1]; 8123 pa_output_pic_pcrel_sequence (xoperands); 8124 } 8125 else 8126 { 8127 /* Emit a long plabel-based call sequence. This is 8128 essentially an inline implementation of $$dyncall. 8129 We don't actually try to call $$dyncall as this is 8130 as difficult as calling the function itself. */ 8131 xoperands[0] = pa_get_deferred_plabel (call_dest); 8132 xoperands[1] = gen_label_rtx (); 8133 8134 /* Since the call is indirect, FP arguments in registers 8135 need to be copied to the general registers. Then, the 8136 argument relocation stub will copy them back. */ 8137 if (TARGET_SOM) 8138 copy_fp_args (insn); 8139 8140 if (flag_pic) 8141 { 8142 output_asm_insn ("addil LT'%0,%%r19", xoperands); 8143 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); 8144 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands); 8145 } 8146 else 8147 { 8148 output_asm_insn ("addil LR'%0-$global$,%%r27", 8149 xoperands); 8150 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22", 8151 xoperands); 8152 } 8153 8154 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8155 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8156 /* Should this be an ordered load to ensure the target 8157 address is loaded before the global pointer? */ 8158 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands); 8159 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands); 8160 8161 if (!sibcall && !TARGET_PA_20) 8162 { 8163 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); 8164 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8165 output_asm_insn ("addi 8,%%r2,%%r2", xoperands); 8166 else 8167 output_asm_insn ("addi 16,%%r2,%%r2", xoperands); 8168 } 8169 } 8170 8171 if (TARGET_PA_20) 8172 { 8173 if (sibcall) 8174 output_asm_insn ("bve (%%r1)", xoperands); 8175 else 8176 { 8177 if (indirect_call) 8178 { 8179 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8180 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands); 8181 seq_length = 1; 8182 } 8183 else 8184 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8185 } 8186 } 8187 else 8188 { 8189 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 8190 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0", 8191 xoperands); 8192 8193 if (sibcall) 8194 { 8195 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8196 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands); 8197 else 8198 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands); 8199 } 8200 else 8201 { 8202 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8203 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands); 8204 else 8205 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands); 8206 8207 if (indirect_call) 8208 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands); 8209 else 8210 output_asm_insn ("copy %%r31,%%r2", xoperands); 8211 seq_length = 1; 8212 } 8213 } 8214 } 8215 } 8216 } 8217 8218 if (seq_length == 0) 8219 output_asm_insn ("nop", xoperands); 8220 8221 return ""; 8222 } 8223 8224 /* Return the attribute length of the indirect call instruction INSN. 8225 The length must match the code generated by output_indirect call. 8226 The returned length includes the delay slot. Currently, the delay 8227 slot of an indirect call sequence is not exposed and it is used by 8228 the sequence itself. */ 8229 8230 int 8231 pa_attr_length_indirect_call (rtx_insn *insn) 8232 { 8233 unsigned long distance = -1; 8234 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 8235 8236 if (INSN_ADDRESSES_SET_P ()) 8237 { 8238 distance = (total + insn_current_reference_address (insn)); 8239 if (distance < total) 8240 distance = -1; 8241 } 8242 8243 if (TARGET_64BIT) 8244 return 12; 8245 8246 if (TARGET_FAST_INDIRECT_CALLS) 8247 return 8; 8248 8249 if (TARGET_PORTABLE_RUNTIME) 8250 return 16; 8251 8252 if (!TARGET_LONG_CALLS 8253 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) 8254 || distance < MAX_PCREL17F_OFFSET)) 8255 return 8; 8256 8257 /* Out of reach, can use ble. */ 8258 if (!flag_pic) 8259 return 12; 8260 8261 /* Inline versions of $$dyncall. */ 8262 if (!optimize_size) 8263 { 8264 if (TARGET_NO_SPACE_REGS) 8265 return 28; 8266 8267 if (TARGET_PA_20) 8268 return 32; 8269 } 8270 8271 /* Long PIC pc-relative call. */ 8272 return 20; 8273 } 8274 8275 const char * 8276 pa_output_indirect_call (rtx_insn *insn, rtx call_dest) 8277 { 8278 rtx xoperands[4]; 8279 int length; 8280 8281 if (TARGET_64BIT) 8282 { 8283 xoperands[0] = call_dest; 8284 output_asm_insn ("ldd 16(%0),%%r2\n\t" 8285 "bve,l (%%r2),%%r2\n\t" 8286 "ldd 24(%0),%%r27", xoperands); 8287 return ""; 8288 } 8289 8290 /* First the special case for kernels, level 0 systems, etc. */ 8291 if (TARGET_FAST_INDIRECT_CALLS) 8292 { 8293 pa_output_arg_descriptor (insn); 8294 if (TARGET_PA_20) 8295 return "bve,l,n (%%r22),%%r2\n\tnop"; 8296 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 8297 } 8298 8299 if (TARGET_PORTABLE_RUNTIME) 8300 { 8301 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t" 8302 "ldo R'$$dyncall(%%r31),%%r31", xoperands); 8303 pa_output_arg_descriptor (insn); 8304 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)"; 8305 } 8306 8307 /* Now the normal case -- we can reach $$dyncall directly or 8308 we're sure that we can get there via a long-branch stub. 8309 8310 No need to check target flags as the length uniquely identifies 8311 the remaining cases. */ 8312 length = pa_attr_length_indirect_call (insn); 8313 if (length == 8) 8314 { 8315 pa_output_arg_descriptor (insn); 8316 8317 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to 8318 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit 8319 variant of the B,L instruction can't be used on the SOM target. */ 8320 if (TARGET_PA_20 && !TARGET_SOM) 8321 return "b,l,n $$dyncall,%%r2\n\tnop"; 8322 else 8323 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2"; 8324 } 8325 8326 /* Long millicode call, but we are not generating PIC or portable runtime 8327 code. */ 8328 if (length == 12) 8329 { 8330 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands); 8331 pa_output_arg_descriptor (insn); 8332 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; 8333 } 8334 8335 /* The long PIC pc-relative call sequence is five instructions. So, 8336 let's use an inline version of $$dyncall when the calling sequence 8337 has a roughly similar number of instructions and we are not optimizing 8338 for size. We need two instructions to load the return pointer plus 8339 the $$dyncall implementation. */ 8340 if (!optimize_size) 8341 { 8342 if (TARGET_NO_SPACE_REGS) 8343 { 8344 pa_output_arg_descriptor (insn); 8345 output_asm_insn ("bl .+8,%%r2\n\t" 8346 "ldo 20(%%r2),%%r2\n\t" 8347 "extru,<> %%r22,30,1,%%r0\n\t" 8348 "bv,n %%r0(%%r22)\n\t" 8349 "ldw -2(%%r22),%%r21\n\t" 8350 "bv %%r0(%%r21)\n\t" 8351 "ldw 2(%%r22),%%r19", xoperands); 8352 return ""; 8353 } 8354 if (TARGET_PA_20) 8355 { 8356 pa_output_arg_descriptor (insn); 8357 output_asm_insn ("bl .+8,%%r2\n\t" 8358 "ldo 24(%%r2),%%r2\n\t" 8359 "stw %%r2,-24(%%sp)\n\t" 8360 "extru,<> %r22,30,1,%%r0\n\t" 8361 "bve,n (%%r22)\n\t" 8362 "ldw -2(%%r22),%%r21\n\t" 8363 "bve (%%r21)\n\t" 8364 "ldw 2(%%r22),%%r19", xoperands); 8365 return ""; 8366 } 8367 } 8368 8369 /* We need a long PIC call to $$dyncall. */ 8370 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall"); 8371 xoperands[1] = gen_rtx_REG (Pmode, 2); 8372 xoperands[2] = gen_rtx_REG (Pmode, 1); 8373 pa_output_pic_pcrel_sequence (xoperands); 8374 pa_output_arg_descriptor (insn); 8375 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2"; 8376 } 8377 8378 /* In HPUX 8.0's shared library scheme, special relocations are needed 8379 for function labels if they might be passed to a function 8380 in a shared library (because shared libraries don't live in code 8381 space), and special magic is needed to construct their address. */ 8382 8383 void 8384 pa_encode_label (rtx sym) 8385 { 8386 const char *str = XSTR (sym, 0); 8387 int len = strlen (str) + 1; 8388 char *newstr, *p; 8389 8390 p = newstr = XALLOCAVEC (char, len + 1); 8391 *p++ = '@'; 8392 strcpy (p, str); 8393 8394 XSTR (sym, 0) = ggc_alloc_string (newstr, len); 8395 } 8396 8397 static void 8398 pa_encode_section_info (tree decl, rtx rtl, int first) 8399 { 8400 int old_referenced = 0; 8401 8402 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF) 8403 old_referenced 8404 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED; 8405 8406 default_encode_section_info (decl, rtl, first); 8407 8408 if (first && TEXT_SPACE_P (decl)) 8409 { 8410 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; 8411 if (TREE_CODE (decl) == FUNCTION_DECL) 8412 pa_encode_label (XEXP (rtl, 0)); 8413 } 8414 else if (old_referenced) 8415 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced; 8416 } 8417 8418 /* This is sort of inverse to pa_encode_section_info. */ 8419 8420 static const char * 8421 pa_strip_name_encoding (const char *str) 8422 { 8423 str += (*str == '@'); 8424 str += (*str == '*'); 8425 return str; 8426 } 8427 8428 /* Returns 1 if OP is a function label involved in a simple addition 8429 with a constant. Used to keep certain patterns from matching 8430 during instruction combination. */ 8431 int 8432 pa_is_function_label_plus_const (rtx op) 8433 { 8434 /* Strip off any CONST. */ 8435 if (GET_CODE (op) == CONST) 8436 op = XEXP (op, 0); 8437 8438 return (GET_CODE (op) == PLUS 8439 && function_label_operand (XEXP (op, 0), VOIDmode) 8440 && GET_CODE (XEXP (op, 1)) == CONST_INT); 8441 } 8442 8443 /* Output the assembler code for a thunk function. THUNK_DECL is the 8444 declaration for the thunk function itself, FUNCTION is the decl for 8445 the target function. DELTA is an immediate constant offset to be 8446 added to THIS. If VCALL_OFFSET is nonzero, the word at 8447 *(*this + vcall_offset) should be added to THIS. */ 8448 8449 static void 8450 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, 8451 HOST_WIDE_INT vcall_offset, tree function) 8452 { 8453 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); 8454 static unsigned int current_thunk_number; 8455 int val_14 = VAL_14_BITS_P (delta); 8456 unsigned int old_last_address = last_address, nbytes = 0; 8457 char label[17]; 8458 rtx xoperands[4]; 8459 8460 xoperands[0] = XEXP (DECL_RTL (function), 0); 8461 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0); 8462 xoperands[2] = GEN_INT (delta); 8463 8464 assemble_start_function (thunk_fndecl, fnname); 8465 final_start_function (emit_barrier (), file, 1); 8466 8467 if (!vcall_offset) 8468 { 8469 /* Output the thunk. We know that the function is in the same 8470 translation unit (i.e., the same space) as the thunk, and that 8471 thunks are output after their method. Thus, we don't need an 8472 external branch to reach the function. With SOM and GAS, 8473 functions and thunks are effectively in different sections. 8474 Thus, we can always use a IA-relative branch and the linker 8475 will add a long branch stub if necessary. 8476 8477 However, we have to be careful when generating PIC code on the 8478 SOM port to ensure that the sequence does not transfer to an 8479 import stub for the target function as this could clobber the 8480 return value saved at SP-24. This would also apply to the 8481 32-bit linux port if the multi-space model is implemented. */ 8482 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8483 && !(flag_pic && TREE_PUBLIC (function)) 8484 && (TARGET_GAS || last_address < 262132)) 8485 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8486 && ((targetm_common.have_named_sections 8487 && DECL_SECTION_NAME (thunk_fndecl) != NULL 8488 /* The GNU 64-bit linker has rather poor stub management. 8489 So, we use a long branch from thunks that aren't in 8490 the same section as the target function. */ 8491 && ((!TARGET_64BIT 8492 && (DECL_SECTION_NAME (thunk_fndecl) 8493 != DECL_SECTION_NAME (function))) 8494 || ((DECL_SECTION_NAME (thunk_fndecl) 8495 == DECL_SECTION_NAME (function)) 8496 && last_address < 262132))) 8497 /* In this case, we need to be able to reach the start of 8498 the stub table even though the function is likely closer 8499 and can be jumped to directly. */ 8500 || (targetm_common.have_named_sections 8501 && DECL_SECTION_NAME (thunk_fndecl) == NULL 8502 && DECL_SECTION_NAME (function) == NULL 8503 && total_code_bytes < MAX_PCREL17F_OFFSET) 8504 /* Likewise. */ 8505 || (!targetm_common.have_named_sections 8506 && total_code_bytes < MAX_PCREL17F_OFFSET)))) 8507 { 8508 if (!val_14) 8509 output_asm_insn ("addil L'%2,%%r26", xoperands); 8510 8511 output_asm_insn ("b %0", xoperands); 8512 8513 if (val_14) 8514 { 8515 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8516 nbytes += 8; 8517 } 8518 else 8519 { 8520 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8521 nbytes += 12; 8522 } 8523 } 8524 else if (TARGET_64BIT) 8525 { 8526 rtx xop[4]; 8527 8528 /* We only have one call-clobbered scratch register, so we can't 8529 make use of the delay slot if delta doesn't fit in 14 bits. */ 8530 if (!val_14) 8531 { 8532 output_asm_insn ("addil L'%2,%%r26", xoperands); 8533 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8534 } 8535 8536 /* Load function address into %r1. */ 8537 xop[0] = xoperands[0]; 8538 xop[1] = gen_rtx_REG (Pmode, 1); 8539 xop[2] = xop[1]; 8540 pa_output_pic_pcrel_sequence (xop); 8541 8542 if (val_14) 8543 { 8544 output_asm_insn ("bv %%r0(%%r1)", xoperands); 8545 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8546 nbytes += 20; 8547 } 8548 else 8549 { 8550 output_asm_insn ("bv,n %%r0(%%r1)", xoperands); 8551 nbytes += 24; 8552 } 8553 } 8554 else if (TARGET_PORTABLE_RUNTIME) 8555 { 8556 output_asm_insn ("ldil L'%0,%%r1", xoperands); 8557 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); 8558 8559 if (!val_14) 8560 output_asm_insn ("ldil L'%2,%%r26", xoperands); 8561 8562 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8563 8564 if (val_14) 8565 { 8566 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8567 nbytes += 16; 8568 } 8569 else 8570 { 8571 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands); 8572 nbytes += 20; 8573 } 8574 } 8575 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8576 { 8577 /* The function is accessible from outside this module. The only 8578 way to avoid an import stub between the thunk and function is to 8579 call the function directly with an indirect sequence similar to 8580 that used by $$dyncall. This is possible because $$dyncall acts 8581 as the import stub in an indirect call. */ 8582 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); 8583 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); 8584 output_asm_insn ("addil LT'%3,%%r19", xoperands); 8585 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); 8586 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8587 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8588 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8589 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); 8590 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8591 8592 if (!val_14) 8593 { 8594 output_asm_insn ("addil L'%2,%%r26", xoperands); 8595 nbytes += 4; 8596 } 8597 8598 if (TARGET_PA_20) 8599 { 8600 output_asm_insn ("bve (%%r22)", xoperands); 8601 nbytes += 36; 8602 } 8603 else if (TARGET_NO_SPACE_REGS) 8604 { 8605 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); 8606 nbytes += 36; 8607 } 8608 else 8609 { 8610 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); 8611 output_asm_insn ("mtsp %%r21,%%sr0", xoperands); 8612 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); 8613 nbytes += 44; 8614 } 8615 8616 if (val_14) 8617 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8618 else 8619 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8620 } 8621 else if (flag_pic) 8622 { 8623 rtx xop[4]; 8624 8625 /* Load function address into %r22. */ 8626 xop[0] = xoperands[0]; 8627 xop[1] = gen_rtx_REG (Pmode, 1); 8628 xop[2] = gen_rtx_REG (Pmode, 22); 8629 pa_output_pic_pcrel_sequence (xop); 8630 8631 if (!val_14) 8632 output_asm_insn ("addil L'%2,%%r26", xoperands); 8633 8634 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8635 8636 if (val_14) 8637 { 8638 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8639 nbytes += 20; 8640 } 8641 else 8642 { 8643 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8644 nbytes += 24; 8645 } 8646 } 8647 else 8648 { 8649 if (!val_14) 8650 output_asm_insn ("addil L'%2,%%r26", xoperands); 8651 8652 output_asm_insn ("ldil L'%0,%%r22", xoperands); 8653 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); 8654 8655 if (val_14) 8656 { 8657 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8658 nbytes += 12; 8659 } 8660 else 8661 { 8662 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8663 nbytes += 16; 8664 } 8665 } 8666 } 8667 else 8668 { 8669 rtx xop[4]; 8670 8671 /* Add DELTA to THIS. */ 8672 if (val_14) 8673 { 8674 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8675 nbytes += 4; 8676 } 8677 else 8678 { 8679 output_asm_insn ("addil L'%2,%%r26", xoperands); 8680 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8681 nbytes += 8; 8682 } 8683 8684 if (TARGET_64BIT) 8685 { 8686 /* Load *(THIS + DELTA) to %r1. */ 8687 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands); 8688 8689 val_14 = VAL_14_BITS_P (vcall_offset); 8690 xoperands[2] = GEN_INT (vcall_offset); 8691 8692 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */ 8693 if (val_14) 8694 { 8695 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands); 8696 nbytes += 8; 8697 } 8698 else 8699 { 8700 output_asm_insn ("addil L'%2,%%r1", xoperands); 8701 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands); 8702 nbytes += 12; 8703 } 8704 } 8705 else 8706 { 8707 /* Load *(THIS + DELTA) to %r1. */ 8708 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands); 8709 8710 val_14 = VAL_14_BITS_P (vcall_offset); 8711 xoperands[2] = GEN_INT (vcall_offset); 8712 8713 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */ 8714 if (val_14) 8715 { 8716 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands); 8717 nbytes += 8; 8718 } 8719 else 8720 { 8721 output_asm_insn ("addil L'%2,%%r1", xoperands); 8722 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands); 8723 nbytes += 12; 8724 } 8725 } 8726 8727 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */ 8728 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8729 && !(flag_pic && TREE_PUBLIC (function)) 8730 && (TARGET_GAS || last_address < 262132)) 8731 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8732 && ((targetm_common.have_named_sections 8733 && DECL_SECTION_NAME (thunk_fndecl) != NULL 8734 /* The GNU 64-bit linker has rather poor stub management. 8735 So, we use a long branch from thunks that aren't in 8736 the same section as the target function. */ 8737 && ((!TARGET_64BIT 8738 && (DECL_SECTION_NAME (thunk_fndecl) 8739 != DECL_SECTION_NAME (function))) 8740 || ((DECL_SECTION_NAME (thunk_fndecl) 8741 == DECL_SECTION_NAME (function)) 8742 && last_address < 262132))) 8743 /* In this case, we need to be able to reach the start of 8744 the stub table even though the function is likely closer 8745 and can be jumped to directly. */ 8746 || (targetm_common.have_named_sections 8747 && DECL_SECTION_NAME (thunk_fndecl) == NULL 8748 && DECL_SECTION_NAME (function) == NULL 8749 && total_code_bytes < MAX_PCREL17F_OFFSET) 8750 /* Likewise. */ 8751 || (!targetm_common.have_named_sections 8752 && total_code_bytes < MAX_PCREL17F_OFFSET)))) 8753 { 8754 nbytes += 4; 8755 output_asm_insn ("b %0", xoperands); 8756 8757 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ 8758 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); 8759 } 8760 else if (TARGET_64BIT) 8761 { 8762 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ 8763 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); 8764 8765 /* Load function address into %r1. */ 8766 nbytes += 16; 8767 xop[0] = xoperands[0]; 8768 xop[1] = gen_rtx_REG (Pmode, 1); 8769 xop[2] = xop[1]; 8770 pa_output_pic_pcrel_sequence (xop); 8771 8772 output_asm_insn ("bv,n %%r0(%%r1)", xoperands); 8773 } 8774 else if (TARGET_PORTABLE_RUNTIME) 8775 { 8776 /* Load function address into %r22. */ 8777 nbytes += 12; 8778 output_asm_insn ("ldil L'%0,%%r22", xoperands); 8779 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands); 8780 8781 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8782 8783 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ 8784 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); 8785 } 8786 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8787 { 8788 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ 8789 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); 8790 8791 /* The function is accessible from outside this module. The only 8792 way to avoid an import stub between the thunk and function is to 8793 call the function directly with an indirect sequence similar to 8794 that used by $$dyncall. This is possible because $$dyncall acts 8795 as the import stub in an indirect call. */ 8796 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); 8797 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); 8798 output_asm_insn ("addil LT'%3,%%r19", xoperands); 8799 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); 8800 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8801 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8802 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8803 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); 8804 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8805 8806 if (TARGET_PA_20) 8807 { 8808 output_asm_insn ("bve,n (%%r22)", xoperands); 8809 nbytes += 32; 8810 } 8811 else if (TARGET_NO_SPACE_REGS) 8812 { 8813 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands); 8814 nbytes += 32; 8815 } 8816 else 8817 { 8818 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); 8819 output_asm_insn ("mtsp %%r21,%%sr0", xoperands); 8820 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands); 8821 nbytes += 40; 8822 } 8823 } 8824 else if (flag_pic) 8825 { 8826 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ 8827 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); 8828 8829 /* Load function address into %r1. */ 8830 nbytes += 16; 8831 xop[0] = xoperands[0]; 8832 xop[1] = gen_rtx_REG (Pmode, 1); 8833 xop[2] = xop[1]; 8834 pa_output_pic_pcrel_sequence (xop); 8835 8836 output_asm_insn ("bv,n %%r0(%%r1)", xoperands); 8837 } 8838 else 8839 { 8840 /* Load function address into %r22. */ 8841 nbytes += 8; 8842 output_asm_insn ("ldil L'%0,%%r22", xoperands); 8843 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); 8844 8845 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */ 8846 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands); 8847 } 8848 } 8849 8850 final_end_function (); 8851 8852 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8853 { 8854 switch_to_section (data_section); 8855 output_asm_insn (".align 4", xoperands); 8856 ASM_OUTPUT_LABEL (file, label); 8857 output_asm_insn (".word P'%0", xoperands); 8858 } 8859 8860 current_thunk_number++; 8861 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 8862 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 8863 last_address += nbytes; 8864 if (old_last_address > last_address) 8865 last_address = UINT_MAX; 8866 update_total_code_bytes (nbytes); 8867 assemble_end_function (thunk_fndecl, fnname); 8868 } 8869 8870 /* Only direct calls to static functions are allowed to be sibling (tail) 8871 call optimized. 8872 8873 This restriction is necessary because some linker generated stubs will 8874 store return pointers into rp' in some cases which might clobber a 8875 live value already in rp'. 8876 8877 In a sibcall the current function and the target function share stack 8878 space. Thus if the path to the current function and the path to the 8879 target function save a value in rp', they save the value into the 8880 same stack slot, which has undesirable consequences. 8881 8882 Because of the deferred binding nature of shared libraries any function 8883 with external scope could be in a different load module and thus require 8884 rp' to be saved when calling that function. So sibcall optimizations 8885 can only be safe for static function. 8886 8887 Note that GCC never needs return value relocations, so we don't have to 8888 worry about static calls with return value relocations (which require 8889 saving rp'). 8890 8891 It is safe to perform a sibcall optimization when the target function 8892 will never return. */ 8893 static bool 8894 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 8895 { 8896 /* Sibcalls are not ok because the arg pointer register is not a fixed 8897 register. This prevents the sibcall optimization from occurring. In 8898 addition, there are problems with stub placement using GNU ld. This 8899 is because a normal sibcall branch uses a 17-bit relocation while 8900 a regular call branch uses a 22-bit relocation. As a result, more 8901 care needs to be taken in the placement of long-branch stubs. */ 8902 if (TARGET_64BIT) 8903 return false; 8904 8905 if (TARGET_PORTABLE_RUNTIME) 8906 return false; 8907 8908 /* Sibcalls are only ok within a translation unit. */ 8909 return decl && targetm.binds_local_p (decl); 8910 } 8911 8912 /* ??? Addition is not commutative on the PA due to the weird implicit 8913 space register selection rules for memory addresses. Therefore, we 8914 don't consider a + b == b + a, as this might be inside a MEM. */ 8915 static bool 8916 pa_commutative_p (const_rtx x, int outer_code) 8917 { 8918 return (COMMUTATIVE_P (x) 8919 && (TARGET_NO_SPACE_REGS 8920 || (outer_code != UNKNOWN && outer_code != MEM) 8921 || GET_CODE (x) != PLUS)); 8922 } 8923 8924 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8925 use in fmpyadd instructions. */ 8926 int 8927 pa_fmpyaddoperands (rtx *operands) 8928 { 8929 machine_mode mode = GET_MODE (operands[0]); 8930 8931 /* Must be a floating point mode. */ 8932 if (mode != SFmode && mode != DFmode) 8933 return 0; 8934 8935 /* All modes must be the same. */ 8936 if (! (mode == GET_MODE (operands[1]) 8937 && mode == GET_MODE (operands[2]) 8938 && mode == GET_MODE (operands[3]) 8939 && mode == GET_MODE (operands[4]) 8940 && mode == GET_MODE (operands[5]))) 8941 return 0; 8942 8943 /* All operands must be registers. */ 8944 if (! (GET_CODE (operands[1]) == REG 8945 && GET_CODE (operands[2]) == REG 8946 && GET_CODE (operands[3]) == REG 8947 && GET_CODE (operands[4]) == REG 8948 && GET_CODE (operands[5]) == REG)) 8949 return 0; 8950 8951 /* Only 2 real operands to the addition. One of the input operands must 8952 be the same as the output operand. */ 8953 if (! rtx_equal_p (operands[3], operands[4]) 8954 && ! rtx_equal_p (operands[3], operands[5])) 8955 return 0; 8956 8957 /* Inout operand of add cannot conflict with any operands from multiply. */ 8958 if (rtx_equal_p (operands[3], operands[0]) 8959 || rtx_equal_p (operands[3], operands[1]) 8960 || rtx_equal_p (operands[3], operands[2])) 8961 return 0; 8962 8963 /* multiply cannot feed into addition operands. */ 8964 if (rtx_equal_p (operands[4], operands[0]) 8965 || rtx_equal_p (operands[5], operands[0])) 8966 return 0; 8967 8968 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8969 if (mode == SFmode 8970 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8971 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8972 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8973 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8974 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8975 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8976 return 0; 8977 8978 /* Passed. Operands are suitable for fmpyadd. */ 8979 return 1; 8980 } 8981 8982 #if !defined(USE_COLLECT2) 8983 static void 8984 pa_asm_out_constructor (rtx symbol, int priority) 8985 { 8986 if (!function_label_operand (symbol, VOIDmode)) 8987 pa_encode_label (symbol); 8988 8989 #ifdef CTORS_SECTION_ASM_OP 8990 default_ctor_section_asm_out_constructor (symbol, priority); 8991 #else 8992 # ifdef TARGET_ASM_NAMED_SECTION 8993 default_named_section_asm_out_constructor (symbol, priority); 8994 # else 8995 default_stabs_asm_out_constructor (symbol, priority); 8996 # endif 8997 #endif 8998 } 8999 9000 static void 9001 pa_asm_out_destructor (rtx symbol, int priority) 9002 { 9003 if (!function_label_operand (symbol, VOIDmode)) 9004 pa_encode_label (symbol); 9005 9006 #ifdef DTORS_SECTION_ASM_OP 9007 default_dtor_section_asm_out_destructor (symbol, priority); 9008 #else 9009 # ifdef TARGET_ASM_NAMED_SECTION 9010 default_named_section_asm_out_destructor (symbol, priority); 9011 # else 9012 default_stabs_asm_out_destructor (symbol, priority); 9013 # endif 9014 #endif 9015 } 9016 #endif 9017 9018 /* This function places uninitialized global data in the bss section. 9019 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this 9020 function on the SOM port to prevent uninitialized global data from 9021 being placed in the data section. */ 9022 9023 void 9024 pa_asm_output_aligned_bss (FILE *stream, 9025 const char *name, 9026 unsigned HOST_WIDE_INT size, 9027 unsigned int align) 9028 { 9029 switch_to_section (bss_section); 9030 9031 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 9032 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); 9033 #endif 9034 9035 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE 9036 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); 9037 #endif 9038 9039 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 9040 ASM_OUTPUT_LABEL (stream, name); 9041 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 9042 } 9043 9044 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive 9045 that doesn't allow the alignment of global common storage to be directly 9046 specified. The SOM linker aligns common storage based on the rounded 9047 value of the NUM_BYTES parameter in the .comm directive. It's not 9048 possible to use the .align directive as it doesn't affect the alignment 9049 of the label associated with a .comm directive. */ 9050 9051 void 9052 pa_asm_output_aligned_common (FILE *stream, 9053 const char *name, 9054 unsigned HOST_WIDE_INT size, 9055 unsigned int align) 9056 { 9057 unsigned int max_common_align; 9058 9059 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64); 9060 if (align > max_common_align) 9061 { 9062 warning (0, "alignment (%u) for %s exceeds maximum alignment " 9063 "for global common data. Using %u", 9064 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT); 9065 align = max_common_align; 9066 } 9067 9068 switch_to_section (bss_section); 9069 9070 assemble_name (stream, name); 9071 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n", 9072 MAX (size, align / BITS_PER_UNIT)); 9073 } 9074 9075 /* We can't use .comm for local common storage as the SOM linker effectively 9076 treats the symbol as universal and uses the same storage for local symbols 9077 with the same name in different object files. The .block directive 9078 reserves an uninitialized block of storage. However, it's not common 9079 storage. Fortunately, GCC never requests common storage with the same 9080 name in any given translation unit. */ 9081 9082 void 9083 pa_asm_output_aligned_local (FILE *stream, 9084 const char *name, 9085 unsigned HOST_WIDE_INT size, 9086 unsigned int align) 9087 { 9088 switch_to_section (bss_section); 9089 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 9090 9091 #ifdef LOCAL_ASM_OP 9092 fprintf (stream, "%s", LOCAL_ASM_OP); 9093 assemble_name (stream, name); 9094 fprintf (stream, "\n"); 9095 #endif 9096 9097 ASM_OUTPUT_LABEL (stream, name); 9098 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 9099 } 9100 9101 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 9102 use in fmpysub instructions. */ 9103 int 9104 pa_fmpysuboperands (rtx *operands) 9105 { 9106 machine_mode mode = GET_MODE (operands[0]); 9107 9108 /* Must be a floating point mode. */ 9109 if (mode != SFmode && mode != DFmode) 9110 return 0; 9111 9112 /* All modes must be the same. */ 9113 if (! (mode == GET_MODE (operands[1]) 9114 && mode == GET_MODE (operands[2]) 9115 && mode == GET_MODE (operands[3]) 9116 && mode == GET_MODE (operands[4]) 9117 && mode == GET_MODE (operands[5]))) 9118 return 0; 9119 9120 /* All operands must be registers. */ 9121 if (! (GET_CODE (operands[1]) == REG 9122 && GET_CODE (operands[2]) == REG 9123 && GET_CODE (operands[3]) == REG 9124 && GET_CODE (operands[4]) == REG 9125 && GET_CODE (operands[5]) == REG)) 9126 return 0; 9127 9128 /* Only 2 real operands to the subtraction. Subtraction is not a commutative 9129 operation, so operands[4] must be the same as operand[3]. */ 9130 if (! rtx_equal_p (operands[3], operands[4])) 9131 return 0; 9132 9133 /* multiply cannot feed into subtraction. */ 9134 if (rtx_equal_p (operands[5], operands[0])) 9135 return 0; 9136 9137 /* Inout operand of sub cannot conflict with any operands from multiply. */ 9138 if (rtx_equal_p (operands[3], operands[0]) 9139 || rtx_equal_p (operands[3], operands[1]) 9140 || rtx_equal_p (operands[3], operands[2])) 9141 return 0; 9142 9143 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 9144 if (mode == SFmode 9145 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 9146 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 9147 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 9148 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 9149 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 9150 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 9151 return 0; 9152 9153 /* Passed. Operands are suitable for fmpysub. */ 9154 return 1; 9155 } 9156 9157 /* Return 1 if the given constant is 2, 4, or 8. These are the valid 9158 constants for a MULT embedded inside a memory address. */ 9159 int 9160 pa_mem_shadd_constant_p (int val) 9161 { 9162 if (val == 2 || val == 4 || val == 8) 9163 return 1; 9164 else 9165 return 0; 9166 } 9167 9168 /* Return 1 if the given constant is 1, 2, or 3. These are the valid 9169 constants for shadd instructions. */ 9170 int 9171 pa_shadd_constant_p (int val) 9172 { 9173 if (val == 1 || val == 2 || val == 3) 9174 return 1; 9175 else 9176 return 0; 9177 } 9178 9179 /* Return TRUE if INSN branches forward. */ 9180 9181 static bool 9182 forward_branch_p (rtx_insn *insn) 9183 { 9184 rtx lab = JUMP_LABEL (insn); 9185 9186 /* The INSN must have a jump label. */ 9187 gcc_assert (lab != NULL_RTX); 9188 9189 if (INSN_ADDRESSES_SET_P ()) 9190 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn)); 9191 9192 while (insn) 9193 { 9194 if (insn == lab) 9195 return true; 9196 else 9197 insn = NEXT_INSN (insn); 9198 } 9199 9200 return false; 9201 } 9202 9203 /* Output an unconditional move and branch insn. */ 9204 9205 const char * 9206 pa_output_parallel_movb (rtx *operands, rtx_insn *insn) 9207 { 9208 int length = get_attr_length (insn); 9209 9210 /* These are the cases in which we win. */ 9211 if (length == 4) 9212 return "mov%I1b,tr %1,%0,%2"; 9213 9214 /* None of the following cases win, but they don't lose either. */ 9215 if (length == 8) 9216 { 9217 if (dbr_sequence_length () == 0) 9218 { 9219 /* Nothing in the delay slot, fake it by putting the combined 9220 insn (the copy or add) in the delay slot of a bl. */ 9221 if (GET_CODE (operands[1]) == CONST_INT) 9222 return "b %2\n\tldi %1,%0"; 9223 else 9224 return "b %2\n\tcopy %1,%0"; 9225 } 9226 else 9227 { 9228 /* Something in the delay slot, but we've got a long branch. */ 9229 if (GET_CODE (operands[1]) == CONST_INT) 9230 return "ldi %1,%0\n\tb %2"; 9231 else 9232 return "copy %1,%0\n\tb %2"; 9233 } 9234 } 9235 9236 if (GET_CODE (operands[1]) == CONST_INT) 9237 output_asm_insn ("ldi %1,%0", operands); 9238 else 9239 output_asm_insn ("copy %1,%0", operands); 9240 return pa_output_lbranch (operands[2], insn, 1); 9241 } 9242 9243 /* Output an unconditional add and branch insn. */ 9244 9245 const char * 9246 pa_output_parallel_addb (rtx *operands, rtx_insn *insn) 9247 { 9248 int length = get_attr_length (insn); 9249 9250 /* To make life easy we want operand0 to be the shared input/output 9251 operand and operand1 to be the readonly operand. */ 9252 if (operands[0] == operands[1]) 9253 operands[1] = operands[2]; 9254 9255 /* These are the cases in which we win. */ 9256 if (length == 4) 9257 return "add%I1b,tr %1,%0,%3"; 9258 9259 /* None of the following cases win, but they don't lose either. */ 9260 if (length == 8) 9261 { 9262 if (dbr_sequence_length () == 0) 9263 /* Nothing in the delay slot, fake it by putting the combined 9264 insn (the copy or add) in the delay slot of a bl. */ 9265 return "b %3\n\tadd%I1 %1,%0,%0"; 9266 else 9267 /* Something in the delay slot, but we've got a long branch. */ 9268 return "add%I1 %1,%0,%0\n\tb %3"; 9269 } 9270 9271 output_asm_insn ("add%I1 %1,%0,%0", operands); 9272 return pa_output_lbranch (operands[3], insn, 1); 9273 } 9274 9275 /* We use this hook to perform a PA specific optimization which is difficult 9276 to do in earlier passes. */ 9277 9278 static void 9279 pa_reorg (void) 9280 { 9281 remove_useless_addtr_insns (1); 9282 9283 if (pa_cpu < PROCESSOR_8000) 9284 pa_combine_instructions (); 9285 } 9286 9287 /* The PA has a number of odd instructions which can perform multiple 9288 tasks at once. On first generation PA machines (PA1.0 and PA1.1) 9289 it may be profitable to combine two instructions into one instruction 9290 with two outputs. It's not profitable PA2.0 machines because the 9291 two outputs would take two slots in the reorder buffers. 9292 9293 This routine finds instructions which can be combined and combines 9294 them. We only support some of the potential combinations, and we 9295 only try common ways to find suitable instructions. 9296 9297 * addb can add two registers or a register and a small integer 9298 and jump to a nearby (+-8k) location. Normally the jump to the 9299 nearby location is conditional on the result of the add, but by 9300 using the "true" condition we can make the jump unconditional. 9301 Thus addb can perform two independent operations in one insn. 9302 9303 * movb is similar to addb in that it can perform a reg->reg 9304 or small immediate->reg copy and jump to a nearby (+-8k location). 9305 9306 * fmpyadd and fmpysub can perform a FP multiply and either an 9307 FP add or FP sub if the operands of the multiply and add/sub are 9308 independent (there are other minor restrictions). Note both 9309 the fmpy and fadd/fsub can in theory move to better spots according 9310 to data dependencies, but for now we require the fmpy stay at a 9311 fixed location. 9312 9313 * Many of the memory operations can perform pre & post updates 9314 of index registers. GCC's pre/post increment/decrement addressing 9315 is far too simple to take advantage of all the possibilities. This 9316 pass may not be suitable since those insns may not be independent. 9317 9318 * comclr can compare two ints or an int and a register, nullify 9319 the following instruction and zero some other register. This 9320 is more difficult to use as it's harder to find an insn which 9321 will generate a comclr than finding something like an unconditional 9322 branch. (conditional moves & long branches create comclr insns). 9323 9324 * Most arithmetic operations can conditionally skip the next 9325 instruction. They can be viewed as "perform this operation 9326 and conditionally jump to this nearby location" (where nearby 9327 is an insns away). These are difficult to use due to the 9328 branch length restrictions. */ 9329 9330 static void 9331 pa_combine_instructions (void) 9332 { 9333 rtx_insn *anchor; 9334 9335 /* This can get expensive since the basic algorithm is on the 9336 order of O(n^2) (or worse). Only do it for -O2 or higher 9337 levels of optimization. */ 9338 if (optimize < 2) 9339 return; 9340 9341 /* Walk down the list of insns looking for "anchor" insns which 9342 may be combined with "floating" insns. As the name implies, 9343 "anchor" instructions don't move, while "floating" insns may 9344 move around. */ 9345 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); 9346 rtx_insn *new_rtx = make_insn_raw (par); 9347 9348 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) 9349 { 9350 enum attr_pa_combine_type anchor_attr; 9351 enum attr_pa_combine_type floater_attr; 9352 9353 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. 9354 Also ignore any special USE insns. */ 9355 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor)) 9356 || GET_CODE (PATTERN (anchor)) == USE 9357 || GET_CODE (PATTERN (anchor)) == CLOBBER) 9358 continue; 9359 9360 anchor_attr = get_attr_pa_combine_type (anchor); 9361 /* See if anchor is an insn suitable for combination. */ 9362 if (anchor_attr == PA_COMBINE_TYPE_FMPY 9363 || anchor_attr == PA_COMBINE_TYPE_FADDSUB 9364 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9365 && ! forward_branch_p (anchor))) 9366 { 9367 rtx_insn *floater; 9368 9369 for (floater = PREV_INSN (anchor); 9370 floater; 9371 floater = PREV_INSN (floater)) 9372 { 9373 if (NOTE_P (floater) 9374 || (NONJUMP_INSN_P (floater) 9375 && (GET_CODE (PATTERN (floater)) == USE 9376 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9377 continue; 9378 9379 /* Anything except a regular INSN will stop our search. */ 9380 if (! NONJUMP_INSN_P (floater)) 9381 { 9382 floater = NULL; 9383 break; 9384 } 9385 9386 /* See if FLOATER is suitable for combination with the 9387 anchor. */ 9388 floater_attr = get_attr_pa_combine_type (floater); 9389 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9390 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9391 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9392 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9393 { 9394 /* If ANCHOR and FLOATER can be combined, then we're 9395 done with this pass. */ 9396 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9397 SET_DEST (PATTERN (floater)), 9398 XEXP (SET_SRC (PATTERN (floater)), 0), 9399 XEXP (SET_SRC (PATTERN (floater)), 1))) 9400 break; 9401 } 9402 9403 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9404 && floater_attr == PA_COMBINE_TYPE_ADDMOVE) 9405 { 9406 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) 9407 { 9408 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9409 SET_DEST (PATTERN (floater)), 9410 XEXP (SET_SRC (PATTERN (floater)), 0), 9411 XEXP (SET_SRC (PATTERN (floater)), 1))) 9412 break; 9413 } 9414 else 9415 { 9416 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9417 SET_DEST (PATTERN (floater)), 9418 SET_SRC (PATTERN (floater)), 9419 SET_SRC (PATTERN (floater)))) 9420 break; 9421 } 9422 } 9423 } 9424 9425 /* If we didn't find anything on the backwards scan try forwards. */ 9426 if (!floater 9427 && (anchor_attr == PA_COMBINE_TYPE_FMPY 9428 || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) 9429 { 9430 for (floater = anchor; floater; floater = NEXT_INSN (floater)) 9431 { 9432 if (NOTE_P (floater) 9433 || (NONJUMP_INSN_P (floater) 9434 && (GET_CODE (PATTERN (floater)) == USE 9435 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9436 9437 continue; 9438 9439 /* Anything except a regular INSN will stop our search. */ 9440 if (! NONJUMP_INSN_P (floater)) 9441 { 9442 floater = NULL; 9443 break; 9444 } 9445 9446 /* See if FLOATER is suitable for combination with the 9447 anchor. */ 9448 floater_attr = get_attr_pa_combine_type (floater); 9449 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9450 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9451 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9452 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9453 { 9454 /* If ANCHOR and FLOATER can be combined, then we're 9455 done with this pass. */ 9456 if (pa_can_combine_p (new_rtx, anchor, floater, 1, 9457 SET_DEST (PATTERN (floater)), 9458 XEXP (SET_SRC (PATTERN (floater)), 9459 0), 9460 XEXP (SET_SRC (PATTERN (floater)), 9461 1))) 9462 break; 9463 } 9464 } 9465 } 9466 9467 /* FLOATER will be nonzero if we found a suitable floating 9468 insn for combination with ANCHOR. */ 9469 if (floater 9470 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9471 || anchor_attr == PA_COMBINE_TYPE_FMPY)) 9472 { 9473 /* Emit the new instruction and delete the old anchor. */ 9474 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)), 9475 copy_rtx (PATTERN (floater))); 9476 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp); 9477 emit_insn_before (temp, anchor); 9478 9479 SET_INSN_DELETED (anchor); 9480 9481 /* Emit a special USE insn for FLOATER, then delete 9482 the floating insn. */ 9483 temp = copy_rtx (PATTERN (floater)); 9484 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater); 9485 delete_insn (floater); 9486 9487 continue; 9488 } 9489 else if (floater 9490 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) 9491 { 9492 /* Emit the new_jump instruction and delete the old anchor. */ 9493 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)), 9494 copy_rtx (PATTERN (floater))); 9495 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp); 9496 temp = emit_jump_insn_before (temp, anchor); 9497 9498 JUMP_LABEL (temp) = JUMP_LABEL (anchor); 9499 SET_INSN_DELETED (anchor); 9500 9501 /* Emit a special USE insn for FLOATER, then delete 9502 the floating insn. */ 9503 temp = copy_rtx (PATTERN (floater)); 9504 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater); 9505 delete_insn (floater); 9506 continue; 9507 } 9508 } 9509 } 9510 } 9511 9512 static int 9513 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater, 9514 int reversed, rtx dest, 9515 rtx src1, rtx src2) 9516 { 9517 int insn_code_number; 9518 rtx_insn *start, *end; 9519 9520 /* Create a PARALLEL with the patterns of ANCHOR and 9521 FLOATER, try to recognize it, then test constraints 9522 for the resulting pattern. 9523 9524 If the pattern doesn't match or the constraints 9525 aren't met keep searching for a suitable floater 9526 insn. */ 9527 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor); 9528 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater); 9529 INSN_CODE (new_rtx) = -1; 9530 insn_code_number = recog_memoized (new_rtx); 9531 basic_block bb = BLOCK_FOR_INSN (anchor); 9532 if (insn_code_number < 0 9533 || (extract_insn (new_rtx), 9534 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb)))) 9535 return 0; 9536 9537 if (reversed) 9538 { 9539 start = anchor; 9540 end = floater; 9541 } 9542 else 9543 { 9544 start = floater; 9545 end = anchor; 9546 } 9547 9548 /* There's up to three operands to consider. One 9549 output and two inputs. 9550 9551 The output must not be used between FLOATER & ANCHOR 9552 exclusive. The inputs must not be set between 9553 FLOATER and ANCHOR exclusive. */ 9554 9555 if (reg_used_between_p (dest, start, end)) 9556 return 0; 9557 9558 if (reg_set_between_p (src1, start, end)) 9559 return 0; 9560 9561 if (reg_set_between_p (src2, start, end)) 9562 return 0; 9563 9564 /* If we get here, then everything is good. */ 9565 return 1; 9566 } 9567 9568 /* Return nonzero if references for INSN are delayed. 9569 9570 Millicode insns are actually function calls with some special 9571 constraints on arguments and register usage. 9572 9573 Millicode calls always expect their arguments in the integer argument 9574 registers, and always return their result in %r29 (ret1). They 9575 are expected to clobber their arguments, %r1, %r29, and the return 9576 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. 9577 9578 This function tells reorg that the references to arguments and 9579 millicode calls do not appear to happen until after the millicode call. 9580 This allows reorg to put insns which set the argument registers into the 9581 delay slot of the millicode call -- thus they act more like traditional 9582 CALL_INSNs. 9583 9584 Note we cannot consider side effects of the insn to be delayed because 9585 the branch and link insn will clobber the return pointer. If we happened 9586 to use the return pointer in the delay slot of the call, then we lose. 9587 9588 get_attr_type will try to recognize the given insn, so make sure to 9589 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns 9590 in particular. */ 9591 int 9592 pa_insn_refs_are_delayed (rtx_insn *insn) 9593 { 9594 return ((NONJUMP_INSN_P (insn) 9595 && GET_CODE (PATTERN (insn)) != SEQUENCE 9596 && GET_CODE (PATTERN (insn)) != USE 9597 && GET_CODE (PATTERN (insn)) != CLOBBER 9598 && get_attr_type (insn) == TYPE_MILLI)); 9599 } 9600 9601 /* Promote the return value, but not the arguments. */ 9602 9603 static machine_mode 9604 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, 9605 machine_mode mode, 9606 int *punsignedp ATTRIBUTE_UNUSED, 9607 const_tree fntype ATTRIBUTE_UNUSED, 9608 int for_return) 9609 { 9610 if (for_return == 0) 9611 return mode; 9612 return promote_mode (type, mode, punsignedp); 9613 } 9614 9615 /* On the HP-PA the value is found in register(s) 28(-29), unless 9616 the mode is SF or DF. Then the value is returned in fr4 (32). 9617 9618 This must perform the same promotions as PROMOTE_MODE, else promoting 9619 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly. 9620 9621 Small structures must be returned in a PARALLEL on PA64 in order 9622 to match the HP Compiler ABI. */ 9623 9624 static rtx 9625 pa_function_value (const_tree valtype, 9626 const_tree func ATTRIBUTE_UNUSED, 9627 bool outgoing ATTRIBUTE_UNUSED) 9628 { 9629 machine_mode valmode; 9630 9631 if (AGGREGATE_TYPE_P (valtype) 9632 || TREE_CODE (valtype) == COMPLEX_TYPE 9633 || TREE_CODE (valtype) == VECTOR_TYPE) 9634 { 9635 HOST_WIDE_INT valsize = int_size_in_bytes (valtype); 9636 9637 /* Handle aggregates that fit exactly in a word or double word. */ 9638 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD) 9639 return gen_rtx_REG (TYPE_MODE (valtype), 28); 9640 9641 if (TARGET_64BIT) 9642 { 9643 /* Aggregates with a size less than or equal to 128 bits are 9644 returned in GR 28(-29). They are left justified. The pad 9645 bits are undefined. Larger aggregates are returned in 9646 memory. */ 9647 rtx loc[2]; 9648 int i, offset = 0; 9649 int ub = valsize <= UNITS_PER_WORD ? 1 : 2; 9650 9651 for (i = 0; i < ub; i++) 9652 { 9653 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9654 gen_rtx_REG (DImode, 28 + i), 9655 GEN_INT (offset)); 9656 offset += 8; 9657 } 9658 9659 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); 9660 } 9661 else if (valsize > UNITS_PER_WORD) 9662 { 9663 /* Aggregates 5 to 8 bytes in size are returned in general 9664 registers r28-r29 in the same manner as other non 9665 floating-point objects. The data is right-justified and 9666 zero-extended to 64 bits. This is opposite to the normal 9667 justification used on big endian targets and requires 9668 special treatment. */ 9669 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9670 gen_rtx_REG (DImode, 28), const0_rtx); 9671 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9672 } 9673 } 9674 9675 if ((INTEGRAL_TYPE_P (valtype) 9676 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD) 9677 || POINTER_TYPE_P (valtype)) 9678 valmode = word_mode; 9679 else 9680 valmode = TYPE_MODE (valtype); 9681 9682 if (TREE_CODE (valtype) == REAL_TYPE 9683 && !AGGREGATE_TYPE_P (valtype) 9684 && TYPE_MODE (valtype) != TFmode 9685 && !TARGET_SOFT_FLOAT) 9686 return gen_rtx_REG (valmode, 32); 9687 9688 return gen_rtx_REG (valmode, 28); 9689 } 9690 9691 /* Implement the TARGET_LIBCALL_VALUE hook. */ 9692 9693 static rtx 9694 pa_libcall_value (machine_mode mode, 9695 const_rtx fun ATTRIBUTE_UNUSED) 9696 { 9697 if (! TARGET_SOFT_FLOAT 9698 && (mode == SFmode || mode == DFmode)) 9699 return gen_rtx_REG (mode, 32); 9700 else 9701 return gen_rtx_REG (mode, 28); 9702 } 9703 9704 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */ 9705 9706 static bool 9707 pa_function_value_regno_p (const unsigned int regno) 9708 { 9709 if (regno == 28 9710 || (! TARGET_SOFT_FLOAT && regno == 32)) 9711 return true; 9712 9713 return false; 9714 } 9715 9716 /* Update the data in CUM to advance over argument ARG. */ 9717 9718 static void 9719 pa_function_arg_advance (cumulative_args_t cum_v, 9720 const function_arg_info &arg) 9721 { 9722 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9723 int arg_size = pa_function_arg_size (arg.mode, arg.type); 9724 9725 cum->nargs_prototype--; 9726 cum->words += (arg_size 9727 + ((cum->words & 01) 9728 && arg.type != NULL_TREE 9729 && arg_size > 1)); 9730 } 9731 9732 /* Return the location of a parameter that is passed in a register or NULL 9733 if the parameter has any component that is passed in memory. 9734 9735 This is new code and will be pushed to into the net sources after 9736 further testing. 9737 9738 ??? We might want to restructure this so that it looks more like other 9739 ports. */ 9740 static rtx 9741 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) 9742 { 9743 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9744 tree type = arg.type; 9745 machine_mode mode = arg.mode; 9746 int max_arg_words = (TARGET_64BIT ? 8 : 4); 9747 int alignment = 0; 9748 int arg_size; 9749 int fpr_reg_base; 9750 int gpr_reg_base; 9751 rtx retval; 9752 9753 if (arg.end_marker_p ()) 9754 return NULL_RTX; 9755 9756 arg_size = pa_function_arg_size (mode, type); 9757 9758 /* If this arg would be passed partially or totally on the stack, then 9759 this routine should return zero. pa_arg_partial_bytes will 9760 handle arguments which are split between regs and stack slots if 9761 the ABI mandates split arguments. */ 9762 if (!TARGET_64BIT) 9763 { 9764 /* The 32-bit ABI does not split arguments. */ 9765 if (cum->words + arg_size > max_arg_words) 9766 return NULL_RTX; 9767 } 9768 else 9769 { 9770 if (arg_size > 1) 9771 alignment = cum->words & 1; 9772 if (cum->words + alignment >= max_arg_words) 9773 return NULL_RTX; 9774 } 9775 9776 /* The 32bit ABIs and the 64bit ABIs are rather different, 9777 particularly in their handling of FP registers. We might 9778 be able to cleverly share code between them, but I'm not 9779 going to bother in the hope that splitting them up results 9780 in code that is more easily understood. */ 9781 9782 if (TARGET_64BIT) 9783 { 9784 /* Advance the base registers to their current locations. 9785 9786 Remember, gprs grow towards smaller register numbers while 9787 fprs grow to higher register numbers. Also remember that 9788 although FP regs are 32-bit addressable, we pretend that 9789 the registers are 64-bits wide. */ 9790 gpr_reg_base = 26 - cum->words; 9791 fpr_reg_base = 32 + cum->words; 9792 9793 /* Arguments wider than one word and small aggregates need special 9794 treatment. */ 9795 if (arg_size > 1 9796 || mode == BLKmode 9797 || (type && (AGGREGATE_TYPE_P (type) 9798 || TREE_CODE (type) == COMPLEX_TYPE 9799 || TREE_CODE (type) == VECTOR_TYPE))) 9800 { 9801 /* Double-extended precision (80-bit), quad-precision (128-bit) 9802 and aggregates including complex numbers are aligned on 9803 128-bit boundaries. The first eight 64-bit argument slots 9804 are associated one-to-one, with general registers r26 9805 through r19, and also with floating-point registers fr4 9806 through fr11. Arguments larger than one word are always 9807 passed in general registers. 9808 9809 Using a PARALLEL with a word mode register results in left 9810 justified data on a big-endian target. */ 9811 9812 rtx loc[8]; 9813 int i, offset = 0, ub = arg_size; 9814 9815 /* Align the base register. */ 9816 gpr_reg_base -= alignment; 9817 9818 ub = MIN (ub, max_arg_words - cum->words - alignment); 9819 for (i = 0; i < ub; i++) 9820 { 9821 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9822 gen_rtx_REG (DImode, gpr_reg_base), 9823 GEN_INT (offset)); 9824 gpr_reg_base -= 1; 9825 offset += 8; 9826 } 9827 9828 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc)); 9829 } 9830 } 9831 else 9832 { 9833 /* If the argument is larger than a word, then we know precisely 9834 which registers we must use. */ 9835 if (arg_size > 1) 9836 { 9837 if (cum->words) 9838 { 9839 gpr_reg_base = 23; 9840 fpr_reg_base = 38; 9841 } 9842 else 9843 { 9844 gpr_reg_base = 25; 9845 fpr_reg_base = 34; 9846 } 9847 9848 /* Structures 5 to 8 bytes in size are passed in the general 9849 registers in the same manner as other non floating-point 9850 objects. The data is right-justified and zero-extended 9851 to 64 bits. This is opposite to the normal justification 9852 used on big endian targets and requires special treatment. 9853 We now define BLOCK_REG_PADDING to pad these objects. 9854 Aggregates, complex and vector types are passed in the same 9855 manner as structures. */ 9856 if (mode == BLKmode 9857 || (type && (AGGREGATE_TYPE_P (type) 9858 || TREE_CODE (type) == COMPLEX_TYPE 9859 || TREE_CODE (type) == VECTOR_TYPE))) 9860 { 9861 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9862 gen_rtx_REG (DImode, gpr_reg_base), 9863 const0_rtx); 9864 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9865 } 9866 } 9867 else 9868 { 9869 /* We have a single word (32 bits). A simple computation 9870 will get us the register #s we need. */ 9871 gpr_reg_base = 26 - cum->words; 9872 fpr_reg_base = 32 + 2 * cum->words; 9873 } 9874 } 9875 9876 /* Determine if the argument needs to be passed in both general and 9877 floating point registers. */ 9878 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32) 9879 /* If we are doing soft-float with portable runtime, then there 9880 is no need to worry about FP regs. */ 9881 && !TARGET_SOFT_FLOAT 9882 /* The parameter must be some kind of scalar float, else we just 9883 pass it in integer registers. */ 9884 && GET_MODE_CLASS (mode) == MODE_FLOAT 9885 /* The target function must not have a prototype. */ 9886 && cum->nargs_prototype <= 0 9887 /* libcalls do not need to pass items in both FP and general 9888 registers. */ 9889 && type != NULL_TREE 9890 /* All this hair applies to "outgoing" args only. This includes 9891 sibcall arguments setup with FUNCTION_INCOMING_ARG. */ 9892 && !cum->incoming) 9893 /* Also pass outgoing floating arguments in both registers in indirect 9894 calls with the 32 bit ABI and the HP assembler since there is no 9895 way to the specify argument locations in static functions. */ 9896 || (!TARGET_64BIT 9897 && !TARGET_GAS 9898 && !cum->incoming 9899 && cum->indirect 9900 && GET_MODE_CLASS (mode) == MODE_FLOAT)) 9901 { 9902 retval 9903 = gen_rtx_PARALLEL 9904 (mode, 9905 gen_rtvec (2, 9906 gen_rtx_EXPR_LIST (VOIDmode, 9907 gen_rtx_REG (mode, fpr_reg_base), 9908 const0_rtx), 9909 gen_rtx_EXPR_LIST (VOIDmode, 9910 gen_rtx_REG (mode, gpr_reg_base), 9911 const0_rtx))); 9912 } 9913 else 9914 { 9915 /* See if we should pass this parameter in a general register. */ 9916 if (TARGET_SOFT_FLOAT 9917 /* Indirect calls in the normal 32bit ABI require all arguments 9918 to be passed in general registers. */ 9919 || (!TARGET_PORTABLE_RUNTIME 9920 && !TARGET_64BIT 9921 && !TARGET_ELF32 9922 && cum->indirect) 9923 /* If the parameter is not a scalar floating-point parameter, 9924 then it belongs in GPRs. */ 9925 || GET_MODE_CLASS (mode) != MODE_FLOAT 9926 /* Structure with single SFmode field belongs in GPR. */ 9927 || (type && AGGREGATE_TYPE_P (type))) 9928 retval = gen_rtx_REG (mode, gpr_reg_base); 9929 else 9930 retval = gen_rtx_REG (mode, fpr_reg_base); 9931 } 9932 return retval; 9933 } 9934 9935 /* Arguments larger than one word are double word aligned. */ 9936 9937 static unsigned int 9938 pa_function_arg_boundary (machine_mode mode, const_tree type) 9939 { 9940 bool singleword = (type 9941 ? (integer_zerop (TYPE_SIZE (type)) 9942 || !TREE_CONSTANT (TYPE_SIZE (type)) 9943 || int_size_in_bytes (type) <= UNITS_PER_WORD) 9944 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD); 9945 9946 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY; 9947 } 9948 9949 /* If this arg would be passed totally in registers or totally on the stack, 9950 then this routine should return zero. */ 9951 9952 static int 9953 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg) 9954 { 9955 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9956 unsigned int max_arg_words = 8; 9957 unsigned int offset = 0; 9958 9959 if (!TARGET_64BIT) 9960 return 0; 9961 9962 if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1)) 9963 offset = 1; 9964 9965 if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type) 9966 <= max_arg_words) 9967 /* Arg fits fully into registers. */ 9968 return 0; 9969 else if (cum->words + offset >= max_arg_words) 9970 /* Arg fully on the stack. */ 9971 return 0; 9972 else 9973 /* Arg is split. */ 9974 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD; 9975 } 9976 9977 9978 /* A get_unnamed_section callback for switching to the text section. 9979 9980 This function is only used with SOM. Because we don't support 9981 named subspaces, we can only create a new subspace or switch back 9982 to the default text subspace. */ 9983 9984 static void 9985 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) 9986 { 9987 gcc_assert (TARGET_SOM); 9988 if (TARGET_GAS) 9989 { 9990 if (cfun && cfun->machine && !cfun->machine->in_nsubspa) 9991 { 9992 /* We only want to emit a .nsubspa directive once at the 9993 start of the function. */ 9994 cfun->machine->in_nsubspa = 1; 9995 9996 /* Create a new subspace for the text. This provides 9997 better stub placement and one-only functions. */ 9998 if (cfun->decl 9999 && DECL_ONE_ONLY (cfun->decl) 10000 && !DECL_WEAK (cfun->decl)) 10001 { 10002 output_section_asm_op ("\t.SPACE $TEXT$\n" 10003 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8," 10004 "ACCESS=44,SORT=24,COMDAT"); 10005 return; 10006 } 10007 } 10008 else 10009 { 10010 /* There isn't a current function or the body of the current 10011 function has been completed. So, we are changing to the 10012 text section to output debugging information. Thus, we 10013 need to forget that we are in the text section so that 10014 varasm.c will call us when text_section is selected again. */ 10015 gcc_assert (!cfun || !cfun->machine 10016 || cfun->machine->in_nsubspa == 2); 10017 in_section = NULL; 10018 } 10019 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$"); 10020 return; 10021 } 10022 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$"); 10023 } 10024 10025 /* A get_unnamed_section callback for switching to comdat data 10026 sections. This function is only used with SOM. */ 10027 10028 static void 10029 som_output_comdat_data_section_asm_op (const void *data) 10030 { 10031 in_section = NULL; 10032 output_section_asm_op (data); 10033 } 10034 10035 /* Implement TARGET_ASM_INIT_SECTIONS. */ 10036 10037 static void 10038 pa_som_asm_init_sections (void) 10039 { 10040 text_section 10041 = get_unnamed_section (0, som_output_text_section_asm_op, NULL); 10042 10043 /* SOM puts readonly data in the default $LIT$ subspace when PIC code 10044 is not being generated. */ 10045 som_readonly_data_section 10046 = get_unnamed_section (0, output_section_asm_op, 10047 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$"); 10048 10049 /* When secondary definitions are not supported, SOM makes readonly 10050 data one-only by creating a new $LIT$ subspace in $TEXT$ with 10051 the comdat flag. */ 10052 som_one_only_readonly_data_section 10053 = get_unnamed_section (0, som_output_comdat_data_section_asm_op, 10054 "\t.SPACE $TEXT$\n" 10055 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8," 10056 "ACCESS=0x2c,SORT=16,COMDAT"); 10057 10058 10059 /* When secondary definitions are not supported, SOM makes data one-only 10060 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */ 10061 som_one_only_data_section 10062 = get_unnamed_section (SECTION_WRITE, 10063 som_output_comdat_data_section_asm_op, 10064 "\t.SPACE $PRIVATE$\n" 10065 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8," 10066 "ACCESS=31,SORT=24,COMDAT"); 10067 10068 if (flag_tm) 10069 som_tm_clone_table_section 10070 = get_unnamed_section (0, output_section_asm_op, 10071 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$"); 10072 10073 /* HPUX ld generates incorrect GOT entries for "T" fixups which 10074 reference data within the $TEXT$ space (for example constant 10075 strings in the $LIT$ subspace). 10076 10077 The assemblers (GAS and HP as) both have problems with handling 10078 the difference of two symbols. This is the other correct way to 10079 reference constant data during PIC code generation. 10080 10081 Thus, we can't put constant data needing relocation in the $TEXT$ 10082 space during PIC generation. 10083 10084 Previously, we placed all constant data into the $DATA$ subspace 10085 when generating PIC code. This reduces sharing, but it works 10086 correctly. Now we rely on pa_reloc_rw_mask() for section selection. 10087 This puts constant data not needing relocation into the $TEXT$ space. */ 10088 readonly_data_section = som_readonly_data_section; 10089 10090 /* We must not have a reference to an external symbol defined in a 10091 shared library in a readonly section, else the SOM linker will 10092 complain. 10093 10094 So, we force exception information into the data section. */ 10095 exception_section = data_section; 10096 } 10097 10098 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */ 10099 10100 static section * 10101 pa_som_tm_clone_table_section (void) 10102 { 10103 return som_tm_clone_table_section; 10104 } 10105 10106 /* On hpux10, the linker will give an error if we have a reference 10107 in the read-only data section to a symbol defined in a shared 10108 library. Therefore, expressions that might require a reloc 10109 cannot be placed in the read-only data section. */ 10110 10111 static section * 10112 pa_select_section (tree exp, int reloc, 10113 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 10114 { 10115 if (TREE_CODE (exp) == VAR_DECL 10116 && TREE_READONLY (exp) 10117 && !TREE_THIS_VOLATILE (exp) 10118 && DECL_INITIAL (exp) 10119 && (DECL_INITIAL (exp) == error_mark_node 10120 || TREE_CONSTANT (DECL_INITIAL (exp))) 10121 && !(reloc & pa_reloc_rw_mask ())) 10122 { 10123 if (TARGET_SOM 10124 && DECL_ONE_ONLY (exp) 10125 && !DECL_WEAK (exp)) 10126 return som_one_only_readonly_data_section; 10127 else 10128 return readonly_data_section; 10129 } 10130 else if (CONSTANT_CLASS_P (exp) 10131 && !(reloc & pa_reloc_rw_mask ())) 10132 return readonly_data_section; 10133 else if (TARGET_SOM 10134 && TREE_CODE (exp) == VAR_DECL 10135 && DECL_ONE_ONLY (exp) 10136 && !DECL_WEAK (exp)) 10137 return som_one_only_data_section; 10138 else 10139 return data_section; 10140 } 10141 10142 /* Implement pa_elf_select_rtx_section. If X is a function label operand 10143 and the function is in a COMDAT group, place the plabel reference in the 10144 .data.rel.ro.local section. The linker ignores references to symbols in 10145 discarded sections from this section. */ 10146 10147 static section * 10148 pa_elf_select_rtx_section (machine_mode mode, rtx x, 10149 unsigned HOST_WIDE_INT align) 10150 { 10151 if (function_label_operand (x, VOIDmode)) 10152 { 10153 tree decl = SYMBOL_REF_DECL (x); 10154 10155 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl))) 10156 return get_named_section (NULL, ".data.rel.ro.local", 1); 10157 } 10158 10159 return default_elf_select_rtx_section (mode, x, align); 10160 } 10161 10162 /* Implement pa_reloc_rw_mask. */ 10163 10164 static int 10165 pa_reloc_rw_mask (void) 10166 { 10167 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11)) 10168 return 3; 10169 10170 /* HP linker does not support global relocs in readonly memory. */ 10171 return TARGET_SOM ? 2 : 0; 10172 } 10173 10174 static void 10175 pa_globalize_label (FILE *stream, const char *name) 10176 { 10177 /* We only handle DATA objects here, functions are globalized in 10178 ASM_DECLARE_FUNCTION_NAME. */ 10179 if (! FUNCTION_NAME_P (name)) 10180 { 10181 fputs ("\t.EXPORT ", stream); 10182 assemble_name (stream, name); 10183 fputs (",DATA\n", stream); 10184 } 10185 } 10186 10187 /* Worker function for TARGET_STRUCT_VALUE_RTX. */ 10188 10189 static rtx 10190 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, 10191 int incoming ATTRIBUTE_UNUSED) 10192 { 10193 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM); 10194 } 10195 10196 /* Worker function for TARGET_RETURN_IN_MEMORY. */ 10197 10198 bool 10199 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 10200 { 10201 /* SOM ABI says that objects larger than 64 bits are returned in memory. 10202 PA64 ABI says that objects larger than 128 bits are returned in memory. 10203 Note, int_size_in_bytes can return -1 if the size of the object is 10204 variable or larger than the maximum value that can be expressed as 10205 a HOST_WIDE_INT. It can also return zero for an empty type. The 10206 simplest way to handle variable and empty types is to pass them in 10207 memory. This avoids problems in defining the boundaries of argument 10208 slots, allocating registers, etc. */ 10209 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8) 10210 || int_size_in_bytes (type) <= 0); 10211 } 10212 10213 /* Structure to hold declaration and name of external symbols that are 10214 emitted by GCC. We generate a vector of these symbols and output them 10215 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true. 10216 This avoids putting out names that are never really used. */ 10217 10218 typedef struct GTY(()) extern_symbol 10219 { 10220 tree decl; 10221 const char *name; 10222 } extern_symbol; 10223 10224 /* Define gc'd vector type for extern_symbol. */ 10225 10226 /* Vector of extern_symbol pointers. */ 10227 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols; 10228 10229 #ifdef ASM_OUTPUT_EXTERNAL_REAL 10230 /* Mark DECL (name NAME) as an external reference (assembler output 10231 file FILE). This saves the names to output at the end of the file 10232 if actually referenced. */ 10233 10234 void 10235 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name) 10236 { 10237 gcc_assert (file == asm_out_file); 10238 extern_symbol p = {decl, name}; 10239 vec_safe_push (extern_symbols, p); 10240 } 10241 #endif 10242 10243 /* Output text required at the end of an assembler file. 10244 This includes deferred plabels and .import directives for 10245 all external symbols that were actually referenced. */ 10246 10247 static void 10248 pa_file_end (void) 10249 { 10250 #ifdef ASM_OUTPUT_EXTERNAL_REAL 10251 unsigned int i; 10252 extern_symbol *p; 10253 10254 if (!NO_DEFERRED_PROFILE_COUNTERS) 10255 output_deferred_profile_counters (); 10256 #endif 10257 10258 output_deferred_plabels (); 10259 10260 #ifdef ASM_OUTPUT_EXTERNAL_REAL 10261 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++) 10262 { 10263 tree decl = p->decl; 10264 10265 if (!TREE_ASM_WRITTEN (decl) 10266 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0))) 10267 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name); 10268 } 10269 10270 vec_free (extern_symbols); 10271 #endif 10272 10273 if (NEED_INDICATE_EXEC_STACK) 10274 file_end_indicate_exec_stack (); 10275 } 10276 10277 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 10278 10279 static bool 10280 pa_can_change_mode_class (machine_mode from, machine_mode to, 10281 reg_class_t rclass) 10282 { 10283 if (from == to) 10284 return true; 10285 10286 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)) 10287 return true; 10288 10289 /* Reject changes to/from modes with zero size. */ 10290 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to)) 10291 return false; 10292 10293 /* Reject changes to/from complex and vector modes. */ 10294 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from) 10295 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to)) 10296 return false; 10297 10298 /* There is no way to load QImode or HImode values directly from memory 10299 to a FP register. SImode loads to the FP registers are not zero 10300 extended. On the 64-bit target, this conflicts with the definition 10301 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers 10302 except for DImode to SImode on the 64-bit target. It is handled by 10303 register renaming in pa_print_operand. */ 10304 if (MAYBE_FP_REG_CLASS_P (rclass)) 10305 return TARGET_64BIT && from == DImode && to == SImode; 10306 10307 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word 10308 in specific sets of registers. Thus, we cannot allow changing 10309 to a larger mode when it's larger than a word. */ 10310 if (GET_MODE_SIZE (to) > UNITS_PER_WORD 10311 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from)) 10312 return false; 10313 10314 return true; 10315 } 10316 10317 /* Implement TARGET_MODES_TIEABLE_P. 10318 10319 We should return FALSE for QImode and HImode because these modes 10320 are not ok in the floating-point registers. However, this prevents 10321 tieing these modes to SImode and DImode in the general registers. 10322 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and 10323 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used 10324 in the floating-point registers. */ 10325 10326 static bool 10327 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2) 10328 { 10329 /* Don't tie modes in different classes. */ 10330 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2)) 10331 return false; 10332 10333 return true; 10334 } 10335 10336 10337 /* Length in units of the trampoline instruction code. */ 10338 10339 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48)) 10340 10341 10342 /* Output assembler code for a block containing the constant parts 10343 of a trampoline, leaving space for the variable parts.\ 10344 10345 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM 10346 and then branches to the specified routine. 10347 10348 This code template is copied from text segment to stack location 10349 and then patched with pa_trampoline_init to contain valid values, 10350 and then entered as a subroutine. 10351 10352 It is best to keep this as small as possible to avoid having to 10353 flush multiple lines in the cache. */ 10354 10355 static void 10356 pa_asm_trampoline_template (FILE *f) 10357 { 10358 if (!TARGET_64BIT) 10359 { 10360 if (TARGET_PA_20) 10361 { 10362 fputs ("\tmfia %r20\n", f); 10363 fputs ("\tldw 48(%r20),%r22\n", f); 10364 fputs ("\tcopy %r22,%r21\n", f); 10365 fputs ("\tbb,>=,n %r22,30,.+16\n", f); 10366 fputs ("\tdepwi 0,31,2,%r22\n", f); 10367 fputs ("\tldw 0(%r22),%r21\n", f); 10368 fputs ("\tldw 4(%r22),%r19\n", f); 10369 fputs ("\tbve (%r21)\n", f); 10370 fputs ("\tldw 52(%r1),%r29\n", f); 10371 fputs ("\t.word 0\n", f); 10372 fputs ("\t.word 0\n", f); 10373 fputs ("\t.word 0\n", f); 10374 } 10375 else 10376 { 10377 if (ASSEMBLER_DIALECT == 0) 10378 { 10379 fputs ("\tbl .+8,%r20\n", f); 10380 fputs ("\tdepi 0,31,2,%r20\n", f); 10381 } 10382 else 10383 { 10384 fputs ("\tb,l .+8,%r20\n", f); 10385 fputs ("\tdepwi 0,31,2,%r20\n", f); 10386 } 10387 fputs ("\tldw 40(%r20),%r22\n", f); 10388 fputs ("\tcopy %r22,%r21\n", f); 10389 fputs ("\tbb,>=,n %r22,30,.+16\n", f); 10390 if (ASSEMBLER_DIALECT == 0) 10391 fputs ("\tdepi 0,31,2,%r22\n", f); 10392 else 10393 fputs ("\tdepwi 0,31,2,%r22\n", f); 10394 fputs ("\tldw 0(%r22),%r21\n", f); 10395 fputs ("\tldw 4(%r22),%r19\n", f); 10396 fputs ("\tldsid (%r21),%r1\n", f); 10397 fputs ("\tmtsp %r1,%sr0\n", f); 10398 fputs ("\tbe 0(%sr0,%r21)\n", f); 10399 fputs ("\tldw 44(%r20),%r29\n", f); 10400 } 10401 fputs ("\t.word 0\n", f); 10402 fputs ("\t.word 0\n", f); 10403 fputs ("\t.word 0\n", f); 10404 fputs ("\t.word 0\n", f); 10405 } 10406 else 10407 { 10408 fputs ("\t.dword 0\n", f); 10409 fputs ("\t.dword 0\n", f); 10410 fputs ("\t.dword 0\n", f); 10411 fputs ("\t.dword 0\n", f); 10412 fputs ("\tmfia %r31\n", f); 10413 fputs ("\tldd 24(%r31),%r27\n", f); 10414 fputs ("\tldd 32(%r31),%r31\n", f); 10415 fputs ("\tldd 16(%r27),%r1\n", f); 10416 fputs ("\tbve (%r1)\n", f); 10417 fputs ("\tldd 24(%r27),%r27\n", f); 10418 fputs ("\t.dword 0 ; fptr\n", f); 10419 fputs ("\t.dword 0 ; static link\n", f); 10420 } 10421 } 10422 10423 /* Emit RTL insns to initialize the variable parts of a trampoline. 10424 FNADDR is an RTX for the address of the function's pure code. 10425 CXT is an RTX for the static chain value for the function. 10426 10427 Move the function address to the trampoline template at offset 48. 10428 Move the static chain value to trampoline template at offset 52. 10429 Move the trampoline address to trampoline template at offset 56. 10430 Move r19 to trampoline template at offset 60. The latter two 10431 words create a plabel for the indirect call to the trampoline. 10432 10433 A similar sequence is used for the 64-bit port but the plabel is 10434 at the beginning of the trampoline. 10435 10436 Finally, the cache entries for the trampoline code are flushed. 10437 This is necessary to ensure that the trampoline instruction sequence 10438 is written to memory prior to any attempts at prefetching the code 10439 sequence. */ 10440 10441 static void 10442 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 10443 { 10444 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 10445 rtx start_addr = gen_reg_rtx (Pmode); 10446 rtx end_addr = gen_reg_rtx (Pmode); 10447 rtx line_length = gen_reg_rtx (Pmode); 10448 rtx r_tramp, tmp; 10449 10450 emit_block_move (m_tramp, assemble_trampoline_template (), 10451 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); 10452 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0)); 10453 10454 if (!TARGET_64BIT) 10455 { 10456 tmp = adjust_address (m_tramp, Pmode, 48); 10457 emit_move_insn (tmp, fnaddr); 10458 tmp = adjust_address (m_tramp, Pmode, 52); 10459 emit_move_insn (tmp, chain_value); 10460 10461 /* Create a fat pointer for the trampoline. */ 10462 tmp = adjust_address (m_tramp, Pmode, 56); 10463 emit_move_insn (tmp, r_tramp); 10464 tmp = adjust_address (m_tramp, Pmode, 60); 10465 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); 10466 10467 /* fdc and fic only use registers for the address to flush, 10468 they do not accept integer displacements. We align the 10469 start and end addresses to the beginning of their respective 10470 cache lines to minimize the number of lines flushed. */ 10471 emit_insn (gen_andsi3 (start_addr, r_tramp, 10472 GEN_INT (-MIN_CACHELINE_SIZE))); 10473 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 10474 TRAMPOLINE_CODE_SIZE-1)); 10475 emit_insn (gen_andsi3 (end_addr, tmp, 10476 GEN_INT (-MIN_CACHELINE_SIZE))); 10477 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10478 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); 10479 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, 10480 gen_reg_rtx (Pmode), 10481 gen_reg_rtx (Pmode))); 10482 } 10483 else 10484 { 10485 tmp = adjust_address (m_tramp, Pmode, 56); 10486 emit_move_insn (tmp, fnaddr); 10487 tmp = adjust_address (m_tramp, Pmode, 64); 10488 emit_move_insn (tmp, chain_value); 10489 10490 /* Create a fat pointer for the trampoline. */ 10491 tmp = adjust_address (m_tramp, Pmode, 16); 10492 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode, 10493 r_tramp, 32))); 10494 tmp = adjust_address (m_tramp, Pmode, 24); 10495 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); 10496 10497 /* fdc and fic only use registers for the address to flush, 10498 they do not accept integer displacements. We align the 10499 start and end addresses to the beginning of their respective 10500 cache lines to minimize the number of lines flushed. */ 10501 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32)); 10502 emit_insn (gen_anddi3 (start_addr, tmp, 10503 GEN_INT (-MIN_CACHELINE_SIZE))); 10504 tmp = force_reg (Pmode, plus_constant (Pmode, tmp, 10505 TRAMPOLINE_CODE_SIZE - 1)); 10506 emit_insn (gen_anddi3 (end_addr, tmp, 10507 GEN_INT (-MIN_CACHELINE_SIZE))); 10508 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10509 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); 10510 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, 10511 gen_reg_rtx (Pmode), 10512 gen_reg_rtx (Pmode))); 10513 } 10514 10515 #ifdef HAVE_ENABLE_EXECUTE_STACK 10516 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10517 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 10518 #endif 10519 } 10520 10521 /* Perform any machine-specific adjustment in the address of the trampoline. 10522 ADDR contains the address that was passed to pa_trampoline_init. 10523 Adjust the trampoline address to point to the plabel at offset 56. */ 10524 10525 static rtx 10526 pa_trampoline_adjust_address (rtx addr) 10527 { 10528 if (!TARGET_64BIT) 10529 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58)); 10530 return addr; 10531 } 10532 10533 static rtx 10534 pa_delegitimize_address (rtx orig_x) 10535 { 10536 rtx x = delegitimize_mem_from_attrs (orig_x); 10537 10538 if (GET_CODE (x) == LO_SUM 10539 && GET_CODE (XEXP (x, 1)) == UNSPEC 10540 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R) 10541 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0)); 10542 return x; 10543 } 10544 10545 static rtx 10546 pa_internal_arg_pointer (void) 10547 { 10548 /* The argument pointer and the hard frame pointer are the same in 10549 the 32-bit runtime, so we don't need a copy. */ 10550 if (TARGET_64BIT) 10551 return copy_to_reg (virtual_incoming_args_rtx); 10552 else 10553 return virtual_incoming_args_rtx; 10554 } 10555 10556 /* Given FROM and TO register numbers, say whether this elimination is allowed. 10557 Frame pointer elimination is automatically handled. */ 10558 10559 static bool 10560 pa_can_eliminate (const int from, const int to) 10561 { 10562 /* The argument cannot be eliminated in the 64-bit runtime. */ 10563 if (TARGET_64BIT && from == ARG_POINTER_REGNUM) 10564 return false; 10565 10566 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM 10567 ? ! frame_pointer_needed 10568 : true); 10569 } 10570 10571 /* Define the offset between two registers, FROM to be eliminated and its 10572 replacement TO, at the start of a routine. */ 10573 HOST_WIDE_INT 10574 pa_initial_elimination_offset (int from, int to) 10575 { 10576 HOST_WIDE_INT offset; 10577 10578 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM) 10579 && to == STACK_POINTER_REGNUM) 10580 offset = -pa_compute_frame_size (get_frame_size (), 0); 10581 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 10582 offset = 0; 10583 else 10584 gcc_unreachable (); 10585 10586 return offset; 10587 } 10588 10589 static void 10590 pa_conditional_register_usage (void) 10591 { 10592 int i; 10593 10594 if (!TARGET_64BIT && !TARGET_PA_11) 10595 { 10596 for (i = 56; i <= FP_REG_LAST; i++) 10597 fixed_regs[i] = call_used_regs[i] = 1; 10598 for (i = 33; i < 56; i += 2) 10599 fixed_regs[i] = call_used_regs[i] = 1; 10600 } 10601 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) 10602 { 10603 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 10604 fixed_regs[i] = call_used_regs[i] = 1; 10605 } 10606 if (flag_pic) 10607 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 10608 } 10609 10610 /* Target hook for c_mode_for_suffix. */ 10611 10612 static machine_mode 10613 pa_c_mode_for_suffix (char suffix) 10614 { 10615 if (HPUX_LONG_DOUBLE_LIBRARY) 10616 { 10617 if (suffix == 'q') 10618 return TFmode; 10619 } 10620 10621 return VOIDmode; 10622 } 10623 10624 /* Target hook for function_section. */ 10625 10626 static section * 10627 pa_function_section (tree decl, enum node_frequency freq, 10628 bool startup, bool exit) 10629 { 10630 /* Put functions in text section if target doesn't have named sections. */ 10631 if (!targetm_common.have_named_sections) 10632 return text_section; 10633 10634 /* Force nested functions into the same section as the containing 10635 function. */ 10636 if (decl 10637 && DECL_SECTION_NAME (decl) == NULL 10638 && DECL_CONTEXT (decl) != NULL_TREE 10639 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL 10640 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL) 10641 return function_section (DECL_CONTEXT (decl)); 10642 10643 /* Otherwise, use the default function section. */ 10644 return default_function_section (decl, freq, startup, exit); 10645 } 10646 10647 /* Implement TARGET_LEGITIMATE_CONSTANT_P. 10648 10649 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS 10650 that need more than three instructions to load prior to reload. This 10651 limit is somewhat arbitrary. It takes three instructions to load a 10652 CONST_INT from memory but two are memory accesses. It may be better 10653 to increase the allowed range for CONST_INTS. We may also be able 10654 to handle CONST_DOUBLES. */ 10655 10656 static bool 10657 pa_legitimate_constant_p (machine_mode mode, rtx x) 10658 { 10659 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode)) 10660 return false; 10661 10662 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF) 10663 return false; 10664 10665 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not 10666 legitimate constants. The other variants can't be handled by 10667 the move patterns after reload starts. */ 10668 if (tls_referenced_p (x)) 10669 return false; 10670 10671 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE) 10672 return false; 10673 10674 if (TARGET_64BIT 10675 && HOST_BITS_PER_WIDE_INT > 32 10676 && GET_CODE (x) == CONST_INT 10677 && !reload_in_progress 10678 && !reload_completed 10679 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x)) 10680 && !pa_cint_ok_for_move (UINTVAL (x))) 10681 return false; 10682 10683 if (function_label_operand (x, mode)) 10684 return false; 10685 10686 return true; 10687 } 10688 10689 /* Implement TARGET_SECTION_TYPE_FLAGS. */ 10690 10691 static unsigned int 10692 pa_section_type_flags (tree decl, const char *name, int reloc) 10693 { 10694 unsigned int flags; 10695 10696 flags = default_section_type_flags (decl, name, reloc); 10697 10698 /* Function labels are placed in the constant pool. This can 10699 cause a section conflict if decls are put in ".data.rel.ro" 10700 or ".data.rel.ro.local" using the __attribute__ construct. */ 10701 if (strcmp (name, ".data.rel.ro") == 0 10702 || strcmp (name, ".data.rel.ro.local") == 0) 10703 flags |= SECTION_WRITE | SECTION_RELRO; 10704 10705 return flags; 10706 } 10707 10708 /* pa_legitimate_address_p recognizes an RTL expression that is a 10709 valid memory address for an instruction. The MODE argument is the 10710 machine mode for the MEM expression that wants to use this address. 10711 10712 On HP PA-RISC, the legitimate address forms are REG+SMALLINT, 10713 REG+REG, and REG+(REG*SCALE). The indexed address forms are only 10714 available with floating point loads and stores, and integer loads. 10715 We get better code by allowing indexed addresses in the initial 10716 RTL generation. 10717 10718 The acceptance of indexed addresses as legitimate implies that we 10719 must provide patterns for doing indexed integer stores, or the move 10720 expanders must force the address of an indexed store to a register. 10721 We have adopted the latter approach. 10722 10723 Another function of pa_legitimate_address_p is to ensure that 10724 the base register is a valid pointer for indexed instructions. 10725 On targets that have non-equivalent space registers, we have to 10726 know at the time of assembler output which register in a REG+REG 10727 pair is the base register. The REG_POINTER flag is sometimes lost 10728 in reload and the following passes, so it can't be relied on during 10729 code generation. Thus, we either have to canonicalize the order 10730 of the registers in REG+REG indexed addresses, or treat REG+REG 10731 addresses separately and provide patterns for both permutations. 10732 10733 The latter approach requires several hundred additional lines of 10734 code in pa.md. The downside to canonicalizing is that a PLUS 10735 in the wrong order can't combine to form to make a scaled indexed 10736 memory operand. As we won't need to canonicalize the operands if 10737 the REG_POINTER lossage can be fixed, it seems better canonicalize. 10738 10739 We initially break out scaled indexed addresses in canonical order 10740 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes 10741 scaled indexed addresses during RTL generation. However, fold_rtx 10742 has its own opinion on how the operands of a PLUS should be ordered. 10743 If one of the operands is equivalent to a constant, it will make 10744 that operand the second operand. As the base register is likely to 10745 be equivalent to a SYMBOL_REF, we have made it the second operand. 10746 10747 pa_legitimate_address_p accepts REG+REG as legitimate when the 10748 operands are in the order INDEX+BASE on targets with non-equivalent 10749 space registers, and in any order on targets with equivalent space 10750 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing. 10751 10752 We treat a SYMBOL_REF as legitimate if it is part of the current 10753 function's constant-pool, because such addresses can actually be 10754 output as REG+SMALLINT. */ 10755 10756 static bool 10757 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict) 10758 { 10759 if ((REG_P (x) 10760 && (strict ? STRICT_REG_OK_FOR_BASE_P (x) 10761 : REG_OK_FOR_BASE_P (x))) 10762 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC 10763 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC) 10764 && REG_P (XEXP (x, 0)) 10765 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10766 : REG_OK_FOR_BASE_P (XEXP (x, 0))))) 10767 return true; 10768 10769 if (GET_CODE (x) == PLUS) 10770 { 10771 rtx base, index; 10772 10773 /* For REG+REG, the base register should be in XEXP (x, 1), 10774 so check it first. */ 10775 if (REG_P (XEXP (x, 1)) 10776 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1)) 10777 : REG_OK_FOR_BASE_P (XEXP (x, 1)))) 10778 base = XEXP (x, 1), index = XEXP (x, 0); 10779 else if (REG_P (XEXP (x, 0)) 10780 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10781 : REG_OK_FOR_BASE_P (XEXP (x, 0)))) 10782 base = XEXP (x, 0), index = XEXP (x, 1); 10783 else 10784 return false; 10785 10786 if (GET_CODE (index) == CONST_INT) 10787 { 10788 if (INT_5_BITS (index)) 10789 return true; 10790 10791 /* When INT14_OK_STRICT is false, a secondary reload is needed 10792 to adjust the displacement of SImode and DImode floating point 10793 instructions but this may fail when the register also needs 10794 reloading. So, we return false when STRICT is true. We 10795 also reject long displacements for float mode addresses since 10796 the majority of accesses will use floating point instructions 10797 that don't support 14-bit offsets. */ 10798 if (!INT14_OK_STRICT 10799 && (strict || !(reload_in_progress || reload_completed)) 10800 && mode != QImode 10801 && mode != HImode) 10802 return false; 10803 10804 return base14_operand (index, mode); 10805 } 10806 10807 if (!TARGET_DISABLE_INDEXING 10808 /* Only accept the "canonical" INDEX+BASE operand order 10809 on targets with non-equivalent space registers. */ 10810 && (TARGET_NO_SPACE_REGS 10811 ? REG_P (index) 10812 : (base == XEXP (x, 1) && REG_P (index) 10813 && (reload_completed 10814 || (reload_in_progress && HARD_REGISTER_P (base)) 10815 || REG_POINTER (base)) 10816 && (reload_completed 10817 || (reload_in_progress && HARD_REGISTER_P (index)) 10818 || !REG_POINTER (index)))) 10819 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode) 10820 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index) 10821 : REG_OK_FOR_INDEX_P (index)) 10822 && borx_reg_operand (base, Pmode) 10823 && borx_reg_operand (index, Pmode)) 10824 return true; 10825 10826 if (!TARGET_DISABLE_INDEXING 10827 && GET_CODE (index) == MULT 10828 /* Only accept base operands with the REG_POINTER flag prior to 10829 reload on targets with non-equivalent space registers. */ 10830 && (TARGET_NO_SPACE_REGS 10831 || (base == XEXP (x, 1) 10832 && (reload_completed 10833 || (reload_in_progress && HARD_REGISTER_P (base)) 10834 || REG_POINTER (base)))) 10835 && REG_P (XEXP (index, 0)) 10836 && GET_MODE (XEXP (index, 0)) == Pmode 10837 && MODE_OK_FOR_SCALED_INDEXING_P (mode) 10838 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0)) 10839 : REG_OK_FOR_INDEX_P (XEXP (index, 0))) 10840 && GET_CODE (XEXP (index, 1)) == CONST_INT 10841 && INTVAL (XEXP (index, 1)) 10842 == (HOST_WIDE_INT) GET_MODE_SIZE (mode) 10843 && borx_reg_operand (base, Pmode)) 10844 return true; 10845 10846 return false; 10847 } 10848 10849 if (GET_CODE (x) == LO_SUM) 10850 { 10851 rtx y = XEXP (x, 0); 10852 10853 if (GET_CODE (y) == SUBREG) 10854 y = SUBREG_REG (y); 10855 10856 if (REG_P (y) 10857 && (strict ? STRICT_REG_OK_FOR_BASE_P (y) 10858 : REG_OK_FOR_BASE_P (y))) 10859 { 10860 /* Needed for -fPIC */ 10861 if (mode == Pmode 10862 && GET_CODE (XEXP (x, 1)) == UNSPEC) 10863 return true; 10864 10865 if (!INT14_OK_STRICT 10866 && (strict || !(reload_in_progress || reload_completed)) 10867 && mode != QImode 10868 && mode != HImode) 10869 return false; 10870 10871 if (CONSTANT_P (XEXP (x, 1))) 10872 return true; 10873 } 10874 return false; 10875 } 10876 10877 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x)) 10878 return true; 10879 10880 return false; 10881 } 10882 10883 /* Look for machine dependent ways to make the invalid address AD a 10884 valid address. 10885 10886 For the PA, transform: 10887 10888 memory(X + <large int>) 10889 10890 into: 10891 10892 if (<large int> & mask) >= 16 10893 Y = (<large int> & ~mask) + mask + 1 Round up. 10894 else 10895 Y = (<large int> & ~mask) Round down. 10896 Z = X + Y 10897 memory (Z + (<large int> - Y)); 10898 10899 This makes reload inheritance and reload_cse work better since Z 10900 can be reused. 10901 10902 There may be more opportunities to improve code with this hook. */ 10903 10904 rtx 10905 pa_legitimize_reload_address (rtx ad, machine_mode mode, 10906 int opnum, int type, 10907 int ind_levels ATTRIBUTE_UNUSED) 10908 { 10909 long offset, newoffset, mask; 10910 rtx new_rtx, temp = NULL_RTX; 10911 10912 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 10913 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 10914 10915 if (optimize && GET_CODE (ad) == PLUS) 10916 temp = simplify_binary_operation (PLUS, Pmode, 10917 XEXP (ad, 0), XEXP (ad, 1)); 10918 10919 new_rtx = temp ? temp : ad; 10920 10921 if (optimize 10922 && GET_CODE (new_rtx) == PLUS 10923 && GET_CODE (XEXP (new_rtx, 0)) == REG 10924 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT) 10925 { 10926 offset = INTVAL (XEXP ((new_rtx), 1)); 10927 10928 /* Choose rounding direction. Round up if we are >= halfway. */ 10929 if ((offset & mask) >= ((mask + 1) / 2)) 10930 newoffset = (offset & ~mask) + mask + 1; 10931 else 10932 newoffset = offset & ~mask; 10933 10934 /* Ensure that long displacements are aligned. */ 10935 if (mask == 0x3fff 10936 && (GET_MODE_CLASS (mode) == MODE_FLOAT 10937 || (TARGET_64BIT && (mode) == DImode))) 10938 newoffset &= ~(GET_MODE_SIZE (mode) - 1); 10939 10940 if (newoffset != 0 && VAL_14_BITS_P (newoffset)) 10941 { 10942 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0), 10943 GEN_INT (newoffset)); 10944 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset)); 10945 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0, 10946 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, 10947 opnum, (enum reload_type) type); 10948 return ad; 10949 } 10950 } 10951 10952 return NULL_RTX; 10953 } 10954 10955 /* Output address vector. */ 10956 10957 void 10958 pa_output_addr_vec (rtx lab, rtx body) 10959 { 10960 int idx, vlen = XVECLEN (body, 0); 10961 10962 if (!TARGET_SOM) 10963 fputs ("\t.align 4\n", asm_out_file); 10964 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10965 if (TARGET_GAS) 10966 fputs ("\t.begin_brtab\n", asm_out_file); 10967 for (idx = 0; idx < vlen; idx++) 10968 { 10969 ASM_OUTPUT_ADDR_VEC_ELT 10970 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 10971 } 10972 if (TARGET_GAS) 10973 fputs ("\t.end_brtab\n", asm_out_file); 10974 } 10975 10976 /* Output address difference vector. */ 10977 10978 void 10979 pa_output_addr_diff_vec (rtx lab, rtx body) 10980 { 10981 rtx base = XEXP (XEXP (body, 0), 0); 10982 int idx, vlen = XVECLEN (body, 1); 10983 10984 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10985 if (TARGET_GAS) 10986 fputs ("\t.begin_brtab\n", asm_out_file); 10987 for (idx = 0; idx < vlen; idx++) 10988 { 10989 ASM_OUTPUT_ADDR_DIFF_ELT 10990 (asm_out_file, 10991 body, 10992 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 10993 CODE_LABEL_NUMBER (base)); 10994 } 10995 if (TARGET_GAS) 10996 fputs ("\t.end_brtab\n", asm_out_file); 10997 } 10998 10999 /* This is a helper function for the other atomic operations. This function 11000 emits a loop that contains SEQ that iterates until a compare-and-swap 11001 operation at the end succeeds. MEM is the memory to be modified. SEQ is 11002 a set of instructions that takes a value from OLD_REG as an input and 11003 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be 11004 set to the current contents of MEM. After SEQ, a compare-and-swap will 11005 attempt to update MEM with NEW_REG. The function returns true when the 11006 loop was generated successfully. */ 11007 11008 static bool 11009 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq) 11010 { 11011 machine_mode mode = GET_MODE (mem); 11012 rtx_code_label *label; 11013 rtx cmp_reg, success, oldval; 11014 11015 /* The loop we want to generate looks like 11016 11017 cmp_reg = mem; 11018 label: 11019 old_reg = cmp_reg; 11020 seq; 11021 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg) 11022 if (success) 11023 goto label; 11024 11025 Note that we only do the plain load from memory once. Subsequent 11026 iterations use the value loaded by the compare-and-swap pattern. */ 11027 11028 label = gen_label_rtx (); 11029 cmp_reg = gen_reg_rtx (mode); 11030 11031 emit_move_insn (cmp_reg, mem); 11032 emit_label (label); 11033 emit_move_insn (old_reg, cmp_reg); 11034 if (seq) 11035 emit_insn (seq); 11036 11037 success = NULL_RTX; 11038 oldval = cmp_reg; 11039 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg, 11040 new_reg, false, MEMMODEL_SYNC_SEQ_CST, 11041 MEMMODEL_RELAXED)) 11042 return false; 11043 11044 if (oldval != cmp_reg) 11045 emit_move_insn (cmp_reg, oldval); 11046 11047 /* Mark this jump predicted not taken. */ 11048 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx, 11049 GET_MODE (success), 1, label, 11050 profile_probability::guessed_never ()); 11051 return true; 11052 } 11053 11054 /* This function tries to implement an atomic exchange operation using a 11055 compare_and_swap loop. VAL is written to *MEM. The previous contents of 11056 *MEM are returned, using TARGET if possible. No memory model is required 11057 since a compare_and_swap loop is seq-cst. */ 11058 11059 rtx 11060 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val) 11061 { 11062 machine_mode mode = GET_MODE (mem); 11063 11064 if (can_compare_and_swap_p (mode, true)) 11065 { 11066 if (!target || !register_operand (target, mode)) 11067 target = gen_reg_rtx (mode); 11068 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX)) 11069 return target; 11070 } 11071 11072 return NULL_RTX; 11073 } 11074 11075 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying 11076 arguments passed by hidden reference in the 32-bit HP runtime. Users 11077 can override this behavior for better compatibility with openmp at the 11078 risk of library incompatibilities. Arguments are always passed by value 11079 in the 64-bit HP runtime. */ 11080 11081 static bool 11082 pa_callee_copies (cumulative_args_t, const function_arg_info &) 11083 { 11084 return !TARGET_CALLER_COPIES; 11085 } 11086 11087 /* Implement TARGET_HARD_REGNO_NREGS. */ 11088 11089 static unsigned int 11090 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode) 11091 { 11092 return PA_HARD_REGNO_NREGS (regno, mode); 11093 } 11094 11095 /* Implement TARGET_HARD_REGNO_MODE_OK. */ 11096 11097 static bool 11098 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 11099 { 11100 return PA_HARD_REGNO_MODE_OK (regno, mode); 11101 } 11102 11103 /* Implement TARGET_STARTING_FRAME_OFFSET. 11104 11105 On the 32-bit ports, we reserve one slot for the previous frame 11106 pointer and one fill slot. The fill slot is for compatibility 11107 with HP compiled programs. On the 64-bit ports, we reserve one 11108 slot for the previous frame pointer. */ 11109 11110 static HOST_WIDE_INT 11111 pa_starting_frame_offset (void) 11112 { 11113 return 8; 11114 } 11115 11116 /* Figure out the size in words of the function argument. The size 11117 returned by this function should always be greater than zero because 11118 we pass variable and zero sized objects by reference. */ 11119 11120 HOST_WIDE_INT 11121 pa_function_arg_size (machine_mode mode, const_tree type) 11122 { 11123 HOST_WIDE_INT size; 11124 11125 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type); 11126 return CEIL (size, UNITS_PER_WORD); 11127 } 11128 11129 #include "gt-pa.h" 11130