1 /* Subroutines for insn-output.c for HPPA. 2 Copyright (C) 1992-2019 Free Software Foundation, Inc. 3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #define IN_TARGET_CODE 1 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "memmodel.h" 27 #include "backend.h" 28 #include "target.h" 29 #include "rtl.h" 30 #include "tree.h" 31 #include "df.h" 32 #include "tm_p.h" 33 #include "stringpool.h" 34 #include "attribs.h" 35 #include "optabs.h" 36 #include "regs.h" 37 #include "emit-rtl.h" 38 #include "recog.h" 39 #include "diagnostic-core.h" 40 #include "insn-attr.h" 41 #include "alias.h" 42 #include "fold-const.h" 43 #include "stor-layout.h" 44 #include "varasm.h" 45 #include "calls.h" 46 #include "output.h" 47 #include "except.h" 48 #include "explow.h" 49 #include "expr.h" 50 #include "reload.h" 51 #include "common/common-target.h" 52 #include "langhooks.h" 53 #include "cfgrtl.h" 54 #include "opts.h" 55 #include "builtins.h" 56 57 /* This file should be included last. */ 58 #include "target-def.h" 59 60 /* Return nonzero if there is a bypass for the output of 61 OUT_INSN and the fp store IN_INSN. */ 62 int 63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) 64 { 65 machine_mode store_mode; 66 machine_mode other_mode; 67 rtx set; 68 69 if (recog_memoized (in_insn) < 0 70 || (get_attr_type (in_insn) != TYPE_FPSTORE 71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD) 72 || recog_memoized (out_insn) < 0) 73 return 0; 74 75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn))); 76 77 set = single_set (out_insn); 78 if (!set) 79 return 0; 80 81 other_mode = GET_MODE (SET_SRC (set)); 82 83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode)); 84 } 85 86 87 #ifndef DO_FRAME_NOTES 88 #ifdef INCOMING_RETURN_ADDR_RTX 89 #define DO_FRAME_NOTES 1 90 #else 91 #define DO_FRAME_NOTES 0 92 #endif 93 #endif 94 95 static void pa_option_override (void); 96 static void copy_reg_pointer (rtx, rtx); 97 static void fix_range (const char *); 98 static int hppa_register_move_cost (machine_mode mode, reg_class_t, 99 reg_class_t); 100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool); 101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool); 102 static inline rtx force_mode (machine_mode, rtx); 103 static void pa_reorg (void); 104 static void pa_combine_instructions (void); 105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx, 106 rtx, rtx); 107 static bool forward_branch_p (rtx_insn *); 108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); 109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *); 110 static int compute_movmem_length (rtx_insn *); 111 static int compute_clrmem_length (rtx_insn *); 112 static bool pa_assemble_integer (rtx, unsigned int, int); 113 static void remove_useless_addtr_insns (int); 114 static void store_reg (int, HOST_WIDE_INT, int); 115 static void store_reg_modify (int, int, HOST_WIDE_INT); 116 static void load_reg (int, HOST_WIDE_INT, int); 117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int); 118 static rtx pa_function_value (const_tree, const_tree, bool); 119 static rtx pa_libcall_value (machine_mode, const_rtx); 120 static bool pa_function_value_regno_p (const unsigned int); 121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED; 122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED; 123 static void update_total_code_bytes (unsigned int); 124 static void pa_output_function_epilogue (FILE *); 125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); 126 static int pa_issue_rate (void); 127 static int pa_reloc_rw_mask (void); 128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; 129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED; 130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT) 131 ATTRIBUTE_UNUSED; 132 static void pa_encode_section_info (tree, rtx, int); 133 static const char *pa_strip_name_encoding (const char *); 134 static bool pa_function_ok_for_sibcall (tree, tree); 135 static void pa_globalize_label (FILE *, const char *) 136 ATTRIBUTE_UNUSED; 137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 138 HOST_WIDE_INT, tree); 139 #if !defined(USE_COLLECT2) 140 static void pa_asm_out_constructor (rtx, int); 141 static void pa_asm_out_destructor (rtx, int); 142 #endif 143 static void pa_init_builtins (void); 144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int); 145 static rtx hppa_builtin_saveregs (void); 146 static void hppa_va_start (tree, rtx); 147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 148 static bool pa_scalar_mode_supported_p (scalar_mode); 149 static bool pa_commutative_p (const_rtx x, int outer_code); 150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED; 151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED; 152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode); 153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED; 154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED; 155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED; 156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED; 157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED; 158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED; 159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED; 160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED; 161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED; 162 static void output_deferred_plabels (void); 163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED; 164 static void pa_file_end (void); 165 static void pa_init_libfuncs (void); 166 static rtx pa_struct_value_rtx (tree, int); 167 static bool pa_pass_by_reference (cumulative_args_t, machine_mode, 168 const_tree, bool); 169 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode, 170 tree, bool); 171 static void pa_function_arg_advance (cumulative_args_t, machine_mode, 172 const_tree, bool); 173 static rtx pa_function_arg (cumulative_args_t, machine_mode, 174 const_tree, bool); 175 static pad_direction pa_function_arg_padding (machine_mode, const_tree); 176 static unsigned int pa_function_arg_boundary (machine_mode, const_tree); 177 static struct machine_function * pa_init_machine_status (void); 178 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t, 179 machine_mode, 180 secondary_reload_info *); 181 static bool pa_secondary_memory_needed (machine_mode, 182 reg_class_t, reg_class_t); 183 static void pa_extra_live_on_entry (bitmap); 184 static machine_mode pa_promote_function_mode (const_tree, 185 machine_mode, int *, 186 const_tree, int); 187 188 static void pa_asm_trampoline_template (FILE *); 189 static void pa_trampoline_init (rtx, tree, rtx); 190 static rtx pa_trampoline_adjust_address (rtx); 191 static rtx pa_delegitimize_address (rtx); 192 static bool pa_print_operand_punct_valid_p (unsigned char); 193 static rtx pa_internal_arg_pointer (void); 194 static bool pa_can_eliminate (const int, const int); 195 static void pa_conditional_register_usage (void); 196 static machine_mode pa_c_mode_for_suffix (char); 197 static section *pa_function_section (tree, enum node_frequency, bool, bool); 198 static bool pa_cannot_force_const_mem (machine_mode, rtx); 199 static bool pa_legitimate_constant_p (machine_mode, rtx); 200 static unsigned int pa_section_type_flags (tree, const char *, int); 201 static bool pa_legitimate_address_p (machine_mode, rtx, bool); 202 static bool pa_callee_copies (cumulative_args_t, machine_mode, 203 const_tree, bool); 204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode); 205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode); 206 static bool pa_modes_tieable_p (machine_mode, machine_mode); 207 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t); 208 static HOST_WIDE_INT pa_starting_frame_offset (void); 209 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED; 210 211 /* The following extra sections are only used for SOM. */ 212 static GTY(()) section *som_readonly_data_section; 213 static GTY(()) section *som_one_only_readonly_data_section; 214 static GTY(()) section *som_one_only_data_section; 215 static GTY(()) section *som_tm_clone_table_section; 216 217 /* Counts for the number of callee-saved general and floating point 218 registers which were saved by the current function's prologue. */ 219 static int gr_saved, fr_saved; 220 221 /* Boolean indicating whether the return pointer was saved by the 222 current function's prologue. */ 223 static bool rp_saved; 224 225 static rtx find_addr_reg (rtx); 226 227 /* Keep track of the number of bytes we have output in the CODE subspace 228 during this compilation so we'll know when to emit inline long-calls. */ 229 unsigned long total_code_bytes; 230 231 /* The last address of the previous function plus the number of bytes in 232 associated thunks that have been output. This is used to determine if 233 a thunk can use an IA-relative branch to reach its target function. */ 234 static unsigned int last_address; 235 236 /* Variables to handle plabels that we discover are necessary at assembly 237 output time. They are output after the current function. */ 238 struct GTY(()) deferred_plabel 239 { 240 rtx internal_label; 241 rtx symbol; 242 }; 243 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel * 244 deferred_plabels; 245 static size_t n_deferred_plabels = 0; 246 247 /* Initialize the GCC target structure. */ 248 249 #undef TARGET_OPTION_OVERRIDE 250 #define TARGET_OPTION_OVERRIDE pa_option_override 251 252 #undef TARGET_ASM_ALIGNED_HI_OP 253 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 254 #undef TARGET_ASM_ALIGNED_SI_OP 255 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 256 #undef TARGET_ASM_ALIGNED_DI_OP 257 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" 258 #undef TARGET_ASM_UNALIGNED_HI_OP 259 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 260 #undef TARGET_ASM_UNALIGNED_SI_OP 261 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 262 #undef TARGET_ASM_UNALIGNED_DI_OP 263 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 264 #undef TARGET_ASM_INTEGER 265 #define TARGET_ASM_INTEGER pa_assemble_integer 266 267 #undef TARGET_ASM_FUNCTION_EPILOGUE 268 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue 269 270 #undef TARGET_FUNCTION_VALUE 271 #define TARGET_FUNCTION_VALUE pa_function_value 272 #undef TARGET_LIBCALL_VALUE 273 #define TARGET_LIBCALL_VALUE pa_libcall_value 274 #undef TARGET_FUNCTION_VALUE_REGNO_P 275 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p 276 277 #undef TARGET_LEGITIMIZE_ADDRESS 278 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address 279 280 #undef TARGET_SCHED_ADJUST_COST 281 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost 282 #undef TARGET_SCHED_ISSUE_RATE 283 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate 284 285 #undef TARGET_ENCODE_SECTION_INFO 286 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info 287 #undef TARGET_STRIP_NAME_ENCODING 288 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding 289 290 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 291 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall 292 293 #undef TARGET_COMMUTATIVE_P 294 #define TARGET_COMMUTATIVE_P pa_commutative_p 295 296 #undef TARGET_ASM_OUTPUT_MI_THUNK 297 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk 298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall 300 301 #undef TARGET_ASM_FILE_END 302 #define TARGET_ASM_FILE_END pa_file_end 303 304 #undef TARGET_ASM_RELOC_RW_MASK 305 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask 306 307 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 308 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p 309 310 #if !defined(USE_COLLECT2) 311 #undef TARGET_ASM_CONSTRUCTOR 312 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor 313 #undef TARGET_ASM_DESTRUCTOR 314 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor 315 #endif 316 317 #undef TARGET_INIT_BUILTINS 318 #define TARGET_INIT_BUILTINS pa_init_builtins 319 320 #undef TARGET_EXPAND_BUILTIN 321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin 322 323 #undef TARGET_REGISTER_MOVE_COST 324 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost 325 #undef TARGET_RTX_COSTS 326 #define TARGET_RTX_COSTS hppa_rtx_costs 327 #undef TARGET_ADDRESS_COST 328 #define TARGET_ADDRESS_COST hppa_address_cost 329 330 #undef TARGET_MACHINE_DEPENDENT_REORG 331 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg 332 333 #undef TARGET_INIT_LIBFUNCS 334 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs 335 336 #undef TARGET_PROMOTE_FUNCTION_MODE 337 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode 338 #undef TARGET_PROMOTE_PROTOTYPES 339 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 340 341 #undef TARGET_STRUCT_VALUE_RTX 342 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx 343 #undef TARGET_RETURN_IN_MEMORY 344 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory 345 #undef TARGET_MUST_PASS_IN_STACK 346 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 347 #undef TARGET_PASS_BY_REFERENCE 348 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference 349 #undef TARGET_CALLEE_COPIES 350 #define TARGET_CALLEE_COPIES pa_callee_copies 351 #undef TARGET_ARG_PARTIAL_BYTES 352 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes 353 #undef TARGET_FUNCTION_ARG 354 #define TARGET_FUNCTION_ARG pa_function_arg 355 #undef TARGET_FUNCTION_ARG_ADVANCE 356 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance 357 #undef TARGET_FUNCTION_ARG_PADDING 358 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding 359 #undef TARGET_FUNCTION_ARG_BOUNDARY 360 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary 361 362 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 363 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs 364 #undef TARGET_EXPAND_BUILTIN_VA_START 365 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start 366 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 367 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr 368 369 #undef TARGET_SCALAR_MODE_SUPPORTED_P 370 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p 371 372 #undef TARGET_CANNOT_FORCE_CONST_MEM 373 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem 374 375 #undef TARGET_SECONDARY_RELOAD 376 #define TARGET_SECONDARY_RELOAD pa_secondary_reload 377 #undef TARGET_SECONDARY_MEMORY_NEEDED 378 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed 379 380 #undef TARGET_EXTRA_LIVE_ON_ENTRY 381 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry 382 383 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE 384 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template 385 #undef TARGET_TRAMPOLINE_INIT 386 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init 387 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 388 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address 389 #undef TARGET_DELEGITIMIZE_ADDRESS 390 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address 391 #undef TARGET_INTERNAL_ARG_POINTER 392 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer 393 #undef TARGET_CAN_ELIMINATE 394 #define TARGET_CAN_ELIMINATE pa_can_eliminate 395 #undef TARGET_CONDITIONAL_REGISTER_USAGE 396 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage 397 #undef TARGET_C_MODE_FOR_SUFFIX 398 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix 399 #undef TARGET_ASM_FUNCTION_SECTION 400 #define TARGET_ASM_FUNCTION_SECTION pa_function_section 401 402 #undef TARGET_LEGITIMATE_CONSTANT_P 403 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p 404 #undef TARGET_SECTION_TYPE_FLAGS 405 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags 406 #undef TARGET_LEGITIMATE_ADDRESS_P 407 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p 408 409 #undef TARGET_LRA_P 410 #define TARGET_LRA_P hook_bool_void_false 411 412 #undef TARGET_HARD_REGNO_NREGS 413 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs 414 #undef TARGET_HARD_REGNO_MODE_OK 415 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok 416 #undef TARGET_MODES_TIEABLE_P 417 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p 418 419 #undef TARGET_CAN_CHANGE_MODE_CLASS 420 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class 421 422 #undef TARGET_CONSTANT_ALIGNMENT 423 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings 424 425 #undef TARGET_STARTING_FRAME_OFFSET 426 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset 427 428 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE 429 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed 430 431 struct gcc_target targetm = TARGET_INITIALIZER; 432 433 /* Parse the -mfixed-range= option string. */ 434 435 static void 436 fix_range (const char *const_str) 437 { 438 int i, first, last; 439 char *str, *dash, *comma; 440 441 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 442 REG2 are either register names or register numbers. The effect 443 of this option is to mark the registers in the range from REG1 to 444 REG2 as ``fixed'' so they won't be used by the compiler. This is 445 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */ 446 447 i = strlen (const_str); 448 str = (char *) alloca (i + 1); 449 memcpy (str, const_str, i + 1); 450 451 while (1) 452 { 453 dash = strchr (str, '-'); 454 if (!dash) 455 { 456 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2"); 457 return; 458 } 459 *dash = '\0'; 460 461 comma = strchr (dash + 1, ','); 462 if (comma) 463 *comma = '\0'; 464 465 first = decode_reg_name (str); 466 if (first < 0) 467 { 468 warning (0, "unknown register name: %s", str); 469 return; 470 } 471 472 last = decode_reg_name (dash + 1); 473 if (last < 0) 474 { 475 warning (0, "unknown register name: %s", dash + 1); 476 return; 477 } 478 479 *dash = '-'; 480 481 if (first > last) 482 { 483 warning (0, "%s-%s is an empty range", str, dash + 1); 484 return; 485 } 486 487 for (i = first; i <= last; ++i) 488 fixed_regs[i] = call_used_regs[i] = 1; 489 490 if (!comma) 491 break; 492 493 *comma = ','; 494 str = comma + 1; 495 } 496 497 /* Check if all floating point registers have been fixed. */ 498 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 499 if (!fixed_regs[i]) 500 break; 501 502 if (i > FP_REG_LAST) 503 target_flags |= MASK_DISABLE_FPREGS; 504 } 505 506 /* Implement the TARGET_OPTION_OVERRIDE hook. */ 507 508 static void 509 pa_option_override (void) 510 { 511 unsigned int i; 512 cl_deferred_option *opt; 513 vec<cl_deferred_option> *v 514 = (vec<cl_deferred_option> *) pa_deferred_options; 515 516 if (v) 517 FOR_EACH_VEC_ELT (*v, i, opt) 518 { 519 switch (opt->opt_index) 520 { 521 case OPT_mfixed_range_: 522 fix_range (opt->arg); 523 break; 524 525 default: 526 gcc_unreachable (); 527 } 528 } 529 530 if (flag_pic && TARGET_PORTABLE_RUNTIME) 531 { 532 warning (0, "PIC code generation is not supported in the portable runtime model"); 533 } 534 535 if (flag_pic && TARGET_FAST_INDIRECT_CALLS) 536 { 537 warning (0, "PIC code generation is not compatible with fast indirect calls"); 538 } 539 540 if (! TARGET_GAS && write_symbols != NO_DEBUG) 541 { 542 warning (0, "%<-g%> is only supported when using GAS on this processor,"); 543 warning (0, "%<-g%> option disabled"); 544 write_symbols = NO_DEBUG; 545 } 546 547 /* We only support the "big PIC" model now. And we always generate PIC 548 code when in 64bit mode. */ 549 if (flag_pic == 1 || TARGET_64BIT) 550 flag_pic = 2; 551 552 /* Disable -freorder-blocks-and-partition as we don't support hot and 553 cold partitioning. */ 554 if (flag_reorder_blocks_and_partition) 555 { 556 inform (input_location, 557 "%<-freorder-blocks-and-partition%> does not work " 558 "on this architecture"); 559 flag_reorder_blocks_and_partition = 0; 560 flag_reorder_blocks = 1; 561 } 562 563 /* We can't guarantee that .dword is available for 32-bit targets. */ 564 if (UNITS_PER_WORD == 4) 565 targetm.asm_out.aligned_op.di = NULL; 566 567 /* The unaligned ops are only available when using GAS. */ 568 if (!TARGET_GAS) 569 { 570 targetm.asm_out.unaligned_op.hi = NULL; 571 targetm.asm_out.unaligned_op.si = NULL; 572 targetm.asm_out.unaligned_op.di = NULL; 573 } 574 575 init_machine_status = pa_init_machine_status; 576 } 577 578 enum pa_builtins 579 { 580 PA_BUILTIN_COPYSIGNQ, 581 PA_BUILTIN_FABSQ, 582 PA_BUILTIN_INFQ, 583 PA_BUILTIN_HUGE_VALQ, 584 PA_BUILTIN_max 585 }; 586 587 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max]; 588 589 static void 590 pa_init_builtins (void) 591 { 592 #ifdef DONT_HAVE_FPUTC_UNLOCKED 593 { 594 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED); 595 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl, 596 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED)); 597 } 598 #endif 599 #if TARGET_HPUX_11 600 { 601 tree decl; 602 603 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE) 604 set_user_assembler_name (decl, "_Isfinite"); 605 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE) 606 set_user_assembler_name (decl, "_Isfinitef"); 607 } 608 #endif 609 610 if (HPUX_LONG_DOUBLE_LIBRARY) 611 { 612 tree decl, ftype; 613 614 /* Under HPUX, the __float128 type is a synonym for "long double". */ 615 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 616 "__float128"); 617 618 /* TFmode support builtins. */ 619 ftype = build_function_type_list (long_double_type_node, 620 long_double_type_node, 621 NULL_TREE); 622 decl = add_builtin_function ("__builtin_fabsq", ftype, 623 PA_BUILTIN_FABSQ, BUILT_IN_MD, 624 "_U_Qfabs", NULL_TREE); 625 TREE_READONLY (decl) = 1; 626 pa_builtins[PA_BUILTIN_FABSQ] = decl; 627 628 ftype = build_function_type_list (long_double_type_node, 629 long_double_type_node, 630 long_double_type_node, 631 NULL_TREE); 632 decl = add_builtin_function ("__builtin_copysignq", ftype, 633 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 634 "_U_Qfcopysign", NULL_TREE); 635 TREE_READONLY (decl) = 1; 636 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl; 637 638 ftype = build_function_type_list (long_double_type_node, NULL_TREE); 639 decl = add_builtin_function ("__builtin_infq", ftype, 640 PA_BUILTIN_INFQ, BUILT_IN_MD, 641 NULL, NULL_TREE); 642 pa_builtins[PA_BUILTIN_INFQ] = decl; 643 644 decl = add_builtin_function ("__builtin_huge_valq", ftype, 645 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD, 646 NULL, NULL_TREE); 647 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl; 648 } 649 } 650 651 static rtx 652 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 653 machine_mode mode ATTRIBUTE_UNUSED, 654 int ignore ATTRIBUTE_UNUSED) 655 { 656 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 657 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 658 659 switch (fcode) 660 { 661 case PA_BUILTIN_FABSQ: 662 case PA_BUILTIN_COPYSIGNQ: 663 return expand_call (exp, target, ignore); 664 665 case PA_BUILTIN_INFQ: 666 case PA_BUILTIN_HUGE_VALQ: 667 { 668 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); 669 REAL_VALUE_TYPE inf; 670 rtx tmp; 671 672 real_inf (&inf); 673 tmp = const_double_from_real_value (inf, target_mode); 674 675 tmp = validize_mem (force_const_mem (target_mode, tmp)); 676 677 if (target == 0) 678 target = gen_reg_rtx (target_mode); 679 680 emit_move_insn (target, tmp); 681 return target; 682 } 683 684 default: 685 gcc_unreachable (); 686 } 687 688 return NULL_RTX; 689 } 690 691 /* Function to init struct machine_function. 692 This will be called, via a pointer variable, 693 from push_function_context. */ 694 695 static struct machine_function * 696 pa_init_machine_status (void) 697 { 698 return ggc_cleared_alloc<machine_function> (); 699 } 700 701 /* If FROM is a probable pointer register, mark TO as a probable 702 pointer register with the same pointer alignment as FROM. */ 703 704 static void 705 copy_reg_pointer (rtx to, rtx from) 706 { 707 if (REG_POINTER (from)) 708 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from))); 709 } 710 711 /* Return 1 if X contains a symbolic expression. We know these 712 expressions will have one of a few well defined forms, so 713 we need only check those forms. */ 714 int 715 pa_symbolic_expression_p (rtx x) 716 { 717 718 /* Strip off any HIGH. */ 719 if (GET_CODE (x) == HIGH) 720 x = XEXP (x, 0); 721 722 return symbolic_operand (x, VOIDmode); 723 } 724 725 /* Accept any constant that can be moved in one instruction into a 726 general register. */ 727 int 728 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival) 729 { 730 /* OK if ldo, ldil, or zdepi, can be used. */ 731 return (VAL_14_BITS_P (ival) 732 || pa_ldil_cint_p (ival) 733 || pa_zdepi_cint_p (ival)); 734 } 735 736 /* True iff ldil can be used to load this CONST_INT. The least 737 significant 11 bits of the value must be zero and the value must 738 not change sign when extended from 32 to 64 bits. */ 739 int 740 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival) 741 { 742 unsigned HOST_WIDE_INT x; 743 744 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff); 745 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31); 746 } 747 748 /* True iff zdepi can be used to generate this CONST_INT. 749 zdepi first sign extends a 5-bit signed number to a given field 750 length, then places this field anywhere in a zero. */ 751 int 752 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x) 753 { 754 unsigned HOST_WIDE_INT lsb_mask, t; 755 756 /* This might not be obvious, but it's at least fast. 757 This function is critical; we don't have the time loops would take. */ 758 lsb_mask = x & -x; 759 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); 760 /* Return true iff t is a power of two. */ 761 return ((t & (t - 1)) == 0); 762 } 763 764 /* True iff depi or extru can be used to compute (reg & mask). 765 Accept bit pattern like these: 766 0....01....1 767 1....10....0 768 1..10..01..1 */ 769 int 770 pa_and_mask_p (unsigned HOST_WIDE_INT mask) 771 { 772 mask = ~mask; 773 mask += mask & -mask; 774 return (mask & (mask - 1)) == 0; 775 } 776 777 /* True iff depi can be used to compute (reg | MASK). */ 778 int 779 pa_ior_mask_p (unsigned HOST_WIDE_INT mask) 780 { 781 mask += mask & -mask; 782 return (mask & (mask - 1)) == 0; 783 } 784 785 /* Legitimize PIC addresses. If the address is already 786 position-independent, we return ORIG. Newly generated 787 position-independent addresses go to REG. If we need more 788 than one register, we lose. */ 789 790 static rtx 791 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) 792 { 793 rtx pic_ref = orig; 794 795 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig)); 796 797 /* Labels need special handling. */ 798 if (pic_label_operand (orig, mode)) 799 { 800 rtx_insn *insn; 801 802 /* We do not want to go through the movXX expanders here since that 803 would create recursion. 804 805 Nor do we really want to call a generator for a named pattern 806 since that requires multiple patterns if we want to support 807 multiple word sizes. 808 809 So instead we just emit the raw set, which avoids the movXX 810 expanders completely. */ 811 mark_reg_pointer (reg, BITS_PER_UNIT); 812 insn = emit_insn (gen_rtx_SET (reg, orig)); 813 814 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 815 add_reg_note (insn, REG_EQUAL, orig); 816 817 /* During and after reload, we need to generate a REG_LABEL_OPERAND note 818 and update LABEL_NUSES because this is not done automatically. */ 819 if (reload_in_progress || reload_completed) 820 { 821 /* Extract LABEL_REF. */ 822 if (GET_CODE (orig) == CONST) 823 orig = XEXP (XEXP (orig, 0), 0); 824 /* Extract CODE_LABEL. */ 825 orig = XEXP (orig, 0); 826 add_reg_note (insn, REG_LABEL_OPERAND, orig); 827 /* Make sure we have label and not a note. */ 828 if (LABEL_P (orig)) 829 LABEL_NUSES (orig)++; 830 } 831 crtl->uses_pic_offset_table = 1; 832 return reg; 833 } 834 if (GET_CODE (orig) == SYMBOL_REF) 835 { 836 rtx_insn *insn; 837 rtx tmp_reg; 838 839 gcc_assert (reg); 840 841 /* Before reload, allocate a temporary register for the intermediate 842 result. This allows the sequence to be deleted when the final 843 result is unused and the insns are trivially dead. */ 844 tmp_reg = ((reload_in_progress || reload_completed) 845 ? reg : gen_reg_rtx (Pmode)); 846 847 if (function_label_operand (orig, VOIDmode)) 848 { 849 /* Force function label into memory in word mode. */ 850 orig = XEXP (force_const_mem (word_mode, orig), 0); 851 /* Load plabel address from DLT. */ 852 emit_move_insn (tmp_reg, 853 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 854 gen_rtx_HIGH (word_mode, orig))); 855 pic_ref 856 = gen_const_mem (Pmode, 857 gen_rtx_LO_SUM (Pmode, tmp_reg, 858 gen_rtx_UNSPEC (Pmode, 859 gen_rtvec (1, orig), 860 UNSPEC_DLTIND14R))); 861 emit_move_insn (reg, pic_ref); 862 /* Now load address of function descriptor. */ 863 pic_ref = gen_rtx_MEM (Pmode, reg); 864 } 865 else 866 { 867 /* Load symbol reference from DLT. */ 868 emit_move_insn (tmp_reg, 869 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 870 gen_rtx_HIGH (word_mode, orig))); 871 pic_ref 872 = gen_const_mem (Pmode, 873 gen_rtx_LO_SUM (Pmode, tmp_reg, 874 gen_rtx_UNSPEC (Pmode, 875 gen_rtvec (1, orig), 876 UNSPEC_DLTIND14R))); 877 } 878 879 crtl->uses_pic_offset_table = 1; 880 mark_reg_pointer (reg, BITS_PER_UNIT); 881 insn = emit_move_insn (reg, pic_ref); 882 883 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 884 set_unique_reg_note (insn, REG_EQUAL, orig); 885 886 return reg; 887 } 888 else if (GET_CODE (orig) == CONST) 889 { 890 rtx base; 891 892 if (GET_CODE (XEXP (orig, 0)) == PLUS 893 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 894 return orig; 895 896 gcc_assert (reg); 897 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 898 899 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); 900 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, 901 base == reg ? 0 : reg); 902 903 if (GET_CODE (orig) == CONST_INT) 904 { 905 if (INT_14_BITS (orig)) 906 return plus_constant (Pmode, base, INTVAL (orig)); 907 orig = force_reg (Pmode, orig); 908 } 909 pic_ref = gen_rtx_PLUS (Pmode, base, orig); 910 /* Likewise, should we set special REG_NOTEs here? */ 911 } 912 913 return pic_ref; 914 } 915 916 static GTY(()) rtx gen_tls_tga; 917 918 static rtx 919 gen_tls_get_addr (void) 920 { 921 if (!gen_tls_tga) 922 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 923 return gen_tls_tga; 924 } 925 926 static rtx 927 hppa_tls_call (rtx arg) 928 { 929 rtx ret; 930 931 ret = gen_reg_rtx (Pmode); 932 emit_library_call_value (gen_tls_get_addr (), ret, 933 LCT_CONST, Pmode, arg, Pmode); 934 935 return ret; 936 } 937 938 static rtx 939 legitimize_tls_address (rtx addr) 940 { 941 rtx ret, tmp, t1, t2, tp; 942 rtx_insn *insn; 943 944 /* Currently, we can't handle anything but a SYMBOL_REF. */ 945 if (GET_CODE (addr) != SYMBOL_REF) 946 return addr; 947 948 switch (SYMBOL_REF_TLS_MODEL (addr)) 949 { 950 case TLS_MODEL_GLOBAL_DYNAMIC: 951 tmp = gen_reg_rtx (Pmode); 952 if (flag_pic) 953 emit_insn (gen_tgd_load_pic (tmp, addr)); 954 else 955 emit_insn (gen_tgd_load (tmp, addr)); 956 ret = hppa_tls_call (tmp); 957 break; 958 959 case TLS_MODEL_LOCAL_DYNAMIC: 960 ret = gen_reg_rtx (Pmode); 961 tmp = gen_reg_rtx (Pmode); 962 start_sequence (); 963 if (flag_pic) 964 emit_insn (gen_tld_load_pic (tmp, addr)); 965 else 966 emit_insn (gen_tld_load (tmp, addr)); 967 t1 = hppa_tls_call (tmp); 968 insn = get_insns (); 969 end_sequence (); 970 t2 = gen_reg_rtx (Pmode); 971 emit_libcall_block (insn, t2, t1, 972 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 973 UNSPEC_TLSLDBASE)); 974 emit_insn (gen_tld_offset_load (ret, addr, t2)); 975 break; 976 977 case TLS_MODEL_INITIAL_EXEC: 978 tp = gen_reg_rtx (Pmode); 979 tmp = gen_reg_rtx (Pmode); 980 ret = gen_reg_rtx (Pmode); 981 emit_insn (gen_tp_load (tp)); 982 if (flag_pic) 983 emit_insn (gen_tie_load_pic (tmp, addr)); 984 else 985 emit_insn (gen_tie_load (tmp, addr)); 986 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp)); 987 break; 988 989 case TLS_MODEL_LOCAL_EXEC: 990 tp = gen_reg_rtx (Pmode); 991 ret = gen_reg_rtx (Pmode); 992 emit_insn (gen_tp_load (tp)); 993 emit_insn (gen_tle_load (ret, addr, tp)); 994 break; 995 996 default: 997 gcc_unreachable (); 998 } 999 1000 return ret; 1001 } 1002 1003 /* Helper for hppa_legitimize_address. Given X, return true if it 1004 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. 1005 1006 This respectively represent canonical shift-add rtxs or scaled 1007 memory addresses. */ 1008 static bool 1009 mem_shadd_or_shadd_rtx_p (rtx x) 1010 { 1011 return ((GET_CODE (x) == ASHIFT 1012 || GET_CODE (x) == MULT) 1013 && GET_CODE (XEXP (x, 1)) == CONST_INT 1014 && ((GET_CODE (x) == ASHIFT 1015 && pa_shadd_constant_p (INTVAL (XEXP (x, 1)))) 1016 || (GET_CODE (x) == MULT 1017 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1)))))); 1018 } 1019 1020 /* Try machine-dependent ways of modifying an illegitimate address 1021 to be legitimate. If we find one, return the new, valid address. 1022 This macro is used in only one place: `memory_address' in explow.c. 1023 1024 OLDX is the address as it was before break_out_memory_refs was called. 1025 In some cases it is useful to look at this to decide what needs to be done. 1026 1027 It is always safe for this macro to do nothing. It exists to recognize 1028 opportunities to optimize the output. 1029 1030 For the PA, transform: 1031 1032 memory(X + <large int>) 1033 1034 into: 1035 1036 if (<large int> & mask) >= 16 1037 Y = (<large int> & ~mask) + mask + 1 Round up. 1038 else 1039 Y = (<large int> & ~mask) Round down. 1040 Z = X + Y 1041 memory (Z + (<large int> - Y)); 1042 1043 This is for CSE to find several similar references, and only use one Z. 1044 1045 X can either be a SYMBOL_REF or REG, but because combine cannot 1046 perform a 4->2 combination we do nothing for SYMBOL_REF + D where 1047 D will not fit in 14 bits. 1048 1049 MODE_FLOAT references allow displacements which fit in 5 bits, so use 1050 0x1f as the mask. 1051 1052 MODE_INT references allow displacements which fit in 14 bits, so use 1053 0x3fff as the mask. 1054 1055 This relies on the fact that most mode MODE_FLOAT references will use FP 1056 registers and most mode MODE_INT references will use integer registers. 1057 (In the rare case of an FP register used in an integer MODE, we depend 1058 on secondary reloads to clean things up.) 1059 1060 1061 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special 1062 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed 1063 addressing modes to be used). 1064 1065 Note that the addresses passed into hppa_legitimize_address always 1066 come from a MEM, so we only have to match the MULT form on incoming 1067 addresses. But to be future proof we also match the ASHIFT form. 1068 1069 However, this routine always places those shift-add sequences into 1070 registers, so we have to generate the ASHIFT form as our output. 1071 1072 Put X and Z into registers. Then put the entire expression into 1073 a register. */ 1074 1075 rtx 1076 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 1077 machine_mode mode) 1078 { 1079 rtx orig = x; 1080 1081 /* We need to canonicalize the order of operands in unscaled indexed 1082 addresses since the code that checks if an address is valid doesn't 1083 always try both orders. */ 1084 if (!TARGET_NO_SPACE_REGS 1085 && GET_CODE (x) == PLUS 1086 && GET_MODE (x) == Pmode 1087 && REG_P (XEXP (x, 0)) 1088 && REG_P (XEXP (x, 1)) 1089 && REG_POINTER (XEXP (x, 0)) 1090 && !REG_POINTER (XEXP (x, 1))) 1091 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0)); 1092 1093 if (tls_referenced_p (x)) 1094 return legitimize_tls_address (x); 1095 else if (flag_pic) 1096 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); 1097 1098 /* Strip off CONST. */ 1099 if (GET_CODE (x) == CONST) 1100 x = XEXP (x, 0); 1101 1102 /* Special case. Get the SYMBOL_REF into a register and use indexing. 1103 That should always be safe. */ 1104 if (GET_CODE (x) == PLUS 1105 && GET_CODE (XEXP (x, 0)) == REG 1106 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) 1107 { 1108 rtx reg = force_reg (Pmode, XEXP (x, 1)); 1109 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0))); 1110 } 1111 1112 /* Note we must reject symbols which represent function addresses 1113 since the assembler/linker can't handle arithmetic on plabels. */ 1114 if (GET_CODE (x) == PLUS 1115 && GET_CODE (XEXP (x, 1)) == CONST_INT 1116 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF 1117 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) 1118 || GET_CODE (XEXP (x, 0)) == REG)) 1119 { 1120 rtx int_part, ptr_reg; 1121 HOST_WIDE_INT newoffset; 1122 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 1123 HOST_WIDE_INT mask; 1124 1125 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 1126 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 1127 1128 /* Choose which way to round the offset. Round up if we 1129 are >= halfway to the next boundary. */ 1130 if ((offset & mask) >= ((mask + 1) / 2)) 1131 newoffset = (offset & ~ mask) + mask + 1; 1132 else 1133 newoffset = (offset & ~ mask); 1134 1135 /* If the newoffset will not fit in 14 bits (ldo), then 1136 handling this would take 4 or 5 instructions (2 to load 1137 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to 1138 add the new offset and the SYMBOL_REF.) Combine cannot 1139 handle 4->2 or 5->2 combinations, so do not create 1140 them. */ 1141 if (! VAL_14_BITS_P (newoffset) 1142 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) 1143 { 1144 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset); 1145 rtx tmp_reg 1146 = force_reg (Pmode, 1147 gen_rtx_HIGH (Pmode, const_part)); 1148 ptr_reg 1149 = force_reg (Pmode, 1150 gen_rtx_LO_SUM (Pmode, 1151 tmp_reg, const_part)); 1152 } 1153 else 1154 { 1155 if (! VAL_14_BITS_P (newoffset)) 1156 int_part = force_reg (Pmode, GEN_INT (newoffset)); 1157 else 1158 int_part = GEN_INT (newoffset); 1159 1160 ptr_reg = force_reg (Pmode, 1161 gen_rtx_PLUS (Pmode, 1162 force_reg (Pmode, XEXP (x, 0)), 1163 int_part)); 1164 } 1165 return plus_constant (Pmode, ptr_reg, offset - newoffset); 1166 } 1167 1168 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */ 1169 1170 if (GET_CODE (x) == PLUS 1171 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0)) 1172 && (OBJECT_P (XEXP (x, 1)) 1173 || GET_CODE (XEXP (x, 1)) == SUBREG) 1174 && GET_CODE (XEXP (x, 1)) != CONST) 1175 { 1176 /* If we were given a MULT, we must fix the constant 1177 as we're going to create the ASHIFT form. */ 1178 HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1)); 1179 if (GET_CODE (XEXP (x, 0)) == MULT) 1180 shift_val = exact_log2 (shift_val); 1181 1182 rtx reg1, reg2; 1183 reg1 = XEXP (x, 1); 1184 if (GET_CODE (reg1) != REG) 1185 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1186 1187 reg2 = XEXP (XEXP (x, 0), 0); 1188 if (GET_CODE (reg2) != REG) 1189 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1190 1191 return force_reg (Pmode, 1192 gen_rtx_PLUS (Pmode, 1193 gen_rtx_ASHIFT (Pmode, reg2, 1194 GEN_INT (shift_val)), 1195 reg1)); 1196 } 1197 1198 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)). 1199 1200 Only do so for floating point modes since this is more speculative 1201 and we lose if it's an integer store. */ 1202 if (GET_CODE (x) == PLUS 1203 && GET_CODE (XEXP (x, 0)) == PLUS 1204 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0)) 1205 && (mode == SFmode || mode == DFmode)) 1206 { 1207 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 1208 1209 /* If we were given a MULT, we must fix the constant 1210 as we're going to create the ASHIFT form. */ 1211 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) 1212 shift_val = exact_log2 (shift_val); 1213 1214 /* Try and figure out what to use as a base register. */ 1215 rtx reg1, reg2, base, idx; 1216 1217 reg1 = XEXP (XEXP (x, 0), 1); 1218 reg2 = XEXP (x, 1); 1219 base = NULL_RTX; 1220 idx = NULL_RTX; 1221 1222 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], 1223 then pa_emit_move_sequence will turn on REG_POINTER so we'll know 1224 it's a base register below. */ 1225 if (GET_CODE (reg1) != REG) 1226 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1227 1228 if (GET_CODE (reg2) != REG) 1229 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1230 1231 /* Figure out what the base and index are. */ 1232 1233 if (GET_CODE (reg1) == REG 1234 && REG_POINTER (reg1)) 1235 { 1236 base = reg1; 1237 idx = gen_rtx_PLUS (Pmode, 1238 gen_rtx_ASHIFT (Pmode, 1239 XEXP (XEXP (XEXP (x, 0), 0), 0), 1240 GEN_INT (shift_val)), 1241 XEXP (x, 1)); 1242 } 1243 else if (GET_CODE (reg2) == REG 1244 && REG_POINTER (reg2)) 1245 { 1246 base = reg2; 1247 idx = XEXP (x, 0); 1248 } 1249 1250 if (base == 0) 1251 return orig; 1252 1253 /* If the index adds a large constant, try to scale the 1254 constant so that it can be loaded with only one insn. */ 1255 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1256 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) 1257 / INTVAL (XEXP (XEXP (idx, 0), 1))) 1258 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) 1259 { 1260 /* Divide the CONST_INT by the scale factor, then add it to A. */ 1261 HOST_WIDE_INT val = INTVAL (XEXP (idx, 1)); 1262 val /= (1 << shift_val); 1263 1264 reg1 = XEXP (XEXP (idx, 0), 0); 1265 if (GET_CODE (reg1) != REG) 1266 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1267 1268 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); 1269 1270 /* We can now generate a simple scaled indexed address. */ 1271 return 1272 force_reg 1273 (Pmode, gen_rtx_PLUS (Pmode, 1274 gen_rtx_ASHIFT (Pmode, reg1, 1275 GEN_INT (shift_val)), 1276 base)); 1277 } 1278 1279 /* If B + C is still a valid base register, then add them. */ 1280 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1281 && INTVAL (XEXP (idx, 1)) <= 4096 1282 && INTVAL (XEXP (idx, 1)) >= -4096) 1283 { 1284 rtx reg1, reg2; 1285 1286 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); 1287 1288 reg2 = XEXP (XEXP (idx, 0), 0); 1289 if (GET_CODE (reg2) != CONST_INT) 1290 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1291 1292 return force_reg (Pmode, 1293 gen_rtx_PLUS (Pmode, 1294 gen_rtx_ASHIFT (Pmode, reg2, 1295 GEN_INT (shift_val)), 1296 reg1)); 1297 } 1298 1299 /* Get the index into a register, then add the base + index and 1300 return a register holding the result. */ 1301 1302 /* First get A into a register. */ 1303 reg1 = XEXP (XEXP (idx, 0), 0); 1304 if (GET_CODE (reg1) != REG) 1305 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1306 1307 /* And get B into a register. */ 1308 reg2 = XEXP (idx, 1); 1309 if (GET_CODE (reg2) != REG) 1310 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1311 1312 reg1 = force_reg (Pmode, 1313 gen_rtx_PLUS (Pmode, 1314 gen_rtx_ASHIFT (Pmode, reg1, 1315 GEN_INT (shift_val)), 1316 reg2)); 1317 1318 /* Add the result to our base register and return. */ 1319 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); 1320 1321 } 1322 1323 /* Uh-oh. We might have an address for x[n-100000]. This needs 1324 special handling to avoid creating an indexed memory address 1325 with x-100000 as the base. 1326 1327 If the constant part is small enough, then it's still safe because 1328 there is a guard page at the beginning and end of the data segment. 1329 1330 Scaled references are common enough that we want to try and rearrange the 1331 terms so that we can use indexing for these addresses too. Only 1332 do the optimization for floatint point modes. */ 1333 1334 if (GET_CODE (x) == PLUS 1335 && pa_symbolic_expression_p (XEXP (x, 1))) 1336 { 1337 /* Ugly. We modify things here so that the address offset specified 1338 by the index expression is computed first, then added to x to form 1339 the entire address. */ 1340 1341 rtx regx1, regx2, regy1, regy2, y; 1342 1343 /* Strip off any CONST. */ 1344 y = XEXP (x, 1); 1345 if (GET_CODE (y) == CONST) 1346 y = XEXP (y, 0); 1347 1348 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) 1349 { 1350 /* See if this looks like 1351 (plus (mult (reg) (mem_shadd_const)) 1352 (const (plus (symbol_ref) (const_int)))) 1353 1354 Where const_int is small. In that case the const 1355 expression is a valid pointer for indexing. 1356 1357 If const_int is big, but can be divided evenly by shadd_const 1358 and added to (reg). This allows more scaled indexed addresses. */ 1359 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1360 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0)) 1361 && GET_CODE (XEXP (y, 1)) == CONST_INT 1362 && INTVAL (XEXP (y, 1)) >= -4096 1363 && INTVAL (XEXP (y, 1)) <= 4095) 1364 { 1365 HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1)); 1366 1367 /* If we were given a MULT, we must fix the constant 1368 as we're going to create the ASHIFT form. */ 1369 if (GET_CODE (XEXP (x, 0)) == MULT) 1370 shift_val = exact_log2 (shift_val); 1371 1372 rtx reg1, reg2; 1373 1374 reg1 = XEXP (x, 1); 1375 if (GET_CODE (reg1) != REG) 1376 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1377 1378 reg2 = XEXP (XEXP (x, 0), 0); 1379 if (GET_CODE (reg2) != REG) 1380 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1381 1382 return 1383 force_reg (Pmode, 1384 gen_rtx_PLUS (Pmode, 1385 gen_rtx_ASHIFT (Pmode, 1386 reg2, 1387 GEN_INT (shift_val)), 1388 reg1)); 1389 } 1390 else if ((mode == DFmode || mode == SFmode) 1391 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1392 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0)) 1393 && GET_CODE (XEXP (y, 1)) == CONST_INT 1394 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0) 1395 { 1396 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1)); 1397 1398 /* If we were given a MULT, we must fix the constant 1399 as we're going to create the ASHIFT form. */ 1400 if (GET_CODE (XEXP (x, 0)) == MULT) 1401 shift_val = exact_log2 (shift_val); 1402 1403 regx1 1404 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) 1405 / INTVAL (XEXP (XEXP (x, 0), 1)))); 1406 regx2 = XEXP (XEXP (x, 0), 0); 1407 if (GET_CODE (regx2) != REG) 1408 regx2 = force_reg (Pmode, force_operand (regx2, 0)); 1409 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1410 regx2, regx1)); 1411 return 1412 force_reg (Pmode, 1413 gen_rtx_PLUS (Pmode, 1414 gen_rtx_ASHIFT (Pmode, regx2, 1415 GEN_INT (shift_val)), 1416 force_reg (Pmode, XEXP (y, 0)))); 1417 } 1418 else if (GET_CODE (XEXP (y, 1)) == CONST_INT 1419 && INTVAL (XEXP (y, 1)) >= -4096 1420 && INTVAL (XEXP (y, 1)) <= 4095) 1421 { 1422 /* This is safe because of the guard page at the 1423 beginning and end of the data space. Just 1424 return the original address. */ 1425 return orig; 1426 } 1427 else 1428 { 1429 /* Doesn't look like one we can optimize. */ 1430 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); 1431 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); 1432 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); 1433 regx1 = force_reg (Pmode, 1434 gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1435 regx1, regy2)); 1436 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); 1437 } 1438 } 1439 } 1440 1441 return orig; 1442 } 1443 1444 /* Implement the TARGET_REGISTER_MOVE_COST hook. 1445 1446 Compute extra cost of moving data between one register class 1447 and another. 1448 1449 Make moves from SAR so expensive they should never happen. We used to 1450 have 0xffff here, but that generates overflow in rare cases. 1451 1452 Copies involving a FP register and a non-FP register are relatively 1453 expensive because they must go through memory. 1454 1455 Other copies are reasonably cheap. */ 1456 1457 static int 1458 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 1459 reg_class_t from, reg_class_t to) 1460 { 1461 if (from == SHIFT_REGS) 1462 return 0x100; 1463 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from)) 1464 return 18; 1465 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to)) 1466 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from))) 1467 return 16; 1468 else 1469 return 2; 1470 } 1471 1472 /* For the HPPA, REG and REG+CONST is cost 0 1473 and addresses involving symbolic constants are cost 2. 1474 1475 PIC addresses are very expensive. 1476 1477 It is no coincidence that this has the same structure 1478 as pa_legitimate_address_p. */ 1479 1480 static int 1481 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED, 1482 addr_space_t as ATTRIBUTE_UNUSED, 1483 bool speed ATTRIBUTE_UNUSED) 1484 { 1485 switch (GET_CODE (X)) 1486 { 1487 case REG: 1488 case PLUS: 1489 case LO_SUM: 1490 return 1; 1491 case HIGH: 1492 return 2; 1493 default: 1494 return 4; 1495 } 1496 } 1497 1498 /* Compute a (partial) cost for rtx X. Return true if the complete 1499 cost has been computed, and false if subexpressions should be 1500 scanned. In either case, *TOTAL contains the cost result. */ 1501 1502 static bool 1503 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code, 1504 int opno ATTRIBUTE_UNUSED, 1505 int *total, bool speed ATTRIBUTE_UNUSED) 1506 { 1507 int factor; 1508 int code = GET_CODE (x); 1509 1510 switch (code) 1511 { 1512 case CONST_INT: 1513 if (INTVAL (x) == 0) 1514 *total = 0; 1515 else if (INT_14_BITS (x)) 1516 *total = 1; 1517 else 1518 *total = 2; 1519 return true; 1520 1521 case HIGH: 1522 *total = 2; 1523 return true; 1524 1525 case CONST: 1526 case LABEL_REF: 1527 case SYMBOL_REF: 1528 *total = 4; 1529 return true; 1530 1531 case CONST_DOUBLE: 1532 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode)) 1533 && outer_code != SET) 1534 *total = 0; 1535 else 1536 *total = 8; 1537 return true; 1538 1539 case MULT: 1540 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1541 { 1542 *total = COSTS_N_INSNS (3); 1543 return true; 1544 } 1545 1546 /* A mode size N times larger than SImode needs O(N*N) more insns. */ 1547 factor = GET_MODE_SIZE (mode) / 4; 1548 if (factor == 0) 1549 factor = 1; 1550 1551 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) 1552 *total = factor * factor * COSTS_N_INSNS (8); 1553 else 1554 *total = factor * factor * COSTS_N_INSNS (20); 1555 return true; 1556 1557 case DIV: 1558 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1559 { 1560 *total = COSTS_N_INSNS (14); 1561 return true; 1562 } 1563 /* FALLTHRU */ 1564 1565 case UDIV: 1566 case MOD: 1567 case UMOD: 1568 /* A mode size N times larger than SImode needs O(N*N) more insns. */ 1569 factor = GET_MODE_SIZE (mode) / 4; 1570 if (factor == 0) 1571 factor = 1; 1572 1573 *total = factor * factor * COSTS_N_INSNS (60); 1574 return true; 1575 1576 case PLUS: /* this includes shNadd insns */ 1577 case MINUS: 1578 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1579 { 1580 *total = COSTS_N_INSNS (3); 1581 return true; 1582 } 1583 1584 /* A size N times larger than UNITS_PER_WORD needs N times as 1585 many insns, taking N times as long. */ 1586 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 1587 if (factor == 0) 1588 factor = 1; 1589 *total = factor * COSTS_N_INSNS (1); 1590 return true; 1591 1592 case ASHIFT: 1593 case ASHIFTRT: 1594 case LSHIFTRT: 1595 *total = COSTS_N_INSNS (1); 1596 return true; 1597 1598 default: 1599 return false; 1600 } 1601 } 1602 1603 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a 1604 new rtx with the correct mode. */ 1605 static inline rtx 1606 force_mode (machine_mode mode, rtx orig) 1607 { 1608 if (mode == GET_MODE (orig)) 1609 return orig; 1610 1611 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER); 1612 1613 return gen_rtx_REG (mode, REGNO (orig)); 1614 } 1615 1616 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 1617 1618 static bool 1619 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1620 { 1621 return tls_referenced_p (x); 1622 } 1623 1624 /* Emit insns to move operands[1] into operands[0]. 1625 1626 Return 1 if we have written out everything that needs to be done to 1627 do the move. Otherwise, return 0 and the caller will emit the move 1628 normally. 1629 1630 Note SCRATCH_REG may not be in the proper mode depending on how it 1631 will be used. This routine is responsible for creating a new copy 1632 of SCRATCH_REG in the proper mode. */ 1633 1634 int 1635 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg) 1636 { 1637 register rtx operand0 = operands[0]; 1638 register rtx operand1 = operands[1]; 1639 register rtx tem; 1640 1641 /* We can only handle indexed addresses in the destination operand 1642 of floating point stores. Thus, we need to break out indexed 1643 addresses from the destination operand. */ 1644 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) 1645 { 1646 gcc_assert (can_create_pseudo_p ()); 1647 1648 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); 1649 operand0 = replace_equiv_address (operand0, tem); 1650 } 1651 1652 /* On targets with non-equivalent space registers, break out unscaled 1653 indexed addresses from the source operand before the final CSE. 1654 We have to do this because the REG_POINTER flag is not correctly 1655 carried through various optimization passes and CSE may substitute 1656 a pseudo without the pointer set for one with the pointer set. As 1657 a result, we loose various opportunities to create insns with 1658 unscaled indexed addresses. */ 1659 if (!TARGET_NO_SPACE_REGS 1660 && !cse_not_expected 1661 && GET_CODE (operand1) == MEM 1662 && GET_CODE (XEXP (operand1, 0)) == PLUS 1663 && REG_P (XEXP (XEXP (operand1, 0), 0)) 1664 && REG_P (XEXP (XEXP (operand1, 0), 1))) 1665 operand1 1666 = replace_equiv_address (operand1, 1667 copy_to_mode_reg (Pmode, XEXP (operand1, 0))); 1668 1669 if (scratch_reg 1670 && reload_in_progress && GET_CODE (operand0) == REG 1671 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 1672 operand0 = reg_equiv_mem (REGNO (operand0)); 1673 else if (scratch_reg 1674 && reload_in_progress && GET_CODE (operand0) == SUBREG 1675 && GET_CODE (SUBREG_REG (operand0)) == REG 1676 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) 1677 { 1678 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1679 the code which tracks sets/uses for delete_output_reload. */ 1680 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0), 1681 reg_equiv_mem (REGNO (SUBREG_REG (operand0))), 1682 SUBREG_BYTE (operand0)); 1683 operand0 = alter_subreg (&temp, true); 1684 } 1685 1686 if (scratch_reg 1687 && reload_in_progress && GET_CODE (operand1) == REG 1688 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) 1689 operand1 = reg_equiv_mem (REGNO (operand1)); 1690 else if (scratch_reg 1691 && reload_in_progress && GET_CODE (operand1) == SUBREG 1692 && GET_CODE (SUBREG_REG (operand1)) == REG 1693 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) 1694 { 1695 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1696 the code which tracks sets/uses for delete_output_reload. */ 1697 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1), 1698 reg_equiv_mem (REGNO (SUBREG_REG (operand1))), 1699 SUBREG_BYTE (operand1)); 1700 operand1 = alter_subreg (&temp, true); 1701 } 1702 1703 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM 1704 && ((tem = find_replacement (&XEXP (operand0, 0))) 1705 != XEXP (operand0, 0))) 1706 operand0 = replace_equiv_address (operand0, tem); 1707 1708 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM 1709 && ((tem = find_replacement (&XEXP (operand1, 0))) 1710 != XEXP (operand1, 0))) 1711 operand1 = replace_equiv_address (operand1, tem); 1712 1713 /* Handle secondary reloads for loads/stores of FP registers from 1714 REG+D addresses where D does not fit in 5 or 14 bits, including 1715 (subreg (mem (addr))) cases, and reloads for other unsupported 1716 memory operands. */ 1717 if (scratch_reg 1718 && FP_REG_P (operand0) 1719 && (MEM_P (operand1) 1720 || (GET_CODE (operand1) == SUBREG 1721 && MEM_P (XEXP (operand1, 0))))) 1722 { 1723 rtx op1 = operand1; 1724 1725 if (GET_CODE (op1) == SUBREG) 1726 op1 = XEXP (op1, 0); 1727 1728 if (reg_plus_base_memory_operand (op1, GET_MODE (op1))) 1729 { 1730 if (!(TARGET_PA_20 1731 && !TARGET_ELF32 1732 && INT_14_BITS (XEXP (XEXP (op1, 0), 1))) 1733 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1))) 1734 { 1735 /* SCRATCH_REG will hold an address and maybe the actual data. 1736 We want it in WORD_MODE regardless of what mode it was 1737 originally given to us. */ 1738 scratch_reg = force_mode (word_mode, scratch_reg); 1739 1740 /* D might not fit in 14 bits either; for such cases load D 1741 into scratch reg. */ 1742 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1))) 1743 { 1744 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1)); 1745 emit_move_insn (scratch_reg, 1746 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)), 1747 Pmode, 1748 XEXP (XEXP (op1, 0), 0), 1749 scratch_reg)); 1750 } 1751 else 1752 emit_move_insn (scratch_reg, XEXP (op1, 0)); 1753 op1 = replace_equiv_address (op1, scratch_reg); 1754 } 1755 } 1756 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode)) 1757 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0)) 1758 || IS_INDEX_ADDR_P (XEXP (op1, 0))) 1759 { 1760 /* Load memory address into SCRATCH_REG. */ 1761 scratch_reg = force_mode (word_mode, scratch_reg); 1762 emit_move_insn (scratch_reg, XEXP (op1, 0)); 1763 op1 = replace_equiv_address (op1, scratch_reg); 1764 } 1765 emit_insn (gen_rtx_SET (operand0, op1)); 1766 return 1; 1767 } 1768 else if (scratch_reg 1769 && FP_REG_P (operand1) 1770 && (MEM_P (operand0) 1771 || (GET_CODE (operand0) == SUBREG 1772 && MEM_P (XEXP (operand0, 0))))) 1773 { 1774 rtx op0 = operand0; 1775 1776 if (GET_CODE (op0) == SUBREG) 1777 op0 = XEXP (op0, 0); 1778 1779 if (reg_plus_base_memory_operand (op0, GET_MODE (op0))) 1780 { 1781 if (!(TARGET_PA_20 1782 && !TARGET_ELF32 1783 && INT_14_BITS (XEXP (XEXP (op0, 0), 1))) 1784 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1))) 1785 { 1786 /* SCRATCH_REG will hold an address and maybe the actual data. 1787 We want it in WORD_MODE regardless of what mode it was 1788 originally given to us. */ 1789 scratch_reg = force_mode (word_mode, scratch_reg); 1790 1791 /* D might not fit in 14 bits either; for such cases load D 1792 into scratch reg. */ 1793 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1))) 1794 { 1795 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1)); 1796 emit_move_insn (scratch_reg, 1797 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)), 1798 Pmode, 1799 XEXP (XEXP (op0, 0), 0), 1800 scratch_reg)); 1801 } 1802 else 1803 emit_move_insn (scratch_reg, XEXP (op0, 0)); 1804 op0 = replace_equiv_address (op0, scratch_reg); 1805 } 1806 } 1807 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode)) 1808 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0)) 1809 || IS_INDEX_ADDR_P (XEXP (op0, 0))) 1810 { 1811 /* Load memory address into SCRATCH_REG. */ 1812 scratch_reg = force_mode (word_mode, scratch_reg); 1813 emit_move_insn (scratch_reg, XEXP (op0, 0)); 1814 op0 = replace_equiv_address (op0, scratch_reg); 1815 } 1816 emit_insn (gen_rtx_SET (op0, operand1)); 1817 return 1; 1818 } 1819 /* Handle secondary reloads for loads of FP registers from constant 1820 expressions by forcing the constant into memory. For the most part, 1821 this is only necessary for SImode and DImode. 1822 1823 Use scratch_reg to hold the address of the memory location. */ 1824 else if (scratch_reg 1825 && CONSTANT_P (operand1) 1826 && FP_REG_P (operand0)) 1827 { 1828 rtx const_mem, xoperands[2]; 1829 1830 if (operand1 == CONST0_RTX (mode)) 1831 { 1832 emit_insn (gen_rtx_SET (operand0, operand1)); 1833 return 1; 1834 } 1835 1836 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1837 it in WORD_MODE regardless of what mode it was originally given 1838 to us. */ 1839 scratch_reg = force_mode (word_mode, scratch_reg); 1840 1841 /* Force the constant into memory and put the address of the 1842 memory location into scratch_reg. */ 1843 const_mem = force_const_mem (mode, operand1); 1844 xoperands[0] = scratch_reg; 1845 xoperands[1] = XEXP (const_mem, 0); 1846 pa_emit_move_sequence (xoperands, Pmode, 0); 1847 1848 /* Now load the destination register. */ 1849 emit_insn (gen_rtx_SET (operand0, 1850 replace_equiv_address (const_mem, scratch_reg))); 1851 return 1; 1852 } 1853 /* Handle secondary reloads for SAR. These occur when trying to load 1854 the SAR from memory or a constant. */ 1855 else if (scratch_reg 1856 && GET_CODE (operand0) == REG 1857 && REGNO (operand0) < FIRST_PSEUDO_REGISTER 1858 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS 1859 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT)) 1860 { 1861 /* D might not fit in 14 bits either; for such cases load D into 1862 scratch reg. */ 1863 if (GET_CODE (operand1) == MEM 1864 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0))) 1865 { 1866 /* We are reloading the address into the scratch register, so we 1867 want to make sure the scratch register is a full register. */ 1868 scratch_reg = force_mode (word_mode, scratch_reg); 1869 1870 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); 1871 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 1872 0)), 1873 Pmode, 1874 XEXP (XEXP (operand1, 0), 1875 0), 1876 scratch_reg)); 1877 1878 /* Now we are going to load the scratch register from memory, 1879 we want to load it in the same width as the original MEM, 1880 which must be the same as the width of the ultimate destination, 1881 OPERAND0. */ 1882 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1883 1884 emit_move_insn (scratch_reg, 1885 replace_equiv_address (operand1, scratch_reg)); 1886 } 1887 else 1888 { 1889 /* We want to load the scratch register using the same mode as 1890 the ultimate destination. */ 1891 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1892 1893 emit_move_insn (scratch_reg, operand1); 1894 } 1895 1896 /* And emit the insn to set the ultimate destination. We know that 1897 the scratch register has the same mode as the destination at this 1898 point. */ 1899 emit_move_insn (operand0, scratch_reg); 1900 return 1; 1901 } 1902 1903 /* Handle the most common case: storing into a register. */ 1904 if (register_operand (operand0, mode)) 1905 { 1906 /* Legitimize TLS symbol references. This happens for references 1907 that aren't a legitimate constant. */ 1908 if (PA_SYMBOL_REF_TLS_P (operand1)) 1909 operand1 = legitimize_tls_address (operand1); 1910 1911 if (register_operand (operand1, mode) 1912 || (GET_CODE (operand1) == CONST_INT 1913 && pa_cint_ok_for_move (UINTVAL (operand1))) 1914 || (operand1 == CONST0_RTX (mode)) 1915 || (GET_CODE (operand1) == HIGH 1916 && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) 1917 /* Only `general_operands' can come here, so MEM is ok. */ 1918 || GET_CODE (operand1) == MEM) 1919 { 1920 /* Various sets are created during RTL generation which don't 1921 have the REG_POINTER flag correctly set. After the CSE pass, 1922 instruction recognition can fail if we don't consistently 1923 set this flag when performing register copies. This should 1924 also improve the opportunities for creating insns that use 1925 unscaled indexing. */ 1926 if (REG_P (operand0) && REG_P (operand1)) 1927 { 1928 if (REG_POINTER (operand1) 1929 && !REG_POINTER (operand0) 1930 && !HARD_REGISTER_P (operand0)) 1931 copy_reg_pointer (operand0, operand1); 1932 } 1933 1934 /* When MEMs are broken out, the REG_POINTER flag doesn't 1935 get set. In some cases, we can set the REG_POINTER flag 1936 from the declaration for the MEM. */ 1937 if (REG_P (operand0) 1938 && GET_CODE (operand1) == MEM 1939 && !REG_POINTER (operand0)) 1940 { 1941 tree decl = MEM_EXPR (operand1); 1942 1943 /* Set the register pointer flag and register alignment 1944 if the declaration for this memory reference is a 1945 pointer type. */ 1946 if (decl) 1947 { 1948 tree type; 1949 1950 /* If this is a COMPONENT_REF, use the FIELD_DECL from 1951 tree operand 1. */ 1952 if (TREE_CODE (decl) == COMPONENT_REF) 1953 decl = TREE_OPERAND (decl, 1); 1954 1955 type = TREE_TYPE (decl); 1956 type = strip_array_types (type); 1957 1958 if (POINTER_TYPE_P (type)) 1959 mark_reg_pointer (operand0, BITS_PER_UNIT); 1960 } 1961 } 1962 1963 emit_insn (gen_rtx_SET (operand0, operand1)); 1964 return 1; 1965 } 1966 } 1967 else if (GET_CODE (operand0) == MEM) 1968 { 1969 if (mode == DFmode && operand1 == CONST0_RTX (mode) 1970 && !(reload_in_progress || reload_completed)) 1971 { 1972 rtx temp = gen_reg_rtx (DFmode); 1973 1974 emit_insn (gen_rtx_SET (temp, operand1)); 1975 emit_insn (gen_rtx_SET (operand0, temp)); 1976 return 1; 1977 } 1978 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) 1979 { 1980 /* Run this case quickly. */ 1981 emit_insn (gen_rtx_SET (operand0, operand1)); 1982 return 1; 1983 } 1984 if (! (reload_in_progress || reload_completed)) 1985 { 1986 operands[0] = validize_mem (operand0); 1987 operands[1] = operand1 = force_reg (mode, operand1); 1988 } 1989 } 1990 1991 /* Simplify the source if we need to. 1992 Note we do have to handle function labels here, even though we do 1993 not consider them legitimate constants. Loop optimizations can 1994 call the emit_move_xxx with one as a source. */ 1995 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) 1996 || (GET_CODE (operand1) == HIGH 1997 && symbolic_operand (XEXP (operand1, 0), mode)) 1998 || function_label_operand (operand1, VOIDmode) 1999 || tls_referenced_p (operand1)) 2000 { 2001 int ishighonly = 0; 2002 2003 if (GET_CODE (operand1) == HIGH) 2004 { 2005 ishighonly = 1; 2006 operand1 = XEXP (operand1, 0); 2007 } 2008 if (symbolic_operand (operand1, mode)) 2009 { 2010 /* Argh. The assembler and linker can't handle arithmetic 2011 involving plabels. 2012 2013 So we force the plabel into memory, load operand0 from 2014 the memory location, then add in the constant part. */ 2015 if ((GET_CODE (operand1) == CONST 2016 && GET_CODE (XEXP (operand1, 0)) == PLUS 2017 && function_label_operand (XEXP (XEXP (operand1, 0), 0), 2018 VOIDmode)) 2019 || function_label_operand (operand1, VOIDmode)) 2020 { 2021 rtx temp, const_part; 2022 2023 /* Figure out what (if any) scratch register to use. */ 2024 if (reload_in_progress || reload_completed) 2025 { 2026 scratch_reg = scratch_reg ? scratch_reg : operand0; 2027 /* SCRATCH_REG will hold an address and maybe the actual 2028 data. We want it in WORD_MODE regardless of what mode it 2029 was originally given to us. */ 2030 scratch_reg = force_mode (word_mode, scratch_reg); 2031 } 2032 else if (flag_pic) 2033 scratch_reg = gen_reg_rtx (Pmode); 2034 2035 if (GET_CODE (operand1) == CONST) 2036 { 2037 /* Save away the constant part of the expression. */ 2038 const_part = XEXP (XEXP (operand1, 0), 1); 2039 gcc_assert (GET_CODE (const_part) == CONST_INT); 2040 2041 /* Force the function label into memory. */ 2042 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); 2043 } 2044 else 2045 { 2046 /* No constant part. */ 2047 const_part = NULL_RTX; 2048 2049 /* Force the function label into memory. */ 2050 temp = force_const_mem (mode, operand1); 2051 } 2052 2053 2054 /* Get the address of the memory location. PIC-ify it if 2055 necessary. */ 2056 temp = XEXP (temp, 0); 2057 if (flag_pic) 2058 temp = legitimize_pic_address (temp, mode, scratch_reg); 2059 2060 /* Put the address of the memory location into our destination 2061 register. */ 2062 operands[1] = temp; 2063 pa_emit_move_sequence (operands, mode, scratch_reg); 2064 2065 /* Now load from the memory location into our destination 2066 register. */ 2067 operands[1] = gen_rtx_MEM (Pmode, operands[0]); 2068 pa_emit_move_sequence (operands, mode, scratch_reg); 2069 2070 /* And add back in the constant part. */ 2071 if (const_part != NULL_RTX) 2072 expand_inc (operand0, const_part); 2073 2074 return 1; 2075 } 2076 2077 if (flag_pic) 2078 { 2079 rtx_insn *insn; 2080 rtx temp; 2081 2082 if (reload_in_progress || reload_completed) 2083 { 2084 temp = scratch_reg ? scratch_reg : operand0; 2085 /* TEMP will hold an address and maybe the actual 2086 data. We want it in WORD_MODE regardless of what mode it 2087 was originally given to us. */ 2088 temp = force_mode (word_mode, temp); 2089 } 2090 else 2091 temp = gen_reg_rtx (Pmode); 2092 2093 /* Force (const (plus (symbol) (const_int))) to memory 2094 if the const_int will not fit in 14 bits. Although 2095 this requires a relocation, the instruction sequence 2096 needed to load the value is shorter. */ 2097 if (GET_CODE (operand1) == CONST 2098 && GET_CODE (XEXP (operand1, 0)) == PLUS 2099 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT 2100 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))) 2101 { 2102 rtx x, m = force_const_mem (mode, operand1); 2103 2104 x = legitimize_pic_address (XEXP (m, 0), mode, temp); 2105 x = replace_equiv_address (m, x); 2106 insn = emit_move_insn (operand0, x); 2107 } 2108 else 2109 { 2110 operands[1] = legitimize_pic_address (operand1, mode, temp); 2111 if (REG_P (operand0) && REG_P (operands[1])) 2112 copy_reg_pointer (operand0, operands[1]); 2113 insn = emit_move_insn (operand0, operands[1]); 2114 } 2115 2116 /* Put a REG_EQUAL note on this insn. */ 2117 set_unique_reg_note (insn, REG_EQUAL, operand1); 2118 } 2119 /* On the HPPA, references to data space are supposed to use dp, 2120 register 27, but showing it in the RTL inhibits various cse 2121 and loop optimizations. */ 2122 else 2123 { 2124 rtx temp, set; 2125 2126 if (reload_in_progress || reload_completed) 2127 { 2128 temp = scratch_reg ? scratch_reg : operand0; 2129 /* TEMP will hold an address and maybe the actual 2130 data. We want it in WORD_MODE regardless of what mode it 2131 was originally given to us. */ 2132 temp = force_mode (word_mode, temp); 2133 } 2134 else 2135 temp = gen_reg_rtx (mode); 2136 2137 /* Loading a SYMBOL_REF into a register makes that register 2138 safe to be used as the base in an indexed address. 2139 2140 Don't mark hard registers though. That loses. */ 2141 if (GET_CODE (operand0) == REG 2142 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 2143 mark_reg_pointer (operand0, BITS_PER_UNIT); 2144 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) 2145 mark_reg_pointer (temp, BITS_PER_UNIT); 2146 2147 if (ishighonly) 2148 set = gen_rtx_SET (operand0, temp); 2149 else 2150 set = gen_rtx_SET (operand0, 2151 gen_rtx_LO_SUM (mode, temp, operand1)); 2152 2153 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1))); 2154 emit_insn (set); 2155 2156 } 2157 return 1; 2158 } 2159 else if (tls_referenced_p (operand1)) 2160 { 2161 rtx tmp = operand1; 2162 rtx addend = NULL; 2163 2164 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) 2165 { 2166 addend = XEXP (XEXP (tmp, 0), 1); 2167 tmp = XEXP (XEXP (tmp, 0), 0); 2168 } 2169 2170 gcc_assert (GET_CODE (tmp) == SYMBOL_REF); 2171 tmp = legitimize_tls_address (tmp); 2172 if (addend) 2173 { 2174 tmp = gen_rtx_PLUS (mode, tmp, addend); 2175 tmp = force_operand (tmp, operands[0]); 2176 } 2177 operands[1] = tmp; 2178 } 2179 else if (GET_CODE (operand1) != CONST_INT 2180 || !pa_cint_ok_for_move (UINTVAL (operand1))) 2181 { 2182 rtx temp; 2183 rtx_insn *insn; 2184 rtx op1 = operand1; 2185 HOST_WIDE_INT value = 0; 2186 HOST_WIDE_INT insv = 0; 2187 int insert = 0; 2188 2189 if (GET_CODE (operand1) == CONST_INT) 2190 value = INTVAL (operand1); 2191 2192 if (TARGET_64BIT 2193 && GET_CODE (operand1) == CONST_INT 2194 && HOST_BITS_PER_WIDE_INT > 32 2195 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32) 2196 { 2197 HOST_WIDE_INT nval; 2198 2199 /* Extract the low order 32 bits of the value and sign extend. 2200 If the new value is the same as the original value, we can 2201 can use the original value as-is. If the new value is 2202 different, we use it and insert the most-significant 32-bits 2203 of the original value into the final result. */ 2204 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1)) 2205 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31); 2206 if (value != nval) 2207 { 2208 #if HOST_BITS_PER_WIDE_INT > 32 2209 insv = value >= 0 ? value >> 32 : ~(~value >> 32); 2210 #endif 2211 insert = 1; 2212 value = nval; 2213 operand1 = GEN_INT (nval); 2214 } 2215 } 2216 2217 if (reload_in_progress || reload_completed) 2218 temp = scratch_reg ? scratch_reg : operand0; 2219 else 2220 temp = gen_reg_rtx (mode); 2221 2222 /* We don't directly split DImode constants on 32-bit targets 2223 because PLUS uses an 11-bit immediate and the insn sequence 2224 generated is not as efficient as the one using HIGH/LO_SUM. */ 2225 if (GET_CODE (operand1) == CONST_INT 2226 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD 2227 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT 2228 && !insert) 2229 { 2230 /* Directly break constant into high and low parts. This 2231 provides better optimization opportunities because various 2232 passes recognize constants split with PLUS but not LO_SUM. 2233 We use a 14-bit signed low part except when the addition 2234 of 0x4000 to the high part might change the sign of the 2235 high part. */ 2236 HOST_WIDE_INT low = value & 0x3fff; 2237 HOST_WIDE_INT high = value & ~ 0x3fff; 2238 2239 if (low >= 0x2000) 2240 { 2241 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000)) 2242 high += 0x2000; 2243 else 2244 high += 0x4000; 2245 } 2246 2247 low = value - high; 2248 2249 emit_insn (gen_rtx_SET (temp, GEN_INT (high))); 2250 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 2251 } 2252 else 2253 { 2254 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1))); 2255 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); 2256 } 2257 2258 insn = emit_move_insn (operands[0], operands[1]); 2259 2260 /* Now insert the most significant 32 bits of the value 2261 into the register. When we don't have a second register 2262 available, it could take up to nine instructions to load 2263 a 64-bit integer constant. Prior to reload, we force 2264 constants that would take more than three instructions 2265 to load to the constant pool. During and after reload, 2266 we have to handle all possible values. */ 2267 if (insert) 2268 { 2269 /* Use a HIGH/LO_SUM/INSV sequence if we have a second 2270 register and the value to be inserted is outside the 2271 range that can be loaded with three depdi instructions. */ 2272 if (temp != operand0 && (insv >= 16384 || insv < -16384)) 2273 { 2274 operand1 = GEN_INT (insv); 2275 2276 emit_insn (gen_rtx_SET (temp, 2277 gen_rtx_HIGH (mode, operand1))); 2278 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1)); 2279 if (mode == DImode) 2280 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32), 2281 const0_rtx, temp)); 2282 else 2283 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32), 2284 const0_rtx, temp)); 2285 } 2286 else 2287 { 2288 int len = 5, pos = 27; 2289 2290 /* Insert the bits using the depdi instruction. */ 2291 while (pos >= 0) 2292 { 2293 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16; 2294 HOST_WIDE_INT sign = v5 < 0; 2295 2296 /* Left extend the insertion. */ 2297 insv = (insv >= 0 ? insv >> len : ~(~insv >> len)); 2298 while (pos > 0 && (insv & 1) == sign) 2299 { 2300 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1)); 2301 len += 1; 2302 pos -= 1; 2303 } 2304 2305 if (mode == DImode) 2306 insn = emit_insn (gen_insvdi (operand0, 2307 GEN_INT (len), 2308 GEN_INT (pos), 2309 GEN_INT (v5))); 2310 else 2311 insn = emit_insn (gen_insvsi (operand0, 2312 GEN_INT (len), 2313 GEN_INT (pos), 2314 GEN_INT (v5))); 2315 2316 len = pos > 0 && pos < 5 ? pos : 5; 2317 pos -= len; 2318 } 2319 } 2320 } 2321 2322 set_unique_reg_note (insn, REG_EQUAL, op1); 2323 2324 return 1; 2325 } 2326 } 2327 /* Now have insn-emit do whatever it normally does. */ 2328 return 0; 2329 } 2330 2331 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning 2332 it will need a link/runtime reloc). */ 2333 2334 int 2335 pa_reloc_needed (tree exp) 2336 { 2337 int reloc = 0; 2338 2339 switch (TREE_CODE (exp)) 2340 { 2341 case ADDR_EXPR: 2342 return 1; 2343 2344 case POINTER_PLUS_EXPR: 2345 case PLUS_EXPR: 2346 case MINUS_EXPR: 2347 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2348 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1)); 2349 break; 2350 2351 CASE_CONVERT: 2352 case NON_LVALUE_EXPR: 2353 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2354 break; 2355 2356 case CONSTRUCTOR: 2357 { 2358 tree value; 2359 unsigned HOST_WIDE_INT ix; 2360 2361 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value) 2362 if (value) 2363 reloc |= pa_reloc_needed (value); 2364 } 2365 break; 2366 2367 case ERROR_MARK: 2368 break; 2369 2370 default: 2371 break; 2372 } 2373 return reloc; 2374 } 2375 2376 2377 /* Return the best assembler insn template 2378 for moving operands[1] into operands[0] as a fullword. */ 2379 const char * 2380 pa_singlemove_string (rtx *operands) 2381 { 2382 HOST_WIDE_INT intval; 2383 2384 if (GET_CODE (operands[0]) == MEM) 2385 return "stw %r1,%0"; 2386 if (GET_CODE (operands[1]) == MEM) 2387 return "ldw %1,%0"; 2388 if (GET_CODE (operands[1]) == CONST_DOUBLE) 2389 { 2390 long i; 2391 2392 gcc_assert (GET_MODE (operands[1]) == SFmode); 2393 2394 /* Translate the CONST_DOUBLE to a CONST_INT with the same target 2395 bit pattern. */ 2396 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i); 2397 2398 operands[1] = GEN_INT (i); 2399 /* Fall through to CONST_INT case. */ 2400 } 2401 if (GET_CODE (operands[1]) == CONST_INT) 2402 { 2403 intval = INTVAL (operands[1]); 2404 2405 if (VAL_14_BITS_P (intval)) 2406 return "ldi %1,%0"; 2407 else if ((intval & 0x7ff) == 0) 2408 return "ldil L'%1,%0"; 2409 else if (pa_zdepi_cint_p (intval)) 2410 return "{zdepi %Z1,%0|depwi,z %Z1,%0}"; 2411 else 2412 return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; 2413 } 2414 return "copy %1,%0"; 2415 } 2416 2417 2418 /* Compute position (in OP[1]) and width (in OP[2]) 2419 useful for copying IMM to a register using the zdepi 2420 instructions. Store the immediate value to insert in OP[0]. */ 2421 static void 2422 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2423 { 2424 int lsb, len; 2425 2426 /* Find the least significant set bit in IMM. */ 2427 for (lsb = 0; lsb < 32; lsb++) 2428 { 2429 if ((imm & 1) != 0) 2430 break; 2431 imm >>= 1; 2432 } 2433 2434 /* Choose variants based on *sign* of the 5-bit field. */ 2435 if ((imm & 0x10) == 0) 2436 len = (lsb <= 28) ? 4 : 32 - lsb; 2437 else 2438 { 2439 /* Find the width of the bitstring in IMM. */ 2440 for (len = 5; len < 32 - lsb; len++) 2441 { 2442 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2443 break; 2444 } 2445 2446 /* Sign extend IMM as a 5-bit value. */ 2447 imm = (imm & 0xf) - 0x10; 2448 } 2449 2450 op[0] = imm; 2451 op[1] = 31 - lsb; 2452 op[2] = len; 2453 } 2454 2455 /* Compute position (in OP[1]) and width (in OP[2]) 2456 useful for copying IMM to a register using the depdi,z 2457 instructions. Store the immediate value to insert in OP[0]. */ 2458 2459 static void 2460 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2461 { 2462 int lsb, len, maxlen; 2463 2464 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64); 2465 2466 /* Find the least significant set bit in IMM. */ 2467 for (lsb = 0; lsb < maxlen; lsb++) 2468 { 2469 if ((imm & 1) != 0) 2470 break; 2471 imm >>= 1; 2472 } 2473 2474 /* Choose variants based on *sign* of the 5-bit field. */ 2475 if ((imm & 0x10) == 0) 2476 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb; 2477 else 2478 { 2479 /* Find the width of the bitstring in IMM. */ 2480 for (len = 5; len < maxlen - lsb; len++) 2481 { 2482 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2483 break; 2484 } 2485 2486 /* Extend length if host is narrow and IMM is negative. */ 2487 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb) 2488 len += 32; 2489 2490 /* Sign extend IMM as a 5-bit value. */ 2491 imm = (imm & 0xf) - 0x10; 2492 } 2493 2494 op[0] = imm; 2495 op[1] = 63 - lsb; 2496 op[2] = len; 2497 } 2498 2499 /* Output assembler code to perform a doubleword move insn 2500 with operands OPERANDS. */ 2501 2502 const char * 2503 pa_output_move_double (rtx *operands) 2504 { 2505 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; 2506 rtx latehalf[2]; 2507 rtx addreg0 = 0, addreg1 = 0; 2508 int highonly = 0; 2509 2510 /* First classify both operands. */ 2511 2512 if (REG_P (operands[0])) 2513 optype0 = REGOP; 2514 else if (offsettable_memref_p (operands[0])) 2515 optype0 = OFFSOP; 2516 else if (GET_CODE (operands[0]) == MEM) 2517 optype0 = MEMOP; 2518 else 2519 optype0 = RNDOP; 2520 2521 if (REG_P (operands[1])) 2522 optype1 = REGOP; 2523 else if (CONSTANT_P (operands[1])) 2524 optype1 = CNSTOP; 2525 else if (offsettable_memref_p (operands[1])) 2526 optype1 = OFFSOP; 2527 else if (GET_CODE (operands[1]) == MEM) 2528 optype1 = MEMOP; 2529 else 2530 optype1 = RNDOP; 2531 2532 /* Check for the cases that the operand constraints are not 2533 supposed to allow to happen. */ 2534 gcc_assert (optype0 == REGOP || optype1 == REGOP); 2535 2536 /* Handle copies between general and floating registers. */ 2537 2538 if (optype0 == REGOP && optype1 == REGOP 2539 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])) 2540 { 2541 if (FP_REG_P (operands[0])) 2542 { 2543 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands); 2544 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands); 2545 return "{fldds|fldd} -16(%%sp),%0"; 2546 } 2547 else 2548 { 2549 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands); 2550 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands); 2551 return "{ldws|ldw} -12(%%sp),%R0"; 2552 } 2553 } 2554 2555 /* Handle auto decrementing and incrementing loads and stores 2556 specifically, since the structure of the function doesn't work 2557 for them without major modification. Do it better when we learn 2558 this port about the general inc/dec addressing of PA. 2559 (This was written by tege. Chide him if it doesn't work.) */ 2560 2561 if (optype0 == MEMOP) 2562 { 2563 /* We have to output the address syntax ourselves, since print_operand 2564 doesn't deal with the addresses we want to use. Fix this later. */ 2565 2566 rtx addr = XEXP (operands[0], 0); 2567 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2568 { 2569 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2570 2571 operands[0] = XEXP (addr, 0); 2572 gcc_assert (GET_CODE (operands[1]) == REG 2573 && GET_CODE (operands[0]) == REG); 2574 2575 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2576 2577 /* No overlap between high target register and address 2578 register. (We do this in a non-obvious way to 2579 save a register file writeback) */ 2580 if (GET_CODE (addr) == POST_INC) 2581 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)"; 2582 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)"; 2583 } 2584 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2585 { 2586 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2587 2588 operands[0] = XEXP (addr, 0); 2589 gcc_assert (GET_CODE (operands[1]) == REG 2590 && GET_CODE (operands[0]) == REG); 2591 2592 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2593 /* No overlap between high target register and address 2594 register. (We do this in a non-obvious way to save a 2595 register file writeback) */ 2596 if (GET_CODE (addr) == PRE_INC) 2597 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)"; 2598 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)"; 2599 } 2600 } 2601 if (optype1 == MEMOP) 2602 { 2603 /* We have to output the address syntax ourselves, since print_operand 2604 doesn't deal with the addresses we want to use. Fix this later. */ 2605 2606 rtx addr = XEXP (operands[1], 0); 2607 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2608 { 2609 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2610 2611 operands[1] = XEXP (addr, 0); 2612 gcc_assert (GET_CODE (operands[0]) == REG 2613 && GET_CODE (operands[1]) == REG); 2614 2615 if (!reg_overlap_mentioned_p (high_reg, addr)) 2616 { 2617 /* No overlap between high target register and address 2618 register. (We do this in a non-obvious way to 2619 save a register file writeback) */ 2620 if (GET_CODE (addr) == POST_INC) 2621 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0"; 2622 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0"; 2623 } 2624 else 2625 { 2626 /* This is an undefined situation. We should load into the 2627 address register *and* update that register. Probably 2628 we don't need to handle this at all. */ 2629 if (GET_CODE (addr) == POST_INC) 2630 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0"; 2631 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0"; 2632 } 2633 } 2634 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2635 { 2636 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2637 2638 operands[1] = XEXP (addr, 0); 2639 gcc_assert (GET_CODE (operands[0]) == REG 2640 && GET_CODE (operands[1]) == REG); 2641 2642 if (!reg_overlap_mentioned_p (high_reg, addr)) 2643 { 2644 /* No overlap between high target register and address 2645 register. (We do this in a non-obvious way to 2646 save a register file writeback) */ 2647 if (GET_CODE (addr) == PRE_INC) 2648 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0"; 2649 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0"; 2650 } 2651 else 2652 { 2653 /* This is an undefined situation. We should load into the 2654 address register *and* update that register. Probably 2655 we don't need to handle this at all. */ 2656 if (GET_CODE (addr) == PRE_INC) 2657 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0"; 2658 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0"; 2659 } 2660 } 2661 else if (GET_CODE (addr) == PLUS 2662 && GET_CODE (XEXP (addr, 0)) == MULT) 2663 { 2664 rtx xoperands[4]; 2665 2666 /* Load address into left half of destination register. */ 2667 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2668 xoperands[1] = XEXP (addr, 1); 2669 xoperands[2] = XEXP (XEXP (addr, 0), 0); 2670 xoperands[3] = XEXP (XEXP (addr, 0), 1); 2671 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}", 2672 xoperands); 2673 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2674 } 2675 else if (GET_CODE (addr) == PLUS 2676 && REG_P (XEXP (addr, 0)) 2677 && REG_P (XEXP (addr, 1))) 2678 { 2679 rtx xoperands[3]; 2680 2681 /* Load address into left half of destination register. */ 2682 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2683 xoperands[1] = XEXP (addr, 0); 2684 xoperands[2] = XEXP (addr, 1); 2685 output_asm_insn ("{addl|add,l} %1,%2,%0", 2686 xoperands); 2687 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2688 } 2689 } 2690 2691 /* If an operand is an unoffsettable memory ref, find a register 2692 we can increment temporarily to make it refer to the second word. */ 2693 2694 if (optype0 == MEMOP) 2695 addreg0 = find_addr_reg (XEXP (operands[0], 0)); 2696 2697 if (optype1 == MEMOP) 2698 addreg1 = find_addr_reg (XEXP (operands[1], 0)); 2699 2700 /* Ok, we can do one word at a time. 2701 Normally we do the low-numbered word first. 2702 2703 In either case, set up in LATEHALF the operands to use 2704 for the high-numbered word and in some cases alter the 2705 operands in OPERANDS to be suitable for the low-numbered word. */ 2706 2707 if (optype0 == REGOP) 2708 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2709 else if (optype0 == OFFSOP) 2710 latehalf[0] = adjust_address_nv (operands[0], SImode, 4); 2711 else 2712 latehalf[0] = operands[0]; 2713 2714 if (optype1 == REGOP) 2715 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); 2716 else if (optype1 == OFFSOP) 2717 latehalf[1] = adjust_address_nv (operands[1], SImode, 4); 2718 else if (optype1 == CNSTOP) 2719 { 2720 if (GET_CODE (operands[1]) == HIGH) 2721 { 2722 operands[1] = XEXP (operands[1], 0); 2723 highonly = 1; 2724 } 2725 split_double (operands[1], &operands[1], &latehalf[1]); 2726 } 2727 else 2728 latehalf[1] = operands[1]; 2729 2730 /* If the first move would clobber the source of the second one, 2731 do them in the other order. 2732 2733 This can happen in two cases: 2734 2735 mem -> register where the first half of the destination register 2736 is the same register used in the memory's address. Reload 2737 can create such insns. 2738 2739 mem in this case will be either register indirect or register 2740 indirect plus a valid offset. 2741 2742 register -> register move where REGNO(dst) == REGNO(src + 1) 2743 someone (Tim/Tege?) claimed this can happen for parameter loads. 2744 2745 Handle mem -> register case first. */ 2746 if (optype0 == REGOP 2747 && (optype1 == MEMOP || optype1 == OFFSOP) 2748 && refers_to_regno_p (REGNO (operands[0]), operands[1])) 2749 { 2750 /* Do the late half first. */ 2751 if (addreg1) 2752 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2753 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2754 2755 /* Then clobber. */ 2756 if (addreg1) 2757 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2758 return pa_singlemove_string (operands); 2759 } 2760 2761 /* Now handle register -> register case. */ 2762 if (optype0 == REGOP && optype1 == REGOP 2763 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 2764 { 2765 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2766 return pa_singlemove_string (operands); 2767 } 2768 2769 /* Normal case: do the two words, low-numbered first. */ 2770 2771 output_asm_insn (pa_singlemove_string (operands), operands); 2772 2773 /* Make any unoffsettable addresses point at high-numbered word. */ 2774 if (addreg0) 2775 output_asm_insn ("ldo 4(%0),%0", &addreg0); 2776 if (addreg1) 2777 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2778 2779 /* Do high-numbered word. */ 2780 if (highonly) 2781 output_asm_insn ("ldil L'%1,%0", latehalf); 2782 else 2783 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2784 2785 /* Undo the adds we just did. */ 2786 if (addreg0) 2787 output_asm_insn ("ldo -4(%0),%0", &addreg0); 2788 if (addreg1) 2789 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2790 2791 return ""; 2792 } 2793 2794 const char * 2795 pa_output_fp_move_double (rtx *operands) 2796 { 2797 if (FP_REG_P (operands[0])) 2798 { 2799 if (FP_REG_P (operands[1]) 2800 || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) 2801 output_asm_insn ("fcpy,dbl %f1,%0", operands); 2802 else 2803 output_asm_insn ("fldd%F1 %1,%0", operands); 2804 } 2805 else if (FP_REG_P (operands[1])) 2806 { 2807 output_asm_insn ("fstd%F0 %1,%0", operands); 2808 } 2809 else 2810 { 2811 rtx xoperands[2]; 2812 2813 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0]))); 2814 2815 /* This is a pain. You have to be prepared to deal with an 2816 arbitrary address here including pre/post increment/decrement. 2817 2818 so avoid this in the MD. */ 2819 gcc_assert (GET_CODE (operands[0]) == REG); 2820 2821 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2822 xoperands[0] = operands[0]; 2823 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); 2824 } 2825 return ""; 2826 } 2827 2828 /* Return a REG that occurs in ADDR with coefficient 1. 2829 ADDR can be effectively incremented by incrementing REG. */ 2830 2831 static rtx 2832 find_addr_reg (rtx addr) 2833 { 2834 while (GET_CODE (addr) == PLUS) 2835 { 2836 if (GET_CODE (XEXP (addr, 0)) == REG) 2837 addr = XEXP (addr, 0); 2838 else if (GET_CODE (XEXP (addr, 1)) == REG) 2839 addr = XEXP (addr, 1); 2840 else if (CONSTANT_P (XEXP (addr, 0))) 2841 addr = XEXP (addr, 1); 2842 else if (CONSTANT_P (XEXP (addr, 1))) 2843 addr = XEXP (addr, 0); 2844 else 2845 gcc_unreachable (); 2846 } 2847 gcc_assert (GET_CODE (addr) == REG); 2848 return addr; 2849 } 2850 2851 /* Emit code to perform a block move. 2852 2853 OPERANDS[0] is the destination pointer as a REG, clobbered. 2854 OPERANDS[1] is the source pointer as a REG, clobbered. 2855 OPERANDS[2] is a register for temporary storage. 2856 OPERANDS[3] is a register for temporary storage. 2857 OPERANDS[4] is the size as a CONST_INT 2858 OPERANDS[5] is the alignment safe to use, as a CONST_INT. 2859 OPERANDS[6] is another temporary register. */ 2860 2861 const char * 2862 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 2863 { 2864 HOST_WIDE_INT align = INTVAL (operands[5]); 2865 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]); 2866 2867 /* We can't move more than a word at a time because the PA 2868 has no longer integer move insns. (Could use fp mem ops?) */ 2869 if (align > (TARGET_64BIT ? 8 : 4)) 2870 align = (TARGET_64BIT ? 8 : 4); 2871 2872 /* Note that we know each loop below will execute at least twice 2873 (else we would have open-coded the copy). */ 2874 switch (align) 2875 { 2876 case 8: 2877 /* Pre-adjust the loop counter. */ 2878 operands[4] = GEN_INT (n_bytes - 16); 2879 output_asm_insn ("ldi %4,%2", operands); 2880 2881 /* Copying loop. */ 2882 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2883 output_asm_insn ("ldd,ma 8(%1),%6", operands); 2884 output_asm_insn ("std,ma %3,8(%0)", operands); 2885 output_asm_insn ("addib,>= -16,%2,.-12", operands); 2886 output_asm_insn ("std,ma %6,8(%0)", operands); 2887 2888 /* Handle the residual. There could be up to 7 bytes of 2889 residual to copy! */ 2890 if (n_bytes % 16 != 0) 2891 { 2892 operands[4] = GEN_INT (n_bytes % 8); 2893 if (n_bytes % 16 >= 8) 2894 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2895 if (n_bytes % 8 != 0) 2896 output_asm_insn ("ldd 0(%1),%6", operands); 2897 if (n_bytes % 16 >= 8) 2898 output_asm_insn ("std,ma %3,8(%0)", operands); 2899 if (n_bytes % 8 != 0) 2900 output_asm_insn ("stdby,e %6,%4(%0)", operands); 2901 } 2902 return ""; 2903 2904 case 4: 2905 /* Pre-adjust the loop counter. */ 2906 operands[4] = GEN_INT (n_bytes - 8); 2907 output_asm_insn ("ldi %4,%2", operands); 2908 2909 /* Copying loop. */ 2910 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2911 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands); 2912 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2913 output_asm_insn ("addib,>= -8,%2,.-12", operands); 2914 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands); 2915 2916 /* Handle the residual. There could be up to 7 bytes of 2917 residual to copy! */ 2918 if (n_bytes % 8 != 0) 2919 { 2920 operands[4] = GEN_INT (n_bytes % 4); 2921 if (n_bytes % 8 >= 4) 2922 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2923 if (n_bytes % 4 != 0) 2924 output_asm_insn ("ldw 0(%1),%6", operands); 2925 if (n_bytes % 8 >= 4) 2926 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2927 if (n_bytes % 4 != 0) 2928 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands); 2929 } 2930 return ""; 2931 2932 case 2: 2933 /* Pre-adjust the loop counter. */ 2934 operands[4] = GEN_INT (n_bytes - 4); 2935 output_asm_insn ("ldi %4,%2", operands); 2936 2937 /* Copying loop. */ 2938 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2939 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands); 2940 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2941 output_asm_insn ("addib,>= -4,%2,.-12", operands); 2942 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands); 2943 2944 /* Handle the residual. */ 2945 if (n_bytes % 4 != 0) 2946 { 2947 if (n_bytes % 4 >= 2) 2948 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2949 if (n_bytes % 2 != 0) 2950 output_asm_insn ("ldb 0(%1),%6", operands); 2951 if (n_bytes % 4 >= 2) 2952 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2953 if (n_bytes % 2 != 0) 2954 output_asm_insn ("stb %6,0(%0)", operands); 2955 } 2956 return ""; 2957 2958 case 1: 2959 /* Pre-adjust the loop counter. */ 2960 operands[4] = GEN_INT (n_bytes - 2); 2961 output_asm_insn ("ldi %4,%2", operands); 2962 2963 /* Copying loop. */ 2964 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands); 2965 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands); 2966 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands); 2967 output_asm_insn ("addib,>= -2,%2,.-12", operands); 2968 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands); 2969 2970 /* Handle the residual. */ 2971 if (n_bytes % 2 != 0) 2972 { 2973 output_asm_insn ("ldb 0(%1),%3", operands); 2974 output_asm_insn ("stb %3,0(%0)", operands); 2975 } 2976 return ""; 2977 2978 default: 2979 gcc_unreachable (); 2980 } 2981 } 2982 2983 /* Count the number of insns necessary to handle this block move. 2984 2985 Basic structure is the same as emit_block_move, except that we 2986 count insns rather than emit them. */ 2987 2988 static int 2989 compute_movmem_length (rtx_insn *insn) 2990 { 2991 rtx pat = PATTERN (insn); 2992 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); 2993 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); 2994 unsigned int n_insns = 0; 2995 2996 /* We can't move more than four bytes at a time because the PA 2997 has no longer integer move insns. (Could use fp mem ops?) */ 2998 if (align > (TARGET_64BIT ? 8 : 4)) 2999 align = (TARGET_64BIT ? 8 : 4); 3000 3001 /* The basic copying loop. */ 3002 n_insns = 6; 3003 3004 /* Residuals. */ 3005 if (n_bytes % (2 * align) != 0) 3006 { 3007 if ((n_bytes % (2 * align)) >= align) 3008 n_insns += 2; 3009 3010 if ((n_bytes % align) != 0) 3011 n_insns += 2; 3012 } 3013 3014 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 3015 return n_insns * 4; 3016 } 3017 3018 /* Emit code to perform a block clear. 3019 3020 OPERANDS[0] is the destination pointer as a REG, clobbered. 3021 OPERANDS[1] is a register for temporary storage. 3022 OPERANDS[2] is the size as a CONST_INT 3023 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ 3024 3025 const char * 3026 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 3027 { 3028 HOST_WIDE_INT align = INTVAL (operands[3]); 3029 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]); 3030 3031 /* We can't clear more than a word at a time because the PA 3032 has no longer integer move insns. */ 3033 if (align > (TARGET_64BIT ? 8 : 4)) 3034 align = (TARGET_64BIT ? 8 : 4); 3035 3036 /* Note that we know each loop below will execute at least twice 3037 (else we would have open-coded the copy). */ 3038 switch (align) 3039 { 3040 case 8: 3041 /* Pre-adjust the loop counter. */ 3042 operands[2] = GEN_INT (n_bytes - 16); 3043 output_asm_insn ("ldi %2,%1", operands); 3044 3045 /* Loop. */ 3046 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3047 output_asm_insn ("addib,>= -16,%1,.-4", operands); 3048 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3049 3050 /* Handle the residual. There could be up to 7 bytes of 3051 residual to copy! */ 3052 if (n_bytes % 16 != 0) 3053 { 3054 operands[2] = GEN_INT (n_bytes % 8); 3055 if (n_bytes % 16 >= 8) 3056 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3057 if (n_bytes % 8 != 0) 3058 output_asm_insn ("stdby,e %%r0,%2(%0)", operands); 3059 } 3060 return ""; 3061 3062 case 4: 3063 /* Pre-adjust the loop counter. */ 3064 operands[2] = GEN_INT (n_bytes - 8); 3065 output_asm_insn ("ldi %2,%1", operands); 3066 3067 /* Loop. */ 3068 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3069 output_asm_insn ("addib,>= -8,%1,.-4", operands); 3070 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3071 3072 /* Handle the residual. There could be up to 7 bytes of 3073 residual to copy! */ 3074 if (n_bytes % 8 != 0) 3075 { 3076 operands[2] = GEN_INT (n_bytes % 4); 3077 if (n_bytes % 8 >= 4) 3078 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3079 if (n_bytes % 4 != 0) 3080 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); 3081 } 3082 return ""; 3083 3084 case 2: 3085 /* Pre-adjust the loop counter. */ 3086 operands[2] = GEN_INT (n_bytes - 4); 3087 output_asm_insn ("ldi %2,%1", operands); 3088 3089 /* Loop. */ 3090 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3091 output_asm_insn ("addib,>= -4,%1,.-4", operands); 3092 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3093 3094 /* Handle the residual. */ 3095 if (n_bytes % 4 != 0) 3096 { 3097 if (n_bytes % 4 >= 2) 3098 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3099 if (n_bytes % 2 != 0) 3100 output_asm_insn ("stb %%r0,0(%0)", operands); 3101 } 3102 return ""; 3103 3104 case 1: 3105 /* Pre-adjust the loop counter. */ 3106 operands[2] = GEN_INT (n_bytes - 2); 3107 output_asm_insn ("ldi %2,%1", operands); 3108 3109 /* Loop. */ 3110 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3111 output_asm_insn ("addib,>= -2,%1,.-4", operands); 3112 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3113 3114 /* Handle the residual. */ 3115 if (n_bytes % 2 != 0) 3116 output_asm_insn ("stb %%r0,0(%0)", operands); 3117 3118 return ""; 3119 3120 default: 3121 gcc_unreachable (); 3122 } 3123 } 3124 3125 /* Count the number of insns necessary to handle this block move. 3126 3127 Basic structure is the same as emit_block_move, except that we 3128 count insns rather than emit them. */ 3129 3130 static int 3131 compute_clrmem_length (rtx_insn *insn) 3132 { 3133 rtx pat = PATTERN (insn); 3134 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); 3135 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); 3136 unsigned int n_insns = 0; 3137 3138 /* We can't clear more than a word at a time because the PA 3139 has no longer integer move insns. */ 3140 if (align > (TARGET_64BIT ? 8 : 4)) 3141 align = (TARGET_64BIT ? 8 : 4); 3142 3143 /* The basic loop. */ 3144 n_insns = 4; 3145 3146 /* Residuals. */ 3147 if (n_bytes % (2 * align) != 0) 3148 { 3149 if ((n_bytes % (2 * align)) >= align) 3150 n_insns++; 3151 3152 if ((n_bytes % align) != 0) 3153 n_insns++; 3154 } 3155 3156 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 3157 return n_insns * 4; 3158 } 3159 3160 3161 const char * 3162 pa_output_and (rtx *operands) 3163 { 3164 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3165 { 3166 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3167 int ls0, ls1, ms0, p, len; 3168 3169 for (ls0 = 0; ls0 < 32; ls0++) 3170 if ((mask & (1 << ls0)) == 0) 3171 break; 3172 3173 for (ls1 = ls0; ls1 < 32; ls1++) 3174 if ((mask & (1 << ls1)) != 0) 3175 break; 3176 3177 for (ms0 = ls1; ms0 < 32; ms0++) 3178 if ((mask & (1 << ms0)) == 0) 3179 break; 3180 3181 gcc_assert (ms0 == 32); 3182 3183 if (ls1 == 32) 3184 { 3185 len = ls0; 3186 3187 gcc_assert (len); 3188 3189 operands[2] = GEN_INT (len); 3190 return "{extru|extrw,u} %1,31,%2,%0"; 3191 } 3192 else 3193 { 3194 /* We could use this `depi' for the case above as well, but `depi' 3195 requires one more register file access than an `extru'. */ 3196 3197 p = 31 - ls0; 3198 len = ls1 - ls0; 3199 3200 operands[2] = GEN_INT (p); 3201 operands[3] = GEN_INT (len); 3202 return "{depi|depwi} 0,%2,%3,%0"; 3203 } 3204 } 3205 else 3206 return "and %1,%2,%0"; 3207 } 3208 3209 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3210 storing the result in operands[0]. */ 3211 const char * 3212 pa_output_64bit_and (rtx *operands) 3213 { 3214 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3215 { 3216 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3217 int ls0, ls1, ms0, p, len; 3218 3219 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++) 3220 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0) 3221 break; 3222 3223 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++) 3224 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0) 3225 break; 3226 3227 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++) 3228 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0) 3229 break; 3230 3231 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT); 3232 3233 if (ls1 == HOST_BITS_PER_WIDE_INT) 3234 { 3235 len = ls0; 3236 3237 gcc_assert (len); 3238 3239 operands[2] = GEN_INT (len); 3240 return "extrd,u %1,63,%2,%0"; 3241 } 3242 else 3243 { 3244 /* We could use this `depi' for the case above as well, but `depi' 3245 requires one more register file access than an `extru'. */ 3246 3247 p = 63 - ls0; 3248 len = ls1 - ls0; 3249 3250 operands[2] = GEN_INT (p); 3251 operands[3] = GEN_INT (len); 3252 return "depdi 0,%2,%3,%0"; 3253 } 3254 } 3255 else 3256 return "and %1,%2,%0"; 3257 } 3258 3259 const char * 3260 pa_output_ior (rtx *operands) 3261 { 3262 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3263 int bs0, bs1, p, len; 3264 3265 if (INTVAL (operands[2]) == 0) 3266 return "copy %1,%0"; 3267 3268 for (bs0 = 0; bs0 < 32; bs0++) 3269 if ((mask & (1 << bs0)) != 0) 3270 break; 3271 3272 for (bs1 = bs0; bs1 < 32; bs1++) 3273 if ((mask & (1 << bs1)) == 0) 3274 break; 3275 3276 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3277 3278 p = 31 - bs0; 3279 len = bs1 - bs0; 3280 3281 operands[2] = GEN_INT (p); 3282 operands[3] = GEN_INT (len); 3283 return "{depi|depwi} -1,%2,%3,%0"; 3284 } 3285 3286 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3287 storing the result in operands[0]. */ 3288 const char * 3289 pa_output_64bit_ior (rtx *operands) 3290 { 3291 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3292 int bs0, bs1, p, len; 3293 3294 if (INTVAL (operands[2]) == 0) 3295 return "copy %1,%0"; 3296 3297 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++) 3298 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0) 3299 break; 3300 3301 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++) 3302 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0) 3303 break; 3304 3305 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT 3306 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3307 3308 p = 63 - bs0; 3309 len = bs1 - bs0; 3310 3311 operands[2] = GEN_INT (p); 3312 operands[3] = GEN_INT (len); 3313 return "depdi -1,%2,%3,%0"; 3314 } 3315 3316 /* Target hook for assembling integer objects. This code handles 3317 aligned SI and DI integers specially since function references 3318 must be preceded by P%. */ 3319 3320 static bool 3321 pa_assemble_integer (rtx x, unsigned int size, int aligned_p) 3322 { 3323 bool result; 3324 tree decl = NULL; 3325 3326 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call 3327 call assemble_external and set the SYMBOL_REF_DECL to NULL before 3328 calling output_addr_const. Otherwise, it may call assemble_external 3329 in the midst of outputing the assembler code for the SYMBOL_REF. 3330 We restore the SYMBOL_REF_DECL after the output is done. */ 3331 if (GET_CODE (x) == SYMBOL_REF) 3332 { 3333 decl = SYMBOL_REF_DECL (x); 3334 if (decl) 3335 { 3336 assemble_external (decl); 3337 SET_SYMBOL_REF_DECL (x, NULL); 3338 } 3339 } 3340 3341 if (size == UNITS_PER_WORD 3342 && aligned_p 3343 && function_label_operand (x, VOIDmode)) 3344 { 3345 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file); 3346 3347 /* We don't want an OPD when generating fast indirect calls. */ 3348 if (!TARGET_FAST_INDIRECT_CALLS) 3349 fputs ("P%", asm_out_file); 3350 3351 output_addr_const (asm_out_file, x); 3352 fputc ('\n', asm_out_file); 3353 result = true; 3354 } 3355 else 3356 result = default_assemble_integer (x, size, aligned_p); 3357 3358 if (decl) 3359 SET_SYMBOL_REF_DECL (x, decl); 3360 3361 return result; 3362 } 3363 3364 /* Output an ascii string. */ 3365 void 3366 pa_output_ascii (FILE *file, const char *p, int size) 3367 { 3368 int i; 3369 int chars_output; 3370 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ 3371 3372 /* The HP assembler can only take strings of 256 characters at one 3373 time. This is a limitation on input line length, *not* the 3374 length of the string. Sigh. Even worse, it seems that the 3375 restriction is in number of input characters (see \xnn & 3376 \whatever). So we have to do this very carefully. */ 3377 3378 fputs ("\t.STRING \"", file); 3379 3380 chars_output = 0; 3381 for (i = 0; i < size; i += 4) 3382 { 3383 int co = 0; 3384 int io = 0; 3385 for (io = 0, co = 0; io < MIN (4, size - i); io++) 3386 { 3387 register unsigned int c = (unsigned char) p[i + io]; 3388 3389 if (c == '\"' || c == '\\') 3390 partial_output[co++] = '\\'; 3391 if (c >= ' ' && c < 0177) 3392 partial_output[co++] = c; 3393 else 3394 { 3395 unsigned int hexd; 3396 partial_output[co++] = '\\'; 3397 partial_output[co++] = 'x'; 3398 hexd = c / 16 - 0 + '0'; 3399 if (hexd > '9') 3400 hexd -= '9' - 'a' + 1; 3401 partial_output[co++] = hexd; 3402 hexd = c % 16 - 0 + '0'; 3403 if (hexd > '9') 3404 hexd -= '9' - 'a' + 1; 3405 partial_output[co++] = hexd; 3406 } 3407 } 3408 if (chars_output + co > 243) 3409 { 3410 fputs ("\"\n\t.STRING \"", file); 3411 chars_output = 0; 3412 } 3413 fwrite (partial_output, 1, (size_t) co, file); 3414 chars_output += co; 3415 co = 0; 3416 } 3417 fputs ("\"\n", file); 3418 } 3419 3420 /* Try to rewrite floating point comparisons & branches to avoid 3421 useless add,tr insns. 3422 3423 CHECK_NOTES is nonzero if we should examine REG_DEAD notes 3424 to see if FPCC is dead. CHECK_NOTES is nonzero for the 3425 first attempt to remove useless add,tr insns. It is zero 3426 for the second pass as reorg sometimes leaves bogus REG_DEAD 3427 notes lying around. 3428 3429 When CHECK_NOTES is zero we can only eliminate add,tr insns 3430 when there's a 1:1 correspondence between fcmp and ftest/fbranch 3431 instructions. */ 3432 static void 3433 remove_useless_addtr_insns (int check_notes) 3434 { 3435 rtx_insn *insn; 3436 static int pass = 0; 3437 3438 /* This is fairly cheap, so always run it when optimizing. */ 3439 if (optimize > 0) 3440 { 3441 int fcmp_count = 0; 3442 int fbranch_count = 0; 3443 3444 /* Walk all the insns in this function looking for fcmp & fbranch 3445 instructions. Keep track of how many of each we find. */ 3446 for (insn = get_insns (); insn; insn = next_insn (insn)) 3447 { 3448 rtx tmp; 3449 3450 /* Ignore anything that isn't an INSN or a JUMP_INSN. */ 3451 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn)) 3452 continue; 3453 3454 tmp = PATTERN (insn); 3455 3456 /* It must be a set. */ 3457 if (GET_CODE (tmp) != SET) 3458 continue; 3459 3460 /* If the destination is CCFP, then we've found an fcmp insn. */ 3461 tmp = SET_DEST (tmp); 3462 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) 3463 { 3464 fcmp_count++; 3465 continue; 3466 } 3467 3468 tmp = PATTERN (insn); 3469 /* If this is an fbranch instruction, bump the fbranch counter. */ 3470 if (GET_CODE (tmp) == SET 3471 && SET_DEST (tmp) == pc_rtx 3472 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE 3473 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE 3474 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG 3475 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) 3476 { 3477 fbranch_count++; 3478 continue; 3479 } 3480 } 3481 3482 3483 /* Find all floating point compare + branch insns. If possible, 3484 reverse the comparison & the branch to avoid add,tr insns. */ 3485 for (insn = get_insns (); insn; insn = next_insn (insn)) 3486 { 3487 rtx tmp; 3488 rtx_insn *next; 3489 3490 /* Ignore anything that isn't an INSN. */ 3491 if (! NONJUMP_INSN_P (insn)) 3492 continue; 3493 3494 tmp = PATTERN (insn); 3495 3496 /* It must be a set. */ 3497 if (GET_CODE (tmp) != SET) 3498 continue; 3499 3500 /* The destination must be CCFP, which is register zero. */ 3501 tmp = SET_DEST (tmp); 3502 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) 3503 continue; 3504 3505 /* INSN should be a set of CCFP. 3506 3507 See if the result of this insn is used in a reversed FP 3508 conditional branch. If so, reverse our condition and 3509 the branch. Doing so avoids useless add,tr insns. */ 3510 next = next_insn (insn); 3511 while (next) 3512 { 3513 /* Jumps, calls and labels stop our search. */ 3514 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next)) 3515 break; 3516 3517 /* As does another fcmp insn. */ 3518 if (NONJUMP_INSN_P (next) 3519 && GET_CODE (PATTERN (next)) == SET 3520 && GET_CODE (SET_DEST (PATTERN (next))) == REG 3521 && REGNO (SET_DEST (PATTERN (next))) == 0) 3522 break; 3523 3524 next = next_insn (next); 3525 } 3526 3527 /* Is NEXT_INSN a branch? */ 3528 if (next && JUMP_P (next)) 3529 { 3530 rtx pattern = PATTERN (next); 3531 3532 /* If it a reversed fp conditional branch (e.g. uses add,tr) 3533 and CCFP dies, then reverse our conditional and the branch 3534 to avoid the add,tr. */ 3535 if (GET_CODE (pattern) == SET 3536 && SET_DEST (pattern) == pc_rtx 3537 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE 3538 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE 3539 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG 3540 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 3541 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC 3542 && (fcmp_count == fbranch_count 3543 || (check_notes 3544 && find_regno_note (next, REG_DEAD, 0)))) 3545 { 3546 /* Reverse the branch. */ 3547 tmp = XEXP (SET_SRC (pattern), 1); 3548 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); 3549 XEXP (SET_SRC (pattern), 2) = tmp; 3550 INSN_CODE (next) = -1; 3551 3552 /* Reverse our condition. */ 3553 tmp = PATTERN (insn); 3554 PUT_CODE (XEXP (tmp, 1), 3555 (reverse_condition_maybe_unordered 3556 (GET_CODE (XEXP (tmp, 1))))); 3557 } 3558 } 3559 } 3560 } 3561 3562 pass = !pass; 3563 3564 } 3565 3566 /* You may have trouble believing this, but this is the 32 bit HP-PA 3567 stack layout. Wow. 3568 3569 Offset Contents 3570 3571 Variable arguments (optional; any number may be allocated) 3572 3573 SP-(4*(N+9)) arg word N 3574 : : 3575 SP-56 arg word 5 3576 SP-52 arg word 4 3577 3578 Fixed arguments (must be allocated; may remain unused) 3579 3580 SP-48 arg word 3 3581 SP-44 arg word 2 3582 SP-40 arg word 1 3583 SP-36 arg word 0 3584 3585 Frame Marker 3586 3587 SP-32 External Data Pointer (DP) 3588 SP-28 External sr4 3589 SP-24 External/stub RP (RP') 3590 SP-20 Current RP 3591 SP-16 Static Link 3592 SP-12 Clean up 3593 SP-8 Calling Stub RP (RP'') 3594 SP-4 Previous SP 3595 3596 Top of Frame 3597 3598 SP-0 Stack Pointer (points to next available address) 3599 3600 */ 3601 3602 /* This function saves registers as follows. Registers marked with ' are 3603 this function's registers (as opposed to the previous function's). 3604 If a frame_pointer isn't needed, r4 is saved as a general register; 3605 the space for the frame pointer is still allocated, though, to keep 3606 things simple. 3607 3608 3609 Top of Frame 3610 3611 SP (FP') Previous FP 3612 SP + 4 Alignment filler (sigh) 3613 SP + 8 Space for locals reserved here. 3614 . 3615 . 3616 . 3617 SP + n All call saved register used. 3618 . 3619 . 3620 . 3621 SP + o All call saved fp registers used. 3622 . 3623 . 3624 . 3625 SP + p (SP') points to next available address. 3626 3627 */ 3628 3629 /* Global variables set by output_function_prologue(). */ 3630 /* Size of frame. Need to know this to emit return insns from 3631 leaf procedures. */ 3632 static HOST_WIDE_INT actual_fsize, local_fsize; 3633 static int save_fregs; 3634 3635 /* Emit RTL to store REG at the memory location specified by BASE+DISP. 3636 Handle case where DISP > 8k by using the add_high_const patterns. 3637 3638 Note in DISP > 8k case, we will leave the high part of the address 3639 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ 3640 3641 static void 3642 store_reg (int reg, HOST_WIDE_INT disp, int base) 3643 { 3644 rtx dest, src, basereg; 3645 rtx_insn *insn; 3646 3647 src = gen_rtx_REG (word_mode, reg); 3648 basereg = gen_rtx_REG (Pmode, base); 3649 if (VAL_14_BITS_P (disp)) 3650 { 3651 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 3652 insn = emit_move_insn (dest, src); 3653 } 3654 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3655 { 3656 rtx delta = GEN_INT (disp); 3657 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3658 3659 emit_move_insn (tmpreg, delta); 3660 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3661 if (DO_FRAME_NOTES) 3662 { 3663 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3664 gen_rtx_SET (tmpreg, 3665 gen_rtx_PLUS (Pmode, basereg, delta))); 3666 RTX_FRAME_RELATED_P (insn) = 1; 3667 } 3668 dest = gen_rtx_MEM (word_mode, tmpreg); 3669 insn = emit_move_insn (dest, src); 3670 } 3671 else 3672 { 3673 rtx delta = GEN_INT (disp); 3674 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 3675 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3676 3677 emit_move_insn (tmpreg, high); 3678 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3679 insn = emit_move_insn (dest, src); 3680 if (DO_FRAME_NOTES) 3681 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3682 gen_rtx_SET (gen_rtx_MEM (word_mode, 3683 gen_rtx_PLUS (word_mode, 3684 basereg, 3685 delta)), 3686 src)); 3687 } 3688 3689 if (DO_FRAME_NOTES) 3690 RTX_FRAME_RELATED_P (insn) = 1; 3691 } 3692 3693 /* Emit RTL to store REG at the memory location specified by BASE and then 3694 add MOD to BASE. MOD must be <= 8k. */ 3695 3696 static void 3697 store_reg_modify (int base, int reg, HOST_WIDE_INT mod) 3698 { 3699 rtx basereg, srcreg, delta; 3700 rtx_insn *insn; 3701 3702 gcc_assert (VAL_14_BITS_P (mod)); 3703 3704 basereg = gen_rtx_REG (Pmode, base); 3705 srcreg = gen_rtx_REG (word_mode, reg); 3706 delta = GEN_INT (mod); 3707 3708 insn = emit_insn (gen_post_store (basereg, srcreg, delta)); 3709 if (DO_FRAME_NOTES) 3710 { 3711 RTX_FRAME_RELATED_P (insn) = 1; 3712 3713 /* RTX_FRAME_RELATED_P must be set on each frame related set 3714 in a parallel with more than one element. */ 3715 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1; 3716 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 3717 } 3718 } 3719 3720 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case 3721 where DISP > 8k by using the add_high_const patterns. NOTE indicates 3722 whether to add a frame note or not. 3723 3724 In the DISP > 8k case, we leave the high part of the address in %r1. 3725 There is code in expand_hppa_{prologue,epilogue} that knows about this. */ 3726 3727 static void 3728 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note) 3729 { 3730 rtx_insn *insn; 3731 3732 if (VAL_14_BITS_P (disp)) 3733 { 3734 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3735 plus_constant (Pmode, 3736 gen_rtx_REG (Pmode, base), disp)); 3737 } 3738 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3739 { 3740 rtx basereg = gen_rtx_REG (Pmode, base); 3741 rtx delta = GEN_INT (disp); 3742 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3743 3744 emit_move_insn (tmpreg, delta); 3745 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3746 gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3747 if (DO_FRAME_NOTES) 3748 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3749 gen_rtx_SET (tmpreg, 3750 gen_rtx_PLUS (Pmode, basereg, delta))); 3751 } 3752 else 3753 { 3754 rtx basereg = gen_rtx_REG (Pmode, base); 3755 rtx delta = GEN_INT (disp); 3756 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3757 3758 emit_move_insn (tmpreg, 3759 gen_rtx_PLUS (Pmode, basereg, 3760 gen_rtx_HIGH (Pmode, delta))); 3761 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3762 gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3763 } 3764 3765 if (DO_FRAME_NOTES && note) 3766 RTX_FRAME_RELATED_P (insn) = 1; 3767 } 3768 3769 HOST_WIDE_INT 3770 pa_compute_frame_size (poly_int64 size, int *fregs_live) 3771 { 3772 int freg_saved = 0; 3773 int i, j; 3774 3775 /* The code in pa_expand_prologue and pa_expand_epilogue must 3776 be consistent with the rounding and size calculation done here. 3777 Change them at the same time. */ 3778 3779 /* We do our own stack alignment. First, round the size of the 3780 stack locals up to a word boundary. */ 3781 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3782 3783 /* Space for previous frame pointer + filler. If any frame is 3784 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We 3785 waste some space here for the sake of HP compatibility. The 3786 first slot is only used when the frame pointer is needed. */ 3787 if (size || frame_pointer_needed) 3788 size += pa_starting_frame_offset (); 3789 3790 /* If the current function calls __builtin_eh_return, then we need 3791 to allocate stack space for registers that will hold data for 3792 the exception handler. */ 3793 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3794 { 3795 unsigned int i; 3796 3797 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) 3798 continue; 3799 size += i * UNITS_PER_WORD; 3800 } 3801 3802 /* Account for space used by the callee general register saves. */ 3803 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--) 3804 if (df_regs_ever_live_p (i)) 3805 size += UNITS_PER_WORD; 3806 3807 /* Account for space used by the callee floating point register saves. */ 3808 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 3809 if (df_regs_ever_live_p (i) 3810 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1))) 3811 { 3812 freg_saved = 1; 3813 3814 /* We always save both halves of the FP register, so always 3815 increment the frame size by 8 bytes. */ 3816 size += 8; 3817 } 3818 3819 /* If any of the floating registers are saved, account for the 3820 alignment needed for the floating point register save block. */ 3821 if (freg_saved) 3822 { 3823 size = (size + 7) & ~7; 3824 if (fregs_live) 3825 *fregs_live = 1; 3826 } 3827 3828 /* The various ABIs include space for the outgoing parameters in the 3829 size of the current function's stack frame. We don't need to align 3830 for the outgoing arguments as their alignment is set by the final 3831 rounding for the frame as a whole. */ 3832 size += crtl->outgoing_args_size; 3833 3834 /* Allocate space for the fixed frame marker. This space must be 3835 allocated for any function that makes calls or allocates 3836 stack space. */ 3837 if (!crtl->is_leaf || size) 3838 size += TARGET_64BIT ? 48 : 32; 3839 3840 /* Finally, round to the preferred stack boundary. */ 3841 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1) 3842 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); 3843 } 3844 3845 /* Output function label, and associated .PROC and .CALLINFO statements. */ 3846 3847 void 3848 pa_output_function_label (FILE *file) 3849 { 3850 /* The function's label and associated .PROC must never be 3851 separated and must be output *after* any profiling declarations 3852 to avoid changing spaces/subspaces within a procedure. */ 3853 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); 3854 fputs ("\t.PROC\n", file); 3855 3856 /* pa_expand_prologue does the dirty work now. We just need 3857 to output the assembler directives which denote the start 3858 of a function. */ 3859 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize); 3860 if (crtl->is_leaf) 3861 fputs (",NO_CALLS", file); 3862 else 3863 fputs (",CALLS", file); 3864 if (rp_saved) 3865 fputs (",SAVE_RP", file); 3866 3867 /* The SAVE_SP flag is used to indicate that register %r3 is stored 3868 at the beginning of the frame and that it is used as the frame 3869 pointer for the frame. We do this because our current frame 3870 layout doesn't conform to that specified in the HP runtime 3871 documentation and we need a way to indicate to programs such as 3872 GDB where %r3 is saved. The SAVE_SP flag was chosen because it 3873 isn't used by HP compilers but is supported by the assembler. 3874 However, SAVE_SP is supposed to indicate that the previous stack 3875 pointer has been saved in the frame marker. */ 3876 if (frame_pointer_needed) 3877 fputs (",SAVE_SP", file); 3878 3879 /* Pass on information about the number of callee register saves 3880 performed in the prologue. 3881 3882 The compiler is supposed to pass the highest register number 3883 saved, the assembler then has to adjust that number before 3884 entering it into the unwind descriptor (to account for any 3885 caller saved registers with lower register numbers than the 3886 first callee saved register). */ 3887 if (gr_saved) 3888 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); 3889 3890 if (fr_saved) 3891 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); 3892 3893 fputs ("\n\t.ENTRY\n", file); 3894 } 3895 3896 /* Output function prologue. */ 3897 3898 static void 3899 pa_output_function_prologue (FILE *file) 3900 { 3901 pa_output_function_label (file); 3902 remove_useless_addtr_insns (0); 3903 } 3904 3905 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */ 3906 3907 static void 3908 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED) 3909 { 3910 remove_useless_addtr_insns (0); 3911 } 3912 3913 void 3914 pa_expand_prologue (void) 3915 { 3916 int merge_sp_adjust_with_store = 0; 3917 HOST_WIDE_INT size = get_frame_size (); 3918 HOST_WIDE_INT offset; 3919 int i; 3920 rtx tmpreg; 3921 rtx_insn *insn; 3922 3923 gr_saved = 0; 3924 fr_saved = 0; 3925 save_fregs = 0; 3926 3927 /* Compute total size for frame pointer, filler, locals and rounding to 3928 the next word boundary. Similar code appears in pa_compute_frame_size 3929 and must be changed in tandem with this code. */ 3930 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3931 if (local_fsize || frame_pointer_needed) 3932 local_fsize += pa_starting_frame_offset (); 3933 3934 actual_fsize = pa_compute_frame_size (size, &save_fregs); 3935 if (flag_stack_usage_info) 3936 current_function_static_stack_size = actual_fsize; 3937 3938 /* Compute a few things we will use often. */ 3939 tmpreg = gen_rtx_REG (word_mode, 1); 3940 3941 /* Save RP first. The calling conventions manual states RP will 3942 always be stored into the caller's frame at sp - 20 or sp - 16 3943 depending on which ABI is in use. */ 3944 if (df_regs_ever_live_p (2) || crtl->calls_eh_return) 3945 { 3946 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM); 3947 rp_saved = true; 3948 } 3949 else 3950 rp_saved = false; 3951 3952 /* Allocate the local frame and set up the frame pointer if needed. */ 3953 if (actual_fsize != 0) 3954 { 3955 if (frame_pointer_needed) 3956 { 3957 /* Copy the old frame pointer temporarily into %r1. Set up the 3958 new stack pointer, then store away the saved old frame pointer 3959 into the stack at sp and at the same time update the stack 3960 pointer by actual_fsize bytes. Two versions, first 3961 handles small (<8k) frames. The second handles large (>=8k) 3962 frames. */ 3963 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx); 3964 if (DO_FRAME_NOTES) 3965 RTX_FRAME_RELATED_P (insn) = 1; 3966 3967 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 3968 if (DO_FRAME_NOTES) 3969 RTX_FRAME_RELATED_P (insn) = 1; 3970 3971 if (VAL_14_BITS_P (actual_fsize)) 3972 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize); 3973 else 3974 { 3975 /* It is incorrect to store the saved frame pointer at *sp, 3976 then increment sp (writes beyond the current stack boundary). 3977 3978 So instead use stwm to store at *sp and post-increment the 3979 stack pointer as an atomic operation. Then increment sp to 3980 finish allocating the new frame. */ 3981 HOST_WIDE_INT adjust1 = 8192 - 64; 3982 HOST_WIDE_INT adjust2 = actual_fsize - adjust1; 3983 3984 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1); 3985 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3986 adjust2, 1); 3987 } 3988 3989 /* We set SAVE_SP in frames that need a frame pointer. Thus, 3990 we need to store the previous stack pointer (frame pointer) 3991 into the frame marker on targets that use the HP unwind 3992 library. This allows the HP unwind library to be used to 3993 unwind GCC frames. However, we are not fully compatible 3994 with the HP library because our frame layout differs from 3995 that specified in the HP runtime specification. 3996 3997 We don't want a frame note on this instruction as the frame 3998 marker moves during dynamic stack allocation. 3999 4000 This instruction also serves as a blockage to prevent 4001 register spills from being scheduled before the stack 4002 pointer is raised. This is necessary as we store 4003 registers using the frame pointer as a base register, 4004 and the frame pointer is set before sp is raised. */ 4005 if (TARGET_HPUX_UNWIND_LIBRARY) 4006 { 4007 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, 4008 GEN_INT (TARGET_64BIT ? -8 : -4)); 4009 4010 emit_move_insn (gen_rtx_MEM (word_mode, addr), 4011 hard_frame_pointer_rtx); 4012 } 4013 else 4014 emit_insn (gen_blockage ()); 4015 } 4016 /* no frame pointer needed. */ 4017 else 4018 { 4019 /* In some cases we can perform the first callee register save 4020 and allocating the stack frame at the same time. If so, just 4021 make a note of it and defer allocating the frame until saving 4022 the callee registers. */ 4023 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0) 4024 merge_sp_adjust_with_store = 1; 4025 /* Cannot optimize. Adjust the stack frame by actual_fsize 4026 bytes. */ 4027 else 4028 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4029 actual_fsize, 1); 4030 } 4031 } 4032 4033 /* Normal register save. 4034 4035 Do not save the frame pointer in the frame_pointer_needed case. It 4036 was done earlier. */ 4037 if (frame_pointer_needed) 4038 { 4039 offset = local_fsize; 4040 4041 /* Saving the EH return data registers in the frame is the simplest 4042 way to get the frame unwind information emitted. We put them 4043 just before the general registers. */ 4044 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4045 { 4046 unsigned int i, regno; 4047 4048 for (i = 0; ; ++i) 4049 { 4050 regno = EH_RETURN_DATA_REGNO (i); 4051 if (regno == INVALID_REGNUM) 4052 break; 4053 4054 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 4055 offset += UNITS_PER_WORD; 4056 } 4057 } 4058 4059 for (i = 18; i >= 4; i--) 4060 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4061 { 4062 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 4063 offset += UNITS_PER_WORD; 4064 gr_saved++; 4065 } 4066 /* Account for %r3 which is saved in a special place. */ 4067 gr_saved++; 4068 } 4069 /* No frame pointer needed. */ 4070 else 4071 { 4072 offset = local_fsize - actual_fsize; 4073 4074 /* Saving the EH return data registers in the frame is the simplest 4075 way to get the frame unwind information emitted. */ 4076 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4077 { 4078 unsigned int i, regno; 4079 4080 for (i = 0; ; ++i) 4081 { 4082 regno = EH_RETURN_DATA_REGNO (i); 4083 if (regno == INVALID_REGNUM) 4084 break; 4085 4086 /* If merge_sp_adjust_with_store is nonzero, then we can 4087 optimize the first save. */ 4088 if (merge_sp_adjust_with_store) 4089 { 4090 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset); 4091 merge_sp_adjust_with_store = 0; 4092 } 4093 else 4094 store_reg (regno, offset, STACK_POINTER_REGNUM); 4095 offset += UNITS_PER_WORD; 4096 } 4097 } 4098 4099 for (i = 18; i >= 3; i--) 4100 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4101 { 4102 /* If merge_sp_adjust_with_store is nonzero, then we can 4103 optimize the first GR save. */ 4104 if (merge_sp_adjust_with_store) 4105 { 4106 store_reg_modify (STACK_POINTER_REGNUM, i, -offset); 4107 merge_sp_adjust_with_store = 0; 4108 } 4109 else 4110 store_reg (i, offset, STACK_POINTER_REGNUM); 4111 offset += UNITS_PER_WORD; 4112 gr_saved++; 4113 } 4114 4115 /* If we wanted to merge the SP adjustment with a GR save, but we never 4116 did any GR saves, then just emit the adjustment here. */ 4117 if (merge_sp_adjust_with_store) 4118 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4119 actual_fsize, 1); 4120 } 4121 4122 /* The hppa calling conventions say that %r19, the pic offset 4123 register, is saved at sp - 32 (in this function's frame) 4124 when generating PIC code. FIXME: What is the correct thing 4125 to do for functions which make no calls and allocate no 4126 frame? Do we need to allocate a frame, or can we just omit 4127 the save? For now we'll just omit the save. 4128 4129 We don't want a note on this insn as the frame marker can 4130 move if there is a dynamic stack allocation. */ 4131 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT) 4132 { 4133 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); 4134 4135 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); 4136 4137 } 4138 4139 /* Align pointer properly (doubleword boundary). */ 4140 offset = (offset + 7) & ~7; 4141 4142 /* Floating point register store. */ 4143 if (save_fregs) 4144 { 4145 rtx base; 4146 4147 /* First get the frame or stack pointer to the start of the FP register 4148 save area. */ 4149 if (frame_pointer_needed) 4150 { 4151 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4152 base = hard_frame_pointer_rtx; 4153 } 4154 else 4155 { 4156 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4157 base = stack_pointer_rtx; 4158 } 4159 4160 /* Now actually save the FP registers. */ 4161 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4162 { 4163 if (df_regs_ever_live_p (i) 4164 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4165 { 4166 rtx addr, reg; 4167 rtx_insn *insn; 4168 addr = gen_rtx_MEM (DFmode, 4169 gen_rtx_POST_INC (word_mode, tmpreg)); 4170 reg = gen_rtx_REG (DFmode, i); 4171 insn = emit_move_insn (addr, reg); 4172 if (DO_FRAME_NOTES) 4173 { 4174 RTX_FRAME_RELATED_P (insn) = 1; 4175 if (TARGET_64BIT) 4176 { 4177 rtx mem = gen_rtx_MEM (DFmode, 4178 plus_constant (Pmode, base, 4179 offset)); 4180 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4181 gen_rtx_SET (mem, reg)); 4182 } 4183 else 4184 { 4185 rtx meml = gen_rtx_MEM (SFmode, 4186 plus_constant (Pmode, base, 4187 offset)); 4188 rtx memr = gen_rtx_MEM (SFmode, 4189 plus_constant (Pmode, base, 4190 offset + 4)); 4191 rtx regl = gen_rtx_REG (SFmode, i); 4192 rtx regr = gen_rtx_REG (SFmode, i + 1); 4193 rtx setl = gen_rtx_SET (meml, regl); 4194 rtx setr = gen_rtx_SET (memr, regr); 4195 rtvec vec; 4196 4197 RTX_FRAME_RELATED_P (setl) = 1; 4198 RTX_FRAME_RELATED_P (setr) = 1; 4199 vec = gen_rtvec (2, setl, setr); 4200 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4201 gen_rtx_SEQUENCE (VOIDmode, vec)); 4202 } 4203 } 4204 offset += GET_MODE_SIZE (DFmode); 4205 fr_saved++; 4206 } 4207 } 4208 } 4209 } 4210 4211 /* Emit RTL to load REG from the memory location specified by BASE+DISP. 4212 Handle case where DISP > 8k by using the add_high_const patterns. */ 4213 4214 static void 4215 load_reg (int reg, HOST_WIDE_INT disp, int base) 4216 { 4217 rtx dest = gen_rtx_REG (word_mode, reg); 4218 rtx basereg = gen_rtx_REG (Pmode, base); 4219 rtx src; 4220 4221 if (VAL_14_BITS_P (disp)) 4222 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 4223 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 4224 { 4225 rtx delta = GEN_INT (disp); 4226 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4227 4228 emit_move_insn (tmpreg, delta); 4229 if (TARGET_DISABLE_INDEXING) 4230 { 4231 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4232 src = gen_rtx_MEM (word_mode, tmpreg); 4233 } 4234 else 4235 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4236 } 4237 else 4238 { 4239 rtx delta = GEN_INT (disp); 4240 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 4241 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4242 4243 emit_move_insn (tmpreg, high); 4244 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 4245 } 4246 4247 emit_move_insn (dest, src); 4248 } 4249 4250 /* Update the total code bytes output to the text section. */ 4251 4252 static void 4253 update_total_code_bytes (unsigned int nbytes) 4254 { 4255 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM) 4256 && !IN_NAMED_SECTION_P (cfun->decl)) 4257 { 4258 unsigned int old_total = total_code_bytes; 4259 4260 total_code_bytes += nbytes; 4261 4262 /* Be prepared to handle overflows. */ 4263 if (old_total > total_code_bytes) 4264 total_code_bytes = UINT_MAX; 4265 } 4266 } 4267 4268 /* This function generates the assembly code for function exit. 4269 Args are as for output_function_prologue (). 4270 4271 The function epilogue should not depend on the current stack 4272 pointer! It should use the frame pointer only. This is mandatory 4273 because of alloca; we also take advantage of it to omit stack 4274 adjustments before returning. */ 4275 4276 static void 4277 pa_output_function_epilogue (FILE *file) 4278 { 4279 rtx_insn *insn = get_last_insn (); 4280 bool extra_nop; 4281 4282 /* pa_expand_epilogue does the dirty work now. We just need 4283 to output the assembler directives which denote the end 4284 of a function. 4285 4286 To make debuggers happy, emit a nop if the epilogue was completely 4287 eliminated due to a volatile call as the last insn in the 4288 current function. That way the return address (in %r2) will 4289 always point to a valid instruction in the current function. */ 4290 4291 /* Get the last real insn. */ 4292 if (NOTE_P (insn)) 4293 insn = prev_real_insn (insn); 4294 4295 /* If it is a sequence, then look inside. */ 4296 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE) 4297 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0); 4298 4299 /* If insn is a CALL_INSN, then it must be a call to a volatile 4300 function (otherwise there would be epilogue insns). */ 4301 if (insn && CALL_P (insn)) 4302 { 4303 fputs ("\tnop\n", file); 4304 extra_nop = true; 4305 } 4306 else 4307 extra_nop = false; 4308 4309 fputs ("\t.EXIT\n\t.PROCEND\n", file); 4310 4311 if (TARGET_SOM && TARGET_GAS) 4312 { 4313 /* We are done with this subspace except possibly for some additional 4314 debug information. Forget that we are in this subspace to ensure 4315 that the next function is output in its own subspace. */ 4316 in_section = NULL; 4317 cfun->machine->in_nsubspa = 2; 4318 } 4319 4320 /* Thunks do their own insn accounting. */ 4321 if (cfun->is_thunk) 4322 return; 4323 4324 if (INSN_ADDRESSES_SET_P ()) 4325 { 4326 last_address = extra_nop ? 4 : 0; 4327 insn = get_last_nonnote_insn (); 4328 if (insn) 4329 { 4330 last_address += INSN_ADDRESSES (INSN_UID (insn)); 4331 if (INSN_P (insn)) 4332 last_address += insn_default_length (insn); 4333 } 4334 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 4335 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 4336 } 4337 else 4338 last_address = UINT_MAX; 4339 4340 /* Finally, update the total number of code bytes output so far. */ 4341 update_total_code_bytes (last_address); 4342 } 4343 4344 void 4345 pa_expand_epilogue (void) 4346 { 4347 rtx tmpreg; 4348 HOST_WIDE_INT offset; 4349 HOST_WIDE_INT ret_off = 0; 4350 int i; 4351 int merge_sp_adjust_with_load = 0; 4352 4353 /* We will use this often. */ 4354 tmpreg = gen_rtx_REG (word_mode, 1); 4355 4356 /* Try to restore RP early to avoid load/use interlocks when 4357 RP gets used in the return (bv) instruction. This appears to still 4358 be necessary even when we schedule the prologue and epilogue. */ 4359 if (rp_saved) 4360 { 4361 ret_off = TARGET_64BIT ? -16 : -20; 4362 if (frame_pointer_needed) 4363 { 4364 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM); 4365 ret_off = 0; 4366 } 4367 else 4368 { 4369 /* No frame pointer, and stack is smaller than 8k. */ 4370 if (VAL_14_BITS_P (ret_off - actual_fsize)) 4371 { 4372 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM); 4373 ret_off = 0; 4374 } 4375 } 4376 } 4377 4378 /* General register restores. */ 4379 if (frame_pointer_needed) 4380 { 4381 offset = local_fsize; 4382 4383 /* If the current function calls __builtin_eh_return, then we need 4384 to restore the saved EH data registers. */ 4385 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4386 { 4387 unsigned int i, regno; 4388 4389 for (i = 0; ; ++i) 4390 { 4391 regno = EH_RETURN_DATA_REGNO (i); 4392 if (regno == INVALID_REGNUM) 4393 break; 4394 4395 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 4396 offset += UNITS_PER_WORD; 4397 } 4398 } 4399 4400 for (i = 18; i >= 4; i--) 4401 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4402 { 4403 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 4404 offset += UNITS_PER_WORD; 4405 } 4406 } 4407 else 4408 { 4409 offset = local_fsize - actual_fsize; 4410 4411 /* If the current function calls __builtin_eh_return, then we need 4412 to restore the saved EH data registers. */ 4413 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4414 { 4415 unsigned int i, regno; 4416 4417 for (i = 0; ; ++i) 4418 { 4419 regno = EH_RETURN_DATA_REGNO (i); 4420 if (regno == INVALID_REGNUM) 4421 break; 4422 4423 /* Only for the first load. 4424 merge_sp_adjust_with_load holds the register load 4425 with which we will merge the sp adjustment. */ 4426 if (merge_sp_adjust_with_load == 0 4427 && local_fsize == 0 4428 && VAL_14_BITS_P (-actual_fsize)) 4429 merge_sp_adjust_with_load = regno; 4430 else 4431 load_reg (regno, offset, STACK_POINTER_REGNUM); 4432 offset += UNITS_PER_WORD; 4433 } 4434 } 4435 4436 for (i = 18; i >= 3; i--) 4437 { 4438 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4439 { 4440 /* Only for the first load. 4441 merge_sp_adjust_with_load holds the register load 4442 with which we will merge the sp adjustment. */ 4443 if (merge_sp_adjust_with_load == 0 4444 && local_fsize == 0 4445 && VAL_14_BITS_P (-actual_fsize)) 4446 merge_sp_adjust_with_load = i; 4447 else 4448 load_reg (i, offset, STACK_POINTER_REGNUM); 4449 offset += UNITS_PER_WORD; 4450 } 4451 } 4452 } 4453 4454 /* Align pointer properly (doubleword boundary). */ 4455 offset = (offset + 7) & ~7; 4456 4457 /* FP register restores. */ 4458 if (save_fregs) 4459 { 4460 /* Adjust the register to index off of. */ 4461 if (frame_pointer_needed) 4462 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4463 else 4464 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4465 4466 /* Actually do the restores now. */ 4467 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4468 if (df_regs_ever_live_p (i) 4469 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4470 { 4471 rtx src = gen_rtx_MEM (DFmode, 4472 gen_rtx_POST_INC (word_mode, tmpreg)); 4473 rtx dest = gen_rtx_REG (DFmode, i); 4474 emit_move_insn (dest, src); 4475 } 4476 } 4477 4478 /* Emit a blockage insn here to keep these insns from being moved to 4479 an earlier spot in the epilogue, or into the main instruction stream. 4480 4481 This is necessary as we must not cut the stack back before all the 4482 restores are finished. */ 4483 emit_insn (gen_blockage ()); 4484 4485 /* Reset stack pointer (and possibly frame pointer). The stack 4486 pointer is initially set to fp + 64 to avoid a race condition. */ 4487 if (frame_pointer_needed) 4488 { 4489 rtx delta = GEN_INT (-64); 4490 4491 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0); 4492 emit_insn (gen_pre_load (hard_frame_pointer_rtx, 4493 stack_pointer_rtx, delta)); 4494 } 4495 /* If we were deferring a callee register restore, do it now. */ 4496 else if (merge_sp_adjust_with_load) 4497 { 4498 rtx delta = GEN_INT (-actual_fsize); 4499 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load); 4500 4501 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)); 4502 } 4503 else if (actual_fsize != 0) 4504 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4505 - actual_fsize, 0); 4506 4507 /* If we haven't restored %r2 yet (no frame pointer, and a stack 4508 frame greater than 8k), do so now. */ 4509 if (ret_off != 0) 4510 load_reg (2, ret_off, STACK_POINTER_REGNUM); 4511 4512 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4513 { 4514 rtx sa = EH_RETURN_STACKADJ_RTX; 4515 4516 emit_insn (gen_blockage ()); 4517 emit_insn (TARGET_64BIT 4518 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa) 4519 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa)); 4520 } 4521 } 4522 4523 bool 4524 pa_can_use_return_insn (void) 4525 { 4526 if (!reload_completed) 4527 return false; 4528 4529 if (frame_pointer_needed) 4530 return false; 4531 4532 if (df_regs_ever_live_p (2)) 4533 return false; 4534 4535 if (crtl->profile) 4536 return false; 4537 4538 return pa_compute_frame_size (get_frame_size (), 0) == 0; 4539 } 4540 4541 rtx 4542 hppa_pic_save_rtx (void) 4543 { 4544 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM); 4545 } 4546 4547 #ifndef NO_DEFERRED_PROFILE_COUNTERS 4548 #define NO_DEFERRED_PROFILE_COUNTERS 0 4549 #endif 4550 4551 4552 /* Vector of funcdef numbers. */ 4553 static vec<int> funcdef_nos; 4554 4555 /* Output deferred profile counters. */ 4556 static void 4557 output_deferred_profile_counters (void) 4558 { 4559 unsigned int i; 4560 int align, n; 4561 4562 if (funcdef_nos.is_empty ()) 4563 return; 4564 4565 switch_to_section (data_section); 4566 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); 4567 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); 4568 4569 for (i = 0; funcdef_nos.iterate (i, &n); i++) 4570 { 4571 targetm.asm_out.internal_label (asm_out_file, "LP", n); 4572 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); 4573 } 4574 4575 funcdef_nos.release (); 4576 } 4577 4578 void 4579 hppa_profile_hook (int label_no) 4580 { 4581 rtx_code_label *label_rtx = gen_label_rtx (); 4582 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE); 4583 rtx arg_bytes, begin_label_rtx, mcount, sym; 4584 rtx_insn *call_insn; 4585 char begin_label_name[16]; 4586 bool use_mcount_pcrel_call; 4587 4588 /* Set up call destination. */ 4589 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount"); 4590 pa_encode_label (sym); 4591 mcount = gen_rtx_MEM (Pmode, sym); 4592 4593 /* If we can reach _mcount with a pc-relative call, we can optimize 4594 loading the address of the current function. This requires linker 4595 long branch stub support. */ 4596 if (!TARGET_PORTABLE_RUNTIME 4597 && !TARGET_LONG_CALLS 4598 && (TARGET_SOM || flag_function_sections)) 4599 use_mcount_pcrel_call = TRUE; 4600 else 4601 use_mcount_pcrel_call = FALSE; 4602 4603 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, 4604 label_no); 4605 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); 4606 4607 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); 4608 4609 if (!use_mcount_pcrel_call) 4610 { 4611 /* The address of the function is loaded into %r25 with an instruction- 4612 relative sequence that avoids the use of relocations. We use SImode 4613 for the address of the function in both 32 and 64-bit code to avoid 4614 having to provide DImode versions of the lcla2 pattern. */ 4615 if (TARGET_PA_20) 4616 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx)); 4617 else 4618 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx)); 4619 } 4620 4621 if (!NO_DEFERRED_PROFILE_COUNTERS) 4622 { 4623 rtx count_label_rtx, addr, r24; 4624 char count_label_name[16]; 4625 4626 funcdef_nos.safe_push (label_no); 4627 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); 4628 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, 4629 ggc_strdup (count_label_name)); 4630 4631 addr = force_reg (Pmode, count_label_rtx); 4632 r24 = gen_rtx_REG (Pmode, 24); 4633 emit_move_insn (r24, addr); 4634 4635 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12); 4636 if (use_mcount_pcrel_call) 4637 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, 4638 begin_label_rtx)); 4639 else 4640 call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); 4641 4642 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); 4643 } 4644 else 4645 { 4646 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8); 4647 if (use_mcount_pcrel_call) 4648 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, 4649 begin_label_rtx)); 4650 else 4651 call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); 4652 } 4653 4654 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); 4655 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); 4656 4657 /* Indicate the _mcount call cannot throw, nor will it execute a 4658 non-local goto. */ 4659 make_reg_eh_region_note_nothrow_nononlocal (call_insn); 4660 4661 /* Allocate space for fixed arguments. */ 4662 if (reg_parm_stack_space > crtl->outgoing_args_size) 4663 crtl->outgoing_args_size = reg_parm_stack_space; 4664 } 4665 4666 /* Fetch the return address for the frame COUNT steps up from 4667 the current frame, after the prologue. FRAMEADDR is the 4668 frame pointer of the COUNT frame. 4669 4670 We want to ignore any export stub remnants here. To handle this, 4671 we examine the code at the return address, and if it is an export 4672 stub, we return a memory rtx for the stub return address stored 4673 at frame-24. 4674 4675 The value returned is used in two different ways: 4676 4677 1. To find a function's caller. 4678 4679 2. To change the return address for a function. 4680 4681 This function handles most instances of case 1; however, it will 4682 fail if there are two levels of stubs to execute on the return 4683 path. The only way I believe that can happen is if the return value 4684 needs a parameter relocation, which never happens for C code. 4685 4686 This function handles most instances of case 2; however, it will 4687 fail if we did not originally have stub code on the return path 4688 but will need stub code on the new return path. This can happen if 4689 the caller & callee are both in the main program, but the new 4690 return location is in a shared library. */ 4691 4692 rtx 4693 pa_return_addr_rtx (int count, rtx frameaddr) 4694 { 4695 rtx label; 4696 rtx rp; 4697 rtx saved_rp; 4698 rtx ins; 4699 4700 /* The instruction stream at the return address of a PA1.X export stub is: 4701 4702 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4703 0x004010a1 | stub+12: ldsid (sr0,rp),r1 4704 0x00011820 | stub+16: mtsp r1,sr0 4705 0xe0400002 | stub+20: be,n 0(sr0,rp) 4706 4707 0xe0400002 must be specified as -532676606 so that it won't be 4708 rejected as an invalid immediate operand on 64-bit hosts. 4709 4710 The instruction stream at the return address of a PA2.0 export stub is: 4711 4712 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4713 0xe840d002 | stub+12: bve,n (rp) 4714 */ 4715 4716 HOST_WIDE_INT insns[4]; 4717 int i, len; 4718 4719 if (count != 0) 4720 return NULL_RTX; 4721 4722 rp = get_hard_reg_initial_val (Pmode, 2); 4723 4724 if (TARGET_64BIT || TARGET_NO_SPACE_REGS) 4725 return rp; 4726 4727 /* If there is no export stub then just use the value saved from 4728 the return pointer register. */ 4729 4730 saved_rp = gen_reg_rtx (Pmode); 4731 emit_move_insn (saved_rp, rp); 4732 4733 /* Get pointer to the instruction stream. We have to mask out the 4734 privilege level from the two low order bits of the return address 4735 pointer here so that ins will point to the start of the first 4736 instruction that would have been executed if we returned. */ 4737 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR)); 4738 label = gen_label_rtx (); 4739 4740 if (TARGET_PA_20) 4741 { 4742 insns[0] = 0x4bc23fd1; 4743 insns[1] = -398405630; 4744 len = 2; 4745 } 4746 else 4747 { 4748 insns[0] = 0x4bc23fd1; 4749 insns[1] = 0x004010a1; 4750 insns[2] = 0x00011820; 4751 insns[3] = -532676606; 4752 len = 4; 4753 } 4754 4755 /* Check the instruction stream at the normal return address for the 4756 export stub. If it is an export stub, than our return address is 4757 really in -24[frameaddr]. */ 4758 4759 for (i = 0; i < len; i++) 4760 { 4761 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4)); 4762 rtx op1 = GEN_INT (insns[i]); 4763 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label); 4764 } 4765 4766 /* Here we know that our return address points to an export 4767 stub. We don't want to return the address of the export stub, 4768 but rather the return address of the export stub. That return 4769 address is stored at -24[frameaddr]. */ 4770 4771 emit_move_insn (saved_rp, 4772 gen_rtx_MEM (Pmode, 4773 memory_address (Pmode, 4774 plus_constant (Pmode, frameaddr, 4775 -24)))); 4776 4777 emit_label (label); 4778 4779 return saved_rp; 4780 } 4781 4782 void 4783 pa_emit_bcond_fp (rtx operands[]) 4784 { 4785 enum rtx_code code = GET_CODE (operands[0]); 4786 rtx operand0 = operands[1]; 4787 rtx operand1 = operands[2]; 4788 rtx label = operands[3]; 4789 4790 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0), 4791 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1))); 4792 4793 emit_jump_insn (gen_rtx_SET (pc_rtx, 4794 gen_rtx_IF_THEN_ELSE (VOIDmode, 4795 gen_rtx_fmt_ee (NE, 4796 VOIDmode, 4797 gen_rtx_REG (CCFPmode, 0), 4798 const0_rtx), 4799 gen_rtx_LABEL_REF (VOIDmode, label), 4800 pc_rtx))); 4801 4802 } 4803 4804 /* Adjust the cost of a scheduling dependency. Return the new cost of 4805 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4806 4807 static int 4808 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, 4809 unsigned int) 4810 { 4811 enum attr_type attr_type; 4812 4813 /* Don't adjust costs for a pa8000 chip, also do not adjust any 4814 true dependencies as they are described with bypasses now. */ 4815 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0) 4816 return cost; 4817 4818 if (! recog_memoized (insn)) 4819 return 0; 4820 4821 attr_type = get_attr_type (insn); 4822 4823 switch (dep_type) 4824 { 4825 case REG_DEP_ANTI: 4826 /* Anti dependency; DEP_INSN reads a register that INSN writes some 4827 cycles later. */ 4828 4829 if (attr_type == TYPE_FPLOAD) 4830 { 4831 rtx pat = PATTERN (insn); 4832 rtx dep_pat = PATTERN (dep_insn); 4833 if (GET_CODE (pat) == PARALLEL) 4834 { 4835 /* This happens for the fldXs,mb patterns. */ 4836 pat = XVECEXP (pat, 0, 0); 4837 } 4838 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4839 /* If this happens, we have to extend this to schedule 4840 optimally. Return 0 for now. */ 4841 return 0; 4842 4843 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4844 { 4845 if (! recog_memoized (dep_insn)) 4846 return 0; 4847 switch (get_attr_type (dep_insn)) 4848 { 4849 case TYPE_FPALU: 4850 case TYPE_FPMULSGL: 4851 case TYPE_FPMULDBL: 4852 case TYPE_FPDIVSGL: 4853 case TYPE_FPDIVDBL: 4854 case TYPE_FPSQRTSGL: 4855 case TYPE_FPSQRTDBL: 4856 /* A fpload can't be issued until one cycle before a 4857 preceding arithmetic operation has finished if 4858 the target of the fpload is any of the sources 4859 (or destination) of the arithmetic operation. */ 4860 return insn_default_latency (dep_insn) - 1; 4861 4862 default: 4863 return 0; 4864 } 4865 } 4866 } 4867 else if (attr_type == TYPE_FPALU) 4868 { 4869 rtx pat = PATTERN (insn); 4870 rtx dep_pat = PATTERN (dep_insn); 4871 if (GET_CODE (pat) == PARALLEL) 4872 { 4873 /* This happens for the fldXs,mb patterns. */ 4874 pat = XVECEXP (pat, 0, 0); 4875 } 4876 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4877 /* If this happens, we have to extend this to schedule 4878 optimally. Return 0 for now. */ 4879 return 0; 4880 4881 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4882 { 4883 if (! recog_memoized (dep_insn)) 4884 return 0; 4885 switch (get_attr_type (dep_insn)) 4886 { 4887 case TYPE_FPDIVSGL: 4888 case TYPE_FPDIVDBL: 4889 case TYPE_FPSQRTSGL: 4890 case TYPE_FPSQRTDBL: 4891 /* An ALU flop can't be issued until two cycles before a 4892 preceding divide or sqrt operation has finished if 4893 the target of the ALU flop is any of the sources 4894 (or destination) of the divide or sqrt operation. */ 4895 return insn_default_latency (dep_insn) - 2; 4896 4897 default: 4898 return 0; 4899 } 4900 } 4901 } 4902 4903 /* For other anti dependencies, the cost is 0. */ 4904 return 0; 4905 4906 case REG_DEP_OUTPUT: 4907 /* Output dependency; DEP_INSN writes a register that INSN writes some 4908 cycles later. */ 4909 if (attr_type == TYPE_FPLOAD) 4910 { 4911 rtx pat = PATTERN (insn); 4912 rtx dep_pat = PATTERN (dep_insn); 4913 if (GET_CODE (pat) == PARALLEL) 4914 { 4915 /* This happens for the fldXs,mb patterns. */ 4916 pat = XVECEXP (pat, 0, 0); 4917 } 4918 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4919 /* If this happens, we have to extend this to schedule 4920 optimally. Return 0 for now. */ 4921 return 0; 4922 4923 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4924 { 4925 if (! recog_memoized (dep_insn)) 4926 return 0; 4927 switch (get_attr_type (dep_insn)) 4928 { 4929 case TYPE_FPALU: 4930 case TYPE_FPMULSGL: 4931 case TYPE_FPMULDBL: 4932 case TYPE_FPDIVSGL: 4933 case TYPE_FPDIVDBL: 4934 case TYPE_FPSQRTSGL: 4935 case TYPE_FPSQRTDBL: 4936 /* A fpload can't be issued until one cycle before a 4937 preceding arithmetic operation has finished if 4938 the target of the fpload is the destination of the 4939 arithmetic operation. 4940 4941 Exception: For PA7100LC, PA7200 and PA7300, the cost 4942 is 3 cycles, unless they bundle together. We also 4943 pay the penalty if the second insn is a fpload. */ 4944 return insn_default_latency (dep_insn) - 1; 4945 4946 default: 4947 return 0; 4948 } 4949 } 4950 } 4951 else if (attr_type == TYPE_FPALU) 4952 { 4953 rtx pat = PATTERN (insn); 4954 rtx dep_pat = PATTERN (dep_insn); 4955 if (GET_CODE (pat) == PARALLEL) 4956 { 4957 /* This happens for the fldXs,mb patterns. */ 4958 pat = XVECEXP (pat, 0, 0); 4959 } 4960 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4961 /* If this happens, we have to extend this to schedule 4962 optimally. Return 0 for now. */ 4963 return 0; 4964 4965 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4966 { 4967 if (! recog_memoized (dep_insn)) 4968 return 0; 4969 switch (get_attr_type (dep_insn)) 4970 { 4971 case TYPE_FPDIVSGL: 4972 case TYPE_FPDIVDBL: 4973 case TYPE_FPSQRTSGL: 4974 case TYPE_FPSQRTDBL: 4975 /* An ALU flop can't be issued until two cycles before a 4976 preceding divide or sqrt operation has finished if 4977 the target of the ALU flop is also the target of 4978 the divide or sqrt operation. */ 4979 return insn_default_latency (dep_insn) - 2; 4980 4981 default: 4982 return 0; 4983 } 4984 } 4985 } 4986 4987 /* For other output dependencies, the cost is 0. */ 4988 return 0; 4989 4990 default: 4991 gcc_unreachable (); 4992 } 4993 } 4994 4995 /* The 700 can only issue a single insn at a time. 4996 The 7XXX processors can issue two insns at a time. 4997 The 8000 can issue 4 insns at a time. */ 4998 static int 4999 pa_issue_rate (void) 5000 { 5001 switch (pa_cpu) 5002 { 5003 case PROCESSOR_700: return 1; 5004 case PROCESSOR_7100: return 2; 5005 case PROCESSOR_7100LC: return 2; 5006 case PROCESSOR_7200: return 2; 5007 case PROCESSOR_7300: return 2; 5008 case PROCESSOR_8000: return 4; 5009 5010 default: 5011 gcc_unreachable (); 5012 } 5013 } 5014 5015 5016 5017 /* Return any length plus adjustment needed by INSN which already has 5018 its length computed as LENGTH. Return LENGTH if no adjustment is 5019 necessary. 5020 5021 Also compute the length of an inline block move here as it is too 5022 complicated to express as a length attribute in pa.md. */ 5023 int 5024 pa_adjust_insn_length (rtx_insn *insn, int length) 5025 { 5026 rtx pat = PATTERN (insn); 5027 5028 /* If length is negative or undefined, provide initial length. */ 5029 if ((unsigned int) length >= INT_MAX) 5030 { 5031 if (GET_CODE (pat) == SEQUENCE) 5032 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0)); 5033 5034 switch (get_attr_type (insn)) 5035 { 5036 case TYPE_MILLI: 5037 length = pa_attr_length_millicode_call (insn); 5038 break; 5039 case TYPE_CALL: 5040 length = pa_attr_length_call (insn, 0); 5041 break; 5042 case TYPE_SIBCALL: 5043 length = pa_attr_length_call (insn, 1); 5044 break; 5045 case TYPE_DYNCALL: 5046 length = pa_attr_length_indirect_call (insn); 5047 break; 5048 case TYPE_SH_FUNC_ADRS: 5049 length = pa_attr_length_millicode_call (insn) + 20; 5050 break; 5051 default: 5052 gcc_unreachable (); 5053 } 5054 } 5055 5056 /* Block move pattern. */ 5057 if (NONJUMP_INSN_P (insn) 5058 && GET_CODE (pat) == PARALLEL 5059 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 5060 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 5061 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM 5062 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode 5063 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) 5064 length += compute_movmem_length (insn) - 4; 5065 /* Block clear pattern. */ 5066 else if (NONJUMP_INSN_P (insn) 5067 && GET_CODE (pat) == PARALLEL 5068 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 5069 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 5070 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx 5071 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) 5072 length += compute_clrmem_length (insn) - 4; 5073 /* Conditional branch with an unfilled delay slot. */ 5074 else if (JUMP_P (insn) && ! simplejump_p (insn)) 5075 { 5076 /* Adjust a short backwards conditional with an unfilled delay slot. */ 5077 if (GET_CODE (pat) == SET 5078 && length == 4 5079 && JUMP_LABEL (insn) != NULL_RTX 5080 && ! forward_branch_p (insn)) 5081 length += 4; 5082 else if (GET_CODE (pat) == PARALLEL 5083 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH 5084 && length == 4) 5085 length += 4; 5086 /* Adjust dbra insn with short backwards conditional branch with 5087 unfilled delay slot -- only for case where counter is in a 5088 general register register. */ 5089 else if (GET_CODE (pat) == PARALLEL 5090 && GET_CODE (XVECEXP (pat, 0, 1)) == SET 5091 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG 5092 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) 5093 && length == 4 5094 && ! forward_branch_p (insn)) 5095 length += 4; 5096 } 5097 return length; 5098 } 5099 5100 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */ 5101 5102 static bool 5103 pa_print_operand_punct_valid_p (unsigned char code) 5104 { 5105 if (code == '@' 5106 || code == '#' 5107 || code == '*' 5108 || code == '^') 5109 return true; 5110 5111 return false; 5112 } 5113 5114 /* Print operand X (an rtx) in assembler syntax to file FILE. 5115 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 5116 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 5117 5118 void 5119 pa_print_operand (FILE *file, rtx x, int code) 5120 { 5121 switch (code) 5122 { 5123 case '#': 5124 /* Output a 'nop' if there's nothing for the delay slot. */ 5125 if (dbr_sequence_length () == 0) 5126 fputs ("\n\tnop", file); 5127 return; 5128 case '*': 5129 /* Output a nullification completer if there's nothing for the */ 5130 /* delay slot or nullification is requested. */ 5131 if (dbr_sequence_length () == 0 || 5132 (final_sequence && 5133 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) 5134 fputs (",n", file); 5135 return; 5136 case 'R': 5137 /* Print out the second register name of a register pair. 5138 I.e., R (6) => 7. */ 5139 fputs (reg_names[REGNO (x) + 1], file); 5140 return; 5141 case 'r': 5142 /* A register or zero. */ 5143 if (x == const0_rtx 5144 || (x == CONST0_RTX (DFmode)) 5145 || (x == CONST0_RTX (SFmode))) 5146 { 5147 fputs ("%r0", file); 5148 return; 5149 } 5150 else 5151 break; 5152 case 'f': 5153 /* A register or zero (floating point). */ 5154 if (x == const0_rtx 5155 || (x == CONST0_RTX (DFmode)) 5156 || (x == CONST0_RTX (SFmode))) 5157 { 5158 fputs ("%fr0", file); 5159 return; 5160 } 5161 else 5162 break; 5163 case 'A': 5164 { 5165 rtx xoperands[2]; 5166 5167 xoperands[0] = XEXP (XEXP (x, 0), 0); 5168 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0); 5169 pa_output_global_address (file, xoperands[1], 0); 5170 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]); 5171 return; 5172 } 5173 5174 case 'C': /* Plain (C)ondition */ 5175 case 'X': 5176 switch (GET_CODE (x)) 5177 { 5178 case EQ: 5179 fputs ("=", file); break; 5180 case NE: 5181 fputs ("<>", file); break; 5182 case GT: 5183 fputs (">", file); break; 5184 case GE: 5185 fputs (">=", file); break; 5186 case GEU: 5187 fputs (">>=", file); break; 5188 case GTU: 5189 fputs (">>", file); break; 5190 case LT: 5191 fputs ("<", file); break; 5192 case LE: 5193 fputs ("<=", file); break; 5194 case LEU: 5195 fputs ("<<=", file); break; 5196 case LTU: 5197 fputs ("<<", file); break; 5198 default: 5199 gcc_unreachable (); 5200 } 5201 return; 5202 case 'N': /* Condition, (N)egated */ 5203 switch (GET_CODE (x)) 5204 { 5205 case EQ: 5206 fputs ("<>", file); break; 5207 case NE: 5208 fputs ("=", file); break; 5209 case GT: 5210 fputs ("<=", file); break; 5211 case GE: 5212 fputs ("<", file); break; 5213 case GEU: 5214 fputs ("<<", file); break; 5215 case GTU: 5216 fputs ("<<=", file); break; 5217 case LT: 5218 fputs (">=", file); break; 5219 case LE: 5220 fputs (">", file); break; 5221 case LEU: 5222 fputs (">>", file); break; 5223 case LTU: 5224 fputs (">>=", file); break; 5225 default: 5226 gcc_unreachable (); 5227 } 5228 return; 5229 /* For floating point comparisons. Note that the output 5230 predicates are the complement of the desired mode. The 5231 conditions for GT, GE, LT, LE and LTGT cause an invalid 5232 operation exception if the result is unordered and this 5233 exception is enabled in the floating-point status register. */ 5234 case 'Y': 5235 switch (GET_CODE (x)) 5236 { 5237 case EQ: 5238 fputs ("!=", file); break; 5239 case NE: 5240 fputs ("=", file); break; 5241 case GT: 5242 fputs ("!>", file); break; 5243 case GE: 5244 fputs ("!>=", file); break; 5245 case LT: 5246 fputs ("!<", file); break; 5247 case LE: 5248 fputs ("!<=", file); break; 5249 case LTGT: 5250 fputs ("!<>", file); break; 5251 case UNLE: 5252 fputs ("!?<=", file); break; 5253 case UNLT: 5254 fputs ("!?<", file); break; 5255 case UNGE: 5256 fputs ("!?>=", file); break; 5257 case UNGT: 5258 fputs ("!?>", file); break; 5259 case UNEQ: 5260 fputs ("!?=", file); break; 5261 case UNORDERED: 5262 fputs ("!?", file); break; 5263 case ORDERED: 5264 fputs ("?", file); break; 5265 default: 5266 gcc_unreachable (); 5267 } 5268 return; 5269 case 'S': /* Condition, operands are (S)wapped. */ 5270 switch (GET_CODE (x)) 5271 { 5272 case EQ: 5273 fputs ("=", file); break; 5274 case NE: 5275 fputs ("<>", file); break; 5276 case GT: 5277 fputs ("<", file); break; 5278 case GE: 5279 fputs ("<=", file); break; 5280 case GEU: 5281 fputs ("<<=", file); break; 5282 case GTU: 5283 fputs ("<<", file); break; 5284 case LT: 5285 fputs (">", file); break; 5286 case LE: 5287 fputs (">=", file); break; 5288 case LEU: 5289 fputs (">>=", file); break; 5290 case LTU: 5291 fputs (">>", file); break; 5292 default: 5293 gcc_unreachable (); 5294 } 5295 return; 5296 case 'B': /* Condition, (B)oth swapped and negate. */ 5297 switch (GET_CODE (x)) 5298 { 5299 case EQ: 5300 fputs ("<>", file); break; 5301 case NE: 5302 fputs ("=", file); break; 5303 case GT: 5304 fputs (">=", file); break; 5305 case GE: 5306 fputs (">", file); break; 5307 case GEU: 5308 fputs (">>", file); break; 5309 case GTU: 5310 fputs (">>=", file); break; 5311 case LT: 5312 fputs ("<=", file); break; 5313 case LE: 5314 fputs ("<", file); break; 5315 case LEU: 5316 fputs ("<<", file); break; 5317 case LTU: 5318 fputs ("<<=", file); break; 5319 default: 5320 gcc_unreachable (); 5321 } 5322 return; 5323 case 'k': 5324 gcc_assert (GET_CODE (x) == CONST_INT); 5325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); 5326 return; 5327 case 'Q': 5328 gcc_assert (GET_CODE (x) == CONST_INT); 5329 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63)); 5330 return; 5331 case 'L': 5332 gcc_assert (GET_CODE (x) == CONST_INT); 5333 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31)); 5334 return; 5335 case 'o': 5336 gcc_assert (GET_CODE (x) == CONST_INT 5337 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3)); 5338 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 5339 return; 5340 case 'O': 5341 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0); 5342 fprintf (file, "%d", exact_log2 (INTVAL (x))); 5343 return; 5344 case 'p': 5345 gcc_assert (GET_CODE (x) == CONST_INT); 5346 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63)); 5347 return; 5348 case 'P': 5349 gcc_assert (GET_CODE (x) == CONST_INT); 5350 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31)); 5351 return; 5352 case 'I': 5353 if (GET_CODE (x) == CONST_INT) 5354 fputs ("i", file); 5355 return; 5356 case 'M': 5357 case 'F': 5358 switch (GET_CODE (XEXP (x, 0))) 5359 { 5360 case PRE_DEC: 5361 case PRE_INC: 5362 if (ASSEMBLER_DIALECT == 0) 5363 fputs ("s,mb", file); 5364 else 5365 fputs (",mb", file); 5366 break; 5367 case POST_DEC: 5368 case POST_INC: 5369 if (ASSEMBLER_DIALECT == 0) 5370 fputs ("s,ma", file); 5371 else 5372 fputs (",ma", file); 5373 break; 5374 case PLUS: 5375 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5376 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5377 { 5378 if (ASSEMBLER_DIALECT == 0) 5379 fputs ("x", file); 5380 } 5381 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5382 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5383 { 5384 if (ASSEMBLER_DIALECT == 0) 5385 fputs ("x,s", file); 5386 else 5387 fputs (",s", file); 5388 } 5389 else if (code == 'F' && ASSEMBLER_DIALECT == 0) 5390 fputs ("s", file); 5391 break; 5392 default: 5393 if (code == 'F' && ASSEMBLER_DIALECT == 0) 5394 fputs ("s", file); 5395 break; 5396 } 5397 return; 5398 case 'G': 5399 pa_output_global_address (file, x, 0); 5400 return; 5401 case 'H': 5402 pa_output_global_address (file, x, 1); 5403 return; 5404 case 0: /* Don't do anything special */ 5405 break; 5406 case 'Z': 5407 { 5408 unsigned op[3]; 5409 compute_zdepwi_operands (INTVAL (x), op); 5410 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5411 return; 5412 } 5413 case 'z': 5414 { 5415 unsigned op[3]; 5416 compute_zdepdi_operands (INTVAL (x), op); 5417 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5418 return; 5419 } 5420 case 'c': 5421 /* We can get here from a .vtable_inherit due to our 5422 CONSTANT_ADDRESS_P rejecting perfectly good constant 5423 addresses. */ 5424 break; 5425 default: 5426 gcc_unreachable (); 5427 } 5428 if (GET_CODE (x) == REG) 5429 { 5430 fputs (reg_names [REGNO (x)], file); 5431 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4) 5432 { 5433 fputs ("R", file); 5434 return; 5435 } 5436 if (FP_REG_P (x) 5437 && GET_MODE_SIZE (GET_MODE (x)) <= 4 5438 && (REGNO (x) & 1) == 0) 5439 fputs ("L", file); 5440 } 5441 else if (GET_CODE (x) == MEM) 5442 { 5443 int size = GET_MODE_SIZE (GET_MODE (x)); 5444 rtx base = NULL_RTX; 5445 switch (GET_CODE (XEXP (x, 0))) 5446 { 5447 case PRE_DEC: 5448 case POST_DEC: 5449 base = XEXP (XEXP (x, 0), 0); 5450 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]); 5451 break; 5452 case PRE_INC: 5453 case POST_INC: 5454 base = XEXP (XEXP (x, 0), 0); 5455 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]); 5456 break; 5457 case PLUS: 5458 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) 5459 fprintf (file, "%s(%s)", 5460 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], 5461 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); 5462 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5463 fprintf (file, "%s(%s)", 5464 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], 5465 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); 5466 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5467 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5468 { 5469 /* Because the REG_POINTER flag can get lost during reload, 5470 pa_legitimate_address_p canonicalizes the order of the 5471 index and base registers in the combined move patterns. */ 5472 rtx base = XEXP (XEXP (x, 0), 1); 5473 rtx index = XEXP (XEXP (x, 0), 0); 5474 5475 fprintf (file, "%s(%s)", 5476 reg_names [REGNO (index)], reg_names [REGNO (base)]); 5477 } 5478 else 5479 output_address (GET_MODE (x), XEXP (x, 0)); 5480 break; 5481 default: 5482 output_address (GET_MODE (x), XEXP (x, 0)); 5483 break; 5484 } 5485 } 5486 else 5487 output_addr_const (file, x); 5488 } 5489 5490 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ 5491 5492 void 5493 pa_output_global_address (FILE *file, rtx x, int round_constant) 5494 { 5495 5496 /* Imagine (high (const (plus ...))). */ 5497 if (GET_CODE (x) == HIGH) 5498 x = XEXP (x, 0); 5499 5500 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode)) 5501 output_addr_const (file, x); 5502 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) 5503 { 5504 output_addr_const (file, x); 5505 fputs ("-$global$", file); 5506 } 5507 else if (GET_CODE (x) == CONST) 5508 { 5509 const char *sep = ""; 5510 int offset = 0; /* assembler wants -$global$ at end */ 5511 rtx base = NULL_RTX; 5512 5513 switch (GET_CODE (XEXP (XEXP (x, 0), 0))) 5514 { 5515 case LABEL_REF: 5516 case SYMBOL_REF: 5517 base = XEXP (XEXP (x, 0), 0); 5518 output_addr_const (file, base); 5519 break; 5520 case CONST_INT: 5521 offset = INTVAL (XEXP (XEXP (x, 0), 0)); 5522 break; 5523 default: 5524 gcc_unreachable (); 5525 } 5526 5527 switch (GET_CODE (XEXP (XEXP (x, 0), 1))) 5528 { 5529 case LABEL_REF: 5530 case SYMBOL_REF: 5531 base = XEXP (XEXP (x, 0), 1); 5532 output_addr_const (file, base); 5533 break; 5534 case CONST_INT: 5535 offset = INTVAL (XEXP (XEXP (x, 0), 1)); 5536 break; 5537 default: 5538 gcc_unreachable (); 5539 } 5540 5541 /* How bogus. The compiler is apparently responsible for 5542 rounding the constant if it uses an LR field selector. 5543 5544 The linker and/or assembler seem a better place since 5545 they have to do this kind of thing already. 5546 5547 If we fail to do this, HP's optimizing linker may eliminate 5548 an addil, but not update the ldw/stw/ldo instruction that 5549 uses the result of the addil. */ 5550 if (round_constant) 5551 offset = ((offset + 0x1000) & ~0x1fff); 5552 5553 switch (GET_CODE (XEXP (x, 0))) 5554 { 5555 case PLUS: 5556 if (offset < 0) 5557 { 5558 offset = -offset; 5559 sep = "-"; 5560 } 5561 else 5562 sep = "+"; 5563 break; 5564 5565 case MINUS: 5566 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF); 5567 sep = "-"; 5568 break; 5569 5570 default: 5571 gcc_unreachable (); 5572 } 5573 5574 if (!read_only_operand (base, VOIDmode) && !flag_pic) 5575 fputs ("-$global$", file); 5576 if (offset) 5577 fprintf (file, "%s%d", sep, offset); 5578 } 5579 else 5580 output_addr_const (file, x); 5581 } 5582 5583 /* Output boilerplate text to appear at the beginning of the file. 5584 There are several possible versions. */ 5585 #define aputs(x) fputs(x, asm_out_file) 5586 static inline void 5587 pa_file_start_level (void) 5588 { 5589 if (TARGET_64BIT) 5590 aputs ("\t.LEVEL 2.0w\n"); 5591 else if (TARGET_PA_20) 5592 aputs ("\t.LEVEL 2.0\n"); 5593 else if (TARGET_PA_11) 5594 aputs ("\t.LEVEL 1.1\n"); 5595 else 5596 aputs ("\t.LEVEL 1.0\n"); 5597 } 5598 5599 static inline void 5600 pa_file_start_space (int sortspace) 5601 { 5602 aputs ("\t.SPACE $PRIVATE$"); 5603 if (sortspace) 5604 aputs (",SORT=16"); 5605 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"); 5606 if (flag_tm) 5607 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31"); 5608 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82" 5609 "\n\t.SPACE $TEXT$"); 5610 if (sortspace) 5611 aputs (",SORT=8"); 5612 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44" 5613 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n"); 5614 } 5615 5616 static inline void 5617 pa_file_start_file (int want_version) 5618 { 5619 if (write_symbols != NO_DEBUG) 5620 { 5621 output_file_directive (asm_out_file, main_input_filename); 5622 if (want_version) 5623 aputs ("\t.version\t\"01.01\"\n"); 5624 } 5625 } 5626 5627 static inline void 5628 pa_file_start_mcount (const char *aswhat) 5629 { 5630 if (profile_flag) 5631 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat); 5632 } 5633 5634 static void 5635 pa_elf_file_start (void) 5636 { 5637 pa_file_start_level (); 5638 pa_file_start_mcount ("ENTRY"); 5639 pa_file_start_file (0); 5640 } 5641 5642 static void 5643 pa_som_file_start (void) 5644 { 5645 pa_file_start_level (); 5646 pa_file_start_space (0); 5647 aputs ("\t.IMPORT $global$,DATA\n" 5648 "\t.IMPORT $$dyncall,MILLICODE\n"); 5649 pa_file_start_mcount ("CODE"); 5650 pa_file_start_file (0); 5651 } 5652 5653 static void 5654 pa_linux_file_start (void) 5655 { 5656 pa_file_start_file (0); 5657 pa_file_start_level (); 5658 pa_file_start_mcount ("CODE"); 5659 } 5660 5661 static void 5662 pa_hpux64_gas_file_start (void) 5663 { 5664 pa_file_start_level (); 5665 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 5666 if (profile_flag) 5667 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function"); 5668 #endif 5669 pa_file_start_file (1); 5670 } 5671 5672 static void 5673 pa_hpux64_hpas_file_start (void) 5674 { 5675 pa_file_start_level (); 5676 pa_file_start_space (1); 5677 pa_file_start_mcount ("CODE"); 5678 pa_file_start_file (0); 5679 } 5680 #undef aputs 5681 5682 /* Search the deferred plabel list for SYMBOL and return its internal 5683 label. If an entry for SYMBOL is not found, a new entry is created. */ 5684 5685 rtx 5686 pa_get_deferred_plabel (rtx symbol) 5687 { 5688 const char *fname = XSTR (symbol, 0); 5689 size_t i; 5690 5691 /* See if we have already put this function on the list of deferred 5692 plabels. This list is generally small, so a liner search is not 5693 too ugly. If it proves too slow replace it with something faster. */ 5694 for (i = 0; i < n_deferred_plabels; i++) 5695 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0) 5696 break; 5697 5698 /* If the deferred plabel list is empty, or this entry was not found 5699 on the list, create a new entry on the list. */ 5700 if (deferred_plabels == NULL || i == n_deferred_plabels) 5701 { 5702 tree id; 5703 5704 if (deferred_plabels == 0) 5705 deferred_plabels = ggc_alloc<deferred_plabel> (); 5706 else 5707 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel, 5708 deferred_plabels, 5709 n_deferred_plabels + 1); 5710 5711 i = n_deferred_plabels++; 5712 deferred_plabels[i].internal_label = gen_label_rtx (); 5713 deferred_plabels[i].symbol = symbol; 5714 5715 /* Gross. We have just implicitly taken the address of this 5716 function. Mark it in the same manner as assemble_name. */ 5717 id = maybe_get_identifier (targetm.strip_name_encoding (fname)); 5718 if (id) 5719 mark_referenced (id); 5720 } 5721 5722 return deferred_plabels[i].internal_label; 5723 } 5724 5725 static void 5726 output_deferred_plabels (void) 5727 { 5728 size_t i; 5729 5730 /* If we have some deferred plabels, then we need to switch into the 5731 data or readonly data section, and align it to a 4 byte boundary 5732 before outputting the deferred plabels. */ 5733 if (n_deferred_plabels) 5734 { 5735 switch_to_section (flag_pic ? data_section : readonly_data_section); 5736 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 5737 } 5738 5739 /* Now output the deferred plabels. */ 5740 for (i = 0; i < n_deferred_plabels; i++) 5741 { 5742 targetm.asm_out.internal_label (asm_out_file, "L", 5743 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); 5744 assemble_integer (deferred_plabels[i].symbol, 5745 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1); 5746 } 5747 } 5748 5749 /* Initialize optabs to point to emulation routines. */ 5750 5751 static void 5752 pa_init_libfuncs (void) 5753 { 5754 if (HPUX_LONG_DOUBLE_LIBRARY) 5755 { 5756 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 5757 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 5758 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 5759 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 5760 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin"); 5761 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 5762 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt"); 5763 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 5764 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 5765 5766 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 5767 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 5768 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 5769 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 5770 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 5771 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 5772 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord"); 5773 5774 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 5775 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 5776 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 5777 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 5778 5779 set_conv_libfunc (sfix_optab, SImode, TFmode, 5780 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl" 5781 : "_U_Qfcnvfxt_quad_to_sgl"); 5782 set_conv_libfunc (sfix_optab, DImode, TFmode, 5783 "_U_Qfcnvfxt_quad_to_dbl"); 5784 set_conv_libfunc (ufix_optab, SImode, TFmode, 5785 "_U_Qfcnvfxt_quad_to_usgl"); 5786 set_conv_libfunc (ufix_optab, DImode, TFmode, 5787 "_U_Qfcnvfxt_quad_to_udbl"); 5788 5789 set_conv_libfunc (sfloat_optab, TFmode, SImode, 5790 "_U_Qfcnvxf_sgl_to_quad"); 5791 set_conv_libfunc (sfloat_optab, TFmode, DImode, 5792 "_U_Qfcnvxf_dbl_to_quad"); 5793 set_conv_libfunc (ufloat_optab, TFmode, SImode, 5794 "_U_Qfcnvxf_usgl_to_quad"); 5795 set_conv_libfunc (ufloat_optab, TFmode, DImode, 5796 "_U_Qfcnvxf_udbl_to_quad"); 5797 } 5798 5799 if (TARGET_SYNC_LIBCALL) 5800 init_sync_libfuncs (8); 5801 } 5802 5803 /* HP's millicode routines mean something special to the assembler. 5804 Keep track of which ones we have used. */ 5805 5806 enum millicodes { remI, remU, divI, divU, mulI, end1000 }; 5807 static void import_milli (enum millicodes); 5808 static char imported[(int) end1000]; 5809 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"}; 5810 static const char import_string[] = ".IMPORT $$....,MILLICODE"; 5811 #define MILLI_START 10 5812 5813 static void 5814 import_milli (enum millicodes code) 5815 { 5816 char str[sizeof (import_string)]; 5817 5818 if (!imported[(int) code]) 5819 { 5820 imported[(int) code] = 1; 5821 strcpy (str, import_string); 5822 strncpy (str + MILLI_START, milli_names[(int) code], 4); 5823 output_asm_insn (str, 0); 5824 } 5825 } 5826 5827 /* The register constraints have put the operands and return value in 5828 the proper registers. */ 5829 5830 const char * 5831 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn) 5832 { 5833 import_milli (mulI); 5834 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI")); 5835 } 5836 5837 /* Emit the rtl for doing a division by a constant. */ 5838 5839 /* Do magic division millicodes exist for this value? */ 5840 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1}; 5841 5842 /* We'll use an array to keep track of the magic millicodes and 5843 whether or not we've used them already. [n][0] is signed, [n][1] is 5844 unsigned. */ 5845 5846 static int div_milli[16][2]; 5847 5848 int 5849 pa_emit_hpdiv_const (rtx *operands, int unsignedp) 5850 { 5851 if (GET_CODE (operands[2]) == CONST_INT 5852 && INTVAL (operands[2]) > 0 5853 && INTVAL (operands[2]) < 16 5854 && pa_magic_milli[INTVAL (operands[2])]) 5855 { 5856 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); 5857 5858 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); 5859 emit 5860 (gen_rtx_PARALLEL 5861 (VOIDmode, 5862 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29), 5863 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 5864 SImode, 5865 gen_rtx_REG (SImode, 26), 5866 operands[2])), 5867 gen_rtx_CLOBBER (VOIDmode, operands[4]), 5868 gen_rtx_CLOBBER (VOIDmode, operands[3]), 5869 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), 5870 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), 5871 gen_rtx_CLOBBER (VOIDmode, ret)))); 5872 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); 5873 return 1; 5874 } 5875 return 0; 5876 } 5877 5878 const char * 5879 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn) 5880 { 5881 HOST_WIDE_INT divisor; 5882 5883 /* If the divisor is a constant, try to use one of the special 5884 opcodes .*/ 5885 if (GET_CODE (operands[0]) == CONST_INT) 5886 { 5887 static char buf[100]; 5888 divisor = INTVAL (operands[0]); 5889 if (!div_milli[divisor][unsignedp]) 5890 { 5891 div_milli[divisor][unsignedp] = 1; 5892 if (unsignedp) 5893 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); 5894 else 5895 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); 5896 } 5897 if (unsignedp) 5898 { 5899 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC, 5900 INTVAL (operands[0])); 5901 return pa_output_millicode_call (insn, 5902 gen_rtx_SYMBOL_REF (SImode, buf)); 5903 } 5904 else 5905 { 5906 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC, 5907 INTVAL (operands[0])); 5908 return pa_output_millicode_call (insn, 5909 gen_rtx_SYMBOL_REF (SImode, buf)); 5910 } 5911 } 5912 /* Divisor isn't a special constant. */ 5913 else 5914 { 5915 if (unsignedp) 5916 { 5917 import_milli (divU); 5918 return pa_output_millicode_call (insn, 5919 gen_rtx_SYMBOL_REF (SImode, "$$divU")); 5920 } 5921 else 5922 { 5923 import_milli (divI); 5924 return pa_output_millicode_call (insn, 5925 gen_rtx_SYMBOL_REF (SImode, "$$divI")); 5926 } 5927 } 5928 } 5929 5930 /* Output a $$rem millicode to do mod. */ 5931 5932 const char * 5933 pa_output_mod_insn (int unsignedp, rtx_insn *insn) 5934 { 5935 if (unsignedp) 5936 { 5937 import_milli (remU); 5938 return pa_output_millicode_call (insn, 5939 gen_rtx_SYMBOL_REF (SImode, "$$remU")); 5940 } 5941 else 5942 { 5943 import_milli (remI); 5944 return pa_output_millicode_call (insn, 5945 gen_rtx_SYMBOL_REF (SImode, "$$remI")); 5946 } 5947 } 5948 5949 void 5950 pa_output_arg_descriptor (rtx_insn *call_insn) 5951 { 5952 const char *arg_regs[4]; 5953 machine_mode arg_mode; 5954 rtx link; 5955 int i, output_flag = 0; 5956 int regno; 5957 5958 /* We neither need nor want argument location descriptors for the 5959 64bit runtime environment or the ELF32 environment. */ 5960 if (TARGET_64BIT || TARGET_ELF32) 5961 return; 5962 5963 for (i = 0; i < 4; i++) 5964 arg_regs[i] = 0; 5965 5966 /* Specify explicitly that no argument relocations should take place 5967 if using the portable runtime calling conventions. */ 5968 if (TARGET_PORTABLE_RUNTIME) 5969 { 5970 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", 5971 asm_out_file); 5972 return; 5973 } 5974 5975 gcc_assert (CALL_P (call_insn)); 5976 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); 5977 link; link = XEXP (link, 1)) 5978 { 5979 rtx use = XEXP (link, 0); 5980 5981 if (! (GET_CODE (use) == USE 5982 && GET_CODE (XEXP (use, 0)) == REG 5983 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 5984 continue; 5985 5986 arg_mode = GET_MODE (XEXP (use, 0)); 5987 regno = REGNO (XEXP (use, 0)); 5988 if (regno >= 23 && regno <= 26) 5989 { 5990 arg_regs[26 - regno] = "GR"; 5991 if (arg_mode == DImode) 5992 arg_regs[25 - regno] = "GR"; 5993 } 5994 else if (regno >= 32 && regno <= 39) 5995 { 5996 if (arg_mode == SFmode) 5997 arg_regs[(regno - 32) / 2] = "FR"; 5998 else 5999 { 6000 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED 6001 arg_regs[(regno - 34) / 2] = "FR"; 6002 arg_regs[(regno - 34) / 2 + 1] = "FU"; 6003 #else 6004 arg_regs[(regno - 34) / 2] = "FU"; 6005 arg_regs[(regno - 34) / 2 + 1] = "FR"; 6006 #endif 6007 } 6008 } 6009 } 6010 fputs ("\t.CALL ", asm_out_file); 6011 for (i = 0; i < 4; i++) 6012 { 6013 if (arg_regs[i]) 6014 { 6015 if (output_flag++) 6016 fputc (',', asm_out_file); 6017 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); 6018 } 6019 } 6020 fputc ('\n', asm_out_file); 6021 } 6022 6023 /* Inform reload about cases where moving X with a mode MODE to or from 6024 a register in RCLASS requires an extra scratch or immediate register. 6025 Return the class needed for the immediate register. */ 6026 6027 static reg_class_t 6028 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 6029 machine_mode mode, secondary_reload_info *sri) 6030 { 6031 int regno; 6032 enum reg_class rclass = (enum reg_class) rclass_i; 6033 6034 /* Handle the easy stuff first. */ 6035 if (rclass == R1_REGS) 6036 return NO_REGS; 6037 6038 if (REG_P (x)) 6039 { 6040 regno = REGNO (x); 6041 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER) 6042 return NO_REGS; 6043 } 6044 else 6045 regno = -1; 6046 6047 /* If we have something like (mem (mem (...)), we can safely assume the 6048 inner MEM will end up in a general register after reloading, so there's 6049 no need for a secondary reload. */ 6050 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM) 6051 return NO_REGS; 6052 6053 /* Trying to load a constant into a FP register during PIC code 6054 generation requires %r1 as a scratch register. For float modes, 6055 the only legitimate constant is CONST0_RTX. However, there are 6056 a few patterns that accept constant double operands. */ 6057 if (flag_pic 6058 && FP_REG_CLASS_P (rclass) 6059 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)) 6060 { 6061 switch (mode) 6062 { 6063 case E_SImode: 6064 sri->icode = CODE_FOR_reload_insi_r1; 6065 break; 6066 6067 case E_DImode: 6068 sri->icode = CODE_FOR_reload_indi_r1; 6069 break; 6070 6071 case E_SFmode: 6072 sri->icode = CODE_FOR_reload_insf_r1; 6073 break; 6074 6075 case E_DFmode: 6076 sri->icode = CODE_FOR_reload_indf_r1; 6077 break; 6078 6079 default: 6080 gcc_unreachable (); 6081 } 6082 return NO_REGS; 6083 } 6084 6085 /* Secondary reloads of symbolic expressions require %r1 as a scratch 6086 register when we're generating PIC code or when the operand isn't 6087 readonly. */ 6088 if (pa_symbolic_expression_p (x)) 6089 { 6090 if (GET_CODE (x) == HIGH) 6091 x = XEXP (x, 0); 6092 6093 if (flag_pic || !read_only_operand (x, VOIDmode)) 6094 { 6095 switch (mode) 6096 { 6097 case E_SImode: 6098 sri->icode = CODE_FOR_reload_insi_r1; 6099 break; 6100 6101 case E_DImode: 6102 sri->icode = CODE_FOR_reload_indi_r1; 6103 break; 6104 6105 default: 6106 gcc_unreachable (); 6107 } 6108 return NO_REGS; 6109 } 6110 } 6111 6112 /* Profiling showed the PA port spends about 1.3% of its compilation 6113 time in true_regnum from calls inside pa_secondary_reload_class. */ 6114 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) 6115 regno = true_regnum (x); 6116 6117 /* Handle reloads for floating point loads and stores. */ 6118 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1) 6119 && FP_REG_CLASS_P (rclass)) 6120 { 6121 if (MEM_P (x)) 6122 { 6123 x = XEXP (x, 0); 6124 6125 /* We don't need a secondary reload for indexed memory addresses. 6126 6127 When INT14_OK_STRICT is true, it might appear that we could 6128 directly allow register indirect memory addresses. However, 6129 this doesn't work because we don't support SUBREGs in 6130 floating-point register copies and reload doesn't tell us 6131 when it's going to use a SUBREG. */ 6132 if (IS_INDEX_ADDR_P (x)) 6133 return NO_REGS; 6134 } 6135 6136 /* Request a secondary reload with a general scratch register 6137 for everything else. ??? Could symbolic operands be handled 6138 directly when generating non-pic PA 2.0 code? */ 6139 sri->icode = (in_p 6140 ? direct_optab_handler (reload_in_optab, mode) 6141 : direct_optab_handler (reload_out_optab, mode)); 6142 return NO_REGS; 6143 } 6144 6145 /* A SAR<->FP register copy requires an intermediate general register 6146 and secondary memory. We need a secondary reload with a general 6147 scratch register for spills. */ 6148 if (rclass == SHIFT_REGS) 6149 { 6150 /* Handle spill. */ 6151 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0) 6152 { 6153 sri->icode = (in_p 6154 ? direct_optab_handler (reload_in_optab, mode) 6155 : direct_optab_handler (reload_out_optab, mode)); 6156 return NO_REGS; 6157 } 6158 6159 /* Handle FP copy. */ 6160 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno))) 6161 return GENERAL_REGS; 6162 } 6163 6164 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER 6165 && REGNO_REG_CLASS (regno) == SHIFT_REGS 6166 && FP_REG_CLASS_P (rclass)) 6167 return GENERAL_REGS; 6168 6169 return NO_REGS; 6170 } 6171 6172 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ 6173 6174 static bool 6175 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED, 6176 reg_class_t class1 ATTRIBUTE_UNUSED, 6177 reg_class_t class2 ATTRIBUTE_UNUSED) 6178 { 6179 #ifdef PA_SECONDARY_MEMORY_NEEDED 6180 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2); 6181 #else 6182 return false; 6183 #endif 6184 } 6185 6186 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer 6187 is only marked as live on entry by df-scan when it is a fixed 6188 register. It isn't a fixed register in the 64-bit runtime, 6189 so we need to mark it here. */ 6190 6191 static void 6192 pa_extra_live_on_entry (bitmap regs) 6193 { 6194 if (TARGET_64BIT) 6195 bitmap_set_bit (regs, ARG_POINTER_REGNUM); 6196 } 6197 6198 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile 6199 to prevent it from being deleted. */ 6200 6201 rtx 6202 pa_eh_return_handler_rtx (void) 6203 { 6204 rtx tmp; 6205 6206 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx, 6207 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20)); 6208 tmp = gen_rtx_MEM (word_mode, tmp); 6209 tmp->volatil = 1; 6210 return tmp; 6211 } 6212 6213 /* In the 32-bit runtime, arguments larger than eight bytes are passed 6214 by invisible reference. As a GCC extension, we also pass anything 6215 with a zero or variable size by reference. 6216 6217 The 64-bit runtime does not describe passing any types by invisible 6218 reference. The internals of GCC can't currently handle passing 6219 empty structures, and zero or variable length arrays when they are 6220 not passed entirely on the stack or by reference. Thus, as a GCC 6221 extension, we pass these types by reference. The HP compiler doesn't 6222 support these types, so hopefully there shouldn't be any compatibility 6223 issues. This may have to be revisited when HP releases a C99 compiler 6224 or updates the ABI. */ 6225 6226 static bool 6227 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, 6228 machine_mode mode, const_tree type, 6229 bool named ATTRIBUTE_UNUSED) 6230 { 6231 HOST_WIDE_INT size; 6232 6233 if (type) 6234 size = int_size_in_bytes (type); 6235 else 6236 size = GET_MODE_SIZE (mode); 6237 6238 if (TARGET_64BIT) 6239 return size <= 0; 6240 else 6241 return size <= 0 || size > 8; 6242 } 6243 6244 /* Implement TARGET_FUNCTION_ARG_PADDING. */ 6245 6246 static pad_direction 6247 pa_function_arg_padding (machine_mode mode, const_tree type) 6248 { 6249 if (mode == BLKmode 6250 || (TARGET_64BIT 6251 && type 6252 && (AGGREGATE_TYPE_P (type) 6253 || TREE_CODE (type) == COMPLEX_TYPE 6254 || TREE_CODE (type) == VECTOR_TYPE))) 6255 { 6256 /* Return PAD_NONE if justification is not required. */ 6257 if (type 6258 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 6259 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0) 6260 return PAD_NONE; 6261 6262 /* The directions set here are ignored when a BLKmode argument larger 6263 than a word is placed in a register. Different code is used for 6264 the stack and registers. This makes it difficult to have a 6265 consistent data representation for both the stack and registers. 6266 For both runtimes, the justification and padding for arguments on 6267 the stack and in registers should be identical. */ 6268 if (TARGET_64BIT) 6269 /* The 64-bit runtime specifies left justification for aggregates. */ 6270 return PAD_UPWARD; 6271 else 6272 /* The 32-bit runtime architecture specifies right justification. 6273 When the argument is passed on the stack, the argument is padded 6274 with garbage on the left. The HP compiler pads with zeros. */ 6275 return PAD_DOWNWARD; 6276 } 6277 6278 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 6279 return PAD_DOWNWARD; 6280 else 6281 return PAD_NONE; 6282 } 6283 6284 6285 /* Do what is necessary for `va_start'. We look at the current function 6286 to determine if stdargs or varargs is used and fill in an initial 6287 va_list. A pointer to this constructor is returned. */ 6288 6289 static rtx 6290 hppa_builtin_saveregs (void) 6291 { 6292 rtx offset, dest; 6293 tree fntype = TREE_TYPE (current_function_decl); 6294 int argadj = ((!stdarg_p (fntype)) 6295 ? UNITS_PER_WORD : 0); 6296 6297 if (argadj) 6298 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj); 6299 else 6300 offset = crtl->args.arg_offset_rtx; 6301 6302 if (TARGET_64BIT) 6303 { 6304 int i, off; 6305 6306 /* Adjust for varargs/stdarg differences. */ 6307 if (argadj) 6308 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj); 6309 else 6310 offset = crtl->args.arg_offset_rtx; 6311 6312 /* We need to save %r26 .. %r19 inclusive starting at offset -64 6313 from the incoming arg pointer and growing to larger addresses. */ 6314 for (i = 26, off = -64; i >= 19; i--, off += 8) 6315 emit_move_insn (gen_rtx_MEM (word_mode, 6316 plus_constant (Pmode, 6317 arg_pointer_rtx, off)), 6318 gen_rtx_REG (word_mode, i)); 6319 6320 /* The incoming args pointer points just beyond the flushback area; 6321 normally this is not a serious concern. However, when we are doing 6322 varargs/stdargs we want to make the arg pointer point to the start 6323 of the incoming argument area. */ 6324 emit_move_insn (virtual_incoming_args_rtx, 6325 plus_constant (Pmode, arg_pointer_rtx, -64)); 6326 6327 /* Now return a pointer to the first anonymous argument. */ 6328 return copy_to_reg (expand_binop (Pmode, add_optab, 6329 virtual_incoming_args_rtx, 6330 offset, 0, 0, OPTAB_LIB_WIDEN)); 6331 } 6332 6333 /* Store general registers on the stack. */ 6334 dest = gen_rtx_MEM (BLKmode, 6335 plus_constant (Pmode, crtl->args.internal_arg_pointer, 6336 -16)); 6337 set_mem_alias_set (dest, get_varargs_alias_set ()); 6338 set_mem_align (dest, BITS_PER_WORD); 6339 move_block_from_reg (23, dest, 4); 6340 6341 /* move_block_from_reg will emit code to store the argument registers 6342 individually as scalar stores. 6343 6344 However, other insns may later load from the same addresses for 6345 a structure load (passing a struct to a varargs routine). 6346 6347 The alias code assumes that such aliasing can never happen, so we 6348 have to keep memory referencing insns from moving up beyond the 6349 last argument register store. So we emit a blockage insn here. */ 6350 emit_insn (gen_blockage ()); 6351 6352 return copy_to_reg (expand_binop (Pmode, add_optab, 6353 crtl->args.internal_arg_pointer, 6354 offset, 0, 0, OPTAB_LIB_WIDEN)); 6355 } 6356 6357 static void 6358 hppa_va_start (tree valist, rtx nextarg) 6359 { 6360 nextarg = expand_builtin_saveregs (); 6361 std_expand_builtin_va_start (valist, nextarg); 6362 } 6363 6364 static tree 6365 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 6366 gimple_seq *post_p) 6367 { 6368 if (TARGET_64BIT) 6369 { 6370 /* Args grow upward. We can use the generic routines. */ 6371 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6372 } 6373 else /* !TARGET_64BIT */ 6374 { 6375 tree ptr = build_pointer_type (type); 6376 tree valist_type; 6377 tree t, u; 6378 unsigned int size, ofs; 6379 bool indirect; 6380 6381 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); 6382 if (indirect) 6383 { 6384 type = ptr; 6385 ptr = build_pointer_type (type); 6386 } 6387 size = int_size_in_bytes (type); 6388 valist_type = TREE_TYPE (valist); 6389 6390 /* Args grow down. Not handled by generic routines. */ 6391 6392 u = fold_convert (sizetype, size_in_bytes (type)); 6393 u = fold_build1 (NEGATE_EXPR, sizetype, u); 6394 t = fold_build_pointer_plus (valist, u); 6395 6396 /* Align to 4 or 8 byte boundary depending on argument size. */ 6397 6398 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4)); 6399 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u); 6400 t = fold_convert (valist_type, t); 6401 6402 t = build2 (MODIFY_EXPR, valist_type, valist, t); 6403 6404 ofs = (8 - size) % 4; 6405 if (ofs != 0) 6406 t = fold_build_pointer_plus_hwi (t, ofs); 6407 6408 t = fold_convert (ptr, t); 6409 t = build_va_arg_indirect_ref (t); 6410 6411 if (indirect) 6412 t = build_va_arg_indirect_ref (t); 6413 6414 return t; 6415 } 6416 } 6417 6418 /* True if MODE is valid for the target. By "valid", we mean able to 6419 be manipulated in non-trivial ways. In particular, this means all 6420 the arithmetic is supported. 6421 6422 Currently, TImode is not valid as the HP 64-bit runtime documentation 6423 doesn't document the alignment and calling conventions for this type. 6424 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and 6425 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */ 6426 6427 static bool 6428 pa_scalar_mode_supported_p (scalar_mode mode) 6429 { 6430 int precision = GET_MODE_PRECISION (mode); 6431 6432 switch (GET_MODE_CLASS (mode)) 6433 { 6434 case MODE_PARTIAL_INT: 6435 case MODE_INT: 6436 if (precision == CHAR_TYPE_SIZE) 6437 return true; 6438 if (precision == SHORT_TYPE_SIZE) 6439 return true; 6440 if (precision == INT_TYPE_SIZE) 6441 return true; 6442 if (precision == LONG_TYPE_SIZE) 6443 return true; 6444 if (precision == LONG_LONG_TYPE_SIZE) 6445 return true; 6446 return false; 6447 6448 case MODE_FLOAT: 6449 if (precision == FLOAT_TYPE_SIZE) 6450 return true; 6451 if (precision == DOUBLE_TYPE_SIZE) 6452 return true; 6453 if (precision == LONG_DOUBLE_TYPE_SIZE) 6454 return true; 6455 return false; 6456 6457 case MODE_DECIMAL_FLOAT: 6458 return false; 6459 6460 default: 6461 gcc_unreachable (); 6462 } 6463 } 6464 6465 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and 6466 it branches into the delay slot. Otherwise, return FALSE. */ 6467 6468 static bool 6469 branch_to_delay_slot_p (rtx_insn *insn) 6470 { 6471 rtx_insn *jump_insn; 6472 6473 if (dbr_sequence_length ()) 6474 return FALSE; 6475 6476 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn)); 6477 while (insn) 6478 { 6479 insn = next_active_insn (insn); 6480 if (jump_insn == insn) 6481 return TRUE; 6482 6483 /* We can't rely on the length of asms. So, we return FALSE when 6484 the branch is followed by an asm. */ 6485 if (!insn 6486 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6487 || asm_noperands (PATTERN (insn)) >= 0 6488 || get_attr_length (insn) > 0) 6489 break; 6490 } 6491 6492 return FALSE; 6493 } 6494 6495 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot. 6496 6497 This occurs when INSN has an unfilled delay slot and is followed 6498 by an asm. Disaster can occur if the asm is empty and the jump 6499 branches into the delay slot. So, we add a nop in the delay slot 6500 when this occurs. */ 6501 6502 static bool 6503 branch_needs_nop_p (rtx_insn *insn) 6504 { 6505 rtx_insn *jump_insn; 6506 6507 if (dbr_sequence_length ()) 6508 return FALSE; 6509 6510 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn)); 6511 while (insn) 6512 { 6513 insn = next_active_insn (insn); 6514 if (!insn || jump_insn == insn) 6515 return TRUE; 6516 6517 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT 6518 || asm_noperands (PATTERN (insn)) >= 0) 6519 && get_attr_length (insn) > 0) 6520 break; 6521 } 6522 6523 return FALSE; 6524 } 6525 6526 /* Return TRUE if INSN, a forward jump insn, can use nullification 6527 to skip the following instruction. This avoids an extra cycle due 6528 to a mis-predicted branch when we fall through. */ 6529 6530 static bool 6531 use_skip_p (rtx_insn *insn) 6532 { 6533 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn)); 6534 6535 while (insn) 6536 { 6537 insn = next_active_insn (insn); 6538 6539 /* We can't rely on the length of asms, so we can't skip asms. */ 6540 if (!insn 6541 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6542 || asm_noperands (PATTERN (insn)) >= 0) 6543 break; 6544 if (get_attr_length (insn) == 4 6545 && jump_insn == next_active_insn (insn)) 6546 return TRUE; 6547 if (get_attr_length (insn) > 0) 6548 break; 6549 } 6550 6551 return FALSE; 6552 } 6553 6554 /* This routine handles all the normal conditional branch sequences we 6555 might need to generate. It handles compare immediate vs compare 6556 register, nullification of delay slots, varying length branches, 6557 negated branches, and all combinations of the above. It returns the 6558 output appropriate to emit the branch corresponding to all given 6559 parameters. */ 6560 6561 const char * 6562 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn) 6563 { 6564 static char buf[100]; 6565 bool useskip; 6566 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6567 int length = get_attr_length (insn); 6568 int xdelay; 6569 6570 /* A conditional branch to the following instruction (e.g. the delay slot) 6571 is asking for a disaster. This can happen when not optimizing and 6572 when jump optimization fails. 6573 6574 While it is usually safe to emit nothing, this can fail if the 6575 preceding instruction is a nullified branch with an empty delay 6576 slot and the same branch target as this branch. We could check 6577 for this but jump optimization should eliminate nop jumps. It 6578 is always safe to emit a nop. */ 6579 if (branch_to_delay_slot_p (insn)) 6580 return "nop"; 6581 6582 /* The doubleword form of the cmpib instruction doesn't have the LEU 6583 and GTU conditions while the cmpb instruction does. Since we accept 6584 zero for cmpb, we must ensure that we use cmpb for the comparison. */ 6585 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx) 6586 operands[2] = gen_rtx_REG (DImode, 0); 6587 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx) 6588 operands[1] = gen_rtx_REG (DImode, 0); 6589 6590 /* If this is a long branch with its delay slot unfilled, set `nullify' 6591 as it can nullify the delay slot and save a nop. */ 6592 if (length == 8 && dbr_sequence_length () == 0) 6593 nullify = 1; 6594 6595 /* If this is a short forward conditional branch which did not get 6596 its delay slot filled, the delay slot can still be nullified. */ 6597 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6598 nullify = forward_branch_p (insn); 6599 6600 /* A forward branch over a single nullified insn can be done with a 6601 comclr instruction. This avoids a single cycle penalty due to 6602 mis-predicted branch if we fall through (branch not taken). */ 6603 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6604 6605 switch (length) 6606 { 6607 /* All short conditional branches except backwards with an unfilled 6608 delay slot. */ 6609 case 4: 6610 if (useskip) 6611 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6612 else 6613 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6614 if (GET_MODE (operands[1]) == DImode) 6615 strcat (buf, "*"); 6616 if (negated) 6617 strcat (buf, "%B3"); 6618 else 6619 strcat (buf, "%S3"); 6620 if (useskip) 6621 strcat (buf, " %2,%r1,%%r0"); 6622 else if (nullify) 6623 { 6624 if (branch_needs_nop_p (insn)) 6625 strcat (buf, ",n %2,%r1,%0%#"); 6626 else 6627 strcat (buf, ",n %2,%r1,%0"); 6628 } 6629 else 6630 strcat (buf, " %2,%r1,%0"); 6631 break; 6632 6633 /* All long conditionals. Note a short backward branch with an 6634 unfilled delay slot is treated just like a long backward branch 6635 with an unfilled delay slot. */ 6636 case 8: 6637 /* Handle weird backwards branch with a filled delay slot 6638 which is nullified. */ 6639 if (dbr_sequence_length () != 0 6640 && ! forward_branch_p (insn) 6641 && nullify) 6642 { 6643 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6644 if (GET_MODE (operands[1]) == DImode) 6645 strcat (buf, "*"); 6646 if (negated) 6647 strcat (buf, "%S3"); 6648 else 6649 strcat (buf, "%B3"); 6650 strcat (buf, ",n %2,%r1,.+12\n\tb %0"); 6651 } 6652 /* Handle short backwards branch with an unfilled delay slot. 6653 Using a comb;nop rather than comiclr;bl saves 1 cycle for both 6654 taken and untaken branches. */ 6655 else if (dbr_sequence_length () == 0 6656 && ! forward_branch_p (insn) 6657 && INSN_ADDRESSES_SET_P () 6658 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6659 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6660 { 6661 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6662 if (GET_MODE (operands[1]) == DImode) 6663 strcat (buf, "*"); 6664 if (negated) 6665 strcat (buf, "%B3 %2,%r1,%0%#"); 6666 else 6667 strcat (buf, "%S3 %2,%r1,%0%#"); 6668 } 6669 else 6670 { 6671 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6672 if (GET_MODE (operands[1]) == DImode) 6673 strcat (buf, "*"); 6674 if (negated) 6675 strcat (buf, "%S3"); 6676 else 6677 strcat (buf, "%B3"); 6678 if (nullify) 6679 strcat (buf, " %2,%r1,%%r0\n\tb,n %0"); 6680 else 6681 strcat (buf, " %2,%r1,%%r0\n\tb %0"); 6682 } 6683 break; 6684 6685 default: 6686 /* The reversed conditional branch must branch over one additional 6687 instruction if the delay slot is filled and needs to be extracted 6688 by pa_output_lbranch. If the delay slot is empty or this is a 6689 nullified forward branch, the instruction after the reversed 6690 condition branch must be nullified. */ 6691 if (dbr_sequence_length () == 0 6692 || (nullify && forward_branch_p (insn))) 6693 { 6694 nullify = 1; 6695 xdelay = 0; 6696 operands[4] = GEN_INT (length); 6697 } 6698 else 6699 { 6700 xdelay = 1; 6701 operands[4] = GEN_INT (length + 4); 6702 } 6703 6704 /* Create a reversed conditional branch which branches around 6705 the following insns. */ 6706 if (GET_MODE (operands[1]) != DImode) 6707 { 6708 if (nullify) 6709 { 6710 if (negated) 6711 strcpy (buf, 6712 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}"); 6713 else 6714 strcpy (buf, 6715 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}"); 6716 } 6717 else 6718 { 6719 if (negated) 6720 strcpy (buf, 6721 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}"); 6722 else 6723 strcpy (buf, 6724 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}"); 6725 } 6726 } 6727 else 6728 { 6729 if (nullify) 6730 { 6731 if (negated) 6732 strcpy (buf, 6733 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}"); 6734 else 6735 strcpy (buf, 6736 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}"); 6737 } 6738 else 6739 { 6740 if (negated) 6741 strcpy (buf, 6742 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}"); 6743 else 6744 strcpy (buf, 6745 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}"); 6746 } 6747 } 6748 6749 output_asm_insn (buf, operands); 6750 return pa_output_lbranch (operands[0], insn, xdelay); 6751 } 6752 return buf; 6753 } 6754 6755 /* Output a PIC pc-relative instruction sequence to load the address of 6756 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref 6757 or a code label. OPERANDS[1] specifies the register to use to load 6758 the program counter. OPERANDS[3] may be used for label generation 6759 The sequence is always three instructions in length. The program 6760 counter recorded for PA 1.X is eight bytes more than that for PA 2.0. 6761 Register %r1 is clobbered. */ 6762 6763 static void 6764 pa_output_pic_pcrel_sequence (rtx *operands) 6765 { 6766 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0])); 6767 if (TARGET_PA_20) 6768 { 6769 /* We can use mfia to determine the current program counter. */ 6770 if (TARGET_SOM || !TARGET_GAS) 6771 { 6772 operands[3] = gen_label_rtx (); 6773 targetm.asm_out.internal_label (asm_out_file, "L", 6774 CODE_LABEL_NUMBER (operands[3])); 6775 output_asm_insn ("mfia %1", operands); 6776 output_asm_insn ("addil L'%0-%l3,%1", operands); 6777 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands); 6778 } 6779 else 6780 { 6781 output_asm_insn ("mfia %1", operands); 6782 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands); 6783 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands); 6784 } 6785 } 6786 else 6787 { 6788 /* We need to use a branch to determine the current program counter. */ 6789 output_asm_insn ("{bl|b,l} .+8,%1", operands); 6790 if (TARGET_SOM || !TARGET_GAS) 6791 { 6792 operands[3] = gen_label_rtx (); 6793 output_asm_insn ("addil L'%0-%l3,%1", operands); 6794 targetm.asm_out.internal_label (asm_out_file, "L", 6795 CODE_LABEL_NUMBER (operands[3])); 6796 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands); 6797 } 6798 else 6799 { 6800 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands); 6801 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands); 6802 } 6803 } 6804 } 6805 6806 /* This routine handles output of long unconditional branches that 6807 exceed the maximum range of a simple branch instruction. Since 6808 we don't have a register available for the branch, we save register 6809 %r1 in the frame marker, load the branch destination DEST into %r1, 6810 execute the branch, and restore %r1 in the delay slot of the branch. 6811 6812 Since long branches may have an insn in the delay slot and the 6813 delay slot is used to restore %r1, we in general need to extract 6814 this insn and execute it before the branch. However, to facilitate 6815 use of this function by conditional branches, we also provide an 6816 option to not extract the delay insn so that it will be emitted 6817 after the long branch. So, if there is an insn in the delay slot, 6818 it is extracted if XDELAY is nonzero. 6819 6820 The lengths of the various long-branch sequences are 20, 16 and 24 6821 bytes for the portable runtime, non-PIC and PIC cases, respectively. */ 6822 6823 const char * 6824 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay) 6825 { 6826 rtx xoperands[4]; 6827 6828 xoperands[0] = dest; 6829 6830 /* First, free up the delay slot. */ 6831 if (xdelay && dbr_sequence_length () != 0) 6832 { 6833 /* We can't handle a jump in the delay slot. */ 6834 gcc_assert (! JUMP_P (NEXT_INSN (insn))); 6835 6836 final_scan_insn (NEXT_INSN (insn), asm_out_file, 6837 optimize, 0, NULL); 6838 6839 /* Now delete the delay insn. */ 6840 SET_INSN_DELETED (NEXT_INSN (insn)); 6841 } 6842 6843 /* Output an insn to save %r1. The runtime documentation doesn't 6844 specify whether the "Clean Up" slot in the callers frame can 6845 be clobbered by the callee. It isn't copied by HP's builtin 6846 alloca, so this suggests that it can be clobbered if necessary. 6847 The "Static Link" location is copied by HP builtin alloca, so 6848 we avoid using it. Using the cleanup slot might be a problem 6849 if we have to interoperate with languages that pass cleanup 6850 information. However, it should be possible to handle these 6851 situations with GCC's asm feature. 6852 6853 The "Current RP" slot is reserved for the called procedure, so 6854 we try to use it when we don't have a frame of our own. It's 6855 rather unlikely that we won't have a frame when we need to emit 6856 a very long branch. 6857 6858 Really the way to go long term is a register scavenger; goto 6859 the target of the jump and find a register which we can use 6860 as a scratch to hold the value in %r1. Then, we wouldn't have 6861 to free up the delay slot or clobber a slot that may be needed 6862 for other purposes. */ 6863 if (TARGET_64BIT) 6864 { 6865 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6866 /* Use the return pointer slot in the frame marker. */ 6867 output_asm_insn ("std %%r1,-16(%%r30)", xoperands); 6868 else 6869 /* Use the slot at -40 in the frame marker since HP builtin 6870 alloca doesn't copy it. */ 6871 output_asm_insn ("std %%r1,-40(%%r30)", xoperands); 6872 } 6873 else 6874 { 6875 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6876 /* Use the return pointer slot in the frame marker. */ 6877 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands); 6878 else 6879 /* Use the "Clean Up" slot in the frame marker. In GCC, 6880 the only other use of this location is for copying a 6881 floating point double argument from a floating-point 6882 register to two general registers. The copy is done 6883 as an "atomic" operation when outputting a call, so it 6884 won't interfere with our using the location here. */ 6885 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands); 6886 } 6887 6888 if (TARGET_PORTABLE_RUNTIME) 6889 { 6890 output_asm_insn ("ldil L'%0,%%r1", xoperands); 6891 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 6892 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6893 } 6894 else if (flag_pic) 6895 { 6896 xoperands[1] = gen_rtx_REG (Pmode, 1); 6897 xoperands[2] = xoperands[1]; 6898 pa_output_pic_pcrel_sequence (xoperands); 6899 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6900 } 6901 else 6902 /* Now output a very long branch to the original target. */ 6903 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands); 6904 6905 /* Now restore the value of %r1 in the delay slot. */ 6906 if (TARGET_64BIT) 6907 { 6908 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6909 return "ldd -16(%%r30),%%r1"; 6910 else 6911 return "ldd -40(%%r30),%%r1"; 6912 } 6913 else 6914 { 6915 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6916 return "ldw -20(%%r30),%%r1"; 6917 else 6918 return "ldw -12(%%r30),%%r1"; 6919 } 6920 } 6921 6922 /* This routine handles all the branch-on-bit conditional branch sequences we 6923 might need to generate. It handles nullification of delay slots, 6924 varying length branches, negated branches and all combinations of the 6925 above. it returns the appropriate output template to emit the branch. */ 6926 6927 const char * 6928 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which) 6929 { 6930 static char buf[100]; 6931 bool useskip; 6932 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6933 int length = get_attr_length (insn); 6934 int xdelay; 6935 6936 /* A conditional branch to the following instruction (e.g. the delay slot) is 6937 asking for a disaster. I do not think this can happen as this pattern 6938 is only used when optimizing; jump optimization should eliminate the 6939 jump. But be prepared just in case. */ 6940 6941 if (branch_to_delay_slot_p (insn)) 6942 return "nop"; 6943 6944 /* If this is a long branch with its delay slot unfilled, set `nullify' 6945 as it can nullify the delay slot and save a nop. */ 6946 if (length == 8 && dbr_sequence_length () == 0) 6947 nullify = 1; 6948 6949 /* If this is a short forward conditional branch which did not get 6950 its delay slot filled, the delay slot can still be nullified. */ 6951 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6952 nullify = forward_branch_p (insn); 6953 6954 /* A forward branch over a single nullified insn can be done with a 6955 extrs instruction. This avoids a single cycle penalty due to 6956 mis-predicted branch if we fall through (branch not taken). */ 6957 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6958 6959 switch (length) 6960 { 6961 6962 /* All short conditional branches except backwards with an unfilled 6963 delay slot. */ 6964 case 4: 6965 if (useskip) 6966 strcpy (buf, "{extrs,|extrw,s,}"); 6967 else 6968 strcpy (buf, "bb,"); 6969 if (useskip && GET_MODE (operands[0]) == DImode) 6970 strcpy (buf, "extrd,s,*"); 6971 else if (GET_MODE (operands[0]) == DImode) 6972 strcpy (buf, "bb,*"); 6973 if ((which == 0 && negated) 6974 || (which == 1 && ! negated)) 6975 strcat (buf, ">="); 6976 else 6977 strcat (buf, "<"); 6978 if (useskip) 6979 strcat (buf, " %0,%1,1,%%r0"); 6980 else if (nullify && negated) 6981 { 6982 if (branch_needs_nop_p (insn)) 6983 strcat (buf, ",n %0,%1,%3%#"); 6984 else 6985 strcat (buf, ",n %0,%1,%3"); 6986 } 6987 else if (nullify && ! negated) 6988 { 6989 if (branch_needs_nop_p (insn)) 6990 strcat (buf, ",n %0,%1,%2%#"); 6991 else 6992 strcat (buf, ",n %0,%1,%2"); 6993 } 6994 else if (! nullify && negated) 6995 strcat (buf, " %0,%1,%3"); 6996 else if (! nullify && ! negated) 6997 strcat (buf, " %0,%1,%2"); 6998 break; 6999 7000 /* All long conditionals. Note a short backward branch with an 7001 unfilled delay slot is treated just like a long backward branch 7002 with an unfilled delay slot. */ 7003 case 8: 7004 /* Handle weird backwards branch with a filled delay slot 7005 which is nullified. */ 7006 if (dbr_sequence_length () != 0 7007 && ! forward_branch_p (insn) 7008 && nullify) 7009 { 7010 strcpy (buf, "bb,"); 7011 if (GET_MODE (operands[0]) == DImode) 7012 strcat (buf, "*"); 7013 if ((which == 0 && negated) 7014 || (which == 1 && ! negated)) 7015 strcat (buf, "<"); 7016 else 7017 strcat (buf, ">="); 7018 if (negated) 7019 strcat (buf, ",n %0,%1,.+12\n\tb %3"); 7020 else 7021 strcat (buf, ",n %0,%1,.+12\n\tb %2"); 7022 } 7023 /* Handle short backwards branch with an unfilled delay slot. 7024 Using a bb;nop rather than extrs;bl saves 1 cycle for both 7025 taken and untaken branches. */ 7026 else if (dbr_sequence_length () == 0 7027 && ! forward_branch_p (insn) 7028 && INSN_ADDRESSES_SET_P () 7029 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7030 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7031 { 7032 strcpy (buf, "bb,"); 7033 if (GET_MODE (operands[0]) == DImode) 7034 strcat (buf, "*"); 7035 if ((which == 0 && negated) 7036 || (which == 1 && ! negated)) 7037 strcat (buf, ">="); 7038 else 7039 strcat (buf, "<"); 7040 if (negated) 7041 strcat (buf, " %0,%1,%3%#"); 7042 else 7043 strcat (buf, " %0,%1,%2%#"); 7044 } 7045 else 7046 { 7047 if (GET_MODE (operands[0]) == DImode) 7048 strcpy (buf, "extrd,s,*"); 7049 else 7050 strcpy (buf, "{extrs,|extrw,s,}"); 7051 if ((which == 0 && negated) 7052 || (which == 1 && ! negated)) 7053 strcat (buf, "<"); 7054 else 7055 strcat (buf, ">="); 7056 if (nullify && negated) 7057 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3"); 7058 else if (nullify && ! negated) 7059 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2"); 7060 else if (negated) 7061 strcat (buf, " %0,%1,1,%%r0\n\tb %3"); 7062 else 7063 strcat (buf, " %0,%1,1,%%r0\n\tb %2"); 7064 } 7065 break; 7066 7067 default: 7068 /* The reversed conditional branch must branch over one additional 7069 instruction if the delay slot is filled and needs to be extracted 7070 by pa_output_lbranch. If the delay slot is empty or this is a 7071 nullified forward branch, the instruction after the reversed 7072 condition branch must be nullified. */ 7073 if (dbr_sequence_length () == 0 7074 || (nullify && forward_branch_p (insn))) 7075 { 7076 nullify = 1; 7077 xdelay = 0; 7078 operands[4] = GEN_INT (length); 7079 } 7080 else 7081 { 7082 xdelay = 1; 7083 operands[4] = GEN_INT (length + 4); 7084 } 7085 7086 if (GET_MODE (operands[0]) == DImode) 7087 strcpy (buf, "bb,*"); 7088 else 7089 strcpy (buf, "bb,"); 7090 if ((which == 0 && negated) 7091 || (which == 1 && !negated)) 7092 strcat (buf, "<"); 7093 else 7094 strcat (buf, ">="); 7095 if (nullify) 7096 strcat (buf, ",n %0,%1,.+%4"); 7097 else 7098 strcat (buf, " %0,%1,.+%4"); 7099 output_asm_insn (buf, operands); 7100 return pa_output_lbranch (negated ? operands[3] : operands[2], 7101 insn, xdelay); 7102 } 7103 return buf; 7104 } 7105 7106 /* This routine handles all the branch-on-variable-bit conditional branch 7107 sequences we might need to generate. It handles nullification of delay 7108 slots, varying length branches, negated branches and all combinations 7109 of the above. it returns the appropriate output template to emit the 7110 branch. */ 7111 7112 const char * 7113 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, 7114 int which) 7115 { 7116 static char buf[100]; 7117 bool useskip; 7118 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7119 int length = get_attr_length (insn); 7120 int xdelay; 7121 7122 /* A conditional branch to the following instruction (e.g. the delay slot) is 7123 asking for a disaster. I do not think this can happen as this pattern 7124 is only used when optimizing; jump optimization should eliminate the 7125 jump. But be prepared just in case. */ 7126 7127 if (branch_to_delay_slot_p (insn)) 7128 return "nop"; 7129 7130 /* If this is a long branch with its delay slot unfilled, set `nullify' 7131 as it can nullify the delay slot and save a nop. */ 7132 if (length == 8 && dbr_sequence_length () == 0) 7133 nullify = 1; 7134 7135 /* If this is a short forward conditional branch which did not get 7136 its delay slot filled, the delay slot can still be nullified. */ 7137 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7138 nullify = forward_branch_p (insn); 7139 7140 /* A forward branch over a single nullified insn can be done with a 7141 extrs instruction. This avoids a single cycle penalty due to 7142 mis-predicted branch if we fall through (branch not taken). */ 7143 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 7144 7145 switch (length) 7146 { 7147 7148 /* All short conditional branches except backwards with an unfilled 7149 delay slot. */ 7150 case 4: 7151 if (useskip) 7152 strcpy (buf, "{vextrs,|extrw,s,}"); 7153 else 7154 strcpy (buf, "{bvb,|bb,}"); 7155 if (useskip && GET_MODE (operands[0]) == DImode) 7156 strcpy (buf, "extrd,s,*"); 7157 else if (GET_MODE (operands[0]) == DImode) 7158 strcpy (buf, "bb,*"); 7159 if ((which == 0 && negated) 7160 || (which == 1 && ! negated)) 7161 strcat (buf, ">="); 7162 else 7163 strcat (buf, "<"); 7164 if (useskip) 7165 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}"); 7166 else if (nullify && negated) 7167 { 7168 if (branch_needs_nop_p (insn)) 7169 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}"); 7170 else 7171 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}"); 7172 } 7173 else if (nullify && ! negated) 7174 { 7175 if (branch_needs_nop_p (insn)) 7176 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}"); 7177 else 7178 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}"); 7179 } 7180 else if (! nullify && negated) 7181 strcat (buf, "{ %0,%3| %0,%%sar,%3}"); 7182 else if (! nullify && ! negated) 7183 strcat (buf, "{ %0,%2| %0,%%sar,%2}"); 7184 break; 7185 7186 /* All long conditionals. Note a short backward branch with an 7187 unfilled delay slot is treated just like a long backward branch 7188 with an unfilled delay slot. */ 7189 case 8: 7190 /* Handle weird backwards branch with a filled delay slot 7191 which is nullified. */ 7192 if (dbr_sequence_length () != 0 7193 && ! forward_branch_p (insn) 7194 && nullify) 7195 { 7196 strcpy (buf, "{bvb,|bb,}"); 7197 if (GET_MODE (operands[0]) == DImode) 7198 strcat (buf, "*"); 7199 if ((which == 0 && negated) 7200 || (which == 1 && ! negated)) 7201 strcat (buf, "<"); 7202 else 7203 strcat (buf, ">="); 7204 if (negated) 7205 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}"); 7206 else 7207 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}"); 7208 } 7209 /* Handle short backwards branch with an unfilled delay slot. 7210 Using a bb;nop rather than extrs;bl saves 1 cycle for both 7211 taken and untaken branches. */ 7212 else if (dbr_sequence_length () == 0 7213 && ! forward_branch_p (insn) 7214 && INSN_ADDRESSES_SET_P () 7215 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7216 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7217 { 7218 strcpy (buf, "{bvb,|bb,}"); 7219 if (GET_MODE (operands[0]) == DImode) 7220 strcat (buf, "*"); 7221 if ((which == 0 && negated) 7222 || (which == 1 && ! negated)) 7223 strcat (buf, ">="); 7224 else 7225 strcat (buf, "<"); 7226 if (negated) 7227 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}"); 7228 else 7229 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}"); 7230 } 7231 else 7232 { 7233 strcpy (buf, "{vextrs,|extrw,s,}"); 7234 if (GET_MODE (operands[0]) == DImode) 7235 strcpy (buf, "extrd,s,*"); 7236 if ((which == 0 && negated) 7237 || (which == 1 && ! negated)) 7238 strcat (buf, "<"); 7239 else 7240 strcat (buf, ">="); 7241 if (nullify && negated) 7242 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}"); 7243 else if (nullify && ! negated) 7244 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}"); 7245 else if (negated) 7246 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}"); 7247 else 7248 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}"); 7249 } 7250 break; 7251 7252 default: 7253 /* The reversed conditional branch must branch over one additional 7254 instruction if the delay slot is filled and needs to be extracted 7255 by pa_output_lbranch. If the delay slot is empty or this is a 7256 nullified forward branch, the instruction after the reversed 7257 condition branch must be nullified. */ 7258 if (dbr_sequence_length () == 0 7259 || (nullify && forward_branch_p (insn))) 7260 { 7261 nullify = 1; 7262 xdelay = 0; 7263 operands[4] = GEN_INT (length); 7264 } 7265 else 7266 { 7267 xdelay = 1; 7268 operands[4] = GEN_INT (length + 4); 7269 } 7270 7271 if (GET_MODE (operands[0]) == DImode) 7272 strcpy (buf, "bb,*"); 7273 else 7274 strcpy (buf, "{bvb,|bb,}"); 7275 if ((which == 0 && negated) 7276 || (which == 1 && !negated)) 7277 strcat (buf, "<"); 7278 else 7279 strcat (buf, ">="); 7280 if (nullify) 7281 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}"); 7282 else 7283 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}"); 7284 output_asm_insn (buf, operands); 7285 return pa_output_lbranch (negated ? operands[3] : operands[2], 7286 insn, xdelay); 7287 } 7288 return buf; 7289 } 7290 7291 /* Return the output template for emitting a dbra type insn. 7292 7293 Note it may perform some output operations on its own before 7294 returning the final output string. */ 7295 const char * 7296 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative) 7297 { 7298 int length = get_attr_length (insn); 7299 7300 /* A conditional branch to the following instruction (e.g. the delay slot) is 7301 asking for a disaster. Be prepared! */ 7302 7303 if (branch_to_delay_slot_p (insn)) 7304 { 7305 if (which_alternative == 0) 7306 return "ldo %1(%0),%0"; 7307 else if (which_alternative == 1) 7308 { 7309 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands); 7310 output_asm_insn ("ldw -16(%%r30),%4", operands); 7311 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7312 return "{fldws|fldw} -16(%%r30),%0"; 7313 } 7314 else 7315 { 7316 output_asm_insn ("ldw %0,%4", operands); 7317 return "ldo %1(%4),%4\n\tstw %4,%0"; 7318 } 7319 } 7320 7321 if (which_alternative == 0) 7322 { 7323 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7324 int xdelay; 7325 7326 /* If this is a long branch with its delay slot unfilled, set `nullify' 7327 as it can nullify the delay slot and save a nop. */ 7328 if (length == 8 && dbr_sequence_length () == 0) 7329 nullify = 1; 7330 7331 /* If this is a short forward conditional branch which did not get 7332 its delay slot filled, the delay slot can still be nullified. */ 7333 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7334 nullify = forward_branch_p (insn); 7335 7336 switch (length) 7337 { 7338 case 4: 7339 if (nullify) 7340 { 7341 if (branch_needs_nop_p (insn)) 7342 return "addib,%C2,n %1,%0,%3%#"; 7343 else 7344 return "addib,%C2,n %1,%0,%3"; 7345 } 7346 else 7347 return "addib,%C2 %1,%0,%3"; 7348 7349 case 8: 7350 /* Handle weird backwards branch with a fulled delay slot 7351 which is nullified. */ 7352 if (dbr_sequence_length () != 0 7353 && ! forward_branch_p (insn) 7354 && nullify) 7355 return "addib,%N2,n %1,%0,.+12\n\tb %3"; 7356 /* Handle short backwards branch with an unfilled delay slot. 7357 Using a addb;nop rather than addi;bl saves 1 cycle for both 7358 taken and untaken branches. */ 7359 else if (dbr_sequence_length () == 0 7360 && ! forward_branch_p (insn) 7361 && INSN_ADDRESSES_SET_P () 7362 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7363 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7364 return "addib,%C2 %1,%0,%3%#"; 7365 7366 /* Handle normal cases. */ 7367 if (nullify) 7368 return "addi,%N2 %1,%0,%0\n\tb,n %3"; 7369 else 7370 return "addi,%N2 %1,%0,%0\n\tb %3"; 7371 7372 default: 7373 /* The reversed conditional branch must branch over one additional 7374 instruction if the delay slot is filled and needs to be extracted 7375 by pa_output_lbranch. If the delay slot is empty or this is a 7376 nullified forward branch, the instruction after the reversed 7377 condition branch must be nullified. */ 7378 if (dbr_sequence_length () == 0 7379 || (nullify && forward_branch_p (insn))) 7380 { 7381 nullify = 1; 7382 xdelay = 0; 7383 operands[4] = GEN_INT (length); 7384 } 7385 else 7386 { 7387 xdelay = 1; 7388 operands[4] = GEN_INT (length + 4); 7389 } 7390 7391 if (nullify) 7392 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands); 7393 else 7394 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands); 7395 7396 return pa_output_lbranch (operands[3], insn, xdelay); 7397 } 7398 7399 } 7400 /* Deal with gross reload from FP register case. */ 7401 else if (which_alternative == 1) 7402 { 7403 /* Move loop counter from FP register to MEM then into a GR, 7404 increment the GR, store the GR into MEM, and finally reload 7405 the FP register from MEM from within the branch's delay slot. */ 7406 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4", 7407 operands); 7408 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7409 if (length == 24) 7410 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7411 else if (length == 28) 7412 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7413 else 7414 { 7415 operands[5] = GEN_INT (length - 16); 7416 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands); 7417 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7418 return pa_output_lbranch (operands[3], insn, 0); 7419 } 7420 } 7421 /* Deal with gross reload from memory case. */ 7422 else 7423 { 7424 /* Reload loop counter from memory, the store back to memory 7425 happens in the branch's delay slot. */ 7426 output_asm_insn ("ldw %0,%4", operands); 7427 if (length == 12) 7428 return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; 7429 else if (length == 16) 7430 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0"; 7431 else 7432 { 7433 operands[5] = GEN_INT (length - 4); 7434 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands); 7435 return pa_output_lbranch (operands[3], insn, 0); 7436 } 7437 } 7438 } 7439 7440 /* Return the output template for emitting a movb type insn. 7441 7442 Note it may perform some output operations on its own before 7443 returning the final output string. */ 7444 const char * 7445 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative, 7446 int reverse_comparison) 7447 { 7448 int length = get_attr_length (insn); 7449 7450 /* A conditional branch to the following instruction (e.g. the delay slot) is 7451 asking for a disaster. Be prepared! */ 7452 7453 if (branch_to_delay_slot_p (insn)) 7454 { 7455 if (which_alternative == 0) 7456 return "copy %1,%0"; 7457 else if (which_alternative == 1) 7458 { 7459 output_asm_insn ("stw %1,-16(%%r30)", operands); 7460 return "{fldws|fldw} -16(%%r30),%0"; 7461 } 7462 else if (which_alternative == 2) 7463 return "stw %1,%0"; 7464 else 7465 return "mtsar %r1"; 7466 } 7467 7468 /* Support the second variant. */ 7469 if (reverse_comparison) 7470 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); 7471 7472 if (which_alternative == 0) 7473 { 7474 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7475 int xdelay; 7476 7477 /* If this is a long branch with its delay slot unfilled, set `nullify' 7478 as it can nullify the delay slot and save a nop. */ 7479 if (length == 8 && dbr_sequence_length () == 0) 7480 nullify = 1; 7481 7482 /* If this is a short forward conditional branch which did not get 7483 its delay slot filled, the delay slot can still be nullified. */ 7484 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7485 nullify = forward_branch_p (insn); 7486 7487 switch (length) 7488 { 7489 case 4: 7490 if (nullify) 7491 { 7492 if (branch_needs_nop_p (insn)) 7493 return "movb,%C2,n %1,%0,%3%#"; 7494 else 7495 return "movb,%C2,n %1,%0,%3"; 7496 } 7497 else 7498 return "movb,%C2 %1,%0,%3"; 7499 7500 case 8: 7501 /* Handle weird backwards branch with a filled delay slot 7502 which is nullified. */ 7503 if (dbr_sequence_length () != 0 7504 && ! forward_branch_p (insn) 7505 && nullify) 7506 return "movb,%N2,n %1,%0,.+12\n\tb %3"; 7507 7508 /* Handle short backwards branch with an unfilled delay slot. 7509 Using a movb;nop rather than or;bl saves 1 cycle for both 7510 taken and untaken branches. */ 7511 else if (dbr_sequence_length () == 0 7512 && ! forward_branch_p (insn) 7513 && INSN_ADDRESSES_SET_P () 7514 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7515 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7516 return "movb,%C2 %1,%0,%3%#"; 7517 /* Handle normal cases. */ 7518 if (nullify) 7519 return "or,%N2 %1,%%r0,%0\n\tb,n %3"; 7520 else 7521 return "or,%N2 %1,%%r0,%0\n\tb %3"; 7522 7523 default: 7524 /* The reversed conditional branch must branch over one additional 7525 instruction if the delay slot is filled and needs to be extracted 7526 by pa_output_lbranch. If the delay slot is empty or this is a 7527 nullified forward branch, the instruction after the reversed 7528 condition branch must be nullified. */ 7529 if (dbr_sequence_length () == 0 7530 || (nullify && forward_branch_p (insn))) 7531 { 7532 nullify = 1; 7533 xdelay = 0; 7534 operands[4] = GEN_INT (length); 7535 } 7536 else 7537 { 7538 xdelay = 1; 7539 operands[4] = GEN_INT (length + 4); 7540 } 7541 7542 if (nullify) 7543 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands); 7544 else 7545 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands); 7546 7547 return pa_output_lbranch (operands[3], insn, xdelay); 7548 } 7549 } 7550 /* Deal with gross reload for FP destination register case. */ 7551 else if (which_alternative == 1) 7552 { 7553 /* Move source register to MEM, perform the branch test, then 7554 finally load the FP register from MEM from within the branch's 7555 delay slot. */ 7556 output_asm_insn ("stw %1,-16(%%r30)", operands); 7557 if (length == 12) 7558 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7559 else if (length == 16) 7560 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7561 else 7562 { 7563 operands[4] = GEN_INT (length - 4); 7564 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands); 7565 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7566 return pa_output_lbranch (operands[3], insn, 0); 7567 } 7568 } 7569 /* Deal with gross reload from memory case. */ 7570 else if (which_alternative == 2) 7571 { 7572 /* Reload loop counter from memory, the store back to memory 7573 happens in the branch's delay slot. */ 7574 if (length == 8) 7575 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0"; 7576 else if (length == 12) 7577 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0"; 7578 else 7579 { 7580 operands[4] = GEN_INT (length); 7581 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0", 7582 operands); 7583 return pa_output_lbranch (operands[3], insn, 0); 7584 } 7585 } 7586 /* Handle SAR as a destination. */ 7587 else 7588 { 7589 if (length == 8) 7590 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1"; 7591 else if (length == 12) 7592 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1"; 7593 else 7594 { 7595 operands[4] = GEN_INT (length); 7596 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1", 7597 operands); 7598 return pa_output_lbranch (operands[3], insn, 0); 7599 } 7600 } 7601 } 7602 7603 /* Copy any FP arguments in INSN into integer registers. */ 7604 static void 7605 copy_fp_args (rtx_insn *insn) 7606 { 7607 rtx link; 7608 rtx xoperands[2]; 7609 7610 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7611 { 7612 int arg_mode, regno; 7613 rtx use = XEXP (link, 0); 7614 7615 if (! (GET_CODE (use) == USE 7616 && GET_CODE (XEXP (use, 0)) == REG 7617 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7618 continue; 7619 7620 arg_mode = GET_MODE (XEXP (use, 0)); 7621 regno = REGNO (XEXP (use, 0)); 7622 7623 /* Is it a floating point register? */ 7624 if (regno >= 32 && regno <= 39) 7625 { 7626 /* Copy the FP register into an integer register via memory. */ 7627 if (arg_mode == SFmode) 7628 { 7629 xoperands[0] = XEXP (use, 0); 7630 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); 7631 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands); 7632 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7633 } 7634 else 7635 { 7636 xoperands[0] = XEXP (use, 0); 7637 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); 7638 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands); 7639 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); 7640 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7641 } 7642 } 7643 } 7644 } 7645 7646 /* Compute length of the FP argument copy sequence for INSN. */ 7647 static int 7648 length_fp_args (rtx_insn *insn) 7649 { 7650 int length = 0; 7651 rtx link; 7652 7653 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7654 { 7655 int arg_mode, regno; 7656 rtx use = XEXP (link, 0); 7657 7658 if (! (GET_CODE (use) == USE 7659 && GET_CODE (XEXP (use, 0)) == REG 7660 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7661 continue; 7662 7663 arg_mode = GET_MODE (XEXP (use, 0)); 7664 regno = REGNO (XEXP (use, 0)); 7665 7666 /* Is it a floating point register? */ 7667 if (regno >= 32 && regno <= 39) 7668 { 7669 if (arg_mode == SFmode) 7670 length += 8; 7671 else 7672 length += 12; 7673 } 7674 } 7675 7676 return length; 7677 } 7678 7679 /* Return the attribute length for the millicode call instruction INSN. 7680 The length must match the code generated by pa_output_millicode_call. 7681 We include the delay slot in the returned length as it is better to 7682 over estimate the length than to under estimate it. */ 7683 7684 int 7685 pa_attr_length_millicode_call (rtx_insn *insn) 7686 { 7687 unsigned long distance = -1; 7688 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7689 7690 if (INSN_ADDRESSES_SET_P ()) 7691 { 7692 distance = (total + insn_current_reference_address (insn)); 7693 if (distance < total) 7694 distance = -1; 7695 } 7696 7697 if (TARGET_64BIT) 7698 { 7699 if (!TARGET_LONG_CALLS && distance < 7600000) 7700 return 8; 7701 7702 return 20; 7703 } 7704 else if (TARGET_PORTABLE_RUNTIME) 7705 return 24; 7706 else 7707 { 7708 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET) 7709 return 8; 7710 7711 if (!flag_pic) 7712 return 12; 7713 7714 return 24; 7715 } 7716 } 7717 7718 /* INSN is a function call. 7719 7720 CALL_DEST is the routine we are calling. */ 7721 7722 const char * 7723 pa_output_millicode_call (rtx_insn *insn, rtx call_dest) 7724 { 7725 int attr_length = get_attr_length (insn); 7726 int seq_length = dbr_sequence_length (); 7727 rtx xoperands[4]; 7728 7729 xoperands[0] = call_dest; 7730 7731 /* Handle the common case where we are sure that the branch will 7732 reach the beginning of the $CODE$ subspace. The within reach 7733 form of the $$sh_func_adrs call has a length of 28. Because it 7734 has an attribute type of sh_func_adrs, it never has a nonzero 7735 sequence length (i.e., the delay slot is never filled). */ 7736 if (!TARGET_LONG_CALLS 7737 && (attr_length == 8 7738 || (attr_length == 28 7739 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS))) 7740 { 7741 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31); 7742 output_asm_insn ("{bl|b,l} %0,%1", xoperands); 7743 } 7744 else 7745 { 7746 if (TARGET_64BIT) 7747 { 7748 /* It might seem that one insn could be saved by accessing 7749 the millicode function using the linkage table. However, 7750 this doesn't work in shared libraries and other dynamically 7751 loaded objects. Using a pc-relative sequence also avoids 7752 problems related to the implicit use of the gp register. */ 7753 xoperands[1] = gen_rtx_REG (Pmode, 1); 7754 xoperands[2] = xoperands[1]; 7755 pa_output_pic_pcrel_sequence (xoperands); 7756 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 7757 } 7758 else if (TARGET_PORTABLE_RUNTIME) 7759 { 7760 /* Pure portable runtime doesn't allow be/ble; we also don't 7761 have PIC support in the assembler/linker, so this sequence 7762 is needed. */ 7763 7764 /* Get the address of our target into %r1. */ 7765 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7766 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 7767 7768 /* Get our return address into %r31. */ 7769 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands); 7770 output_asm_insn ("addi 8,%%r31,%%r31", xoperands); 7771 7772 /* Jump to our target address in %r1. */ 7773 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7774 } 7775 else if (!flag_pic) 7776 { 7777 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7778 if (TARGET_PA_20) 7779 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands); 7780 else 7781 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7782 } 7783 else 7784 { 7785 xoperands[1] = gen_rtx_REG (Pmode, 31); 7786 xoperands[2] = gen_rtx_REG (Pmode, 1); 7787 pa_output_pic_pcrel_sequence (xoperands); 7788 7789 /* Adjust return address. */ 7790 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands); 7791 7792 /* Jump to our target address in %r1. */ 7793 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7794 } 7795 } 7796 7797 if (seq_length == 0) 7798 output_asm_insn ("nop", xoperands); 7799 7800 return ""; 7801 } 7802 7803 /* Return the attribute length of the call instruction INSN. The SIBCALL 7804 flag indicates whether INSN is a regular call or a sibling call. The 7805 length returned must be longer than the code actually generated by 7806 pa_output_call. Since branch shortening is done before delay branch 7807 sequencing, there is no way to determine whether or not the delay 7808 slot will be filled during branch shortening. Even when the delay 7809 slot is filled, we may have to add a nop if the delay slot contains 7810 a branch that can't reach its target. Thus, we always have to include 7811 the delay slot in the length estimate. This used to be done in 7812 pa_adjust_insn_length but we do it here now as some sequences always 7813 fill the delay slot and we can save four bytes in the estimate for 7814 these sequences. */ 7815 7816 int 7817 pa_attr_length_call (rtx_insn *insn, int sibcall) 7818 { 7819 int local_call; 7820 rtx call, call_dest; 7821 tree call_decl; 7822 int length = 0; 7823 rtx pat = PATTERN (insn); 7824 unsigned long distance = -1; 7825 7826 gcc_assert (CALL_P (insn)); 7827 7828 if (INSN_ADDRESSES_SET_P ()) 7829 { 7830 unsigned long total; 7831 7832 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7833 distance = (total + insn_current_reference_address (insn)); 7834 if (distance < total) 7835 distance = -1; 7836 } 7837 7838 gcc_assert (GET_CODE (pat) == PARALLEL); 7839 7840 /* Get the call rtx. */ 7841 call = XVECEXP (pat, 0, 0); 7842 if (GET_CODE (call) == SET) 7843 call = SET_SRC (call); 7844 7845 gcc_assert (GET_CODE (call) == CALL); 7846 7847 /* Determine if this is a local call. */ 7848 call_dest = XEXP (XEXP (call, 0), 0); 7849 call_decl = SYMBOL_REF_DECL (call_dest); 7850 local_call = call_decl && targetm.binds_local_p (call_decl); 7851 7852 /* pc-relative branch. */ 7853 if (!TARGET_LONG_CALLS 7854 && ((TARGET_PA_20 && !sibcall && distance < 7600000) 7855 || distance < MAX_PCREL17F_OFFSET)) 7856 length += 8; 7857 7858 /* 64-bit plabel sequence. */ 7859 else if (TARGET_64BIT && !local_call) 7860 length += 24; 7861 7862 /* non-pic long absolute branch sequence. */ 7863 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7864 length += 12; 7865 7866 /* long pc-relative branch sequence. */ 7867 else if (TARGET_LONG_PIC_SDIFF_CALL 7868 || (TARGET_GAS && !TARGET_SOM && local_call)) 7869 { 7870 length += 20; 7871 7872 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7873 length += 8; 7874 } 7875 7876 /* 32-bit plabel sequence. */ 7877 else 7878 { 7879 length += 32; 7880 7881 if (TARGET_SOM) 7882 length += length_fp_args (insn); 7883 7884 if (flag_pic) 7885 length += 4; 7886 7887 if (!TARGET_PA_20) 7888 { 7889 if (!sibcall) 7890 length += 8; 7891 7892 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7893 length += 8; 7894 } 7895 } 7896 7897 return length; 7898 } 7899 7900 /* INSN is a function call. 7901 7902 CALL_DEST is the routine we are calling. */ 7903 7904 const char * 7905 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall) 7906 { 7907 int seq_length = dbr_sequence_length (); 7908 tree call_decl = SYMBOL_REF_DECL (call_dest); 7909 int local_call = call_decl && targetm.binds_local_p (call_decl); 7910 rtx xoperands[4]; 7911 7912 xoperands[0] = call_dest; 7913 7914 /* Handle the common case where we're sure that the branch will reach 7915 the beginning of the "$CODE$" subspace. This is the beginning of 7916 the current function if we are in a named section. */ 7917 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8) 7918 { 7919 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2); 7920 output_asm_insn ("{bl|b,l} %0,%1", xoperands); 7921 } 7922 else 7923 { 7924 if (TARGET_64BIT && !local_call) 7925 { 7926 /* ??? As far as I can tell, the HP linker doesn't support the 7927 long pc-relative sequence described in the 64-bit runtime 7928 architecture. So, we use a slightly longer indirect call. */ 7929 xoperands[0] = pa_get_deferred_plabel (call_dest); 7930 xoperands[1] = gen_label_rtx (); 7931 7932 /* Put the load of %r27 into the delay slot. We don't need to 7933 do anything when generating fast indirect calls. */ 7934 if (seq_length != 0) 7935 { 7936 final_scan_insn (NEXT_INSN (insn), asm_out_file, 7937 optimize, 0, NULL); 7938 7939 /* Now delete the delay insn. */ 7940 SET_INSN_DELETED (NEXT_INSN (insn)); 7941 } 7942 7943 output_asm_insn ("addil LT'%0,%%r27", xoperands); 7944 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); 7945 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); 7946 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); 7947 output_asm_insn ("bve,l (%%r2),%%r2", xoperands); 7948 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 7949 seq_length = 1; 7950 } 7951 else 7952 { 7953 int indirect_call = 0; 7954 7955 /* Emit a long call. There are several different sequences 7956 of increasing length and complexity. In most cases, 7957 they don't allow an instruction in the delay slot. */ 7958 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7959 && !TARGET_LONG_PIC_SDIFF_CALL 7960 && !(TARGET_GAS && !TARGET_SOM && local_call) 7961 && !TARGET_64BIT) 7962 indirect_call = 1; 7963 7964 if (seq_length != 0 7965 && !sibcall 7966 && (!TARGET_PA_20 7967 || indirect_call 7968 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic))) 7969 { 7970 /* A non-jump insn in the delay slot. By definition we can 7971 emit this insn before the call (and in fact before argument 7972 relocating. */ 7973 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 7974 NULL); 7975 7976 /* Now delete the delay insn. */ 7977 SET_INSN_DELETED (NEXT_INSN (insn)); 7978 seq_length = 0; 7979 } 7980 7981 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7982 { 7983 /* This is the best sequence for making long calls in 7984 non-pic code. Unfortunately, GNU ld doesn't provide 7985 the stub needed for external calls, and GAS's support 7986 for this with the SOM linker is buggy. It is safe 7987 to use this for local calls. */ 7988 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7989 if (sibcall) 7990 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands); 7991 else 7992 { 7993 if (TARGET_PA_20) 7994 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", 7995 xoperands); 7996 else 7997 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7998 7999 output_asm_insn ("copy %%r31,%%r2", xoperands); 8000 seq_length = 1; 8001 } 8002 } 8003 else 8004 { 8005 /* The HP assembler and linker can handle relocations for 8006 the difference of two symbols. The HP assembler 8007 recognizes the sequence as a pc-relative call and 8008 the linker provides stubs when needed. */ 8009 8010 /* GAS currently can't generate the relocations that 8011 are needed for the SOM linker under HP-UX using this 8012 sequence. The GNU linker doesn't generate the stubs 8013 that are needed for external calls on TARGET_ELF32 8014 with this sequence. For now, we have to use a longer 8015 plabel sequence when using GAS for non local calls. */ 8016 if (TARGET_LONG_PIC_SDIFF_CALL 8017 || (TARGET_GAS && !TARGET_SOM && local_call)) 8018 { 8019 xoperands[1] = gen_rtx_REG (Pmode, 1); 8020 xoperands[2] = xoperands[1]; 8021 pa_output_pic_pcrel_sequence (xoperands); 8022 } 8023 else 8024 { 8025 /* Emit a long plabel-based call sequence. This is 8026 essentially an inline implementation of $$dyncall. 8027 We don't actually try to call $$dyncall as this is 8028 as difficult as calling the function itself. */ 8029 xoperands[0] = pa_get_deferred_plabel (call_dest); 8030 xoperands[1] = gen_label_rtx (); 8031 8032 /* Since the call is indirect, FP arguments in registers 8033 need to be copied to the general registers. Then, the 8034 argument relocation stub will copy them back. */ 8035 if (TARGET_SOM) 8036 copy_fp_args (insn); 8037 8038 if (flag_pic) 8039 { 8040 output_asm_insn ("addil LT'%0,%%r19", xoperands); 8041 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); 8042 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands); 8043 } 8044 else 8045 { 8046 output_asm_insn ("addil LR'%0-$global$,%%r27", 8047 xoperands); 8048 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22", 8049 xoperands); 8050 } 8051 8052 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8053 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8054 /* Should this be an ordered load to ensure the target 8055 address is loaded before the global pointer? */ 8056 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands); 8057 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands); 8058 8059 if (!sibcall && !TARGET_PA_20) 8060 { 8061 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); 8062 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8063 output_asm_insn ("addi 8,%%r2,%%r2", xoperands); 8064 else 8065 output_asm_insn ("addi 16,%%r2,%%r2", xoperands); 8066 } 8067 } 8068 8069 if (TARGET_PA_20) 8070 { 8071 if (sibcall) 8072 output_asm_insn ("bve (%%r1)", xoperands); 8073 else 8074 { 8075 if (indirect_call) 8076 { 8077 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8078 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands); 8079 seq_length = 1; 8080 } 8081 else 8082 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8083 } 8084 } 8085 else 8086 { 8087 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 8088 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0", 8089 xoperands); 8090 8091 if (sibcall) 8092 { 8093 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8094 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands); 8095 else 8096 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands); 8097 } 8098 else 8099 { 8100 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8101 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands); 8102 else 8103 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands); 8104 8105 if (indirect_call) 8106 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands); 8107 else 8108 output_asm_insn ("copy %%r31,%%r2", xoperands); 8109 seq_length = 1; 8110 } 8111 } 8112 } 8113 } 8114 } 8115 8116 if (seq_length == 0) 8117 output_asm_insn ("nop", xoperands); 8118 8119 return ""; 8120 } 8121 8122 /* Return the attribute length of the indirect call instruction INSN. 8123 The length must match the code generated by output_indirect call. 8124 The returned length includes the delay slot. Currently, the delay 8125 slot of an indirect call sequence is not exposed and it is used by 8126 the sequence itself. */ 8127 8128 int 8129 pa_attr_length_indirect_call (rtx_insn *insn) 8130 { 8131 unsigned long distance = -1; 8132 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 8133 8134 if (INSN_ADDRESSES_SET_P ()) 8135 { 8136 distance = (total + insn_current_reference_address (insn)); 8137 if (distance < total) 8138 distance = -1; 8139 } 8140 8141 if (TARGET_64BIT) 8142 return 12; 8143 8144 if (TARGET_FAST_INDIRECT_CALLS) 8145 return 8; 8146 8147 if (TARGET_PORTABLE_RUNTIME) 8148 return 16; 8149 8150 if (!TARGET_LONG_CALLS 8151 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) 8152 || distance < MAX_PCREL17F_OFFSET)) 8153 return 8; 8154 8155 /* Out of reach, can use ble. */ 8156 if (!flag_pic) 8157 return 12; 8158 8159 /* Inline versions of $$dyncall. */ 8160 if (!optimize_size) 8161 { 8162 if (TARGET_NO_SPACE_REGS) 8163 return 28; 8164 8165 if (TARGET_PA_20) 8166 return 32; 8167 } 8168 8169 /* Long PIC pc-relative call. */ 8170 return 20; 8171 } 8172 8173 const char * 8174 pa_output_indirect_call (rtx_insn *insn, rtx call_dest) 8175 { 8176 rtx xoperands[4]; 8177 int length; 8178 8179 if (TARGET_64BIT) 8180 { 8181 xoperands[0] = call_dest; 8182 output_asm_insn ("ldd 16(%0),%%r2\n\t" 8183 "bve,l (%%r2),%%r2\n\t" 8184 "ldd 24(%0),%%r27", xoperands); 8185 return ""; 8186 } 8187 8188 /* First the special case for kernels, level 0 systems, etc. */ 8189 if (TARGET_FAST_INDIRECT_CALLS) 8190 { 8191 pa_output_arg_descriptor (insn); 8192 if (TARGET_PA_20) 8193 return "bve,l,n (%%r22),%%r2\n\tnop"; 8194 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 8195 } 8196 8197 if (TARGET_PORTABLE_RUNTIME) 8198 { 8199 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t" 8200 "ldo R'$$dyncall(%%r31),%%r31", xoperands); 8201 pa_output_arg_descriptor (insn); 8202 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)"; 8203 } 8204 8205 /* Now the normal case -- we can reach $$dyncall directly or 8206 we're sure that we can get there via a long-branch stub. 8207 8208 No need to check target flags as the length uniquely identifies 8209 the remaining cases. */ 8210 length = pa_attr_length_indirect_call (insn); 8211 if (length == 8) 8212 { 8213 pa_output_arg_descriptor (insn); 8214 8215 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to 8216 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit 8217 variant of the B,L instruction can't be used on the SOM target. */ 8218 if (TARGET_PA_20 && !TARGET_SOM) 8219 return "b,l,n $$dyncall,%%r2\n\tnop"; 8220 else 8221 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2"; 8222 } 8223 8224 /* Long millicode call, but we are not generating PIC or portable runtime 8225 code. */ 8226 if (length == 12) 8227 { 8228 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands); 8229 pa_output_arg_descriptor (insn); 8230 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; 8231 } 8232 8233 /* The long PIC pc-relative call sequence is five instructions. So, 8234 let's use an inline version of $$dyncall when the calling sequence 8235 has a roughly similar number of instructions and we are not optimizing 8236 for size. We need two instructions to load the return pointer plus 8237 the $$dyncall implementation. */ 8238 if (!optimize_size) 8239 { 8240 if (TARGET_NO_SPACE_REGS) 8241 { 8242 pa_output_arg_descriptor (insn); 8243 output_asm_insn ("bl .+8,%%r2\n\t" 8244 "ldo 20(%%r2),%%r2\n\t" 8245 "extru,<> %%r22,30,1,%%r0\n\t" 8246 "bv,n %%r0(%%r22)\n\t" 8247 "ldw -2(%%r22),%%r21\n\t" 8248 "bv %%r0(%%r21)\n\t" 8249 "ldw 2(%%r22),%%r19", xoperands); 8250 return ""; 8251 } 8252 if (TARGET_PA_20) 8253 { 8254 pa_output_arg_descriptor (insn); 8255 output_asm_insn ("bl .+8,%%r2\n\t" 8256 "ldo 24(%%r2),%%r2\n\t" 8257 "stw %%r2,-24(%%sp)\n\t" 8258 "extru,<> %r22,30,1,%%r0\n\t" 8259 "bve,n (%%r22)\n\t" 8260 "ldw -2(%%r22),%%r21\n\t" 8261 "bve (%%r21)\n\t" 8262 "ldw 2(%%r22),%%r19", xoperands); 8263 return ""; 8264 } 8265 } 8266 8267 /* We need a long PIC call to $$dyncall. */ 8268 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall"); 8269 xoperands[1] = gen_rtx_REG (Pmode, 2); 8270 xoperands[2] = gen_rtx_REG (Pmode, 1); 8271 pa_output_pic_pcrel_sequence (xoperands); 8272 pa_output_arg_descriptor (insn); 8273 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2"; 8274 } 8275 8276 /* In HPUX 8.0's shared library scheme, special relocations are needed 8277 for function labels if they might be passed to a function 8278 in a shared library (because shared libraries don't live in code 8279 space), and special magic is needed to construct their address. */ 8280 8281 void 8282 pa_encode_label (rtx sym) 8283 { 8284 const char *str = XSTR (sym, 0); 8285 int len = strlen (str) + 1; 8286 char *newstr, *p; 8287 8288 p = newstr = XALLOCAVEC (char, len + 1); 8289 *p++ = '@'; 8290 strcpy (p, str); 8291 8292 XSTR (sym, 0) = ggc_alloc_string (newstr, len); 8293 } 8294 8295 static void 8296 pa_encode_section_info (tree decl, rtx rtl, int first) 8297 { 8298 int old_referenced = 0; 8299 8300 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF) 8301 old_referenced 8302 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED; 8303 8304 default_encode_section_info (decl, rtl, first); 8305 8306 if (first && TEXT_SPACE_P (decl)) 8307 { 8308 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; 8309 if (TREE_CODE (decl) == FUNCTION_DECL) 8310 pa_encode_label (XEXP (rtl, 0)); 8311 } 8312 else if (old_referenced) 8313 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced; 8314 } 8315 8316 /* This is sort of inverse to pa_encode_section_info. */ 8317 8318 static const char * 8319 pa_strip_name_encoding (const char *str) 8320 { 8321 str += (*str == '@'); 8322 str += (*str == '*'); 8323 return str; 8324 } 8325 8326 /* Returns 1 if OP is a function label involved in a simple addition 8327 with a constant. Used to keep certain patterns from matching 8328 during instruction combination. */ 8329 int 8330 pa_is_function_label_plus_const (rtx op) 8331 { 8332 /* Strip off any CONST. */ 8333 if (GET_CODE (op) == CONST) 8334 op = XEXP (op, 0); 8335 8336 return (GET_CODE (op) == PLUS 8337 && function_label_operand (XEXP (op, 0), VOIDmode) 8338 && GET_CODE (XEXP (op, 1)) == CONST_INT); 8339 } 8340 8341 /* Output assembly code for a thunk to FUNCTION. */ 8342 8343 static void 8344 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, 8345 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, 8346 tree function) 8347 { 8348 static unsigned int current_thunk_number; 8349 int val_14 = VAL_14_BITS_P (delta); 8350 unsigned int old_last_address = last_address, nbytes = 0; 8351 char label[17]; 8352 rtx xoperands[4]; 8353 8354 xoperands[0] = XEXP (DECL_RTL (function), 0); 8355 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0); 8356 xoperands[2] = GEN_INT (delta); 8357 8358 final_start_function (emit_barrier (), file, 1); 8359 8360 /* Output the thunk. We know that the function is in the same 8361 translation unit (i.e., the same space) as the thunk, and that 8362 thunks are output after their method. Thus, we don't need an 8363 external branch to reach the function. With SOM and GAS, 8364 functions and thunks are effectively in different sections. 8365 Thus, we can always use a IA-relative branch and the linker 8366 will add a long branch stub if necessary. 8367 8368 However, we have to be careful when generating PIC code on the 8369 SOM port to ensure that the sequence does not transfer to an 8370 import stub for the target function as this could clobber the 8371 return value saved at SP-24. This would also apply to the 8372 32-bit linux port if the multi-space model is implemented. */ 8373 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8374 && !(flag_pic && TREE_PUBLIC (function)) 8375 && (TARGET_GAS || last_address < 262132)) 8376 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8377 && ((targetm_common.have_named_sections 8378 && DECL_SECTION_NAME (thunk_fndecl) != NULL 8379 /* The GNU 64-bit linker has rather poor stub management. 8380 So, we use a long branch from thunks that aren't in 8381 the same section as the target function. */ 8382 && ((!TARGET_64BIT 8383 && (DECL_SECTION_NAME (thunk_fndecl) 8384 != DECL_SECTION_NAME (function))) 8385 || ((DECL_SECTION_NAME (thunk_fndecl) 8386 == DECL_SECTION_NAME (function)) 8387 && last_address < 262132))) 8388 /* In this case, we need to be able to reach the start of 8389 the stub table even though the function is likely closer 8390 and can be jumped to directly. */ 8391 || (targetm_common.have_named_sections 8392 && DECL_SECTION_NAME (thunk_fndecl) == NULL 8393 && DECL_SECTION_NAME (function) == NULL 8394 && total_code_bytes < MAX_PCREL17F_OFFSET) 8395 /* Likewise. */ 8396 || (!targetm_common.have_named_sections 8397 && total_code_bytes < MAX_PCREL17F_OFFSET)))) 8398 { 8399 if (!val_14) 8400 output_asm_insn ("addil L'%2,%%r26", xoperands); 8401 8402 output_asm_insn ("b %0", xoperands); 8403 8404 if (val_14) 8405 { 8406 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8407 nbytes += 8; 8408 } 8409 else 8410 { 8411 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8412 nbytes += 12; 8413 } 8414 } 8415 else if (TARGET_64BIT) 8416 { 8417 rtx xop[4]; 8418 8419 /* We only have one call-clobbered scratch register, so we can't 8420 make use of the delay slot if delta doesn't fit in 14 bits. */ 8421 if (!val_14) 8422 { 8423 output_asm_insn ("addil L'%2,%%r26", xoperands); 8424 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8425 } 8426 8427 /* Load function address into %r1. */ 8428 xop[0] = xoperands[0]; 8429 xop[1] = gen_rtx_REG (Pmode, 1); 8430 xop[2] = xop[1]; 8431 pa_output_pic_pcrel_sequence (xop); 8432 8433 if (val_14) 8434 { 8435 output_asm_insn ("bv %%r0(%%r1)", xoperands); 8436 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8437 nbytes += 20; 8438 } 8439 else 8440 { 8441 output_asm_insn ("bv,n %%r0(%%r1)", xoperands); 8442 nbytes += 24; 8443 } 8444 } 8445 else if (TARGET_PORTABLE_RUNTIME) 8446 { 8447 output_asm_insn ("ldil L'%0,%%r1", xoperands); 8448 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); 8449 8450 if (!val_14) 8451 output_asm_insn ("ldil L'%2,%%r26", xoperands); 8452 8453 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8454 8455 if (val_14) 8456 { 8457 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8458 nbytes += 16; 8459 } 8460 else 8461 { 8462 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands); 8463 nbytes += 20; 8464 } 8465 } 8466 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8467 { 8468 /* The function is accessible from outside this module. The only 8469 way to avoid an import stub between the thunk and function is to 8470 call the function directly with an indirect sequence similar to 8471 that used by $$dyncall. This is possible because $$dyncall acts 8472 as the import stub in an indirect call. */ 8473 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); 8474 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); 8475 output_asm_insn ("addil LT'%3,%%r19", xoperands); 8476 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); 8477 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8478 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8479 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8480 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); 8481 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8482 8483 if (!val_14) 8484 { 8485 output_asm_insn ("addil L'%2,%%r26", xoperands); 8486 nbytes += 4; 8487 } 8488 8489 if (TARGET_PA_20) 8490 { 8491 output_asm_insn ("bve (%%r22)", xoperands); 8492 nbytes += 36; 8493 } 8494 else if (TARGET_NO_SPACE_REGS) 8495 { 8496 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); 8497 nbytes += 36; 8498 } 8499 else 8500 { 8501 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); 8502 output_asm_insn ("mtsp %%r21,%%sr0", xoperands); 8503 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); 8504 nbytes += 44; 8505 } 8506 8507 if (val_14) 8508 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8509 else 8510 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8511 } 8512 else if (flag_pic) 8513 { 8514 rtx xop[4]; 8515 8516 /* Load function address into %r22. */ 8517 xop[0] = xoperands[0]; 8518 xop[1] = gen_rtx_REG (Pmode, 1); 8519 xop[2] = gen_rtx_REG (Pmode, 22); 8520 pa_output_pic_pcrel_sequence (xop); 8521 8522 if (!val_14) 8523 output_asm_insn ("addil L'%2,%%r26", xoperands); 8524 8525 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8526 8527 if (val_14) 8528 { 8529 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8530 nbytes += 20; 8531 } 8532 else 8533 { 8534 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8535 nbytes += 24; 8536 } 8537 } 8538 else 8539 { 8540 if (!val_14) 8541 output_asm_insn ("addil L'%2,%%r26", xoperands); 8542 8543 output_asm_insn ("ldil L'%0,%%r22", xoperands); 8544 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); 8545 8546 if (val_14) 8547 { 8548 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8549 nbytes += 12; 8550 } 8551 else 8552 { 8553 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8554 nbytes += 16; 8555 } 8556 } 8557 8558 final_end_function (); 8559 8560 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8561 { 8562 switch_to_section (data_section); 8563 output_asm_insn (".align 4", xoperands); 8564 ASM_OUTPUT_LABEL (file, label); 8565 output_asm_insn (".word P'%0", xoperands); 8566 } 8567 8568 current_thunk_number++; 8569 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 8570 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 8571 last_address += nbytes; 8572 if (old_last_address > last_address) 8573 last_address = UINT_MAX; 8574 update_total_code_bytes (nbytes); 8575 } 8576 8577 /* Only direct calls to static functions are allowed to be sibling (tail) 8578 call optimized. 8579 8580 This restriction is necessary because some linker generated stubs will 8581 store return pointers into rp' in some cases which might clobber a 8582 live value already in rp'. 8583 8584 In a sibcall the current function and the target function share stack 8585 space. Thus if the path to the current function and the path to the 8586 target function save a value in rp', they save the value into the 8587 same stack slot, which has undesirable consequences. 8588 8589 Because of the deferred binding nature of shared libraries any function 8590 with external scope could be in a different load module and thus require 8591 rp' to be saved when calling that function. So sibcall optimizations 8592 can only be safe for static function. 8593 8594 Note that GCC never needs return value relocations, so we don't have to 8595 worry about static calls with return value relocations (which require 8596 saving rp'). 8597 8598 It is safe to perform a sibcall optimization when the target function 8599 will never return. */ 8600 static bool 8601 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 8602 { 8603 /* Sibcalls are not ok because the arg pointer register is not a fixed 8604 register. This prevents the sibcall optimization from occurring. In 8605 addition, there are problems with stub placement using GNU ld. This 8606 is because a normal sibcall branch uses a 17-bit relocation while 8607 a regular call branch uses a 22-bit relocation. As a result, more 8608 care needs to be taken in the placement of long-branch stubs. */ 8609 if (TARGET_64BIT) 8610 return false; 8611 8612 if (TARGET_PORTABLE_RUNTIME) 8613 return false; 8614 8615 /* Sibcalls are only ok within a translation unit. */ 8616 return decl && targetm.binds_local_p (decl); 8617 } 8618 8619 /* ??? Addition is not commutative on the PA due to the weird implicit 8620 space register selection rules for memory addresses. Therefore, we 8621 don't consider a + b == b + a, as this might be inside a MEM. */ 8622 static bool 8623 pa_commutative_p (const_rtx x, int outer_code) 8624 { 8625 return (COMMUTATIVE_P (x) 8626 && (TARGET_NO_SPACE_REGS 8627 || (outer_code != UNKNOWN && outer_code != MEM) 8628 || GET_CODE (x) != PLUS)); 8629 } 8630 8631 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8632 use in fmpyadd instructions. */ 8633 int 8634 pa_fmpyaddoperands (rtx *operands) 8635 { 8636 machine_mode mode = GET_MODE (operands[0]); 8637 8638 /* Must be a floating point mode. */ 8639 if (mode != SFmode && mode != DFmode) 8640 return 0; 8641 8642 /* All modes must be the same. */ 8643 if (! (mode == GET_MODE (operands[1]) 8644 && mode == GET_MODE (operands[2]) 8645 && mode == GET_MODE (operands[3]) 8646 && mode == GET_MODE (operands[4]) 8647 && mode == GET_MODE (operands[5]))) 8648 return 0; 8649 8650 /* All operands must be registers. */ 8651 if (! (GET_CODE (operands[1]) == REG 8652 && GET_CODE (operands[2]) == REG 8653 && GET_CODE (operands[3]) == REG 8654 && GET_CODE (operands[4]) == REG 8655 && GET_CODE (operands[5]) == REG)) 8656 return 0; 8657 8658 /* Only 2 real operands to the addition. One of the input operands must 8659 be the same as the output operand. */ 8660 if (! rtx_equal_p (operands[3], operands[4]) 8661 && ! rtx_equal_p (operands[3], operands[5])) 8662 return 0; 8663 8664 /* Inout operand of add cannot conflict with any operands from multiply. */ 8665 if (rtx_equal_p (operands[3], operands[0]) 8666 || rtx_equal_p (operands[3], operands[1]) 8667 || rtx_equal_p (operands[3], operands[2])) 8668 return 0; 8669 8670 /* multiply cannot feed into addition operands. */ 8671 if (rtx_equal_p (operands[4], operands[0]) 8672 || rtx_equal_p (operands[5], operands[0])) 8673 return 0; 8674 8675 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8676 if (mode == SFmode 8677 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8678 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8679 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8680 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8681 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8682 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8683 return 0; 8684 8685 /* Passed. Operands are suitable for fmpyadd. */ 8686 return 1; 8687 } 8688 8689 #if !defined(USE_COLLECT2) 8690 static void 8691 pa_asm_out_constructor (rtx symbol, int priority) 8692 { 8693 if (!function_label_operand (symbol, VOIDmode)) 8694 pa_encode_label (symbol); 8695 8696 #ifdef CTORS_SECTION_ASM_OP 8697 default_ctor_section_asm_out_constructor (symbol, priority); 8698 #else 8699 # ifdef TARGET_ASM_NAMED_SECTION 8700 default_named_section_asm_out_constructor (symbol, priority); 8701 # else 8702 default_stabs_asm_out_constructor (symbol, priority); 8703 # endif 8704 #endif 8705 } 8706 8707 static void 8708 pa_asm_out_destructor (rtx symbol, int priority) 8709 { 8710 if (!function_label_operand (symbol, VOIDmode)) 8711 pa_encode_label (symbol); 8712 8713 #ifdef DTORS_SECTION_ASM_OP 8714 default_dtor_section_asm_out_destructor (symbol, priority); 8715 #else 8716 # ifdef TARGET_ASM_NAMED_SECTION 8717 default_named_section_asm_out_destructor (symbol, priority); 8718 # else 8719 default_stabs_asm_out_destructor (symbol, priority); 8720 # endif 8721 #endif 8722 } 8723 #endif 8724 8725 /* This function places uninitialized global data in the bss section. 8726 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this 8727 function on the SOM port to prevent uninitialized global data from 8728 being placed in the data section. */ 8729 8730 void 8731 pa_asm_output_aligned_bss (FILE *stream, 8732 const char *name, 8733 unsigned HOST_WIDE_INT size, 8734 unsigned int align) 8735 { 8736 switch_to_section (bss_section); 8737 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8738 8739 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 8740 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); 8741 #endif 8742 8743 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE 8744 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); 8745 #endif 8746 8747 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8748 ASM_OUTPUT_LABEL (stream, name); 8749 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8750 } 8751 8752 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive 8753 that doesn't allow the alignment of global common storage to be directly 8754 specified. The SOM linker aligns common storage based on the rounded 8755 value of the NUM_BYTES parameter in the .comm directive. It's not 8756 possible to use the .align directive as it doesn't affect the alignment 8757 of the label associated with a .comm directive. */ 8758 8759 void 8760 pa_asm_output_aligned_common (FILE *stream, 8761 const char *name, 8762 unsigned HOST_WIDE_INT size, 8763 unsigned int align) 8764 { 8765 unsigned int max_common_align; 8766 8767 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64); 8768 if (align > max_common_align) 8769 { 8770 warning (0, "alignment (%u) for %s exceeds maximum alignment " 8771 "for global common data. Using %u", 8772 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT); 8773 align = max_common_align; 8774 } 8775 8776 switch_to_section (bss_section); 8777 8778 assemble_name (stream, name); 8779 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n", 8780 MAX (size, align / BITS_PER_UNIT)); 8781 } 8782 8783 /* We can't use .comm for local common storage as the SOM linker effectively 8784 treats the symbol as universal and uses the same storage for local symbols 8785 with the same name in different object files. The .block directive 8786 reserves an uninitialized block of storage. However, it's not common 8787 storage. Fortunately, GCC never requests common storage with the same 8788 name in any given translation unit. */ 8789 8790 void 8791 pa_asm_output_aligned_local (FILE *stream, 8792 const char *name, 8793 unsigned HOST_WIDE_INT size, 8794 unsigned int align) 8795 { 8796 switch_to_section (bss_section); 8797 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8798 8799 #ifdef LOCAL_ASM_OP 8800 fprintf (stream, "%s", LOCAL_ASM_OP); 8801 assemble_name (stream, name); 8802 fprintf (stream, "\n"); 8803 #endif 8804 8805 ASM_OUTPUT_LABEL (stream, name); 8806 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8807 } 8808 8809 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8810 use in fmpysub instructions. */ 8811 int 8812 pa_fmpysuboperands (rtx *operands) 8813 { 8814 machine_mode mode = GET_MODE (operands[0]); 8815 8816 /* Must be a floating point mode. */ 8817 if (mode != SFmode && mode != DFmode) 8818 return 0; 8819 8820 /* All modes must be the same. */ 8821 if (! (mode == GET_MODE (operands[1]) 8822 && mode == GET_MODE (operands[2]) 8823 && mode == GET_MODE (operands[3]) 8824 && mode == GET_MODE (operands[4]) 8825 && mode == GET_MODE (operands[5]))) 8826 return 0; 8827 8828 /* All operands must be registers. */ 8829 if (! (GET_CODE (operands[1]) == REG 8830 && GET_CODE (operands[2]) == REG 8831 && GET_CODE (operands[3]) == REG 8832 && GET_CODE (operands[4]) == REG 8833 && GET_CODE (operands[5]) == REG)) 8834 return 0; 8835 8836 /* Only 2 real operands to the subtraction. Subtraction is not a commutative 8837 operation, so operands[4] must be the same as operand[3]. */ 8838 if (! rtx_equal_p (operands[3], operands[4])) 8839 return 0; 8840 8841 /* multiply cannot feed into subtraction. */ 8842 if (rtx_equal_p (operands[5], operands[0])) 8843 return 0; 8844 8845 /* Inout operand of sub cannot conflict with any operands from multiply. */ 8846 if (rtx_equal_p (operands[3], operands[0]) 8847 || rtx_equal_p (operands[3], operands[1]) 8848 || rtx_equal_p (operands[3], operands[2])) 8849 return 0; 8850 8851 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8852 if (mode == SFmode 8853 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8854 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8855 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8856 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8857 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8858 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8859 return 0; 8860 8861 /* Passed. Operands are suitable for fmpysub. */ 8862 return 1; 8863 } 8864 8865 /* Return 1 if the given constant is 2, 4, or 8. These are the valid 8866 constants for a MULT embedded inside a memory address. */ 8867 int 8868 pa_mem_shadd_constant_p (int val) 8869 { 8870 if (val == 2 || val == 4 || val == 8) 8871 return 1; 8872 else 8873 return 0; 8874 } 8875 8876 /* Return 1 if the given constant is 1, 2, or 3. These are the valid 8877 constants for shadd instructions. */ 8878 int 8879 pa_shadd_constant_p (int val) 8880 { 8881 if (val == 1 || val == 2 || val == 3) 8882 return 1; 8883 else 8884 return 0; 8885 } 8886 8887 /* Return TRUE if INSN branches forward. */ 8888 8889 static bool 8890 forward_branch_p (rtx_insn *insn) 8891 { 8892 rtx lab = JUMP_LABEL (insn); 8893 8894 /* The INSN must have a jump label. */ 8895 gcc_assert (lab != NULL_RTX); 8896 8897 if (INSN_ADDRESSES_SET_P ()) 8898 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn)); 8899 8900 while (insn) 8901 { 8902 if (insn == lab) 8903 return true; 8904 else 8905 insn = NEXT_INSN (insn); 8906 } 8907 8908 return false; 8909 } 8910 8911 /* Output an unconditional move and branch insn. */ 8912 8913 const char * 8914 pa_output_parallel_movb (rtx *operands, rtx_insn *insn) 8915 { 8916 int length = get_attr_length (insn); 8917 8918 /* These are the cases in which we win. */ 8919 if (length == 4) 8920 return "mov%I1b,tr %1,%0,%2"; 8921 8922 /* None of the following cases win, but they don't lose either. */ 8923 if (length == 8) 8924 { 8925 if (dbr_sequence_length () == 0) 8926 { 8927 /* Nothing in the delay slot, fake it by putting the combined 8928 insn (the copy or add) in the delay slot of a bl. */ 8929 if (GET_CODE (operands[1]) == CONST_INT) 8930 return "b %2\n\tldi %1,%0"; 8931 else 8932 return "b %2\n\tcopy %1,%0"; 8933 } 8934 else 8935 { 8936 /* Something in the delay slot, but we've got a long branch. */ 8937 if (GET_CODE (operands[1]) == CONST_INT) 8938 return "ldi %1,%0\n\tb %2"; 8939 else 8940 return "copy %1,%0\n\tb %2"; 8941 } 8942 } 8943 8944 if (GET_CODE (operands[1]) == CONST_INT) 8945 output_asm_insn ("ldi %1,%0", operands); 8946 else 8947 output_asm_insn ("copy %1,%0", operands); 8948 return pa_output_lbranch (operands[2], insn, 1); 8949 } 8950 8951 /* Output an unconditional add and branch insn. */ 8952 8953 const char * 8954 pa_output_parallel_addb (rtx *operands, rtx_insn *insn) 8955 { 8956 int length = get_attr_length (insn); 8957 8958 /* To make life easy we want operand0 to be the shared input/output 8959 operand and operand1 to be the readonly operand. */ 8960 if (operands[0] == operands[1]) 8961 operands[1] = operands[2]; 8962 8963 /* These are the cases in which we win. */ 8964 if (length == 4) 8965 return "add%I1b,tr %1,%0,%3"; 8966 8967 /* None of the following cases win, but they don't lose either. */ 8968 if (length == 8) 8969 { 8970 if (dbr_sequence_length () == 0) 8971 /* Nothing in the delay slot, fake it by putting the combined 8972 insn (the copy or add) in the delay slot of a bl. */ 8973 return "b %3\n\tadd%I1 %1,%0,%0"; 8974 else 8975 /* Something in the delay slot, but we've got a long branch. */ 8976 return "add%I1 %1,%0,%0\n\tb %3"; 8977 } 8978 8979 output_asm_insn ("add%I1 %1,%0,%0", operands); 8980 return pa_output_lbranch (operands[3], insn, 1); 8981 } 8982 8983 /* We use this hook to perform a PA specific optimization which is difficult 8984 to do in earlier passes. */ 8985 8986 static void 8987 pa_reorg (void) 8988 { 8989 remove_useless_addtr_insns (1); 8990 8991 if (pa_cpu < PROCESSOR_8000) 8992 pa_combine_instructions (); 8993 } 8994 8995 /* The PA has a number of odd instructions which can perform multiple 8996 tasks at once. On first generation PA machines (PA1.0 and PA1.1) 8997 it may be profitable to combine two instructions into one instruction 8998 with two outputs. It's not profitable PA2.0 machines because the 8999 two outputs would take two slots in the reorder buffers. 9000 9001 This routine finds instructions which can be combined and combines 9002 them. We only support some of the potential combinations, and we 9003 only try common ways to find suitable instructions. 9004 9005 * addb can add two registers or a register and a small integer 9006 and jump to a nearby (+-8k) location. Normally the jump to the 9007 nearby location is conditional on the result of the add, but by 9008 using the "true" condition we can make the jump unconditional. 9009 Thus addb can perform two independent operations in one insn. 9010 9011 * movb is similar to addb in that it can perform a reg->reg 9012 or small immediate->reg copy and jump to a nearby (+-8k location). 9013 9014 * fmpyadd and fmpysub can perform a FP multiply and either an 9015 FP add or FP sub if the operands of the multiply and add/sub are 9016 independent (there are other minor restrictions). Note both 9017 the fmpy and fadd/fsub can in theory move to better spots according 9018 to data dependencies, but for now we require the fmpy stay at a 9019 fixed location. 9020 9021 * Many of the memory operations can perform pre & post updates 9022 of index registers. GCC's pre/post increment/decrement addressing 9023 is far too simple to take advantage of all the possibilities. This 9024 pass may not be suitable since those insns may not be independent. 9025 9026 * comclr can compare two ints or an int and a register, nullify 9027 the following instruction and zero some other register. This 9028 is more difficult to use as it's harder to find an insn which 9029 will generate a comclr than finding something like an unconditional 9030 branch. (conditional moves & long branches create comclr insns). 9031 9032 * Most arithmetic operations can conditionally skip the next 9033 instruction. They can be viewed as "perform this operation 9034 and conditionally jump to this nearby location" (where nearby 9035 is an insns away). These are difficult to use due to the 9036 branch length restrictions. */ 9037 9038 static void 9039 pa_combine_instructions (void) 9040 { 9041 rtx_insn *anchor; 9042 9043 /* This can get expensive since the basic algorithm is on the 9044 order of O(n^2) (or worse). Only do it for -O2 or higher 9045 levels of optimization. */ 9046 if (optimize < 2) 9047 return; 9048 9049 /* Walk down the list of insns looking for "anchor" insns which 9050 may be combined with "floating" insns. As the name implies, 9051 "anchor" instructions don't move, while "floating" insns may 9052 move around. */ 9053 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); 9054 rtx_insn *new_rtx = make_insn_raw (par); 9055 9056 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) 9057 { 9058 enum attr_pa_combine_type anchor_attr; 9059 enum attr_pa_combine_type floater_attr; 9060 9061 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. 9062 Also ignore any special USE insns. */ 9063 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor)) 9064 || GET_CODE (PATTERN (anchor)) == USE 9065 || GET_CODE (PATTERN (anchor)) == CLOBBER) 9066 continue; 9067 9068 anchor_attr = get_attr_pa_combine_type (anchor); 9069 /* See if anchor is an insn suitable for combination. */ 9070 if (anchor_attr == PA_COMBINE_TYPE_FMPY 9071 || anchor_attr == PA_COMBINE_TYPE_FADDSUB 9072 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9073 && ! forward_branch_p (anchor))) 9074 { 9075 rtx_insn *floater; 9076 9077 for (floater = PREV_INSN (anchor); 9078 floater; 9079 floater = PREV_INSN (floater)) 9080 { 9081 if (NOTE_P (floater) 9082 || (NONJUMP_INSN_P (floater) 9083 && (GET_CODE (PATTERN (floater)) == USE 9084 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9085 continue; 9086 9087 /* Anything except a regular INSN will stop our search. */ 9088 if (! NONJUMP_INSN_P (floater)) 9089 { 9090 floater = NULL; 9091 break; 9092 } 9093 9094 /* See if FLOATER is suitable for combination with the 9095 anchor. */ 9096 floater_attr = get_attr_pa_combine_type (floater); 9097 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9098 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9099 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9100 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9101 { 9102 /* If ANCHOR and FLOATER can be combined, then we're 9103 done with this pass. */ 9104 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9105 SET_DEST (PATTERN (floater)), 9106 XEXP (SET_SRC (PATTERN (floater)), 0), 9107 XEXP (SET_SRC (PATTERN (floater)), 1))) 9108 break; 9109 } 9110 9111 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9112 && floater_attr == PA_COMBINE_TYPE_ADDMOVE) 9113 { 9114 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) 9115 { 9116 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9117 SET_DEST (PATTERN (floater)), 9118 XEXP (SET_SRC (PATTERN (floater)), 0), 9119 XEXP (SET_SRC (PATTERN (floater)), 1))) 9120 break; 9121 } 9122 else 9123 { 9124 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9125 SET_DEST (PATTERN (floater)), 9126 SET_SRC (PATTERN (floater)), 9127 SET_SRC (PATTERN (floater)))) 9128 break; 9129 } 9130 } 9131 } 9132 9133 /* If we didn't find anything on the backwards scan try forwards. */ 9134 if (!floater 9135 && (anchor_attr == PA_COMBINE_TYPE_FMPY 9136 || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) 9137 { 9138 for (floater = anchor; floater; floater = NEXT_INSN (floater)) 9139 { 9140 if (NOTE_P (floater) 9141 || (NONJUMP_INSN_P (floater) 9142 && (GET_CODE (PATTERN (floater)) == USE 9143 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9144 9145 continue; 9146 9147 /* Anything except a regular INSN will stop our search. */ 9148 if (! NONJUMP_INSN_P (floater)) 9149 { 9150 floater = NULL; 9151 break; 9152 } 9153 9154 /* See if FLOATER is suitable for combination with the 9155 anchor. */ 9156 floater_attr = get_attr_pa_combine_type (floater); 9157 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9158 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9159 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9160 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9161 { 9162 /* If ANCHOR and FLOATER can be combined, then we're 9163 done with this pass. */ 9164 if (pa_can_combine_p (new_rtx, anchor, floater, 1, 9165 SET_DEST (PATTERN (floater)), 9166 XEXP (SET_SRC (PATTERN (floater)), 9167 0), 9168 XEXP (SET_SRC (PATTERN (floater)), 9169 1))) 9170 break; 9171 } 9172 } 9173 } 9174 9175 /* FLOATER will be nonzero if we found a suitable floating 9176 insn for combination with ANCHOR. */ 9177 if (floater 9178 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9179 || anchor_attr == PA_COMBINE_TYPE_FMPY)) 9180 { 9181 /* Emit the new instruction and delete the old anchor. */ 9182 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)), 9183 copy_rtx (PATTERN (floater))); 9184 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp); 9185 emit_insn_before (temp, anchor); 9186 9187 SET_INSN_DELETED (anchor); 9188 9189 /* Emit a special USE insn for FLOATER, then delete 9190 the floating insn. */ 9191 temp = copy_rtx (PATTERN (floater)); 9192 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater); 9193 delete_insn (floater); 9194 9195 continue; 9196 } 9197 else if (floater 9198 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) 9199 { 9200 /* Emit the new_jump instruction and delete the old anchor. */ 9201 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)), 9202 copy_rtx (PATTERN (floater))); 9203 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp); 9204 temp = emit_jump_insn_before (temp, anchor); 9205 9206 JUMP_LABEL (temp) = JUMP_LABEL (anchor); 9207 SET_INSN_DELETED (anchor); 9208 9209 /* Emit a special USE insn for FLOATER, then delete 9210 the floating insn. */ 9211 temp = copy_rtx (PATTERN (floater)); 9212 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater); 9213 delete_insn (floater); 9214 continue; 9215 } 9216 } 9217 } 9218 } 9219 9220 static int 9221 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater, 9222 int reversed, rtx dest, 9223 rtx src1, rtx src2) 9224 { 9225 int insn_code_number; 9226 rtx_insn *start, *end; 9227 9228 /* Create a PARALLEL with the patterns of ANCHOR and 9229 FLOATER, try to recognize it, then test constraints 9230 for the resulting pattern. 9231 9232 If the pattern doesn't match or the constraints 9233 aren't met keep searching for a suitable floater 9234 insn. */ 9235 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor); 9236 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater); 9237 INSN_CODE (new_rtx) = -1; 9238 insn_code_number = recog_memoized (new_rtx); 9239 basic_block bb = BLOCK_FOR_INSN (anchor); 9240 if (insn_code_number < 0 9241 || (extract_insn (new_rtx), 9242 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb)))) 9243 return 0; 9244 9245 if (reversed) 9246 { 9247 start = anchor; 9248 end = floater; 9249 } 9250 else 9251 { 9252 start = floater; 9253 end = anchor; 9254 } 9255 9256 /* There's up to three operands to consider. One 9257 output and two inputs. 9258 9259 The output must not be used between FLOATER & ANCHOR 9260 exclusive. The inputs must not be set between 9261 FLOATER and ANCHOR exclusive. */ 9262 9263 if (reg_used_between_p (dest, start, end)) 9264 return 0; 9265 9266 if (reg_set_between_p (src1, start, end)) 9267 return 0; 9268 9269 if (reg_set_between_p (src2, start, end)) 9270 return 0; 9271 9272 /* If we get here, then everything is good. */ 9273 return 1; 9274 } 9275 9276 /* Return nonzero if references for INSN are delayed. 9277 9278 Millicode insns are actually function calls with some special 9279 constraints on arguments and register usage. 9280 9281 Millicode calls always expect their arguments in the integer argument 9282 registers, and always return their result in %r29 (ret1). They 9283 are expected to clobber their arguments, %r1, %r29, and the return 9284 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. 9285 9286 This function tells reorg that the references to arguments and 9287 millicode calls do not appear to happen until after the millicode call. 9288 This allows reorg to put insns which set the argument registers into the 9289 delay slot of the millicode call -- thus they act more like traditional 9290 CALL_INSNs. 9291 9292 Note we cannot consider side effects of the insn to be delayed because 9293 the branch and link insn will clobber the return pointer. If we happened 9294 to use the return pointer in the delay slot of the call, then we lose. 9295 9296 get_attr_type will try to recognize the given insn, so make sure to 9297 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns 9298 in particular. */ 9299 int 9300 pa_insn_refs_are_delayed (rtx_insn *insn) 9301 { 9302 return ((NONJUMP_INSN_P (insn) 9303 && GET_CODE (PATTERN (insn)) != SEQUENCE 9304 && GET_CODE (PATTERN (insn)) != USE 9305 && GET_CODE (PATTERN (insn)) != CLOBBER 9306 && get_attr_type (insn) == TYPE_MILLI)); 9307 } 9308 9309 /* Promote the return value, but not the arguments. */ 9310 9311 static machine_mode 9312 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, 9313 machine_mode mode, 9314 int *punsignedp ATTRIBUTE_UNUSED, 9315 const_tree fntype ATTRIBUTE_UNUSED, 9316 int for_return) 9317 { 9318 if (for_return == 0) 9319 return mode; 9320 return promote_mode (type, mode, punsignedp); 9321 } 9322 9323 /* On the HP-PA the value is found in register(s) 28(-29), unless 9324 the mode is SF or DF. Then the value is returned in fr4 (32). 9325 9326 This must perform the same promotions as PROMOTE_MODE, else promoting 9327 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly. 9328 9329 Small structures must be returned in a PARALLEL on PA64 in order 9330 to match the HP Compiler ABI. */ 9331 9332 static rtx 9333 pa_function_value (const_tree valtype, 9334 const_tree func ATTRIBUTE_UNUSED, 9335 bool outgoing ATTRIBUTE_UNUSED) 9336 { 9337 machine_mode valmode; 9338 9339 if (AGGREGATE_TYPE_P (valtype) 9340 || TREE_CODE (valtype) == COMPLEX_TYPE 9341 || TREE_CODE (valtype) == VECTOR_TYPE) 9342 { 9343 HOST_WIDE_INT valsize = int_size_in_bytes (valtype); 9344 9345 /* Handle aggregates that fit exactly in a word or double word. */ 9346 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD) 9347 return gen_rtx_REG (TYPE_MODE (valtype), 28); 9348 9349 if (TARGET_64BIT) 9350 { 9351 /* Aggregates with a size less than or equal to 128 bits are 9352 returned in GR 28(-29). They are left justified. The pad 9353 bits are undefined. Larger aggregates are returned in 9354 memory. */ 9355 rtx loc[2]; 9356 int i, offset = 0; 9357 int ub = valsize <= UNITS_PER_WORD ? 1 : 2; 9358 9359 for (i = 0; i < ub; i++) 9360 { 9361 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9362 gen_rtx_REG (DImode, 28 + i), 9363 GEN_INT (offset)); 9364 offset += 8; 9365 } 9366 9367 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); 9368 } 9369 else if (valsize > UNITS_PER_WORD) 9370 { 9371 /* Aggregates 5 to 8 bytes in size are returned in general 9372 registers r28-r29 in the same manner as other non 9373 floating-point objects. The data is right-justified and 9374 zero-extended to 64 bits. This is opposite to the normal 9375 justification used on big endian targets and requires 9376 special treatment. */ 9377 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9378 gen_rtx_REG (DImode, 28), const0_rtx); 9379 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9380 } 9381 } 9382 9383 if ((INTEGRAL_TYPE_P (valtype) 9384 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD) 9385 || POINTER_TYPE_P (valtype)) 9386 valmode = word_mode; 9387 else 9388 valmode = TYPE_MODE (valtype); 9389 9390 if (TREE_CODE (valtype) == REAL_TYPE 9391 && !AGGREGATE_TYPE_P (valtype) 9392 && TYPE_MODE (valtype) != TFmode 9393 && !TARGET_SOFT_FLOAT) 9394 return gen_rtx_REG (valmode, 32); 9395 9396 return gen_rtx_REG (valmode, 28); 9397 } 9398 9399 /* Implement the TARGET_LIBCALL_VALUE hook. */ 9400 9401 static rtx 9402 pa_libcall_value (machine_mode mode, 9403 const_rtx fun ATTRIBUTE_UNUSED) 9404 { 9405 if (! TARGET_SOFT_FLOAT 9406 && (mode == SFmode || mode == DFmode)) 9407 return gen_rtx_REG (mode, 32); 9408 else 9409 return gen_rtx_REG (mode, 28); 9410 } 9411 9412 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */ 9413 9414 static bool 9415 pa_function_value_regno_p (const unsigned int regno) 9416 { 9417 if (regno == 28 9418 || (! TARGET_SOFT_FLOAT && regno == 32)) 9419 return true; 9420 9421 return false; 9422 } 9423 9424 /* Update the data in CUM to advance over an argument 9425 of mode MODE and data type TYPE. 9426 (TYPE is null for libcalls where that information may not be available.) */ 9427 9428 static void 9429 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 9430 const_tree type, bool named ATTRIBUTE_UNUSED) 9431 { 9432 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9433 int arg_size = pa_function_arg_size (mode, type); 9434 9435 cum->nargs_prototype--; 9436 cum->words += (arg_size 9437 + ((cum->words & 01) 9438 && type != NULL_TREE 9439 && arg_size > 1)); 9440 } 9441 9442 /* Return the location of a parameter that is passed in a register or NULL 9443 if the parameter has any component that is passed in memory. 9444 9445 This is new code and will be pushed to into the net sources after 9446 further testing. 9447 9448 ??? We might want to restructure this so that it looks more like other 9449 ports. */ 9450 static rtx 9451 pa_function_arg (cumulative_args_t cum_v, machine_mode mode, 9452 const_tree type, bool named ATTRIBUTE_UNUSED) 9453 { 9454 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9455 int max_arg_words = (TARGET_64BIT ? 8 : 4); 9456 int alignment = 0; 9457 int arg_size; 9458 int fpr_reg_base; 9459 int gpr_reg_base; 9460 rtx retval; 9461 9462 if (mode == VOIDmode) 9463 return NULL_RTX; 9464 9465 arg_size = pa_function_arg_size (mode, type); 9466 9467 /* If this arg would be passed partially or totally on the stack, then 9468 this routine should return zero. pa_arg_partial_bytes will 9469 handle arguments which are split between regs and stack slots if 9470 the ABI mandates split arguments. */ 9471 if (!TARGET_64BIT) 9472 { 9473 /* The 32-bit ABI does not split arguments. */ 9474 if (cum->words + arg_size > max_arg_words) 9475 return NULL_RTX; 9476 } 9477 else 9478 { 9479 if (arg_size > 1) 9480 alignment = cum->words & 1; 9481 if (cum->words + alignment >= max_arg_words) 9482 return NULL_RTX; 9483 } 9484 9485 /* The 32bit ABIs and the 64bit ABIs are rather different, 9486 particularly in their handling of FP registers. We might 9487 be able to cleverly share code between them, but I'm not 9488 going to bother in the hope that splitting them up results 9489 in code that is more easily understood. */ 9490 9491 if (TARGET_64BIT) 9492 { 9493 /* Advance the base registers to their current locations. 9494 9495 Remember, gprs grow towards smaller register numbers while 9496 fprs grow to higher register numbers. Also remember that 9497 although FP regs are 32-bit addressable, we pretend that 9498 the registers are 64-bits wide. */ 9499 gpr_reg_base = 26 - cum->words; 9500 fpr_reg_base = 32 + cum->words; 9501 9502 /* Arguments wider than one word and small aggregates need special 9503 treatment. */ 9504 if (arg_size > 1 9505 || mode == BLKmode 9506 || (type && (AGGREGATE_TYPE_P (type) 9507 || TREE_CODE (type) == COMPLEX_TYPE 9508 || TREE_CODE (type) == VECTOR_TYPE))) 9509 { 9510 /* Double-extended precision (80-bit), quad-precision (128-bit) 9511 and aggregates including complex numbers are aligned on 9512 128-bit boundaries. The first eight 64-bit argument slots 9513 are associated one-to-one, with general registers r26 9514 through r19, and also with floating-point registers fr4 9515 through fr11. Arguments larger than one word are always 9516 passed in general registers. 9517 9518 Using a PARALLEL with a word mode register results in left 9519 justified data on a big-endian target. */ 9520 9521 rtx loc[8]; 9522 int i, offset = 0, ub = arg_size; 9523 9524 /* Align the base register. */ 9525 gpr_reg_base -= alignment; 9526 9527 ub = MIN (ub, max_arg_words - cum->words - alignment); 9528 for (i = 0; i < ub; i++) 9529 { 9530 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9531 gen_rtx_REG (DImode, gpr_reg_base), 9532 GEN_INT (offset)); 9533 gpr_reg_base -= 1; 9534 offset += 8; 9535 } 9536 9537 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc)); 9538 } 9539 } 9540 else 9541 { 9542 /* If the argument is larger than a word, then we know precisely 9543 which registers we must use. */ 9544 if (arg_size > 1) 9545 { 9546 if (cum->words) 9547 { 9548 gpr_reg_base = 23; 9549 fpr_reg_base = 38; 9550 } 9551 else 9552 { 9553 gpr_reg_base = 25; 9554 fpr_reg_base = 34; 9555 } 9556 9557 /* Structures 5 to 8 bytes in size are passed in the general 9558 registers in the same manner as other non floating-point 9559 objects. The data is right-justified and zero-extended 9560 to 64 bits. This is opposite to the normal justification 9561 used on big endian targets and requires special treatment. 9562 We now define BLOCK_REG_PADDING to pad these objects. 9563 Aggregates, complex and vector types are passed in the same 9564 manner as structures. */ 9565 if (mode == BLKmode 9566 || (type && (AGGREGATE_TYPE_P (type) 9567 || TREE_CODE (type) == COMPLEX_TYPE 9568 || TREE_CODE (type) == VECTOR_TYPE))) 9569 { 9570 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9571 gen_rtx_REG (DImode, gpr_reg_base), 9572 const0_rtx); 9573 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9574 } 9575 } 9576 else 9577 { 9578 /* We have a single word (32 bits). A simple computation 9579 will get us the register #s we need. */ 9580 gpr_reg_base = 26 - cum->words; 9581 fpr_reg_base = 32 + 2 * cum->words; 9582 } 9583 } 9584 9585 /* Determine if the argument needs to be passed in both general and 9586 floating point registers. */ 9587 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32) 9588 /* If we are doing soft-float with portable runtime, then there 9589 is no need to worry about FP regs. */ 9590 && !TARGET_SOFT_FLOAT 9591 /* The parameter must be some kind of scalar float, else we just 9592 pass it in integer registers. */ 9593 && GET_MODE_CLASS (mode) == MODE_FLOAT 9594 /* The target function must not have a prototype. */ 9595 && cum->nargs_prototype <= 0 9596 /* libcalls do not need to pass items in both FP and general 9597 registers. */ 9598 && type != NULL_TREE 9599 /* All this hair applies to "outgoing" args only. This includes 9600 sibcall arguments setup with FUNCTION_INCOMING_ARG. */ 9601 && !cum->incoming) 9602 /* Also pass outgoing floating arguments in both registers in indirect 9603 calls with the 32 bit ABI and the HP assembler since there is no 9604 way to the specify argument locations in static functions. */ 9605 || (!TARGET_64BIT 9606 && !TARGET_GAS 9607 && !cum->incoming 9608 && cum->indirect 9609 && GET_MODE_CLASS (mode) == MODE_FLOAT)) 9610 { 9611 retval 9612 = gen_rtx_PARALLEL 9613 (mode, 9614 gen_rtvec (2, 9615 gen_rtx_EXPR_LIST (VOIDmode, 9616 gen_rtx_REG (mode, fpr_reg_base), 9617 const0_rtx), 9618 gen_rtx_EXPR_LIST (VOIDmode, 9619 gen_rtx_REG (mode, gpr_reg_base), 9620 const0_rtx))); 9621 } 9622 else 9623 { 9624 /* See if we should pass this parameter in a general register. */ 9625 if (TARGET_SOFT_FLOAT 9626 /* Indirect calls in the normal 32bit ABI require all arguments 9627 to be passed in general registers. */ 9628 || (!TARGET_PORTABLE_RUNTIME 9629 && !TARGET_64BIT 9630 && !TARGET_ELF32 9631 && cum->indirect) 9632 /* If the parameter is not a scalar floating-point parameter, 9633 then it belongs in GPRs. */ 9634 || GET_MODE_CLASS (mode) != MODE_FLOAT 9635 /* Structure with single SFmode field belongs in GPR. */ 9636 || (type && AGGREGATE_TYPE_P (type))) 9637 retval = gen_rtx_REG (mode, gpr_reg_base); 9638 else 9639 retval = gen_rtx_REG (mode, fpr_reg_base); 9640 } 9641 return retval; 9642 } 9643 9644 /* Arguments larger than one word are double word aligned. */ 9645 9646 static unsigned int 9647 pa_function_arg_boundary (machine_mode mode, const_tree type) 9648 { 9649 bool singleword = (type 9650 ? (integer_zerop (TYPE_SIZE (type)) 9651 || !TREE_CONSTANT (TYPE_SIZE (type)) 9652 || int_size_in_bytes (type) <= UNITS_PER_WORD) 9653 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD); 9654 9655 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY; 9656 } 9657 9658 /* If this arg would be passed totally in registers or totally on the stack, 9659 then this routine should return zero. */ 9660 9661 static int 9662 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, 9663 tree type, bool named ATTRIBUTE_UNUSED) 9664 { 9665 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9666 unsigned int max_arg_words = 8; 9667 unsigned int offset = 0; 9668 9669 if (!TARGET_64BIT) 9670 return 0; 9671 9672 if (pa_function_arg_size (mode, type) > 1 && (cum->words & 1)) 9673 offset = 1; 9674 9675 if (cum->words + offset + pa_function_arg_size (mode, type) <= max_arg_words) 9676 /* Arg fits fully into registers. */ 9677 return 0; 9678 else if (cum->words + offset >= max_arg_words) 9679 /* Arg fully on the stack. */ 9680 return 0; 9681 else 9682 /* Arg is split. */ 9683 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD; 9684 } 9685 9686 9687 /* A get_unnamed_section callback for switching to the text section. 9688 9689 This function is only used with SOM. Because we don't support 9690 named subspaces, we can only create a new subspace or switch back 9691 to the default text subspace. */ 9692 9693 static void 9694 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) 9695 { 9696 gcc_assert (TARGET_SOM); 9697 if (TARGET_GAS) 9698 { 9699 if (cfun && cfun->machine && !cfun->machine->in_nsubspa) 9700 { 9701 /* We only want to emit a .nsubspa directive once at the 9702 start of the function. */ 9703 cfun->machine->in_nsubspa = 1; 9704 9705 /* Create a new subspace for the text. This provides 9706 better stub placement and one-only functions. */ 9707 if (cfun->decl 9708 && DECL_ONE_ONLY (cfun->decl) 9709 && !DECL_WEAK (cfun->decl)) 9710 { 9711 output_section_asm_op ("\t.SPACE $TEXT$\n" 9712 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8," 9713 "ACCESS=44,SORT=24,COMDAT"); 9714 return; 9715 } 9716 } 9717 else 9718 { 9719 /* There isn't a current function or the body of the current 9720 function has been completed. So, we are changing to the 9721 text section to output debugging information. Thus, we 9722 need to forget that we are in the text section so that 9723 varasm.c will call us when text_section is selected again. */ 9724 gcc_assert (!cfun || !cfun->machine 9725 || cfun->machine->in_nsubspa == 2); 9726 in_section = NULL; 9727 } 9728 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$"); 9729 return; 9730 } 9731 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$"); 9732 } 9733 9734 /* A get_unnamed_section callback for switching to comdat data 9735 sections. This function is only used with SOM. */ 9736 9737 static void 9738 som_output_comdat_data_section_asm_op (const void *data) 9739 { 9740 in_section = NULL; 9741 output_section_asm_op (data); 9742 } 9743 9744 /* Implement TARGET_ASM_INIT_SECTIONS. */ 9745 9746 static void 9747 pa_som_asm_init_sections (void) 9748 { 9749 text_section 9750 = get_unnamed_section (0, som_output_text_section_asm_op, NULL); 9751 9752 /* SOM puts readonly data in the default $LIT$ subspace when PIC code 9753 is not being generated. */ 9754 som_readonly_data_section 9755 = get_unnamed_section (0, output_section_asm_op, 9756 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$"); 9757 9758 /* When secondary definitions are not supported, SOM makes readonly 9759 data one-only by creating a new $LIT$ subspace in $TEXT$ with 9760 the comdat flag. */ 9761 som_one_only_readonly_data_section 9762 = get_unnamed_section (0, som_output_comdat_data_section_asm_op, 9763 "\t.SPACE $TEXT$\n" 9764 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8," 9765 "ACCESS=0x2c,SORT=16,COMDAT"); 9766 9767 9768 /* When secondary definitions are not supported, SOM makes data one-only 9769 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */ 9770 som_one_only_data_section 9771 = get_unnamed_section (SECTION_WRITE, 9772 som_output_comdat_data_section_asm_op, 9773 "\t.SPACE $PRIVATE$\n" 9774 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8," 9775 "ACCESS=31,SORT=24,COMDAT"); 9776 9777 if (flag_tm) 9778 som_tm_clone_table_section 9779 = get_unnamed_section (0, output_section_asm_op, 9780 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$"); 9781 9782 /* HPUX ld generates incorrect GOT entries for "T" fixups which 9783 reference data within the $TEXT$ space (for example constant 9784 strings in the $LIT$ subspace). 9785 9786 The assemblers (GAS and HP as) both have problems with handling 9787 the difference of two symbols. This is the other correct way to 9788 reference constant data during PIC code generation. 9789 9790 Thus, we can't put constant data needing relocation in the $TEXT$ 9791 space during PIC generation. 9792 9793 Previously, we placed all constant data into the $DATA$ subspace 9794 when generating PIC code. This reduces sharing, but it works 9795 correctly. Now we rely on pa_reloc_rw_mask() for section selection. 9796 This puts constant data not needing relocation into the $TEXT$ space. */ 9797 readonly_data_section = som_readonly_data_section; 9798 9799 /* We must not have a reference to an external symbol defined in a 9800 shared library in a readonly section, else the SOM linker will 9801 complain. 9802 9803 So, we force exception information into the data section. */ 9804 exception_section = data_section; 9805 } 9806 9807 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */ 9808 9809 static section * 9810 pa_som_tm_clone_table_section (void) 9811 { 9812 return som_tm_clone_table_section; 9813 } 9814 9815 /* On hpux10, the linker will give an error if we have a reference 9816 in the read-only data section to a symbol defined in a shared 9817 library. Therefore, expressions that might require a reloc 9818 cannot be placed in the read-only data section. */ 9819 9820 static section * 9821 pa_select_section (tree exp, int reloc, 9822 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 9823 { 9824 if (TREE_CODE (exp) == VAR_DECL 9825 && TREE_READONLY (exp) 9826 && !TREE_THIS_VOLATILE (exp) 9827 && DECL_INITIAL (exp) 9828 && (DECL_INITIAL (exp) == error_mark_node 9829 || TREE_CONSTANT (DECL_INITIAL (exp))) 9830 && !(reloc & pa_reloc_rw_mask ())) 9831 { 9832 if (TARGET_SOM 9833 && DECL_ONE_ONLY (exp) 9834 && !DECL_WEAK (exp)) 9835 return som_one_only_readonly_data_section; 9836 else 9837 return readonly_data_section; 9838 } 9839 else if (CONSTANT_CLASS_P (exp) 9840 && !(reloc & pa_reloc_rw_mask ())) 9841 return readonly_data_section; 9842 else if (TARGET_SOM 9843 && TREE_CODE (exp) == VAR_DECL 9844 && DECL_ONE_ONLY (exp) 9845 && !DECL_WEAK (exp)) 9846 return som_one_only_data_section; 9847 else 9848 return data_section; 9849 } 9850 9851 /* Implement pa_elf_select_rtx_section. If X is a function label operand 9852 and the function is in a COMDAT group, place the plabel reference in the 9853 .data.rel.ro.local section. The linker ignores references to symbols in 9854 discarded sections from this section. */ 9855 9856 static section * 9857 pa_elf_select_rtx_section (machine_mode mode, rtx x, 9858 unsigned HOST_WIDE_INT align) 9859 { 9860 if (function_label_operand (x, VOIDmode)) 9861 { 9862 tree decl = SYMBOL_REF_DECL (x); 9863 9864 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl))) 9865 return get_named_section (NULL, ".data.rel.ro.local", 1); 9866 } 9867 9868 return default_elf_select_rtx_section (mode, x, align); 9869 } 9870 9871 /* Implement pa_reloc_rw_mask. */ 9872 9873 static int 9874 pa_reloc_rw_mask (void) 9875 { 9876 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11)) 9877 return 3; 9878 9879 /* HP linker does not support global relocs in readonly memory. */ 9880 return TARGET_SOM ? 2 : 0; 9881 } 9882 9883 static void 9884 pa_globalize_label (FILE *stream, const char *name) 9885 { 9886 /* We only handle DATA objects here, functions are globalized in 9887 ASM_DECLARE_FUNCTION_NAME. */ 9888 if (! FUNCTION_NAME_P (name)) 9889 { 9890 fputs ("\t.EXPORT ", stream); 9891 assemble_name (stream, name); 9892 fputs (",DATA\n", stream); 9893 } 9894 } 9895 9896 /* Worker function for TARGET_STRUCT_VALUE_RTX. */ 9897 9898 static rtx 9899 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, 9900 int incoming ATTRIBUTE_UNUSED) 9901 { 9902 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM); 9903 } 9904 9905 /* Worker function for TARGET_RETURN_IN_MEMORY. */ 9906 9907 bool 9908 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 9909 { 9910 /* SOM ABI says that objects larger than 64 bits are returned in memory. 9911 PA64 ABI says that objects larger than 128 bits are returned in memory. 9912 Note, int_size_in_bytes can return -1 if the size of the object is 9913 variable or larger than the maximum value that can be expressed as 9914 a HOST_WIDE_INT. It can also return zero for an empty type. The 9915 simplest way to handle variable and empty types is to pass them in 9916 memory. This avoids problems in defining the boundaries of argument 9917 slots, allocating registers, etc. */ 9918 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8) 9919 || int_size_in_bytes (type) <= 0); 9920 } 9921 9922 /* Structure to hold declaration and name of external symbols that are 9923 emitted by GCC. We generate a vector of these symbols and output them 9924 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true. 9925 This avoids putting out names that are never really used. */ 9926 9927 typedef struct GTY(()) extern_symbol 9928 { 9929 tree decl; 9930 const char *name; 9931 } extern_symbol; 9932 9933 /* Define gc'd vector type for extern_symbol. */ 9934 9935 /* Vector of extern_symbol pointers. */ 9936 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols; 9937 9938 #ifdef ASM_OUTPUT_EXTERNAL_REAL 9939 /* Mark DECL (name NAME) as an external reference (assembler output 9940 file FILE). This saves the names to output at the end of the file 9941 if actually referenced. */ 9942 9943 void 9944 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name) 9945 { 9946 gcc_assert (file == asm_out_file); 9947 extern_symbol p = {decl, name}; 9948 vec_safe_push (extern_symbols, p); 9949 } 9950 #endif 9951 9952 /* Output text required at the end of an assembler file. 9953 This includes deferred plabels and .import directives for 9954 all external symbols that were actually referenced. */ 9955 9956 static void 9957 pa_file_end (void) 9958 { 9959 #ifdef ASM_OUTPUT_EXTERNAL_REAL 9960 unsigned int i; 9961 extern_symbol *p; 9962 9963 if (!NO_DEFERRED_PROFILE_COUNTERS) 9964 output_deferred_profile_counters (); 9965 #endif 9966 9967 output_deferred_plabels (); 9968 9969 #ifdef ASM_OUTPUT_EXTERNAL_REAL 9970 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++) 9971 { 9972 tree decl = p->decl; 9973 9974 if (!TREE_ASM_WRITTEN (decl) 9975 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0))) 9976 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name); 9977 } 9978 9979 vec_free (extern_symbols); 9980 #endif 9981 9982 if (NEED_INDICATE_EXEC_STACK) 9983 file_end_indicate_exec_stack (); 9984 } 9985 9986 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 9987 9988 static bool 9989 pa_can_change_mode_class (machine_mode from, machine_mode to, 9990 reg_class_t rclass) 9991 { 9992 if (from == to) 9993 return true; 9994 9995 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)) 9996 return true; 9997 9998 /* Reject changes to/from modes with zero size. */ 9999 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to)) 10000 return false; 10001 10002 /* Reject changes to/from complex and vector modes. */ 10003 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from) 10004 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to)) 10005 return false; 10006 10007 /* There is no way to load QImode or HImode values directly from memory 10008 to a FP register. SImode loads to the FP registers are not zero 10009 extended. On the 64-bit target, this conflicts with the definition 10010 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers 10011 except for DImode to SImode on the 64-bit target. It is handled by 10012 register renaming in pa_print_operand. */ 10013 if (MAYBE_FP_REG_CLASS_P (rclass)) 10014 return TARGET_64BIT && from == DImode && to == SImode; 10015 10016 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word 10017 in specific sets of registers. Thus, we cannot allow changing 10018 to a larger mode when it's larger than a word. */ 10019 if (GET_MODE_SIZE (to) > UNITS_PER_WORD 10020 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from)) 10021 return false; 10022 10023 return true; 10024 } 10025 10026 /* Implement TARGET_MODES_TIEABLE_P. 10027 10028 We should return FALSE for QImode and HImode because these modes 10029 are not ok in the floating-point registers. However, this prevents 10030 tieing these modes to SImode and DImode in the general registers. 10031 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and 10032 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used 10033 in the floating-point registers. */ 10034 10035 static bool 10036 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2) 10037 { 10038 /* Don't tie modes in different classes. */ 10039 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2)) 10040 return false; 10041 10042 return true; 10043 } 10044 10045 10046 /* Length in units of the trampoline instruction code. */ 10047 10048 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48)) 10049 10050 10051 /* Output assembler code for a block containing the constant parts 10052 of a trampoline, leaving space for the variable parts.\ 10053 10054 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM 10055 and then branches to the specified routine. 10056 10057 This code template is copied from text segment to stack location 10058 and then patched with pa_trampoline_init to contain valid values, 10059 and then entered as a subroutine. 10060 10061 It is best to keep this as small as possible to avoid having to 10062 flush multiple lines in the cache. */ 10063 10064 static void 10065 pa_asm_trampoline_template (FILE *f) 10066 { 10067 if (!TARGET_64BIT) 10068 { 10069 if (TARGET_PA_20) 10070 { 10071 fputs ("\tmfia %r20\n", f); 10072 fputs ("\tldw 48(%r20),%r22\n", f); 10073 fputs ("\tcopy %r22,%r21\n", f); 10074 fputs ("\tbb,>=,n %r22,30,.+16\n", f); 10075 fputs ("\tdepwi 0,31,2,%r22\n", f); 10076 fputs ("\tldw 0(%r22),%r21\n", f); 10077 fputs ("\tldw 4(%r22),%r19\n", f); 10078 fputs ("\tbve (%r21)\n", f); 10079 fputs ("\tldw 52(%r1),%r29\n", f); 10080 fputs ("\t.word 0\n", f); 10081 fputs ("\t.word 0\n", f); 10082 fputs ("\t.word 0\n", f); 10083 } 10084 else 10085 { 10086 if (ASSEMBLER_DIALECT == 0) 10087 { 10088 fputs ("\tbl .+8,%r20\n", f); 10089 fputs ("\tdepi 0,31,2,%r20\n", f); 10090 } 10091 else 10092 { 10093 fputs ("\tb,l .+8,%r20\n", f); 10094 fputs ("\tdepwi 0,31,2,%r20\n", f); 10095 } 10096 fputs ("\tldw 40(%r20),%r22\n", f); 10097 fputs ("\tcopy %r22,%r21\n", f); 10098 fputs ("\tbb,>=,n %r22,30,.+16\n", f); 10099 if (ASSEMBLER_DIALECT == 0) 10100 fputs ("\tdepi 0,31,2,%r22\n", f); 10101 else 10102 fputs ("\tdepwi 0,31,2,%r22\n", f); 10103 fputs ("\tldw 0(%r22),%r21\n", f); 10104 fputs ("\tldw 4(%r22),%r19\n", f); 10105 fputs ("\tldsid (%r21),%r1\n", f); 10106 fputs ("\tmtsp %r1,%sr0\n", f); 10107 fputs ("\tbe 0(%sr0,%r21)\n", f); 10108 fputs ("\tldw 44(%r20),%r29\n", f); 10109 } 10110 fputs ("\t.word 0\n", f); 10111 fputs ("\t.word 0\n", f); 10112 fputs ("\t.word 0\n", f); 10113 fputs ("\t.word 0\n", f); 10114 } 10115 else 10116 { 10117 fputs ("\t.dword 0\n", f); 10118 fputs ("\t.dword 0\n", f); 10119 fputs ("\t.dword 0\n", f); 10120 fputs ("\t.dword 0\n", f); 10121 fputs ("\tmfia %r31\n", f); 10122 fputs ("\tldd 24(%r31),%r27\n", f); 10123 fputs ("\tldd 32(%r31),%r31\n", f); 10124 fputs ("\tldd 16(%r27),%r1\n", f); 10125 fputs ("\tbve (%r1)\n", f); 10126 fputs ("\tldd 24(%r27),%r27\n", f); 10127 fputs ("\t.dword 0 ; fptr\n", f); 10128 fputs ("\t.dword 0 ; static link\n", f); 10129 } 10130 } 10131 10132 /* Emit RTL insns to initialize the variable parts of a trampoline. 10133 FNADDR is an RTX for the address of the function's pure code. 10134 CXT is an RTX for the static chain value for the function. 10135 10136 Move the function address to the trampoline template at offset 48. 10137 Move the static chain value to trampoline template at offset 52. 10138 Move the trampoline address to trampoline template at offset 56. 10139 Move r19 to trampoline template at offset 60. The latter two 10140 words create a plabel for the indirect call to the trampoline. 10141 10142 A similar sequence is used for the 64-bit port but the plabel is 10143 at the beginning of the trampoline. 10144 10145 Finally, the cache entries for the trampoline code are flushed. 10146 This is necessary to ensure that the trampoline instruction sequence 10147 is written to memory prior to any attempts at prefetching the code 10148 sequence. */ 10149 10150 static void 10151 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 10152 { 10153 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 10154 rtx start_addr = gen_reg_rtx (Pmode); 10155 rtx end_addr = gen_reg_rtx (Pmode); 10156 rtx line_length = gen_reg_rtx (Pmode); 10157 rtx r_tramp, tmp; 10158 10159 emit_block_move (m_tramp, assemble_trampoline_template (), 10160 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); 10161 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0)); 10162 10163 if (!TARGET_64BIT) 10164 { 10165 tmp = adjust_address (m_tramp, Pmode, 48); 10166 emit_move_insn (tmp, fnaddr); 10167 tmp = adjust_address (m_tramp, Pmode, 52); 10168 emit_move_insn (tmp, chain_value); 10169 10170 /* Create a fat pointer for the trampoline. */ 10171 tmp = adjust_address (m_tramp, Pmode, 56); 10172 emit_move_insn (tmp, r_tramp); 10173 tmp = adjust_address (m_tramp, Pmode, 60); 10174 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); 10175 10176 /* fdc and fic only use registers for the address to flush, 10177 they do not accept integer displacements. We align the 10178 start and end addresses to the beginning of their respective 10179 cache lines to minimize the number of lines flushed. */ 10180 emit_insn (gen_andsi3 (start_addr, r_tramp, 10181 GEN_INT (-MIN_CACHELINE_SIZE))); 10182 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 10183 TRAMPOLINE_CODE_SIZE-1)); 10184 emit_insn (gen_andsi3 (end_addr, tmp, 10185 GEN_INT (-MIN_CACHELINE_SIZE))); 10186 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10187 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); 10188 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, 10189 gen_reg_rtx (Pmode), 10190 gen_reg_rtx (Pmode))); 10191 } 10192 else 10193 { 10194 tmp = adjust_address (m_tramp, Pmode, 56); 10195 emit_move_insn (tmp, fnaddr); 10196 tmp = adjust_address (m_tramp, Pmode, 64); 10197 emit_move_insn (tmp, chain_value); 10198 10199 /* Create a fat pointer for the trampoline. */ 10200 tmp = adjust_address (m_tramp, Pmode, 16); 10201 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode, 10202 r_tramp, 32))); 10203 tmp = adjust_address (m_tramp, Pmode, 24); 10204 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); 10205 10206 /* fdc and fic only use registers for the address to flush, 10207 they do not accept integer displacements. We align the 10208 start and end addresses to the beginning of their respective 10209 cache lines to minimize the number of lines flushed. */ 10210 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32)); 10211 emit_insn (gen_anddi3 (start_addr, tmp, 10212 GEN_INT (-MIN_CACHELINE_SIZE))); 10213 tmp = force_reg (Pmode, plus_constant (Pmode, tmp, 10214 TRAMPOLINE_CODE_SIZE - 1)); 10215 emit_insn (gen_anddi3 (end_addr, tmp, 10216 GEN_INT (-MIN_CACHELINE_SIZE))); 10217 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10218 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); 10219 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, 10220 gen_reg_rtx (Pmode), 10221 gen_reg_rtx (Pmode))); 10222 } 10223 10224 #ifdef HAVE_ENABLE_EXECUTE_STACK 10225 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10226 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 10227 #endif 10228 } 10229 10230 /* Perform any machine-specific adjustment in the address of the trampoline. 10231 ADDR contains the address that was passed to pa_trampoline_init. 10232 Adjust the trampoline address to point to the plabel at offset 56. */ 10233 10234 static rtx 10235 pa_trampoline_adjust_address (rtx addr) 10236 { 10237 if (!TARGET_64BIT) 10238 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58)); 10239 return addr; 10240 } 10241 10242 static rtx 10243 pa_delegitimize_address (rtx orig_x) 10244 { 10245 rtx x = delegitimize_mem_from_attrs (orig_x); 10246 10247 if (GET_CODE (x) == LO_SUM 10248 && GET_CODE (XEXP (x, 1)) == UNSPEC 10249 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R) 10250 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0)); 10251 return x; 10252 } 10253 10254 static rtx 10255 pa_internal_arg_pointer (void) 10256 { 10257 /* The argument pointer and the hard frame pointer are the same in 10258 the 32-bit runtime, so we don't need a copy. */ 10259 if (TARGET_64BIT) 10260 return copy_to_reg (virtual_incoming_args_rtx); 10261 else 10262 return virtual_incoming_args_rtx; 10263 } 10264 10265 /* Given FROM and TO register numbers, say whether this elimination is allowed. 10266 Frame pointer elimination is automatically handled. */ 10267 10268 static bool 10269 pa_can_eliminate (const int from, const int to) 10270 { 10271 /* The argument cannot be eliminated in the 64-bit runtime. */ 10272 if (TARGET_64BIT && from == ARG_POINTER_REGNUM) 10273 return false; 10274 10275 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM 10276 ? ! frame_pointer_needed 10277 : true); 10278 } 10279 10280 /* Define the offset between two registers, FROM to be eliminated and its 10281 replacement TO, at the start of a routine. */ 10282 HOST_WIDE_INT 10283 pa_initial_elimination_offset (int from, int to) 10284 { 10285 HOST_WIDE_INT offset; 10286 10287 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM) 10288 && to == STACK_POINTER_REGNUM) 10289 offset = -pa_compute_frame_size (get_frame_size (), 0); 10290 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 10291 offset = 0; 10292 else 10293 gcc_unreachable (); 10294 10295 return offset; 10296 } 10297 10298 static void 10299 pa_conditional_register_usage (void) 10300 { 10301 int i; 10302 10303 if (!TARGET_64BIT && !TARGET_PA_11) 10304 { 10305 for (i = 56; i <= FP_REG_LAST; i++) 10306 fixed_regs[i] = call_used_regs[i] = 1; 10307 for (i = 33; i < 56; i += 2) 10308 fixed_regs[i] = call_used_regs[i] = 1; 10309 } 10310 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) 10311 { 10312 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 10313 fixed_regs[i] = call_used_regs[i] = 1; 10314 } 10315 if (flag_pic) 10316 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 10317 } 10318 10319 /* Target hook for c_mode_for_suffix. */ 10320 10321 static machine_mode 10322 pa_c_mode_for_suffix (char suffix) 10323 { 10324 if (HPUX_LONG_DOUBLE_LIBRARY) 10325 { 10326 if (suffix == 'q') 10327 return TFmode; 10328 } 10329 10330 return VOIDmode; 10331 } 10332 10333 /* Target hook for function_section. */ 10334 10335 static section * 10336 pa_function_section (tree decl, enum node_frequency freq, 10337 bool startup, bool exit) 10338 { 10339 /* Put functions in text section if target doesn't have named sections. */ 10340 if (!targetm_common.have_named_sections) 10341 return text_section; 10342 10343 /* Force nested functions into the same section as the containing 10344 function. */ 10345 if (decl 10346 && DECL_SECTION_NAME (decl) == NULL 10347 && DECL_CONTEXT (decl) != NULL_TREE 10348 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL 10349 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL) 10350 return function_section (DECL_CONTEXT (decl)); 10351 10352 /* Otherwise, use the default function section. */ 10353 return default_function_section (decl, freq, startup, exit); 10354 } 10355 10356 /* Implement TARGET_LEGITIMATE_CONSTANT_P. 10357 10358 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS 10359 that need more than three instructions to load prior to reload. This 10360 limit is somewhat arbitrary. It takes three instructions to load a 10361 CONST_INT from memory but two are memory accesses. It may be better 10362 to increase the allowed range for CONST_INTS. We may also be able 10363 to handle CONST_DOUBLES. */ 10364 10365 static bool 10366 pa_legitimate_constant_p (machine_mode mode, rtx x) 10367 { 10368 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode)) 10369 return false; 10370 10371 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF) 10372 return false; 10373 10374 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not 10375 legitimate constants. The other variants can't be handled by 10376 the move patterns after reload starts. */ 10377 if (tls_referenced_p (x)) 10378 return false; 10379 10380 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE) 10381 return false; 10382 10383 if (TARGET_64BIT 10384 && HOST_BITS_PER_WIDE_INT > 32 10385 && GET_CODE (x) == CONST_INT 10386 && !reload_in_progress 10387 && !reload_completed 10388 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x)) 10389 && !pa_cint_ok_for_move (UINTVAL (x))) 10390 return false; 10391 10392 if (function_label_operand (x, mode)) 10393 return false; 10394 10395 return true; 10396 } 10397 10398 /* Implement TARGET_SECTION_TYPE_FLAGS. */ 10399 10400 static unsigned int 10401 pa_section_type_flags (tree decl, const char *name, int reloc) 10402 { 10403 unsigned int flags; 10404 10405 flags = default_section_type_flags (decl, name, reloc); 10406 10407 /* Function labels are placed in the constant pool. This can 10408 cause a section conflict if decls are put in ".data.rel.ro" 10409 or ".data.rel.ro.local" using the __attribute__ construct. */ 10410 if (strcmp (name, ".data.rel.ro") == 0 10411 || strcmp (name, ".data.rel.ro.local") == 0) 10412 flags |= SECTION_WRITE | SECTION_RELRO; 10413 10414 return flags; 10415 } 10416 10417 /* pa_legitimate_address_p recognizes an RTL expression that is a 10418 valid memory address for an instruction. The MODE argument is the 10419 machine mode for the MEM expression that wants to use this address. 10420 10421 On HP PA-RISC, the legitimate address forms are REG+SMALLINT, 10422 REG+REG, and REG+(REG*SCALE). The indexed address forms are only 10423 available with floating point loads and stores, and integer loads. 10424 We get better code by allowing indexed addresses in the initial 10425 RTL generation. 10426 10427 The acceptance of indexed addresses as legitimate implies that we 10428 must provide patterns for doing indexed integer stores, or the move 10429 expanders must force the address of an indexed store to a register. 10430 We have adopted the latter approach. 10431 10432 Another function of pa_legitimate_address_p is to ensure that 10433 the base register is a valid pointer for indexed instructions. 10434 On targets that have non-equivalent space registers, we have to 10435 know at the time of assembler output which register in a REG+REG 10436 pair is the base register. The REG_POINTER flag is sometimes lost 10437 in reload and the following passes, so it can't be relied on during 10438 code generation. Thus, we either have to canonicalize the order 10439 of the registers in REG+REG indexed addresses, or treat REG+REG 10440 addresses separately and provide patterns for both permutations. 10441 10442 The latter approach requires several hundred additional lines of 10443 code in pa.md. The downside to canonicalizing is that a PLUS 10444 in the wrong order can't combine to form to make a scaled indexed 10445 memory operand. As we won't need to canonicalize the operands if 10446 the REG_POINTER lossage can be fixed, it seems better canonicalize. 10447 10448 We initially break out scaled indexed addresses in canonical order 10449 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes 10450 scaled indexed addresses during RTL generation. However, fold_rtx 10451 has its own opinion on how the operands of a PLUS should be ordered. 10452 If one of the operands is equivalent to a constant, it will make 10453 that operand the second operand. As the base register is likely to 10454 be equivalent to a SYMBOL_REF, we have made it the second operand. 10455 10456 pa_legitimate_address_p accepts REG+REG as legitimate when the 10457 operands are in the order INDEX+BASE on targets with non-equivalent 10458 space registers, and in any order on targets with equivalent space 10459 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing. 10460 10461 We treat a SYMBOL_REF as legitimate if it is part of the current 10462 function's constant-pool, because such addresses can actually be 10463 output as REG+SMALLINT. */ 10464 10465 static bool 10466 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict) 10467 { 10468 if ((REG_P (x) 10469 && (strict ? STRICT_REG_OK_FOR_BASE_P (x) 10470 : REG_OK_FOR_BASE_P (x))) 10471 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC 10472 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC) 10473 && REG_P (XEXP (x, 0)) 10474 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10475 : REG_OK_FOR_BASE_P (XEXP (x, 0))))) 10476 return true; 10477 10478 if (GET_CODE (x) == PLUS) 10479 { 10480 rtx base, index; 10481 10482 /* For REG+REG, the base register should be in XEXP (x, 1), 10483 so check it first. */ 10484 if (REG_P (XEXP (x, 1)) 10485 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1)) 10486 : REG_OK_FOR_BASE_P (XEXP (x, 1)))) 10487 base = XEXP (x, 1), index = XEXP (x, 0); 10488 else if (REG_P (XEXP (x, 0)) 10489 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10490 : REG_OK_FOR_BASE_P (XEXP (x, 0)))) 10491 base = XEXP (x, 0), index = XEXP (x, 1); 10492 else 10493 return false; 10494 10495 if (GET_CODE (index) == CONST_INT) 10496 { 10497 if (INT_5_BITS (index)) 10498 return true; 10499 10500 /* When INT14_OK_STRICT is false, a secondary reload is needed 10501 to adjust the displacement of SImode and DImode floating point 10502 instructions but this may fail when the register also needs 10503 reloading. So, we return false when STRICT is true. We 10504 also reject long displacements for float mode addresses since 10505 the majority of accesses will use floating point instructions 10506 that don't support 14-bit offsets. */ 10507 if (!INT14_OK_STRICT 10508 && (strict || !(reload_in_progress || reload_completed)) 10509 && mode != QImode 10510 && mode != HImode) 10511 return false; 10512 10513 return base14_operand (index, mode); 10514 } 10515 10516 if (!TARGET_DISABLE_INDEXING 10517 /* Only accept the "canonical" INDEX+BASE operand order 10518 on targets with non-equivalent space registers. */ 10519 && (TARGET_NO_SPACE_REGS 10520 ? REG_P (index) 10521 : (base == XEXP (x, 1) && REG_P (index) 10522 && (reload_completed 10523 || (reload_in_progress && HARD_REGISTER_P (base)) 10524 || REG_POINTER (base)) 10525 && (reload_completed 10526 || (reload_in_progress && HARD_REGISTER_P (index)) 10527 || !REG_POINTER (index)))) 10528 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode) 10529 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index) 10530 : REG_OK_FOR_INDEX_P (index)) 10531 && borx_reg_operand (base, Pmode) 10532 && borx_reg_operand (index, Pmode)) 10533 return true; 10534 10535 if (!TARGET_DISABLE_INDEXING 10536 && GET_CODE (index) == MULT 10537 /* Only accept base operands with the REG_POINTER flag prior to 10538 reload on targets with non-equivalent space registers. */ 10539 && (TARGET_NO_SPACE_REGS 10540 || (base == XEXP (x, 1) 10541 && (reload_completed 10542 || (reload_in_progress && HARD_REGISTER_P (base)) 10543 || REG_POINTER (base)))) 10544 && REG_P (XEXP (index, 0)) 10545 && GET_MODE (XEXP (index, 0)) == Pmode 10546 && MODE_OK_FOR_SCALED_INDEXING_P (mode) 10547 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0)) 10548 : REG_OK_FOR_INDEX_P (XEXP (index, 0))) 10549 && GET_CODE (XEXP (index, 1)) == CONST_INT 10550 && INTVAL (XEXP (index, 1)) 10551 == (HOST_WIDE_INT) GET_MODE_SIZE (mode) 10552 && borx_reg_operand (base, Pmode)) 10553 return true; 10554 10555 return false; 10556 } 10557 10558 if (GET_CODE (x) == LO_SUM) 10559 { 10560 rtx y = XEXP (x, 0); 10561 10562 if (GET_CODE (y) == SUBREG) 10563 y = SUBREG_REG (y); 10564 10565 if (REG_P (y) 10566 && (strict ? STRICT_REG_OK_FOR_BASE_P (y) 10567 : REG_OK_FOR_BASE_P (y))) 10568 { 10569 /* Needed for -fPIC */ 10570 if (mode == Pmode 10571 && GET_CODE (XEXP (x, 1)) == UNSPEC) 10572 return true; 10573 10574 if (!INT14_OK_STRICT 10575 && (strict || !(reload_in_progress || reload_completed)) 10576 && mode != QImode 10577 && mode != HImode) 10578 return false; 10579 10580 if (CONSTANT_P (XEXP (x, 1))) 10581 return true; 10582 } 10583 return false; 10584 } 10585 10586 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x)) 10587 return true; 10588 10589 return false; 10590 } 10591 10592 /* Look for machine dependent ways to make the invalid address AD a 10593 valid address. 10594 10595 For the PA, transform: 10596 10597 memory(X + <large int>) 10598 10599 into: 10600 10601 if (<large int> & mask) >= 16 10602 Y = (<large int> & ~mask) + mask + 1 Round up. 10603 else 10604 Y = (<large int> & ~mask) Round down. 10605 Z = X + Y 10606 memory (Z + (<large int> - Y)); 10607 10608 This makes reload inheritance and reload_cse work better since Z 10609 can be reused. 10610 10611 There may be more opportunities to improve code with this hook. */ 10612 10613 rtx 10614 pa_legitimize_reload_address (rtx ad, machine_mode mode, 10615 int opnum, int type, 10616 int ind_levels ATTRIBUTE_UNUSED) 10617 { 10618 long offset, newoffset, mask; 10619 rtx new_rtx, temp = NULL_RTX; 10620 10621 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 10622 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 10623 10624 if (optimize && GET_CODE (ad) == PLUS) 10625 temp = simplify_binary_operation (PLUS, Pmode, 10626 XEXP (ad, 0), XEXP (ad, 1)); 10627 10628 new_rtx = temp ? temp : ad; 10629 10630 if (optimize 10631 && GET_CODE (new_rtx) == PLUS 10632 && GET_CODE (XEXP (new_rtx, 0)) == REG 10633 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT) 10634 { 10635 offset = INTVAL (XEXP ((new_rtx), 1)); 10636 10637 /* Choose rounding direction. Round up if we are >= halfway. */ 10638 if ((offset & mask) >= ((mask + 1) / 2)) 10639 newoffset = (offset & ~mask) + mask + 1; 10640 else 10641 newoffset = offset & ~mask; 10642 10643 /* Ensure that long displacements are aligned. */ 10644 if (mask == 0x3fff 10645 && (GET_MODE_CLASS (mode) == MODE_FLOAT 10646 || (TARGET_64BIT && (mode) == DImode))) 10647 newoffset &= ~(GET_MODE_SIZE (mode) - 1); 10648 10649 if (newoffset != 0 && VAL_14_BITS_P (newoffset)) 10650 { 10651 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0), 10652 GEN_INT (newoffset)); 10653 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset)); 10654 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0, 10655 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, 10656 opnum, (enum reload_type) type); 10657 return ad; 10658 } 10659 } 10660 10661 return NULL_RTX; 10662 } 10663 10664 /* Output address vector. */ 10665 10666 void 10667 pa_output_addr_vec (rtx lab, rtx body) 10668 { 10669 int idx, vlen = XVECLEN (body, 0); 10670 10671 if (!TARGET_SOM) 10672 fputs ("\t.align 4\n", asm_out_file); 10673 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10674 if (TARGET_GAS) 10675 fputs ("\t.begin_brtab\n", asm_out_file); 10676 for (idx = 0; idx < vlen; idx++) 10677 { 10678 ASM_OUTPUT_ADDR_VEC_ELT 10679 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 10680 } 10681 if (TARGET_GAS) 10682 fputs ("\t.end_brtab\n", asm_out_file); 10683 } 10684 10685 /* Output address difference vector. */ 10686 10687 void 10688 pa_output_addr_diff_vec (rtx lab, rtx body) 10689 { 10690 rtx base = XEXP (XEXP (body, 0), 0); 10691 int idx, vlen = XVECLEN (body, 1); 10692 10693 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10694 if (TARGET_GAS) 10695 fputs ("\t.begin_brtab\n", asm_out_file); 10696 for (idx = 0; idx < vlen; idx++) 10697 { 10698 ASM_OUTPUT_ADDR_DIFF_ELT 10699 (asm_out_file, 10700 body, 10701 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 10702 CODE_LABEL_NUMBER (base)); 10703 } 10704 if (TARGET_GAS) 10705 fputs ("\t.end_brtab\n", asm_out_file); 10706 } 10707 10708 /* This is a helper function for the other atomic operations. This function 10709 emits a loop that contains SEQ that iterates until a compare-and-swap 10710 operation at the end succeeds. MEM is the memory to be modified. SEQ is 10711 a set of instructions that takes a value from OLD_REG as an input and 10712 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be 10713 set to the current contents of MEM. After SEQ, a compare-and-swap will 10714 attempt to update MEM with NEW_REG. The function returns true when the 10715 loop was generated successfully. */ 10716 10717 static bool 10718 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq) 10719 { 10720 machine_mode mode = GET_MODE (mem); 10721 rtx_code_label *label; 10722 rtx cmp_reg, success, oldval; 10723 10724 /* The loop we want to generate looks like 10725 10726 cmp_reg = mem; 10727 label: 10728 old_reg = cmp_reg; 10729 seq; 10730 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg) 10731 if (success) 10732 goto label; 10733 10734 Note that we only do the plain load from memory once. Subsequent 10735 iterations use the value loaded by the compare-and-swap pattern. */ 10736 10737 label = gen_label_rtx (); 10738 cmp_reg = gen_reg_rtx (mode); 10739 10740 emit_move_insn (cmp_reg, mem); 10741 emit_label (label); 10742 emit_move_insn (old_reg, cmp_reg); 10743 if (seq) 10744 emit_insn (seq); 10745 10746 success = NULL_RTX; 10747 oldval = cmp_reg; 10748 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg, 10749 new_reg, false, MEMMODEL_SYNC_SEQ_CST, 10750 MEMMODEL_RELAXED)) 10751 return false; 10752 10753 if (oldval != cmp_reg) 10754 emit_move_insn (cmp_reg, oldval); 10755 10756 /* Mark this jump predicted not taken. */ 10757 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx, 10758 GET_MODE (success), 1, label, 10759 profile_probability::guessed_never ()); 10760 return true; 10761 } 10762 10763 /* This function tries to implement an atomic exchange operation using a 10764 compare_and_swap loop. VAL is written to *MEM. The previous contents of 10765 *MEM are returned, using TARGET if possible. No memory model is required 10766 since a compare_and_swap loop is seq-cst. */ 10767 10768 rtx 10769 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val) 10770 { 10771 machine_mode mode = GET_MODE (mem); 10772 10773 if (can_compare_and_swap_p (mode, true)) 10774 { 10775 if (!target || !register_operand (target, mode)) 10776 target = gen_reg_rtx (mode); 10777 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX)) 10778 return target; 10779 } 10780 10781 return NULL_RTX; 10782 } 10783 10784 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying 10785 arguments passed by hidden reference in the 32-bit HP runtime. Users 10786 can override this behavior for better compatibility with openmp at the 10787 risk of library incompatibilities. Arguments are always passed by value 10788 in the 64-bit HP runtime. */ 10789 10790 static bool 10791 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED, 10792 machine_mode mode ATTRIBUTE_UNUSED, 10793 const_tree type ATTRIBUTE_UNUSED, 10794 bool named ATTRIBUTE_UNUSED) 10795 { 10796 return !TARGET_CALLER_COPIES; 10797 } 10798 10799 /* Implement TARGET_HARD_REGNO_NREGS. */ 10800 10801 static unsigned int 10802 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode) 10803 { 10804 return PA_HARD_REGNO_NREGS (regno, mode); 10805 } 10806 10807 /* Implement TARGET_HARD_REGNO_MODE_OK. */ 10808 10809 static bool 10810 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 10811 { 10812 return PA_HARD_REGNO_MODE_OK (regno, mode); 10813 } 10814 10815 /* Implement TARGET_STARTING_FRAME_OFFSET. 10816 10817 On the 32-bit ports, we reserve one slot for the previous frame 10818 pointer and one fill slot. The fill slot is for compatibility 10819 with HP compiled programs. On the 64-bit ports, we reserve one 10820 slot for the previous frame pointer. */ 10821 10822 static HOST_WIDE_INT 10823 pa_starting_frame_offset (void) 10824 { 10825 return 8; 10826 } 10827 10828 /* Figure out the size in words of the function argument. The size 10829 returned by this function should always be greater than zero because 10830 we pass variable and zero sized objects by reference. */ 10831 10832 HOST_WIDE_INT 10833 pa_function_arg_size (machine_mode mode, const_tree type) 10834 { 10835 HOST_WIDE_INT size; 10836 10837 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type); 10838 return CEIL (size, UNITS_PER_WORD); 10839 } 10840 10841 #include "gt-pa.h" 10842