1 /* Subroutines for insn-output.c for HPPA. 2 Copyright (C) 1992-2013 Free Software Foundation, Inc. 3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "rtl.h" 26 #include "regs.h" 27 #include "hard-reg-set.h" 28 #include "insn-config.h" 29 #include "conditions.h" 30 #include "insn-attr.h" 31 #include "flags.h" 32 #include "tree.h" 33 #include "output.h" 34 #include "dbxout.h" 35 #include "except.h" 36 #include "expr.h" 37 #include "optabs.h" 38 #include "reload.h" 39 #include "function.h" 40 #include "diagnostic-core.h" 41 #include "ggc.h" 42 #include "recog.h" 43 #include "predict.h" 44 #include "tm_p.h" 45 #include "target.h" 46 #include "common/common-target.h" 47 #include "target-def.h" 48 #include "langhooks.h" 49 #include "df.h" 50 #include "opts.h" 51 52 /* Return nonzero if there is a bypass for the output of 53 OUT_INSN and the fp store IN_INSN. */ 54 int 55 pa_fpstore_bypass_p (rtx out_insn, rtx in_insn) 56 { 57 enum machine_mode store_mode; 58 enum machine_mode other_mode; 59 rtx set; 60 61 if (recog_memoized (in_insn) < 0 62 || (get_attr_type (in_insn) != TYPE_FPSTORE 63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD) 64 || recog_memoized (out_insn) < 0) 65 return 0; 66 67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn))); 68 69 set = single_set (out_insn); 70 if (!set) 71 return 0; 72 73 other_mode = GET_MODE (SET_SRC (set)); 74 75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode)); 76 } 77 78 79 #ifndef DO_FRAME_NOTES 80 #ifdef INCOMING_RETURN_ADDR_RTX 81 #define DO_FRAME_NOTES 1 82 #else 83 #define DO_FRAME_NOTES 0 84 #endif 85 #endif 86 87 static void pa_option_override (void); 88 static void copy_reg_pointer (rtx, rtx); 89 static void fix_range (const char *); 90 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t, 91 reg_class_t); 92 static int hppa_address_cost (rtx, enum machine_mode mode, addr_space_t, bool); 93 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool); 94 static inline rtx force_mode (enum machine_mode, rtx); 95 static void pa_reorg (void); 96 static void pa_combine_instructions (void); 97 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx); 98 static bool forward_branch_p (rtx); 99 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); 100 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *); 101 static int compute_movmem_length (rtx); 102 static int compute_clrmem_length (rtx); 103 static bool pa_assemble_integer (rtx, unsigned int, int); 104 static void remove_useless_addtr_insns (int); 105 static void store_reg (int, HOST_WIDE_INT, int); 106 static void store_reg_modify (int, int, HOST_WIDE_INT); 107 static void load_reg (int, HOST_WIDE_INT, int); 108 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int); 109 static rtx pa_function_value (const_tree, const_tree, bool); 110 static rtx pa_libcall_value (enum machine_mode, const_rtx); 111 static bool pa_function_value_regno_p (const unsigned int); 112 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT); 113 static void update_total_code_bytes (unsigned int); 114 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT); 115 static int pa_adjust_cost (rtx, rtx, rtx, int); 116 static int pa_adjust_priority (rtx, int); 117 static int pa_issue_rate (void); 118 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; 119 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED; 120 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT) 121 ATTRIBUTE_UNUSED; 122 static void pa_encode_section_info (tree, rtx, int); 123 static const char *pa_strip_name_encoding (const char *); 124 static bool pa_function_ok_for_sibcall (tree, tree); 125 static void pa_globalize_label (FILE *, const char *) 126 ATTRIBUTE_UNUSED; 127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 128 HOST_WIDE_INT, tree); 129 #if !defined(USE_COLLECT2) 130 static void pa_asm_out_constructor (rtx, int); 131 static void pa_asm_out_destructor (rtx, int); 132 #endif 133 static void pa_init_builtins (void); 134 static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int); 135 static rtx hppa_builtin_saveregs (void); 136 static void hppa_va_start (tree, rtx); 137 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 138 static bool pa_scalar_mode_supported_p (enum machine_mode); 139 static bool pa_commutative_p (const_rtx x, int outer_code); 140 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED; 141 static int length_fp_args (rtx) ATTRIBUTE_UNUSED; 142 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode); 143 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED; 144 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED; 145 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED; 146 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED; 147 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED; 148 static void pa_som_file_start (void) ATTRIBUTE_UNUSED; 149 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED; 150 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED; 151 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED; 152 static void output_deferred_plabels (void); 153 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED; 154 #ifdef ASM_OUTPUT_EXTERNAL_REAL 155 static void pa_hpux_file_end (void); 156 #endif 157 static void pa_init_libfuncs (void); 158 static rtx pa_struct_value_rtx (tree, int); 159 static bool pa_pass_by_reference (cumulative_args_t, enum machine_mode, 160 const_tree, bool); 161 static int pa_arg_partial_bytes (cumulative_args_t, enum machine_mode, 162 tree, bool); 163 static void pa_function_arg_advance (cumulative_args_t, enum machine_mode, 164 const_tree, bool); 165 static rtx pa_function_arg (cumulative_args_t, enum machine_mode, 166 const_tree, bool); 167 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree); 168 static struct machine_function * pa_init_machine_status (void); 169 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t, 170 enum machine_mode, 171 secondary_reload_info *); 172 static void pa_extra_live_on_entry (bitmap); 173 static enum machine_mode pa_promote_function_mode (const_tree, 174 enum machine_mode, int *, 175 const_tree, int); 176 177 static void pa_asm_trampoline_template (FILE *); 178 static void pa_trampoline_init (rtx, tree, rtx); 179 static rtx pa_trampoline_adjust_address (rtx); 180 static rtx pa_delegitimize_address (rtx); 181 static bool pa_print_operand_punct_valid_p (unsigned char); 182 static rtx pa_internal_arg_pointer (void); 183 static bool pa_can_eliminate (const int, const int); 184 static void pa_conditional_register_usage (void); 185 static enum machine_mode pa_c_mode_for_suffix (char); 186 static section *pa_function_section (tree, enum node_frequency, bool, bool); 187 static bool pa_cannot_force_const_mem (enum machine_mode, rtx); 188 static bool pa_legitimate_constant_p (enum machine_mode, rtx); 189 static unsigned int pa_section_type_flags (tree, const char *, int); 190 static bool pa_legitimate_address_p (enum machine_mode, rtx, bool); 191 192 /* The following extra sections are only used for SOM. */ 193 static GTY(()) section *som_readonly_data_section; 194 static GTY(()) section *som_one_only_readonly_data_section; 195 static GTY(()) section *som_one_only_data_section; 196 static GTY(()) section *som_tm_clone_table_section; 197 198 /* Counts for the number of callee-saved general and floating point 199 registers which were saved by the current function's prologue. */ 200 static int gr_saved, fr_saved; 201 202 /* Boolean indicating whether the return pointer was saved by the 203 current function's prologue. */ 204 static bool rp_saved; 205 206 static rtx find_addr_reg (rtx); 207 208 /* Keep track of the number of bytes we have output in the CODE subspace 209 during this compilation so we'll know when to emit inline long-calls. */ 210 unsigned long total_code_bytes; 211 212 /* The last address of the previous function plus the number of bytes in 213 associated thunks that have been output. This is used to determine if 214 a thunk can use an IA-relative branch to reach its target function. */ 215 static unsigned int last_address; 216 217 /* Variables to handle plabels that we discover are necessary at assembly 218 output time. They are output after the current function. */ 219 struct GTY(()) deferred_plabel 220 { 221 rtx internal_label; 222 rtx symbol; 223 }; 224 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel * 225 deferred_plabels; 226 static size_t n_deferred_plabels = 0; 227 228 /* Initialize the GCC target structure. */ 229 230 #undef TARGET_OPTION_OVERRIDE 231 #define TARGET_OPTION_OVERRIDE pa_option_override 232 233 #undef TARGET_ASM_ALIGNED_HI_OP 234 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 235 #undef TARGET_ASM_ALIGNED_SI_OP 236 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 237 #undef TARGET_ASM_ALIGNED_DI_OP 238 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" 239 #undef TARGET_ASM_UNALIGNED_HI_OP 240 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 241 #undef TARGET_ASM_UNALIGNED_SI_OP 242 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 243 #undef TARGET_ASM_UNALIGNED_DI_OP 244 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 245 #undef TARGET_ASM_INTEGER 246 #define TARGET_ASM_INTEGER pa_assemble_integer 247 248 #undef TARGET_ASM_FUNCTION_PROLOGUE 249 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue 250 #undef TARGET_ASM_FUNCTION_EPILOGUE 251 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue 252 253 #undef TARGET_FUNCTION_VALUE 254 #define TARGET_FUNCTION_VALUE pa_function_value 255 #undef TARGET_LIBCALL_VALUE 256 #define TARGET_LIBCALL_VALUE pa_libcall_value 257 #undef TARGET_FUNCTION_VALUE_REGNO_P 258 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p 259 260 #undef TARGET_LEGITIMIZE_ADDRESS 261 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address 262 263 #undef TARGET_SCHED_ADJUST_COST 264 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost 265 #undef TARGET_SCHED_ADJUST_PRIORITY 266 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority 267 #undef TARGET_SCHED_ISSUE_RATE 268 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate 269 270 #undef TARGET_ENCODE_SECTION_INFO 271 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info 272 #undef TARGET_STRIP_NAME_ENCODING 273 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding 274 275 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 276 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall 277 278 #undef TARGET_COMMUTATIVE_P 279 #define TARGET_COMMUTATIVE_P pa_commutative_p 280 281 #undef TARGET_ASM_OUTPUT_MI_THUNK 282 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk 283 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 284 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall 285 286 #undef TARGET_ASM_FILE_END 287 #ifdef ASM_OUTPUT_EXTERNAL_REAL 288 #define TARGET_ASM_FILE_END pa_hpux_file_end 289 #else 290 #define TARGET_ASM_FILE_END output_deferred_plabels 291 #endif 292 293 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 294 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p 295 296 #if !defined(USE_COLLECT2) 297 #undef TARGET_ASM_CONSTRUCTOR 298 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor 299 #undef TARGET_ASM_DESTRUCTOR 300 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor 301 #endif 302 303 #undef TARGET_INIT_BUILTINS 304 #define TARGET_INIT_BUILTINS pa_init_builtins 305 306 #undef TARGET_EXPAND_BUILTIN 307 #define TARGET_EXPAND_BUILTIN pa_expand_builtin 308 309 #undef TARGET_REGISTER_MOVE_COST 310 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost 311 #undef TARGET_RTX_COSTS 312 #define TARGET_RTX_COSTS hppa_rtx_costs 313 #undef TARGET_ADDRESS_COST 314 #define TARGET_ADDRESS_COST hppa_address_cost 315 316 #undef TARGET_MACHINE_DEPENDENT_REORG 317 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg 318 319 #undef TARGET_INIT_LIBFUNCS 320 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs 321 322 #undef TARGET_PROMOTE_FUNCTION_MODE 323 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode 324 #undef TARGET_PROMOTE_PROTOTYPES 325 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 326 327 #undef TARGET_STRUCT_VALUE_RTX 328 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx 329 #undef TARGET_RETURN_IN_MEMORY 330 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory 331 #undef TARGET_MUST_PASS_IN_STACK 332 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 333 #undef TARGET_PASS_BY_REFERENCE 334 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference 335 #undef TARGET_CALLEE_COPIES 336 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true 337 #undef TARGET_ARG_PARTIAL_BYTES 338 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes 339 #undef TARGET_FUNCTION_ARG 340 #define TARGET_FUNCTION_ARG pa_function_arg 341 #undef TARGET_FUNCTION_ARG_ADVANCE 342 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance 343 #undef TARGET_FUNCTION_ARG_BOUNDARY 344 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary 345 346 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 347 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs 348 #undef TARGET_EXPAND_BUILTIN_VA_START 349 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start 350 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 351 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr 352 353 #undef TARGET_SCALAR_MODE_SUPPORTED_P 354 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p 355 356 #undef TARGET_CANNOT_FORCE_CONST_MEM 357 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem 358 359 #undef TARGET_SECONDARY_RELOAD 360 #define TARGET_SECONDARY_RELOAD pa_secondary_reload 361 362 #undef TARGET_EXTRA_LIVE_ON_ENTRY 363 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry 364 365 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE 366 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template 367 #undef TARGET_TRAMPOLINE_INIT 368 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init 369 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 370 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address 371 #undef TARGET_DELEGITIMIZE_ADDRESS 372 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address 373 #undef TARGET_INTERNAL_ARG_POINTER 374 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer 375 #undef TARGET_CAN_ELIMINATE 376 #define TARGET_CAN_ELIMINATE pa_can_eliminate 377 #undef TARGET_CONDITIONAL_REGISTER_USAGE 378 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage 379 #undef TARGET_C_MODE_FOR_SUFFIX 380 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix 381 #undef TARGET_ASM_FUNCTION_SECTION 382 #define TARGET_ASM_FUNCTION_SECTION pa_function_section 383 384 #undef TARGET_LEGITIMATE_CONSTANT_P 385 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p 386 #undef TARGET_SECTION_TYPE_FLAGS 387 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags 388 #undef TARGET_LEGITIMATE_ADDRESS_P 389 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p 390 391 struct gcc_target targetm = TARGET_INITIALIZER; 392 393 /* Parse the -mfixed-range= option string. */ 394 395 static void 396 fix_range (const char *const_str) 397 { 398 int i, first, last; 399 char *str, *dash, *comma; 400 401 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 402 REG2 are either register names or register numbers. The effect 403 of this option is to mark the registers in the range from REG1 to 404 REG2 as ``fixed'' so they won't be used by the compiler. This is 405 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */ 406 407 i = strlen (const_str); 408 str = (char *) alloca (i + 1); 409 memcpy (str, const_str, i + 1); 410 411 while (1) 412 { 413 dash = strchr (str, '-'); 414 if (!dash) 415 { 416 warning (0, "value of -mfixed-range must have form REG1-REG2"); 417 return; 418 } 419 *dash = '\0'; 420 421 comma = strchr (dash + 1, ','); 422 if (comma) 423 *comma = '\0'; 424 425 first = decode_reg_name (str); 426 if (first < 0) 427 { 428 warning (0, "unknown register name: %s", str); 429 return; 430 } 431 432 last = decode_reg_name (dash + 1); 433 if (last < 0) 434 { 435 warning (0, "unknown register name: %s", dash + 1); 436 return; 437 } 438 439 *dash = '-'; 440 441 if (first > last) 442 { 443 warning (0, "%s-%s is an empty range", str, dash + 1); 444 return; 445 } 446 447 for (i = first; i <= last; ++i) 448 fixed_regs[i] = call_used_regs[i] = 1; 449 450 if (!comma) 451 break; 452 453 *comma = ','; 454 str = comma + 1; 455 } 456 457 /* Check if all floating point registers have been fixed. */ 458 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 459 if (!fixed_regs[i]) 460 break; 461 462 if (i > FP_REG_LAST) 463 target_flags |= MASK_DISABLE_FPREGS; 464 } 465 466 /* Implement the TARGET_OPTION_OVERRIDE hook. */ 467 468 static void 469 pa_option_override (void) 470 { 471 unsigned int i; 472 cl_deferred_option *opt; 473 vec<cl_deferred_option> *v 474 = (vec<cl_deferred_option> *) pa_deferred_options; 475 476 if (v) 477 FOR_EACH_VEC_ELT (*v, i, opt) 478 { 479 switch (opt->opt_index) 480 { 481 case OPT_mfixed_range_: 482 fix_range (opt->arg); 483 break; 484 485 default: 486 gcc_unreachable (); 487 } 488 } 489 490 /* Unconditional branches in the delay slot are not compatible with dwarf2 491 call frame information. There is no benefit in using this optimization 492 on PA8000 and later processors. */ 493 if (pa_cpu >= PROCESSOR_8000 494 || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2 495 && flag_exceptions) 496 || flag_unwind_tables) 497 target_flags &= ~MASK_JUMP_IN_DELAY; 498 499 if (flag_pic && TARGET_PORTABLE_RUNTIME) 500 { 501 warning (0, "PIC code generation is not supported in the portable runtime model"); 502 } 503 504 if (flag_pic && TARGET_FAST_INDIRECT_CALLS) 505 { 506 warning (0, "PIC code generation is not compatible with fast indirect calls"); 507 } 508 509 if (! TARGET_GAS && write_symbols != NO_DEBUG) 510 { 511 warning (0, "-g is only supported when using GAS on this processor,"); 512 warning (0, "-g option disabled"); 513 write_symbols = NO_DEBUG; 514 } 515 516 #ifdef AUTO_INC_DEC 517 /* FIXME: Disable auto increment and decrement processing until reload 518 is completed. See PR middle-end 56791. */ 519 flag_auto_inc_dec = reload_completed; 520 #endif 521 522 /* We only support the "big PIC" model now. And we always generate PIC 523 code when in 64bit mode. */ 524 if (flag_pic == 1 || TARGET_64BIT) 525 flag_pic = 2; 526 527 /* Disable -freorder-blocks-and-partition as we don't support hot and 528 cold partitioning. */ 529 if (flag_reorder_blocks_and_partition) 530 { 531 inform (input_location, 532 "-freorder-blocks-and-partition does not work " 533 "on this architecture"); 534 flag_reorder_blocks_and_partition = 0; 535 flag_reorder_blocks = 1; 536 } 537 538 /* We can't guarantee that .dword is available for 32-bit targets. */ 539 if (UNITS_PER_WORD == 4) 540 targetm.asm_out.aligned_op.di = NULL; 541 542 /* The unaligned ops are only available when using GAS. */ 543 if (!TARGET_GAS) 544 { 545 targetm.asm_out.unaligned_op.hi = NULL; 546 targetm.asm_out.unaligned_op.si = NULL; 547 targetm.asm_out.unaligned_op.di = NULL; 548 } 549 550 init_machine_status = pa_init_machine_status; 551 } 552 553 enum pa_builtins 554 { 555 PA_BUILTIN_COPYSIGNQ, 556 PA_BUILTIN_FABSQ, 557 PA_BUILTIN_INFQ, 558 PA_BUILTIN_HUGE_VALQ, 559 PA_BUILTIN_max 560 }; 561 562 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max]; 563 564 static void 565 pa_init_builtins (void) 566 { 567 #ifdef DONT_HAVE_FPUTC_UNLOCKED 568 { 569 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED); 570 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl, 571 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED)); 572 } 573 #endif 574 #if TARGET_HPUX_11 575 { 576 tree decl; 577 578 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE) 579 set_user_assembler_name (decl, "_Isfinite"); 580 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE) 581 set_user_assembler_name (decl, "_Isfinitef"); 582 } 583 #endif 584 585 if (HPUX_LONG_DOUBLE_LIBRARY) 586 { 587 tree decl, ftype; 588 589 /* Under HPUX, the __float128 type is a synonym for "long double". */ 590 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 591 "__float128"); 592 593 /* TFmode support builtins. */ 594 ftype = build_function_type_list (long_double_type_node, 595 long_double_type_node, 596 NULL_TREE); 597 decl = add_builtin_function ("__builtin_fabsq", ftype, 598 PA_BUILTIN_FABSQ, BUILT_IN_MD, 599 "_U_Qfabs", NULL_TREE); 600 TREE_READONLY (decl) = 1; 601 pa_builtins[PA_BUILTIN_FABSQ] = decl; 602 603 ftype = build_function_type_list (long_double_type_node, 604 long_double_type_node, 605 long_double_type_node, 606 NULL_TREE); 607 decl = add_builtin_function ("__builtin_copysignq", ftype, 608 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 609 "_U_Qfcopysign", NULL_TREE); 610 TREE_READONLY (decl) = 1; 611 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl; 612 613 ftype = build_function_type_list (long_double_type_node, NULL_TREE); 614 decl = add_builtin_function ("__builtin_infq", ftype, 615 PA_BUILTIN_INFQ, BUILT_IN_MD, 616 NULL, NULL_TREE); 617 pa_builtins[PA_BUILTIN_INFQ] = decl; 618 619 decl = add_builtin_function ("__builtin_huge_valq", ftype, 620 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD, 621 NULL, NULL_TREE); 622 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl; 623 } 624 } 625 626 static rtx 627 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 628 enum machine_mode mode ATTRIBUTE_UNUSED, 629 int ignore ATTRIBUTE_UNUSED) 630 { 631 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 632 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 633 634 switch (fcode) 635 { 636 case PA_BUILTIN_FABSQ: 637 case PA_BUILTIN_COPYSIGNQ: 638 return expand_call (exp, target, ignore); 639 640 case PA_BUILTIN_INFQ: 641 case PA_BUILTIN_HUGE_VALQ: 642 { 643 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); 644 REAL_VALUE_TYPE inf; 645 rtx tmp; 646 647 real_inf (&inf); 648 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode); 649 650 tmp = validize_mem (force_const_mem (target_mode, tmp)); 651 652 if (target == 0) 653 target = gen_reg_rtx (target_mode); 654 655 emit_move_insn (target, tmp); 656 return target; 657 } 658 659 default: 660 gcc_unreachable (); 661 } 662 663 return NULL_RTX; 664 } 665 666 /* Function to init struct machine_function. 667 This will be called, via a pointer variable, 668 from push_function_context. */ 669 670 static struct machine_function * 671 pa_init_machine_status (void) 672 { 673 return ggc_alloc_cleared_machine_function (); 674 } 675 676 /* If FROM is a probable pointer register, mark TO as a probable 677 pointer register with the same pointer alignment as FROM. */ 678 679 static void 680 copy_reg_pointer (rtx to, rtx from) 681 { 682 if (REG_POINTER (from)) 683 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from))); 684 } 685 686 /* Return 1 if X contains a symbolic expression. We know these 687 expressions will have one of a few well defined forms, so 688 we need only check those forms. */ 689 int 690 pa_symbolic_expression_p (rtx x) 691 { 692 693 /* Strip off any HIGH. */ 694 if (GET_CODE (x) == HIGH) 695 x = XEXP (x, 0); 696 697 return symbolic_operand (x, VOIDmode); 698 } 699 700 /* Accept any constant that can be moved in one instruction into a 701 general register. */ 702 int 703 pa_cint_ok_for_move (HOST_WIDE_INT ival) 704 { 705 /* OK if ldo, ldil, or zdepi, can be used. */ 706 return (VAL_14_BITS_P (ival) 707 || pa_ldil_cint_p (ival) 708 || pa_zdepi_cint_p (ival)); 709 } 710 711 /* True iff ldil can be used to load this CONST_INT. The least 712 significant 11 bits of the value must be zero and the value must 713 not change sign when extended from 32 to 64 bits. */ 714 int 715 pa_ldil_cint_p (HOST_WIDE_INT ival) 716 { 717 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff); 718 719 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31); 720 } 721 722 /* True iff zdepi can be used to generate this CONST_INT. 723 zdepi first sign extends a 5-bit signed number to a given field 724 length, then places this field anywhere in a zero. */ 725 int 726 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x) 727 { 728 unsigned HOST_WIDE_INT lsb_mask, t; 729 730 /* This might not be obvious, but it's at least fast. 731 This function is critical; we don't have the time loops would take. */ 732 lsb_mask = x & -x; 733 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); 734 /* Return true iff t is a power of two. */ 735 return ((t & (t - 1)) == 0); 736 } 737 738 /* True iff depi or extru can be used to compute (reg & mask). 739 Accept bit pattern like these: 740 0....01....1 741 1....10....0 742 1..10..01..1 */ 743 int 744 pa_and_mask_p (unsigned HOST_WIDE_INT mask) 745 { 746 mask = ~mask; 747 mask += mask & -mask; 748 return (mask & (mask - 1)) == 0; 749 } 750 751 /* True iff depi can be used to compute (reg | MASK). */ 752 int 753 pa_ior_mask_p (unsigned HOST_WIDE_INT mask) 754 { 755 mask += mask & -mask; 756 return (mask & (mask - 1)) == 0; 757 } 758 759 /* Legitimize PIC addresses. If the address is already 760 position-independent, we return ORIG. Newly generated 761 position-independent addresses go to REG. If we need more 762 than one register, we lose. */ 763 764 static rtx 765 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) 766 { 767 rtx pic_ref = orig; 768 769 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig)); 770 771 /* Labels need special handling. */ 772 if (pic_label_operand (orig, mode)) 773 { 774 rtx insn; 775 776 /* We do not want to go through the movXX expanders here since that 777 would create recursion. 778 779 Nor do we really want to call a generator for a named pattern 780 since that requires multiple patterns if we want to support 781 multiple word sizes. 782 783 So instead we just emit the raw set, which avoids the movXX 784 expanders completely. */ 785 mark_reg_pointer (reg, BITS_PER_UNIT); 786 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig)); 787 788 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 789 add_reg_note (insn, REG_EQUAL, orig); 790 791 /* During and after reload, we need to generate a REG_LABEL_OPERAND note 792 and update LABEL_NUSES because this is not done automatically. */ 793 if (reload_in_progress || reload_completed) 794 { 795 /* Extract LABEL_REF. */ 796 if (GET_CODE (orig) == CONST) 797 orig = XEXP (XEXP (orig, 0), 0); 798 /* Extract CODE_LABEL. */ 799 orig = XEXP (orig, 0); 800 add_reg_note (insn, REG_LABEL_OPERAND, orig); 801 /* Make sure we have label and not a note. */ 802 if (LABEL_P (orig)) 803 LABEL_NUSES (orig)++; 804 } 805 crtl->uses_pic_offset_table = 1; 806 return reg; 807 } 808 if (GET_CODE (orig) == SYMBOL_REF) 809 { 810 rtx insn, tmp_reg; 811 812 gcc_assert (reg); 813 814 /* Before reload, allocate a temporary register for the intermediate 815 result. This allows the sequence to be deleted when the final 816 result is unused and the insns are trivially dead. */ 817 tmp_reg = ((reload_in_progress || reload_completed) 818 ? reg : gen_reg_rtx (Pmode)); 819 820 if (function_label_operand (orig, VOIDmode)) 821 { 822 /* Force function label into memory in word mode. */ 823 orig = XEXP (force_const_mem (word_mode, orig), 0); 824 /* Load plabel address from DLT. */ 825 emit_move_insn (tmp_reg, 826 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 827 gen_rtx_HIGH (word_mode, orig))); 828 pic_ref 829 = gen_const_mem (Pmode, 830 gen_rtx_LO_SUM (Pmode, tmp_reg, 831 gen_rtx_UNSPEC (Pmode, 832 gen_rtvec (1, orig), 833 UNSPEC_DLTIND14R))); 834 emit_move_insn (reg, pic_ref); 835 /* Now load address of function descriptor. */ 836 pic_ref = gen_rtx_MEM (Pmode, reg); 837 } 838 else 839 { 840 /* Load symbol reference from DLT. */ 841 emit_move_insn (tmp_reg, 842 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 843 gen_rtx_HIGH (word_mode, orig))); 844 pic_ref 845 = gen_const_mem (Pmode, 846 gen_rtx_LO_SUM (Pmode, tmp_reg, 847 gen_rtx_UNSPEC (Pmode, 848 gen_rtvec (1, orig), 849 UNSPEC_DLTIND14R))); 850 } 851 852 crtl->uses_pic_offset_table = 1; 853 mark_reg_pointer (reg, BITS_PER_UNIT); 854 insn = emit_move_insn (reg, pic_ref); 855 856 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 857 set_unique_reg_note (insn, REG_EQUAL, orig); 858 859 return reg; 860 } 861 else if (GET_CODE (orig) == CONST) 862 { 863 rtx base; 864 865 if (GET_CODE (XEXP (orig, 0)) == PLUS 866 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 867 return orig; 868 869 gcc_assert (reg); 870 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 871 872 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); 873 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, 874 base == reg ? 0 : reg); 875 876 if (GET_CODE (orig) == CONST_INT) 877 { 878 if (INT_14_BITS (orig)) 879 return plus_constant (Pmode, base, INTVAL (orig)); 880 orig = force_reg (Pmode, orig); 881 } 882 pic_ref = gen_rtx_PLUS (Pmode, base, orig); 883 /* Likewise, should we set special REG_NOTEs here? */ 884 } 885 886 return pic_ref; 887 } 888 889 static GTY(()) rtx gen_tls_tga; 890 891 static rtx 892 gen_tls_get_addr (void) 893 { 894 if (!gen_tls_tga) 895 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 896 return gen_tls_tga; 897 } 898 899 static rtx 900 hppa_tls_call (rtx arg) 901 { 902 rtx ret; 903 904 ret = gen_reg_rtx (Pmode); 905 emit_library_call_value (gen_tls_get_addr (), ret, 906 LCT_CONST, Pmode, 1, arg, Pmode); 907 908 return ret; 909 } 910 911 static rtx 912 legitimize_tls_address (rtx addr) 913 { 914 rtx ret, insn, tmp, t1, t2, tp; 915 916 /* Currently, we can't handle anything but a SYMBOL_REF. */ 917 if (GET_CODE (addr) != SYMBOL_REF) 918 return addr; 919 920 switch (SYMBOL_REF_TLS_MODEL (addr)) 921 { 922 case TLS_MODEL_GLOBAL_DYNAMIC: 923 tmp = gen_reg_rtx (Pmode); 924 if (flag_pic) 925 emit_insn (gen_tgd_load_pic (tmp, addr)); 926 else 927 emit_insn (gen_tgd_load (tmp, addr)); 928 ret = hppa_tls_call (tmp); 929 break; 930 931 case TLS_MODEL_LOCAL_DYNAMIC: 932 ret = gen_reg_rtx (Pmode); 933 tmp = gen_reg_rtx (Pmode); 934 start_sequence (); 935 if (flag_pic) 936 emit_insn (gen_tld_load_pic (tmp, addr)); 937 else 938 emit_insn (gen_tld_load (tmp, addr)); 939 t1 = hppa_tls_call (tmp); 940 insn = get_insns (); 941 end_sequence (); 942 t2 = gen_reg_rtx (Pmode); 943 emit_libcall_block (insn, t2, t1, 944 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 945 UNSPEC_TLSLDBASE)); 946 emit_insn (gen_tld_offset_load (ret, addr, t2)); 947 break; 948 949 case TLS_MODEL_INITIAL_EXEC: 950 tp = gen_reg_rtx (Pmode); 951 tmp = gen_reg_rtx (Pmode); 952 ret = gen_reg_rtx (Pmode); 953 emit_insn (gen_tp_load (tp)); 954 if (flag_pic) 955 emit_insn (gen_tie_load_pic (tmp, addr)); 956 else 957 emit_insn (gen_tie_load (tmp, addr)); 958 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp)); 959 break; 960 961 case TLS_MODEL_LOCAL_EXEC: 962 tp = gen_reg_rtx (Pmode); 963 ret = gen_reg_rtx (Pmode); 964 emit_insn (gen_tp_load (tp)); 965 emit_insn (gen_tle_load (ret, addr, tp)); 966 break; 967 968 default: 969 gcc_unreachable (); 970 } 971 972 return ret; 973 } 974 975 /* Try machine-dependent ways of modifying an illegitimate address 976 to be legitimate. If we find one, return the new, valid address. 977 This macro is used in only one place: `memory_address' in explow.c. 978 979 OLDX is the address as it was before break_out_memory_refs was called. 980 In some cases it is useful to look at this to decide what needs to be done. 981 982 It is always safe for this macro to do nothing. It exists to recognize 983 opportunities to optimize the output. 984 985 For the PA, transform: 986 987 memory(X + <large int>) 988 989 into: 990 991 if (<large int> & mask) >= 16 992 Y = (<large int> & ~mask) + mask + 1 Round up. 993 else 994 Y = (<large int> & ~mask) Round down. 995 Z = X + Y 996 memory (Z + (<large int> - Y)); 997 998 This is for CSE to find several similar references, and only use one Z. 999 1000 X can either be a SYMBOL_REF or REG, but because combine cannot 1001 perform a 4->2 combination we do nothing for SYMBOL_REF + D where 1002 D will not fit in 14 bits. 1003 1004 MODE_FLOAT references allow displacements which fit in 5 bits, so use 1005 0x1f as the mask. 1006 1007 MODE_INT references allow displacements which fit in 14 bits, so use 1008 0x3fff as the mask. 1009 1010 This relies on the fact that most mode MODE_FLOAT references will use FP 1011 registers and most mode MODE_INT references will use integer registers. 1012 (In the rare case of an FP register used in an integer MODE, we depend 1013 on secondary reloads to clean things up.) 1014 1015 1016 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special 1017 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed 1018 addressing modes to be used). 1019 1020 Put X and Z into registers. Then put the entire expression into 1021 a register. */ 1022 1023 rtx 1024 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 1025 enum machine_mode mode) 1026 { 1027 rtx orig = x; 1028 1029 /* We need to canonicalize the order of operands in unscaled indexed 1030 addresses since the code that checks if an address is valid doesn't 1031 always try both orders. */ 1032 if (!TARGET_NO_SPACE_REGS 1033 && GET_CODE (x) == PLUS 1034 && GET_MODE (x) == Pmode 1035 && REG_P (XEXP (x, 0)) 1036 && REG_P (XEXP (x, 1)) 1037 && REG_POINTER (XEXP (x, 0)) 1038 && !REG_POINTER (XEXP (x, 1))) 1039 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0)); 1040 1041 if (pa_tls_referenced_p (x)) 1042 return legitimize_tls_address (x); 1043 else if (flag_pic) 1044 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); 1045 1046 /* Strip off CONST. */ 1047 if (GET_CODE (x) == CONST) 1048 x = XEXP (x, 0); 1049 1050 /* Special case. Get the SYMBOL_REF into a register and use indexing. 1051 That should always be safe. */ 1052 if (GET_CODE (x) == PLUS 1053 && GET_CODE (XEXP (x, 0)) == REG 1054 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) 1055 { 1056 rtx reg = force_reg (Pmode, XEXP (x, 1)); 1057 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0))); 1058 } 1059 1060 /* Note we must reject symbols which represent function addresses 1061 since the assembler/linker can't handle arithmetic on plabels. */ 1062 if (GET_CODE (x) == PLUS 1063 && GET_CODE (XEXP (x, 1)) == CONST_INT 1064 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF 1065 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) 1066 || GET_CODE (XEXP (x, 0)) == REG)) 1067 { 1068 rtx int_part, ptr_reg; 1069 HOST_WIDE_INT newoffset; 1070 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 1071 HOST_WIDE_INT mask; 1072 1073 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 1074 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 1075 1076 /* Choose which way to round the offset. Round up if we 1077 are >= halfway to the next boundary. */ 1078 if ((offset & mask) >= ((mask + 1) / 2)) 1079 newoffset = (offset & ~ mask) + mask + 1; 1080 else 1081 newoffset = (offset & ~ mask); 1082 1083 /* If the newoffset will not fit in 14 bits (ldo), then 1084 handling this would take 4 or 5 instructions (2 to load 1085 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to 1086 add the new offset and the SYMBOL_REF.) Combine can 1087 not handle 4->2 or 5->2 combinations, so do not create 1088 them. */ 1089 if (! VAL_14_BITS_P (newoffset) 1090 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) 1091 { 1092 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset); 1093 rtx tmp_reg 1094 = force_reg (Pmode, 1095 gen_rtx_HIGH (Pmode, const_part)); 1096 ptr_reg 1097 = force_reg (Pmode, 1098 gen_rtx_LO_SUM (Pmode, 1099 tmp_reg, const_part)); 1100 } 1101 else 1102 { 1103 if (! VAL_14_BITS_P (newoffset)) 1104 int_part = force_reg (Pmode, GEN_INT (newoffset)); 1105 else 1106 int_part = GEN_INT (newoffset); 1107 1108 ptr_reg = force_reg (Pmode, 1109 gen_rtx_PLUS (Pmode, 1110 force_reg (Pmode, XEXP (x, 0)), 1111 int_part)); 1112 } 1113 return plus_constant (Pmode, ptr_reg, offset - newoffset); 1114 } 1115 1116 /* Handle (plus (mult (a) (shadd_constant)) (b)). */ 1117 1118 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT 1119 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1120 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) 1121 && (OBJECT_P (XEXP (x, 1)) 1122 || GET_CODE (XEXP (x, 1)) == SUBREG) 1123 && GET_CODE (XEXP (x, 1)) != CONST) 1124 { 1125 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 1126 rtx reg1, reg2; 1127 1128 reg1 = XEXP (x, 1); 1129 if (GET_CODE (reg1) != REG) 1130 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1131 1132 reg2 = XEXP (XEXP (x, 0), 0); 1133 if (GET_CODE (reg2) != REG) 1134 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1135 1136 return force_reg (Pmode, gen_rtx_PLUS (Pmode, 1137 gen_rtx_MULT (Pmode, 1138 reg2, 1139 GEN_INT (val)), 1140 reg1)); 1141 } 1142 1143 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). 1144 1145 Only do so for floating point modes since this is more speculative 1146 and we lose if it's an integer store. */ 1147 if (GET_CODE (x) == PLUS 1148 && GET_CODE (XEXP (x, 0)) == PLUS 1149 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 1150 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 1151 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) 1152 && (mode == SFmode || mode == DFmode)) 1153 { 1154 1155 /* First, try and figure out what to use as a base register. */ 1156 rtx reg1, reg2, base, idx; 1157 1158 reg1 = XEXP (XEXP (x, 0), 1); 1159 reg2 = XEXP (x, 1); 1160 base = NULL_RTX; 1161 idx = NULL_RTX; 1162 1163 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], 1164 then pa_emit_move_sequence will turn on REG_POINTER so we'll know 1165 it's a base register below. */ 1166 if (GET_CODE (reg1) != REG) 1167 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1168 1169 if (GET_CODE (reg2) != REG) 1170 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1171 1172 /* Figure out what the base and index are. */ 1173 1174 if (GET_CODE (reg1) == REG 1175 && REG_POINTER (reg1)) 1176 { 1177 base = reg1; 1178 idx = gen_rtx_PLUS (Pmode, 1179 gen_rtx_MULT (Pmode, 1180 XEXP (XEXP (XEXP (x, 0), 0), 0), 1181 XEXP (XEXP (XEXP (x, 0), 0), 1)), 1182 XEXP (x, 1)); 1183 } 1184 else if (GET_CODE (reg2) == REG 1185 && REG_POINTER (reg2)) 1186 { 1187 base = reg2; 1188 idx = XEXP (x, 0); 1189 } 1190 1191 if (base == 0) 1192 return orig; 1193 1194 /* If the index adds a large constant, try to scale the 1195 constant so that it can be loaded with only one insn. */ 1196 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1197 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) 1198 / INTVAL (XEXP (XEXP (idx, 0), 1))) 1199 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) 1200 { 1201 /* Divide the CONST_INT by the scale factor, then add it to A. */ 1202 HOST_WIDE_INT val = INTVAL (XEXP (idx, 1)); 1203 1204 val /= INTVAL (XEXP (XEXP (idx, 0), 1)); 1205 reg1 = XEXP (XEXP (idx, 0), 0); 1206 if (GET_CODE (reg1) != REG) 1207 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1208 1209 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); 1210 1211 /* We can now generate a simple scaled indexed address. */ 1212 return 1213 force_reg 1214 (Pmode, gen_rtx_PLUS (Pmode, 1215 gen_rtx_MULT (Pmode, reg1, 1216 XEXP (XEXP (idx, 0), 1)), 1217 base)); 1218 } 1219 1220 /* If B + C is still a valid base register, then add them. */ 1221 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1222 && INTVAL (XEXP (idx, 1)) <= 4096 1223 && INTVAL (XEXP (idx, 1)) >= -4096) 1224 { 1225 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (idx, 0), 1)); 1226 rtx reg1, reg2; 1227 1228 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); 1229 1230 reg2 = XEXP (XEXP (idx, 0), 0); 1231 if (GET_CODE (reg2) != CONST_INT) 1232 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1233 1234 return force_reg (Pmode, gen_rtx_PLUS (Pmode, 1235 gen_rtx_MULT (Pmode, 1236 reg2, 1237 GEN_INT (val)), 1238 reg1)); 1239 } 1240 1241 /* Get the index into a register, then add the base + index and 1242 return a register holding the result. */ 1243 1244 /* First get A into a register. */ 1245 reg1 = XEXP (XEXP (idx, 0), 0); 1246 if (GET_CODE (reg1) != REG) 1247 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1248 1249 /* And get B into a register. */ 1250 reg2 = XEXP (idx, 1); 1251 if (GET_CODE (reg2) != REG) 1252 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1253 1254 reg1 = force_reg (Pmode, 1255 gen_rtx_PLUS (Pmode, 1256 gen_rtx_MULT (Pmode, reg1, 1257 XEXP (XEXP (idx, 0), 1)), 1258 reg2)); 1259 1260 /* Add the result to our base register and return. */ 1261 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); 1262 1263 } 1264 1265 /* Uh-oh. We might have an address for x[n-100000]. This needs 1266 special handling to avoid creating an indexed memory address 1267 with x-100000 as the base. 1268 1269 If the constant part is small enough, then it's still safe because 1270 there is a guard page at the beginning and end of the data segment. 1271 1272 Scaled references are common enough that we want to try and rearrange the 1273 terms so that we can use indexing for these addresses too. Only 1274 do the optimization for floatint point modes. */ 1275 1276 if (GET_CODE (x) == PLUS 1277 && pa_symbolic_expression_p (XEXP (x, 1))) 1278 { 1279 /* Ugly. We modify things here so that the address offset specified 1280 by the index expression is computed first, then added to x to form 1281 the entire address. */ 1282 1283 rtx regx1, regx2, regy1, regy2, y; 1284 1285 /* Strip off any CONST. */ 1286 y = XEXP (x, 1); 1287 if (GET_CODE (y) == CONST) 1288 y = XEXP (y, 0); 1289 1290 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) 1291 { 1292 /* See if this looks like 1293 (plus (mult (reg) (shadd_const)) 1294 (const (plus (symbol_ref) (const_int)))) 1295 1296 Where const_int is small. In that case the const 1297 expression is a valid pointer for indexing. 1298 1299 If const_int is big, but can be divided evenly by shadd_const 1300 and added to (reg). This allows more scaled indexed addresses. */ 1301 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1302 && GET_CODE (XEXP (x, 0)) == MULT 1303 && GET_CODE (XEXP (y, 1)) == CONST_INT 1304 && INTVAL (XEXP (y, 1)) >= -4096 1305 && INTVAL (XEXP (y, 1)) <= 4095 1306 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1307 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) 1308 { 1309 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 1310 rtx reg1, reg2; 1311 1312 reg1 = XEXP (x, 1); 1313 if (GET_CODE (reg1) != REG) 1314 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1315 1316 reg2 = XEXP (XEXP (x, 0), 0); 1317 if (GET_CODE (reg2) != REG) 1318 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1319 1320 return force_reg (Pmode, 1321 gen_rtx_PLUS (Pmode, 1322 gen_rtx_MULT (Pmode, 1323 reg2, 1324 GEN_INT (val)), 1325 reg1)); 1326 } 1327 else if ((mode == DFmode || mode == SFmode) 1328 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1329 && GET_CODE (XEXP (x, 0)) == MULT 1330 && GET_CODE (XEXP (y, 1)) == CONST_INT 1331 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 1332 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1333 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) 1334 { 1335 regx1 1336 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) 1337 / INTVAL (XEXP (XEXP (x, 0), 1)))); 1338 regx2 = XEXP (XEXP (x, 0), 0); 1339 if (GET_CODE (regx2) != REG) 1340 regx2 = force_reg (Pmode, force_operand (regx2, 0)); 1341 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1342 regx2, regx1)); 1343 return 1344 force_reg (Pmode, 1345 gen_rtx_PLUS (Pmode, 1346 gen_rtx_MULT (Pmode, regx2, 1347 XEXP (XEXP (x, 0), 1)), 1348 force_reg (Pmode, XEXP (y, 0)))); 1349 } 1350 else if (GET_CODE (XEXP (y, 1)) == CONST_INT 1351 && INTVAL (XEXP (y, 1)) >= -4096 1352 && INTVAL (XEXP (y, 1)) <= 4095) 1353 { 1354 /* This is safe because of the guard page at the 1355 beginning and end of the data space. Just 1356 return the original address. */ 1357 return orig; 1358 } 1359 else 1360 { 1361 /* Doesn't look like one we can optimize. */ 1362 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); 1363 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); 1364 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); 1365 regx1 = force_reg (Pmode, 1366 gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1367 regx1, regy2)); 1368 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); 1369 } 1370 } 1371 } 1372 1373 return orig; 1374 } 1375 1376 /* Implement the TARGET_REGISTER_MOVE_COST hook. 1377 1378 Compute extra cost of moving data between one register class 1379 and another. 1380 1381 Make moves from SAR so expensive they should never happen. We used to 1382 have 0xffff here, but that generates overflow in rare cases. 1383 1384 Copies involving a FP register and a non-FP register are relatively 1385 expensive because they must go through memory. 1386 1387 Other copies are reasonably cheap. */ 1388 1389 static int 1390 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, 1391 reg_class_t from, reg_class_t to) 1392 { 1393 if (from == SHIFT_REGS) 1394 return 0x100; 1395 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from)) 1396 return 18; 1397 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to)) 1398 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from))) 1399 return 16; 1400 else 1401 return 2; 1402 } 1403 1404 /* For the HPPA, REG and REG+CONST is cost 0 1405 and addresses involving symbolic constants are cost 2. 1406 1407 PIC addresses are very expensive. 1408 1409 It is no coincidence that this has the same structure 1410 as pa_legitimate_address_p. */ 1411 1412 static int 1413 hppa_address_cost (rtx X, enum machine_mode mode ATTRIBUTE_UNUSED, 1414 addr_space_t as ATTRIBUTE_UNUSED, 1415 bool speed ATTRIBUTE_UNUSED) 1416 { 1417 switch (GET_CODE (X)) 1418 { 1419 case REG: 1420 case PLUS: 1421 case LO_SUM: 1422 return 1; 1423 case HIGH: 1424 return 2; 1425 default: 1426 return 4; 1427 } 1428 } 1429 1430 /* Compute a (partial) cost for rtx X. Return true if the complete 1431 cost has been computed, and false if subexpressions should be 1432 scanned. In either case, *TOTAL contains the cost result. */ 1433 1434 static bool 1435 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, 1436 int *total, bool speed ATTRIBUTE_UNUSED) 1437 { 1438 int factor; 1439 1440 switch (code) 1441 { 1442 case CONST_INT: 1443 if (INTVAL (x) == 0) 1444 *total = 0; 1445 else if (INT_14_BITS (x)) 1446 *total = 1; 1447 else 1448 *total = 2; 1449 return true; 1450 1451 case HIGH: 1452 *total = 2; 1453 return true; 1454 1455 case CONST: 1456 case LABEL_REF: 1457 case SYMBOL_REF: 1458 *total = 4; 1459 return true; 1460 1461 case CONST_DOUBLE: 1462 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode)) 1463 && outer_code != SET) 1464 *total = 0; 1465 else 1466 *total = 8; 1467 return true; 1468 1469 case MULT: 1470 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1471 { 1472 *total = COSTS_N_INSNS (3); 1473 return true; 1474 } 1475 1476 /* A mode size N times larger than SImode needs O(N*N) more insns. */ 1477 factor = GET_MODE_SIZE (GET_MODE (x)) / 4; 1478 if (factor == 0) 1479 factor = 1; 1480 1481 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) 1482 *total = factor * factor * COSTS_N_INSNS (8); 1483 else 1484 *total = factor * factor * COSTS_N_INSNS (20); 1485 return true; 1486 1487 case DIV: 1488 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1489 { 1490 *total = COSTS_N_INSNS (14); 1491 return true; 1492 } 1493 /* FALLTHRU */ 1494 1495 case UDIV: 1496 case MOD: 1497 case UMOD: 1498 /* A mode size N times larger than SImode needs O(N*N) more insns. */ 1499 factor = GET_MODE_SIZE (GET_MODE (x)) / 4; 1500 if (factor == 0) 1501 factor = 1; 1502 1503 *total = factor * factor * COSTS_N_INSNS (60); 1504 return true; 1505 1506 case PLUS: /* this includes shNadd insns */ 1507 case MINUS: 1508 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1509 { 1510 *total = COSTS_N_INSNS (3); 1511 return true; 1512 } 1513 1514 /* A size N times larger than UNITS_PER_WORD needs N times as 1515 many insns, taking N times as long. */ 1516 factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD; 1517 if (factor == 0) 1518 factor = 1; 1519 *total = factor * COSTS_N_INSNS (1); 1520 return true; 1521 1522 case ASHIFT: 1523 case ASHIFTRT: 1524 case LSHIFTRT: 1525 *total = COSTS_N_INSNS (1); 1526 return true; 1527 1528 default: 1529 return false; 1530 } 1531 } 1532 1533 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a 1534 new rtx with the correct mode. */ 1535 static inline rtx 1536 force_mode (enum machine_mode mode, rtx orig) 1537 { 1538 if (mode == GET_MODE (orig)) 1539 return orig; 1540 1541 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER); 1542 1543 return gen_rtx_REG (mode, REGNO (orig)); 1544 } 1545 1546 /* Return 1 if *X is a thread-local symbol. */ 1547 1548 static int 1549 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED) 1550 { 1551 return PA_SYMBOL_REF_TLS_P (*x); 1552 } 1553 1554 /* Return 1 if X contains a thread-local symbol. */ 1555 1556 bool 1557 pa_tls_referenced_p (rtx x) 1558 { 1559 if (!TARGET_HAVE_TLS) 1560 return false; 1561 1562 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0); 1563 } 1564 1565 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 1566 1567 static bool 1568 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1569 { 1570 return pa_tls_referenced_p (x); 1571 } 1572 1573 /* Emit insns to move operands[1] into operands[0]. 1574 1575 Return 1 if we have written out everything that needs to be done to 1576 do the move. Otherwise, return 0 and the caller will emit the move 1577 normally. 1578 1579 Note SCRATCH_REG may not be in the proper mode depending on how it 1580 will be used. This routine is responsible for creating a new copy 1581 of SCRATCH_REG in the proper mode. */ 1582 1583 int 1584 pa_emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg) 1585 { 1586 register rtx operand0 = operands[0]; 1587 register rtx operand1 = operands[1]; 1588 register rtx tem; 1589 1590 /* We can only handle indexed addresses in the destination operand 1591 of floating point stores. Thus, we need to break out indexed 1592 addresses from the destination operand. */ 1593 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) 1594 { 1595 gcc_assert (can_create_pseudo_p ()); 1596 1597 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); 1598 operand0 = replace_equiv_address (operand0, tem); 1599 } 1600 1601 /* On targets with non-equivalent space registers, break out unscaled 1602 indexed addresses from the source operand before the final CSE. 1603 We have to do this because the REG_POINTER flag is not correctly 1604 carried through various optimization passes and CSE may substitute 1605 a pseudo without the pointer set for one with the pointer set. As 1606 a result, we loose various opportunities to create insns with 1607 unscaled indexed addresses. */ 1608 if (!TARGET_NO_SPACE_REGS 1609 && !cse_not_expected 1610 && GET_CODE (operand1) == MEM 1611 && GET_CODE (XEXP (operand1, 0)) == PLUS 1612 && REG_P (XEXP (XEXP (operand1, 0), 0)) 1613 && REG_P (XEXP (XEXP (operand1, 0), 1))) 1614 operand1 1615 = replace_equiv_address (operand1, 1616 copy_to_mode_reg (Pmode, XEXP (operand1, 0))); 1617 1618 if (scratch_reg 1619 && reload_in_progress && GET_CODE (operand0) == REG 1620 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 1621 operand0 = reg_equiv_mem (REGNO (operand0)); 1622 else if (scratch_reg 1623 && reload_in_progress && GET_CODE (operand0) == SUBREG 1624 && GET_CODE (SUBREG_REG (operand0)) == REG 1625 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) 1626 { 1627 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1628 the code which tracks sets/uses for delete_output_reload. */ 1629 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0), 1630 reg_equiv_mem (REGNO (SUBREG_REG (operand0))), 1631 SUBREG_BYTE (operand0)); 1632 operand0 = alter_subreg (&temp, true); 1633 } 1634 1635 if (scratch_reg 1636 && reload_in_progress && GET_CODE (operand1) == REG 1637 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) 1638 operand1 = reg_equiv_mem (REGNO (operand1)); 1639 else if (scratch_reg 1640 && reload_in_progress && GET_CODE (operand1) == SUBREG 1641 && GET_CODE (SUBREG_REG (operand1)) == REG 1642 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) 1643 { 1644 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1645 the code which tracks sets/uses for delete_output_reload. */ 1646 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1), 1647 reg_equiv_mem (REGNO (SUBREG_REG (operand1))), 1648 SUBREG_BYTE (operand1)); 1649 operand1 = alter_subreg (&temp, true); 1650 } 1651 1652 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM 1653 && ((tem = find_replacement (&XEXP (operand0, 0))) 1654 != XEXP (operand0, 0))) 1655 operand0 = replace_equiv_address (operand0, tem); 1656 1657 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM 1658 && ((tem = find_replacement (&XEXP (operand1, 0))) 1659 != XEXP (operand1, 0))) 1660 operand1 = replace_equiv_address (operand1, tem); 1661 1662 /* Handle secondary reloads for loads/stores of FP registers from 1663 REG+D addresses where D does not fit in 5 or 14 bits, including 1664 (subreg (mem (addr))) cases. */ 1665 if (scratch_reg 1666 && fp_reg_operand (operand0, mode) 1667 && (MEM_P (operand1) 1668 || (GET_CODE (operand1) == SUBREG 1669 && MEM_P (XEXP (operand1, 0)))) 1670 && !floating_point_store_memory_operand (operand1, mode)) 1671 { 1672 if (GET_CODE (operand1) == SUBREG) 1673 operand1 = XEXP (operand1, 0); 1674 1675 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1676 it in WORD_MODE regardless of what mode it was originally given 1677 to us. */ 1678 scratch_reg = force_mode (word_mode, scratch_reg); 1679 1680 /* D might not fit in 14 bits either; for such cases load D into 1681 scratch reg. */ 1682 if (reg_plus_base_memory_operand (operand1, mode) 1683 && !(TARGET_PA_20 1684 && !TARGET_ELF32 1685 && INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))) 1686 { 1687 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); 1688 emit_move_insn (scratch_reg, 1689 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), 1690 Pmode, 1691 XEXP (XEXP (operand1, 0), 0), 1692 scratch_reg)); 1693 } 1694 else 1695 emit_move_insn (scratch_reg, XEXP (operand1, 0)); 1696 emit_insn (gen_rtx_SET (VOIDmode, operand0, 1697 replace_equiv_address (operand1, scratch_reg))); 1698 return 1; 1699 } 1700 else if (scratch_reg 1701 && fp_reg_operand (operand1, mode) 1702 && (MEM_P (operand0) 1703 || (GET_CODE (operand0) == SUBREG 1704 && MEM_P (XEXP (operand0, 0)))) 1705 && !floating_point_store_memory_operand (operand0, mode)) 1706 { 1707 if (GET_CODE (operand0) == SUBREG) 1708 operand0 = XEXP (operand0, 0); 1709 1710 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1711 it in WORD_MODE regardless of what mode it was originally given 1712 to us. */ 1713 scratch_reg = force_mode (word_mode, scratch_reg); 1714 1715 /* D might not fit in 14 bits either; for such cases load D into 1716 scratch reg. */ 1717 if (reg_plus_base_memory_operand (operand0, mode) 1718 && !(TARGET_PA_20 1719 && !TARGET_ELF32 1720 && INT_14_BITS (XEXP (XEXP (operand0, 0), 1)))) 1721 { 1722 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1)); 1723 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0, 1724 0)), 1725 Pmode, 1726 XEXP (XEXP (operand0, 0), 1727 0), 1728 scratch_reg)); 1729 } 1730 else 1731 emit_move_insn (scratch_reg, XEXP (operand0, 0)); 1732 emit_insn (gen_rtx_SET (VOIDmode, 1733 replace_equiv_address (operand0, scratch_reg), 1734 operand1)); 1735 return 1; 1736 } 1737 /* Handle secondary reloads for loads of FP registers from constant 1738 expressions by forcing the constant into memory. For the most part, 1739 this is only necessary for SImode and DImode. 1740 1741 Use scratch_reg to hold the address of the memory location. */ 1742 else if (scratch_reg 1743 && CONSTANT_P (operand1) 1744 && fp_reg_operand (operand0, mode)) 1745 { 1746 rtx const_mem, xoperands[2]; 1747 1748 if (operand1 == CONST0_RTX (mode)) 1749 { 1750 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1751 return 1; 1752 } 1753 1754 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1755 it in WORD_MODE regardless of what mode it was originally given 1756 to us. */ 1757 scratch_reg = force_mode (word_mode, scratch_reg); 1758 1759 /* Force the constant into memory and put the address of the 1760 memory location into scratch_reg. */ 1761 const_mem = force_const_mem (mode, operand1); 1762 xoperands[0] = scratch_reg; 1763 xoperands[1] = XEXP (const_mem, 0); 1764 pa_emit_move_sequence (xoperands, Pmode, 0); 1765 1766 /* Now load the destination register. */ 1767 emit_insn (gen_rtx_SET (mode, operand0, 1768 replace_equiv_address (const_mem, scratch_reg))); 1769 return 1; 1770 } 1771 /* Handle secondary reloads for SAR. These occur when trying to load 1772 the SAR from memory or a constant. */ 1773 else if (scratch_reg 1774 && GET_CODE (operand0) == REG 1775 && REGNO (operand0) < FIRST_PSEUDO_REGISTER 1776 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS 1777 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT)) 1778 { 1779 /* D might not fit in 14 bits either; for such cases load D into 1780 scratch reg. */ 1781 if (GET_CODE (operand1) == MEM 1782 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0))) 1783 { 1784 /* We are reloading the address into the scratch register, so we 1785 want to make sure the scratch register is a full register. */ 1786 scratch_reg = force_mode (word_mode, scratch_reg); 1787 1788 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); 1789 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 1790 0)), 1791 Pmode, 1792 XEXP (XEXP (operand1, 0), 1793 0), 1794 scratch_reg)); 1795 1796 /* Now we are going to load the scratch register from memory, 1797 we want to load it in the same width as the original MEM, 1798 which must be the same as the width of the ultimate destination, 1799 OPERAND0. */ 1800 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1801 1802 emit_move_insn (scratch_reg, 1803 replace_equiv_address (operand1, scratch_reg)); 1804 } 1805 else 1806 { 1807 /* We want to load the scratch register using the same mode as 1808 the ultimate destination. */ 1809 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1810 1811 emit_move_insn (scratch_reg, operand1); 1812 } 1813 1814 /* And emit the insn to set the ultimate destination. We know that 1815 the scratch register has the same mode as the destination at this 1816 point. */ 1817 emit_move_insn (operand0, scratch_reg); 1818 return 1; 1819 } 1820 /* Handle the most common case: storing into a register. */ 1821 else if (register_operand (operand0, mode)) 1822 { 1823 /* Legitimize TLS symbol references. This happens for references 1824 that aren't a legitimate constant. */ 1825 if (PA_SYMBOL_REF_TLS_P (operand1)) 1826 operand1 = legitimize_tls_address (operand1); 1827 1828 if (register_operand (operand1, mode) 1829 || (GET_CODE (operand1) == CONST_INT 1830 && pa_cint_ok_for_move (INTVAL (operand1))) 1831 || (operand1 == CONST0_RTX (mode)) 1832 || (GET_CODE (operand1) == HIGH 1833 && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) 1834 /* Only `general_operands' can come here, so MEM is ok. */ 1835 || GET_CODE (operand1) == MEM) 1836 { 1837 /* Various sets are created during RTL generation which don't 1838 have the REG_POINTER flag correctly set. After the CSE pass, 1839 instruction recognition can fail if we don't consistently 1840 set this flag when performing register copies. This should 1841 also improve the opportunities for creating insns that use 1842 unscaled indexing. */ 1843 if (REG_P (operand0) && REG_P (operand1)) 1844 { 1845 if (REG_POINTER (operand1) 1846 && !REG_POINTER (operand0) 1847 && !HARD_REGISTER_P (operand0)) 1848 copy_reg_pointer (operand0, operand1); 1849 } 1850 1851 /* When MEMs are broken out, the REG_POINTER flag doesn't 1852 get set. In some cases, we can set the REG_POINTER flag 1853 from the declaration for the MEM. */ 1854 if (REG_P (operand0) 1855 && GET_CODE (operand1) == MEM 1856 && !REG_POINTER (operand0)) 1857 { 1858 tree decl = MEM_EXPR (operand1); 1859 1860 /* Set the register pointer flag and register alignment 1861 if the declaration for this memory reference is a 1862 pointer type. */ 1863 if (decl) 1864 { 1865 tree type; 1866 1867 /* If this is a COMPONENT_REF, use the FIELD_DECL from 1868 tree operand 1. */ 1869 if (TREE_CODE (decl) == COMPONENT_REF) 1870 decl = TREE_OPERAND (decl, 1); 1871 1872 type = TREE_TYPE (decl); 1873 type = strip_array_types (type); 1874 1875 if (POINTER_TYPE_P (type)) 1876 { 1877 int align; 1878 1879 type = TREE_TYPE (type); 1880 /* Using TYPE_ALIGN_OK is rather conservative as 1881 only the ada frontend actually sets it. */ 1882 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type) 1883 : BITS_PER_UNIT); 1884 mark_reg_pointer (operand0, align); 1885 } 1886 } 1887 } 1888 1889 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1890 return 1; 1891 } 1892 } 1893 else if (GET_CODE (operand0) == MEM) 1894 { 1895 if (mode == DFmode && operand1 == CONST0_RTX (mode) 1896 && !(reload_in_progress || reload_completed)) 1897 { 1898 rtx temp = gen_reg_rtx (DFmode); 1899 1900 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); 1901 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); 1902 return 1; 1903 } 1904 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) 1905 { 1906 /* Run this case quickly. */ 1907 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1908 return 1; 1909 } 1910 if (! (reload_in_progress || reload_completed)) 1911 { 1912 operands[0] = validize_mem (operand0); 1913 operands[1] = operand1 = force_reg (mode, operand1); 1914 } 1915 } 1916 1917 /* Simplify the source if we need to. 1918 Note we do have to handle function labels here, even though we do 1919 not consider them legitimate constants. Loop optimizations can 1920 call the emit_move_xxx with one as a source. */ 1921 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) 1922 || (GET_CODE (operand1) == HIGH 1923 && symbolic_operand (XEXP (operand1, 0), mode)) 1924 || function_label_operand (operand1, VOIDmode) 1925 || pa_tls_referenced_p (operand1)) 1926 { 1927 int ishighonly = 0; 1928 1929 if (GET_CODE (operand1) == HIGH) 1930 { 1931 ishighonly = 1; 1932 operand1 = XEXP (operand1, 0); 1933 } 1934 if (symbolic_operand (operand1, mode)) 1935 { 1936 /* Argh. The assembler and linker can't handle arithmetic 1937 involving plabels. 1938 1939 So we force the plabel into memory, load operand0 from 1940 the memory location, then add in the constant part. */ 1941 if ((GET_CODE (operand1) == CONST 1942 && GET_CODE (XEXP (operand1, 0)) == PLUS 1943 && function_label_operand (XEXP (XEXP (operand1, 0), 0), 1944 VOIDmode)) 1945 || function_label_operand (operand1, VOIDmode)) 1946 { 1947 rtx temp, const_part; 1948 1949 /* Figure out what (if any) scratch register to use. */ 1950 if (reload_in_progress || reload_completed) 1951 { 1952 scratch_reg = scratch_reg ? scratch_reg : operand0; 1953 /* SCRATCH_REG will hold an address and maybe the actual 1954 data. We want it in WORD_MODE regardless of what mode it 1955 was originally given to us. */ 1956 scratch_reg = force_mode (word_mode, scratch_reg); 1957 } 1958 else if (flag_pic) 1959 scratch_reg = gen_reg_rtx (Pmode); 1960 1961 if (GET_CODE (operand1) == CONST) 1962 { 1963 /* Save away the constant part of the expression. */ 1964 const_part = XEXP (XEXP (operand1, 0), 1); 1965 gcc_assert (GET_CODE (const_part) == CONST_INT); 1966 1967 /* Force the function label into memory. */ 1968 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); 1969 } 1970 else 1971 { 1972 /* No constant part. */ 1973 const_part = NULL_RTX; 1974 1975 /* Force the function label into memory. */ 1976 temp = force_const_mem (mode, operand1); 1977 } 1978 1979 1980 /* Get the address of the memory location. PIC-ify it if 1981 necessary. */ 1982 temp = XEXP (temp, 0); 1983 if (flag_pic) 1984 temp = legitimize_pic_address (temp, mode, scratch_reg); 1985 1986 /* Put the address of the memory location into our destination 1987 register. */ 1988 operands[1] = temp; 1989 pa_emit_move_sequence (operands, mode, scratch_reg); 1990 1991 /* Now load from the memory location into our destination 1992 register. */ 1993 operands[1] = gen_rtx_MEM (Pmode, operands[0]); 1994 pa_emit_move_sequence (operands, mode, scratch_reg); 1995 1996 /* And add back in the constant part. */ 1997 if (const_part != NULL_RTX) 1998 expand_inc (operand0, const_part); 1999 2000 return 1; 2001 } 2002 2003 if (flag_pic) 2004 { 2005 rtx temp; 2006 2007 if (reload_in_progress || reload_completed) 2008 { 2009 temp = scratch_reg ? scratch_reg : operand0; 2010 /* TEMP will hold an address and maybe the actual 2011 data. We want it in WORD_MODE regardless of what mode it 2012 was originally given to us. */ 2013 temp = force_mode (word_mode, temp); 2014 } 2015 else 2016 temp = gen_reg_rtx (Pmode); 2017 2018 /* (const (plus (symbol) (const_int))) must be forced to 2019 memory during/after reload if the const_int will not fit 2020 in 14 bits. */ 2021 if (GET_CODE (operand1) == CONST 2022 && GET_CODE (XEXP (operand1, 0)) == PLUS 2023 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT 2024 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)) 2025 && (reload_completed || reload_in_progress) 2026 && flag_pic) 2027 { 2028 rtx const_mem = force_const_mem (mode, operand1); 2029 operands[1] = legitimize_pic_address (XEXP (const_mem, 0), 2030 mode, temp); 2031 operands[1] = replace_equiv_address (const_mem, operands[1]); 2032 pa_emit_move_sequence (operands, mode, temp); 2033 } 2034 else 2035 { 2036 operands[1] = legitimize_pic_address (operand1, mode, temp); 2037 if (REG_P (operand0) && REG_P (operands[1])) 2038 copy_reg_pointer (operand0, operands[1]); 2039 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1])); 2040 } 2041 } 2042 /* On the HPPA, references to data space are supposed to use dp, 2043 register 27, but showing it in the RTL inhibits various cse 2044 and loop optimizations. */ 2045 else 2046 { 2047 rtx temp, set; 2048 2049 if (reload_in_progress || reload_completed) 2050 { 2051 temp = scratch_reg ? scratch_reg : operand0; 2052 /* TEMP will hold an address and maybe the actual 2053 data. We want it in WORD_MODE regardless of what mode it 2054 was originally given to us. */ 2055 temp = force_mode (word_mode, temp); 2056 } 2057 else 2058 temp = gen_reg_rtx (mode); 2059 2060 /* Loading a SYMBOL_REF into a register makes that register 2061 safe to be used as the base in an indexed address. 2062 2063 Don't mark hard registers though. That loses. */ 2064 if (GET_CODE (operand0) == REG 2065 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 2066 mark_reg_pointer (operand0, BITS_PER_UNIT); 2067 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) 2068 mark_reg_pointer (temp, BITS_PER_UNIT); 2069 2070 if (ishighonly) 2071 set = gen_rtx_SET (mode, operand0, temp); 2072 else 2073 set = gen_rtx_SET (VOIDmode, 2074 operand0, 2075 gen_rtx_LO_SUM (mode, temp, operand1)); 2076 2077 emit_insn (gen_rtx_SET (VOIDmode, 2078 temp, 2079 gen_rtx_HIGH (mode, operand1))); 2080 emit_insn (set); 2081 2082 } 2083 return 1; 2084 } 2085 else if (pa_tls_referenced_p (operand1)) 2086 { 2087 rtx tmp = operand1; 2088 rtx addend = NULL; 2089 2090 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) 2091 { 2092 addend = XEXP (XEXP (tmp, 0), 1); 2093 tmp = XEXP (XEXP (tmp, 0), 0); 2094 } 2095 2096 gcc_assert (GET_CODE (tmp) == SYMBOL_REF); 2097 tmp = legitimize_tls_address (tmp); 2098 if (addend) 2099 { 2100 tmp = gen_rtx_PLUS (mode, tmp, addend); 2101 tmp = force_operand (tmp, operands[0]); 2102 } 2103 operands[1] = tmp; 2104 } 2105 else if (GET_CODE (operand1) != CONST_INT 2106 || !pa_cint_ok_for_move (INTVAL (operand1))) 2107 { 2108 rtx insn, temp; 2109 rtx op1 = operand1; 2110 HOST_WIDE_INT value = 0; 2111 HOST_WIDE_INT insv = 0; 2112 int insert = 0; 2113 2114 if (GET_CODE (operand1) == CONST_INT) 2115 value = INTVAL (operand1); 2116 2117 if (TARGET_64BIT 2118 && GET_CODE (operand1) == CONST_INT 2119 && HOST_BITS_PER_WIDE_INT > 32 2120 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32) 2121 { 2122 HOST_WIDE_INT nval; 2123 2124 /* Extract the low order 32 bits of the value and sign extend. 2125 If the new value is the same as the original value, we can 2126 can use the original value as-is. If the new value is 2127 different, we use it and insert the most-significant 32-bits 2128 of the original value into the final result. */ 2129 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1)) 2130 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31); 2131 if (value != nval) 2132 { 2133 #if HOST_BITS_PER_WIDE_INT > 32 2134 insv = value >= 0 ? value >> 32 : ~(~value >> 32); 2135 #endif 2136 insert = 1; 2137 value = nval; 2138 operand1 = GEN_INT (nval); 2139 } 2140 } 2141 2142 if (reload_in_progress || reload_completed) 2143 temp = scratch_reg ? scratch_reg : operand0; 2144 else 2145 temp = gen_reg_rtx (mode); 2146 2147 /* We don't directly split DImode constants on 32-bit targets 2148 because PLUS uses an 11-bit immediate and the insn sequence 2149 generated is not as efficient as the one using HIGH/LO_SUM. */ 2150 if (GET_CODE (operand1) == CONST_INT 2151 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD 2152 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT 2153 && !insert) 2154 { 2155 /* Directly break constant into high and low parts. This 2156 provides better optimization opportunities because various 2157 passes recognize constants split with PLUS but not LO_SUM. 2158 We use a 14-bit signed low part except when the addition 2159 of 0x4000 to the high part might change the sign of the 2160 high part. */ 2161 HOST_WIDE_INT low = value & 0x3fff; 2162 HOST_WIDE_INT high = value & ~ 0x3fff; 2163 2164 if (low >= 0x2000) 2165 { 2166 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000)) 2167 high += 0x2000; 2168 else 2169 high += 0x4000; 2170 } 2171 2172 low = value - high; 2173 2174 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high))); 2175 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 2176 } 2177 else 2178 { 2179 emit_insn (gen_rtx_SET (VOIDmode, temp, 2180 gen_rtx_HIGH (mode, operand1))); 2181 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); 2182 } 2183 2184 insn = emit_move_insn (operands[0], operands[1]); 2185 2186 /* Now insert the most significant 32 bits of the value 2187 into the register. When we don't have a second register 2188 available, it could take up to nine instructions to load 2189 a 64-bit integer constant. Prior to reload, we force 2190 constants that would take more than three instructions 2191 to load to the constant pool. During and after reload, 2192 we have to handle all possible values. */ 2193 if (insert) 2194 { 2195 /* Use a HIGH/LO_SUM/INSV sequence if we have a second 2196 register and the value to be inserted is outside the 2197 range that can be loaded with three depdi instructions. */ 2198 if (temp != operand0 && (insv >= 16384 || insv < -16384)) 2199 { 2200 operand1 = GEN_INT (insv); 2201 2202 emit_insn (gen_rtx_SET (VOIDmode, temp, 2203 gen_rtx_HIGH (mode, operand1))); 2204 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1)); 2205 if (mode == DImode) 2206 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32), 2207 const0_rtx, temp)); 2208 else 2209 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32), 2210 const0_rtx, temp)); 2211 } 2212 else 2213 { 2214 int len = 5, pos = 27; 2215 2216 /* Insert the bits using the depdi instruction. */ 2217 while (pos >= 0) 2218 { 2219 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16; 2220 HOST_WIDE_INT sign = v5 < 0; 2221 2222 /* Left extend the insertion. */ 2223 insv = (insv >= 0 ? insv >> len : ~(~insv >> len)); 2224 while (pos > 0 && (insv & 1) == sign) 2225 { 2226 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1)); 2227 len += 1; 2228 pos -= 1; 2229 } 2230 2231 if (mode == DImode) 2232 insn = emit_insn (gen_insvdi (operand0, 2233 GEN_INT (len), 2234 GEN_INT (pos), 2235 GEN_INT (v5))); 2236 else 2237 insn = emit_insn (gen_insvsi (operand0, 2238 GEN_INT (len), 2239 GEN_INT (pos), 2240 GEN_INT (v5))); 2241 2242 len = pos > 0 && pos < 5 ? pos : 5; 2243 pos -= len; 2244 } 2245 } 2246 } 2247 2248 set_unique_reg_note (insn, REG_EQUAL, op1); 2249 2250 return 1; 2251 } 2252 } 2253 /* Now have insn-emit do whatever it normally does. */ 2254 return 0; 2255 } 2256 2257 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning 2258 it will need a link/runtime reloc). */ 2259 2260 int 2261 pa_reloc_needed (tree exp) 2262 { 2263 int reloc = 0; 2264 2265 switch (TREE_CODE (exp)) 2266 { 2267 case ADDR_EXPR: 2268 return 1; 2269 2270 case POINTER_PLUS_EXPR: 2271 case PLUS_EXPR: 2272 case MINUS_EXPR: 2273 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2274 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1)); 2275 break; 2276 2277 CASE_CONVERT: 2278 case NON_LVALUE_EXPR: 2279 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2280 break; 2281 2282 case CONSTRUCTOR: 2283 { 2284 tree value; 2285 unsigned HOST_WIDE_INT ix; 2286 2287 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value) 2288 if (value) 2289 reloc |= pa_reloc_needed (value); 2290 } 2291 break; 2292 2293 case ERROR_MARK: 2294 break; 2295 2296 default: 2297 break; 2298 } 2299 return reloc; 2300 } 2301 2302 2303 /* Return the best assembler insn template 2304 for moving operands[1] into operands[0] as a fullword. */ 2305 const char * 2306 pa_singlemove_string (rtx *operands) 2307 { 2308 HOST_WIDE_INT intval; 2309 2310 if (GET_CODE (operands[0]) == MEM) 2311 return "stw %r1,%0"; 2312 if (GET_CODE (operands[1]) == MEM) 2313 return "ldw %1,%0"; 2314 if (GET_CODE (operands[1]) == CONST_DOUBLE) 2315 { 2316 long i; 2317 REAL_VALUE_TYPE d; 2318 2319 gcc_assert (GET_MODE (operands[1]) == SFmode); 2320 2321 /* Translate the CONST_DOUBLE to a CONST_INT with the same target 2322 bit pattern. */ 2323 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); 2324 REAL_VALUE_TO_TARGET_SINGLE (d, i); 2325 2326 operands[1] = GEN_INT (i); 2327 /* Fall through to CONST_INT case. */ 2328 } 2329 if (GET_CODE (operands[1]) == CONST_INT) 2330 { 2331 intval = INTVAL (operands[1]); 2332 2333 if (VAL_14_BITS_P (intval)) 2334 return "ldi %1,%0"; 2335 else if ((intval & 0x7ff) == 0) 2336 return "ldil L'%1,%0"; 2337 else if (pa_zdepi_cint_p (intval)) 2338 return "{zdepi %Z1,%0|depwi,z %Z1,%0}"; 2339 else 2340 return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; 2341 } 2342 return "copy %1,%0"; 2343 } 2344 2345 2346 /* Compute position (in OP[1]) and width (in OP[2]) 2347 useful for copying IMM to a register using the zdepi 2348 instructions. Store the immediate value to insert in OP[0]. */ 2349 static void 2350 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2351 { 2352 int lsb, len; 2353 2354 /* Find the least significant set bit in IMM. */ 2355 for (lsb = 0; lsb < 32; lsb++) 2356 { 2357 if ((imm & 1) != 0) 2358 break; 2359 imm >>= 1; 2360 } 2361 2362 /* Choose variants based on *sign* of the 5-bit field. */ 2363 if ((imm & 0x10) == 0) 2364 len = (lsb <= 28) ? 4 : 32 - lsb; 2365 else 2366 { 2367 /* Find the width of the bitstring in IMM. */ 2368 for (len = 5; len < 32 - lsb; len++) 2369 { 2370 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2371 break; 2372 } 2373 2374 /* Sign extend IMM as a 5-bit value. */ 2375 imm = (imm & 0xf) - 0x10; 2376 } 2377 2378 op[0] = imm; 2379 op[1] = 31 - lsb; 2380 op[2] = len; 2381 } 2382 2383 /* Compute position (in OP[1]) and width (in OP[2]) 2384 useful for copying IMM to a register using the depdi,z 2385 instructions. Store the immediate value to insert in OP[0]. */ 2386 2387 static void 2388 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2389 { 2390 int lsb, len, maxlen; 2391 2392 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64); 2393 2394 /* Find the least significant set bit in IMM. */ 2395 for (lsb = 0; lsb < maxlen; lsb++) 2396 { 2397 if ((imm & 1) != 0) 2398 break; 2399 imm >>= 1; 2400 } 2401 2402 /* Choose variants based on *sign* of the 5-bit field. */ 2403 if ((imm & 0x10) == 0) 2404 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb; 2405 else 2406 { 2407 /* Find the width of the bitstring in IMM. */ 2408 for (len = 5; len < maxlen - lsb; len++) 2409 { 2410 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2411 break; 2412 } 2413 2414 /* Extend length if host is narrow and IMM is negative. */ 2415 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb) 2416 len += 32; 2417 2418 /* Sign extend IMM as a 5-bit value. */ 2419 imm = (imm & 0xf) - 0x10; 2420 } 2421 2422 op[0] = imm; 2423 op[1] = 63 - lsb; 2424 op[2] = len; 2425 } 2426 2427 /* Output assembler code to perform a doubleword move insn 2428 with operands OPERANDS. */ 2429 2430 const char * 2431 pa_output_move_double (rtx *operands) 2432 { 2433 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; 2434 rtx latehalf[2]; 2435 rtx addreg0 = 0, addreg1 = 0; 2436 2437 /* First classify both operands. */ 2438 2439 if (REG_P (operands[0])) 2440 optype0 = REGOP; 2441 else if (offsettable_memref_p (operands[0])) 2442 optype0 = OFFSOP; 2443 else if (GET_CODE (operands[0]) == MEM) 2444 optype0 = MEMOP; 2445 else 2446 optype0 = RNDOP; 2447 2448 if (REG_P (operands[1])) 2449 optype1 = REGOP; 2450 else if (CONSTANT_P (operands[1])) 2451 optype1 = CNSTOP; 2452 else if (offsettable_memref_p (operands[1])) 2453 optype1 = OFFSOP; 2454 else if (GET_CODE (operands[1]) == MEM) 2455 optype1 = MEMOP; 2456 else 2457 optype1 = RNDOP; 2458 2459 /* Check for the cases that the operand constraints are not 2460 supposed to allow to happen. */ 2461 gcc_assert (optype0 == REGOP || optype1 == REGOP); 2462 2463 /* Handle copies between general and floating registers. */ 2464 2465 if (optype0 == REGOP && optype1 == REGOP 2466 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])) 2467 { 2468 if (FP_REG_P (operands[0])) 2469 { 2470 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands); 2471 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands); 2472 return "{fldds|fldd} -16(%%sp),%0"; 2473 } 2474 else 2475 { 2476 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands); 2477 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands); 2478 return "{ldws|ldw} -12(%%sp),%R0"; 2479 } 2480 } 2481 2482 /* Handle auto decrementing and incrementing loads and stores 2483 specifically, since the structure of the function doesn't work 2484 for them without major modification. Do it better when we learn 2485 this port about the general inc/dec addressing of PA. 2486 (This was written by tege. Chide him if it doesn't work.) */ 2487 2488 if (optype0 == MEMOP) 2489 { 2490 /* We have to output the address syntax ourselves, since print_operand 2491 doesn't deal with the addresses we want to use. Fix this later. */ 2492 2493 rtx addr = XEXP (operands[0], 0); 2494 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2495 { 2496 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2497 2498 operands[0] = XEXP (addr, 0); 2499 gcc_assert (GET_CODE (operands[1]) == REG 2500 && GET_CODE (operands[0]) == REG); 2501 2502 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2503 2504 /* No overlap between high target register and address 2505 register. (We do this in a non-obvious way to 2506 save a register file writeback) */ 2507 if (GET_CODE (addr) == POST_INC) 2508 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)"; 2509 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)"; 2510 } 2511 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2512 { 2513 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2514 2515 operands[0] = XEXP (addr, 0); 2516 gcc_assert (GET_CODE (operands[1]) == REG 2517 && GET_CODE (operands[0]) == REG); 2518 2519 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2520 /* No overlap between high target register and address 2521 register. (We do this in a non-obvious way to save a 2522 register file writeback) */ 2523 if (GET_CODE (addr) == PRE_INC) 2524 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)"; 2525 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)"; 2526 } 2527 } 2528 if (optype1 == MEMOP) 2529 { 2530 /* We have to output the address syntax ourselves, since print_operand 2531 doesn't deal with the addresses we want to use. Fix this later. */ 2532 2533 rtx addr = XEXP (operands[1], 0); 2534 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2535 { 2536 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2537 2538 operands[1] = XEXP (addr, 0); 2539 gcc_assert (GET_CODE (operands[0]) == REG 2540 && GET_CODE (operands[1]) == REG); 2541 2542 if (!reg_overlap_mentioned_p (high_reg, addr)) 2543 { 2544 /* No overlap between high target register and address 2545 register. (We do this in a non-obvious way to 2546 save a register file writeback) */ 2547 if (GET_CODE (addr) == POST_INC) 2548 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0"; 2549 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0"; 2550 } 2551 else 2552 { 2553 /* This is an undefined situation. We should load into the 2554 address register *and* update that register. Probably 2555 we don't need to handle this at all. */ 2556 if (GET_CODE (addr) == POST_INC) 2557 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0"; 2558 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0"; 2559 } 2560 } 2561 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2562 { 2563 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2564 2565 operands[1] = XEXP (addr, 0); 2566 gcc_assert (GET_CODE (operands[0]) == REG 2567 && GET_CODE (operands[1]) == REG); 2568 2569 if (!reg_overlap_mentioned_p (high_reg, addr)) 2570 { 2571 /* No overlap between high target register and address 2572 register. (We do this in a non-obvious way to 2573 save a register file writeback) */ 2574 if (GET_CODE (addr) == PRE_INC) 2575 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0"; 2576 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0"; 2577 } 2578 else 2579 { 2580 /* This is an undefined situation. We should load into the 2581 address register *and* update that register. Probably 2582 we don't need to handle this at all. */ 2583 if (GET_CODE (addr) == PRE_INC) 2584 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0"; 2585 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0"; 2586 } 2587 } 2588 else if (GET_CODE (addr) == PLUS 2589 && GET_CODE (XEXP (addr, 0)) == MULT) 2590 { 2591 rtx xoperands[4]; 2592 2593 /* Load address into left half of destination register. */ 2594 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2595 xoperands[1] = XEXP (addr, 1); 2596 xoperands[2] = XEXP (XEXP (addr, 0), 0); 2597 xoperands[3] = XEXP (XEXP (addr, 0), 1); 2598 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}", 2599 xoperands); 2600 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2601 } 2602 else if (GET_CODE (addr) == PLUS 2603 && REG_P (XEXP (addr, 0)) 2604 && REG_P (XEXP (addr, 1))) 2605 { 2606 rtx xoperands[3]; 2607 2608 /* Load address into left half of destination register. */ 2609 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2610 xoperands[1] = XEXP (addr, 0); 2611 xoperands[2] = XEXP (addr, 1); 2612 output_asm_insn ("{addl|add,l} %1,%2,%0", 2613 xoperands); 2614 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2615 } 2616 } 2617 2618 /* If an operand is an unoffsettable memory ref, find a register 2619 we can increment temporarily to make it refer to the second word. */ 2620 2621 if (optype0 == MEMOP) 2622 addreg0 = find_addr_reg (XEXP (operands[0], 0)); 2623 2624 if (optype1 == MEMOP) 2625 addreg1 = find_addr_reg (XEXP (operands[1], 0)); 2626 2627 /* Ok, we can do one word at a time. 2628 Normally we do the low-numbered word first. 2629 2630 In either case, set up in LATEHALF the operands to use 2631 for the high-numbered word and in some cases alter the 2632 operands in OPERANDS to be suitable for the low-numbered word. */ 2633 2634 if (optype0 == REGOP) 2635 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2636 else if (optype0 == OFFSOP) 2637 latehalf[0] = adjust_address_nv (operands[0], SImode, 4); 2638 else 2639 latehalf[0] = operands[0]; 2640 2641 if (optype1 == REGOP) 2642 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); 2643 else if (optype1 == OFFSOP) 2644 latehalf[1] = adjust_address_nv (operands[1], SImode, 4); 2645 else if (optype1 == CNSTOP) 2646 split_double (operands[1], &operands[1], &latehalf[1]); 2647 else 2648 latehalf[1] = operands[1]; 2649 2650 /* If the first move would clobber the source of the second one, 2651 do them in the other order. 2652 2653 This can happen in two cases: 2654 2655 mem -> register where the first half of the destination register 2656 is the same register used in the memory's address. Reload 2657 can create such insns. 2658 2659 mem in this case will be either register indirect or register 2660 indirect plus a valid offset. 2661 2662 register -> register move where REGNO(dst) == REGNO(src + 1) 2663 someone (Tim/Tege?) claimed this can happen for parameter loads. 2664 2665 Handle mem -> register case first. */ 2666 if (optype0 == REGOP 2667 && (optype1 == MEMOP || optype1 == OFFSOP) 2668 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, 2669 operands[1], 0)) 2670 { 2671 /* Do the late half first. */ 2672 if (addreg1) 2673 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2674 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2675 2676 /* Then clobber. */ 2677 if (addreg1) 2678 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2679 return pa_singlemove_string (operands); 2680 } 2681 2682 /* Now handle register -> register case. */ 2683 if (optype0 == REGOP && optype1 == REGOP 2684 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 2685 { 2686 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2687 return pa_singlemove_string (operands); 2688 } 2689 2690 /* Normal case: do the two words, low-numbered first. */ 2691 2692 output_asm_insn (pa_singlemove_string (operands), operands); 2693 2694 /* Make any unoffsettable addresses point at high-numbered word. */ 2695 if (addreg0) 2696 output_asm_insn ("ldo 4(%0),%0", &addreg0); 2697 if (addreg1) 2698 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2699 2700 /* Do that word. */ 2701 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2702 2703 /* Undo the adds we just did. */ 2704 if (addreg0) 2705 output_asm_insn ("ldo -4(%0),%0", &addreg0); 2706 if (addreg1) 2707 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2708 2709 return ""; 2710 } 2711 2712 const char * 2713 pa_output_fp_move_double (rtx *operands) 2714 { 2715 if (FP_REG_P (operands[0])) 2716 { 2717 if (FP_REG_P (operands[1]) 2718 || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) 2719 output_asm_insn ("fcpy,dbl %f1,%0", operands); 2720 else 2721 output_asm_insn ("fldd%F1 %1,%0", operands); 2722 } 2723 else if (FP_REG_P (operands[1])) 2724 { 2725 output_asm_insn ("fstd%F0 %1,%0", operands); 2726 } 2727 else 2728 { 2729 rtx xoperands[2]; 2730 2731 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0]))); 2732 2733 /* This is a pain. You have to be prepared to deal with an 2734 arbitrary address here including pre/post increment/decrement. 2735 2736 so avoid this in the MD. */ 2737 gcc_assert (GET_CODE (operands[0]) == REG); 2738 2739 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2740 xoperands[0] = operands[0]; 2741 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); 2742 } 2743 return ""; 2744 } 2745 2746 /* Return a REG that occurs in ADDR with coefficient 1. 2747 ADDR can be effectively incremented by incrementing REG. */ 2748 2749 static rtx 2750 find_addr_reg (rtx addr) 2751 { 2752 while (GET_CODE (addr) == PLUS) 2753 { 2754 if (GET_CODE (XEXP (addr, 0)) == REG) 2755 addr = XEXP (addr, 0); 2756 else if (GET_CODE (XEXP (addr, 1)) == REG) 2757 addr = XEXP (addr, 1); 2758 else if (CONSTANT_P (XEXP (addr, 0))) 2759 addr = XEXP (addr, 1); 2760 else if (CONSTANT_P (XEXP (addr, 1))) 2761 addr = XEXP (addr, 0); 2762 else 2763 gcc_unreachable (); 2764 } 2765 gcc_assert (GET_CODE (addr) == REG); 2766 return addr; 2767 } 2768 2769 /* Emit code to perform a block move. 2770 2771 OPERANDS[0] is the destination pointer as a REG, clobbered. 2772 OPERANDS[1] is the source pointer as a REG, clobbered. 2773 OPERANDS[2] is a register for temporary storage. 2774 OPERANDS[3] is a register for temporary storage. 2775 OPERANDS[4] is the size as a CONST_INT 2776 OPERANDS[5] is the alignment safe to use, as a CONST_INT. 2777 OPERANDS[6] is another temporary register. */ 2778 2779 const char * 2780 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 2781 { 2782 HOST_WIDE_INT align = INTVAL (operands[5]); 2783 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]); 2784 2785 /* We can't move more than a word at a time because the PA 2786 has no longer integer move insns. (Could use fp mem ops?) */ 2787 if (align > (TARGET_64BIT ? 8 : 4)) 2788 align = (TARGET_64BIT ? 8 : 4); 2789 2790 /* Note that we know each loop below will execute at least twice 2791 (else we would have open-coded the copy). */ 2792 switch (align) 2793 { 2794 case 8: 2795 /* Pre-adjust the loop counter. */ 2796 operands[4] = GEN_INT (n_bytes - 16); 2797 output_asm_insn ("ldi %4,%2", operands); 2798 2799 /* Copying loop. */ 2800 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2801 output_asm_insn ("ldd,ma 8(%1),%6", operands); 2802 output_asm_insn ("std,ma %3,8(%0)", operands); 2803 output_asm_insn ("addib,>= -16,%2,.-12", operands); 2804 output_asm_insn ("std,ma %6,8(%0)", operands); 2805 2806 /* Handle the residual. There could be up to 7 bytes of 2807 residual to copy! */ 2808 if (n_bytes % 16 != 0) 2809 { 2810 operands[4] = GEN_INT (n_bytes % 8); 2811 if (n_bytes % 16 >= 8) 2812 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2813 if (n_bytes % 8 != 0) 2814 output_asm_insn ("ldd 0(%1),%6", operands); 2815 if (n_bytes % 16 >= 8) 2816 output_asm_insn ("std,ma %3,8(%0)", operands); 2817 if (n_bytes % 8 != 0) 2818 output_asm_insn ("stdby,e %6,%4(%0)", operands); 2819 } 2820 return ""; 2821 2822 case 4: 2823 /* Pre-adjust the loop counter. */ 2824 operands[4] = GEN_INT (n_bytes - 8); 2825 output_asm_insn ("ldi %4,%2", operands); 2826 2827 /* Copying loop. */ 2828 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2829 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands); 2830 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2831 output_asm_insn ("addib,>= -8,%2,.-12", operands); 2832 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands); 2833 2834 /* Handle the residual. There could be up to 7 bytes of 2835 residual to copy! */ 2836 if (n_bytes % 8 != 0) 2837 { 2838 operands[4] = GEN_INT (n_bytes % 4); 2839 if (n_bytes % 8 >= 4) 2840 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2841 if (n_bytes % 4 != 0) 2842 output_asm_insn ("ldw 0(%1),%6", operands); 2843 if (n_bytes % 8 >= 4) 2844 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2845 if (n_bytes % 4 != 0) 2846 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands); 2847 } 2848 return ""; 2849 2850 case 2: 2851 /* Pre-adjust the loop counter. */ 2852 operands[4] = GEN_INT (n_bytes - 4); 2853 output_asm_insn ("ldi %4,%2", operands); 2854 2855 /* Copying loop. */ 2856 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2857 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands); 2858 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2859 output_asm_insn ("addib,>= -4,%2,.-12", operands); 2860 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands); 2861 2862 /* Handle the residual. */ 2863 if (n_bytes % 4 != 0) 2864 { 2865 if (n_bytes % 4 >= 2) 2866 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2867 if (n_bytes % 2 != 0) 2868 output_asm_insn ("ldb 0(%1),%6", operands); 2869 if (n_bytes % 4 >= 2) 2870 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2871 if (n_bytes % 2 != 0) 2872 output_asm_insn ("stb %6,0(%0)", operands); 2873 } 2874 return ""; 2875 2876 case 1: 2877 /* Pre-adjust the loop counter. */ 2878 operands[4] = GEN_INT (n_bytes - 2); 2879 output_asm_insn ("ldi %4,%2", operands); 2880 2881 /* Copying loop. */ 2882 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands); 2883 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands); 2884 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands); 2885 output_asm_insn ("addib,>= -2,%2,.-12", operands); 2886 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands); 2887 2888 /* Handle the residual. */ 2889 if (n_bytes % 2 != 0) 2890 { 2891 output_asm_insn ("ldb 0(%1),%3", operands); 2892 output_asm_insn ("stb %3,0(%0)", operands); 2893 } 2894 return ""; 2895 2896 default: 2897 gcc_unreachable (); 2898 } 2899 } 2900 2901 /* Count the number of insns necessary to handle this block move. 2902 2903 Basic structure is the same as emit_block_move, except that we 2904 count insns rather than emit them. */ 2905 2906 static int 2907 compute_movmem_length (rtx insn) 2908 { 2909 rtx pat = PATTERN (insn); 2910 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); 2911 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); 2912 unsigned int n_insns = 0; 2913 2914 /* We can't move more than four bytes at a time because the PA 2915 has no longer integer move insns. (Could use fp mem ops?) */ 2916 if (align > (TARGET_64BIT ? 8 : 4)) 2917 align = (TARGET_64BIT ? 8 : 4); 2918 2919 /* The basic copying loop. */ 2920 n_insns = 6; 2921 2922 /* Residuals. */ 2923 if (n_bytes % (2 * align) != 0) 2924 { 2925 if ((n_bytes % (2 * align)) >= align) 2926 n_insns += 2; 2927 2928 if ((n_bytes % align) != 0) 2929 n_insns += 2; 2930 } 2931 2932 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 2933 return n_insns * 4; 2934 } 2935 2936 /* Emit code to perform a block clear. 2937 2938 OPERANDS[0] is the destination pointer as a REG, clobbered. 2939 OPERANDS[1] is a register for temporary storage. 2940 OPERANDS[2] is the size as a CONST_INT 2941 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ 2942 2943 const char * 2944 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 2945 { 2946 HOST_WIDE_INT align = INTVAL (operands[3]); 2947 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]); 2948 2949 /* We can't clear more than a word at a time because the PA 2950 has no longer integer move insns. */ 2951 if (align > (TARGET_64BIT ? 8 : 4)) 2952 align = (TARGET_64BIT ? 8 : 4); 2953 2954 /* Note that we know each loop below will execute at least twice 2955 (else we would have open-coded the copy). */ 2956 switch (align) 2957 { 2958 case 8: 2959 /* Pre-adjust the loop counter. */ 2960 operands[2] = GEN_INT (n_bytes - 16); 2961 output_asm_insn ("ldi %2,%1", operands); 2962 2963 /* Loop. */ 2964 output_asm_insn ("std,ma %%r0,8(%0)", operands); 2965 output_asm_insn ("addib,>= -16,%1,.-4", operands); 2966 output_asm_insn ("std,ma %%r0,8(%0)", operands); 2967 2968 /* Handle the residual. There could be up to 7 bytes of 2969 residual to copy! */ 2970 if (n_bytes % 16 != 0) 2971 { 2972 operands[2] = GEN_INT (n_bytes % 8); 2973 if (n_bytes % 16 >= 8) 2974 output_asm_insn ("std,ma %%r0,8(%0)", operands); 2975 if (n_bytes % 8 != 0) 2976 output_asm_insn ("stdby,e %%r0,%2(%0)", operands); 2977 } 2978 return ""; 2979 2980 case 4: 2981 /* Pre-adjust the loop counter. */ 2982 operands[2] = GEN_INT (n_bytes - 8); 2983 output_asm_insn ("ldi %2,%1", operands); 2984 2985 /* Loop. */ 2986 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 2987 output_asm_insn ("addib,>= -8,%1,.-4", operands); 2988 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 2989 2990 /* Handle the residual. There could be up to 7 bytes of 2991 residual to copy! */ 2992 if (n_bytes % 8 != 0) 2993 { 2994 operands[2] = GEN_INT (n_bytes % 4); 2995 if (n_bytes % 8 >= 4) 2996 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 2997 if (n_bytes % 4 != 0) 2998 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); 2999 } 3000 return ""; 3001 3002 case 2: 3003 /* Pre-adjust the loop counter. */ 3004 operands[2] = GEN_INT (n_bytes - 4); 3005 output_asm_insn ("ldi %2,%1", operands); 3006 3007 /* Loop. */ 3008 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3009 output_asm_insn ("addib,>= -4,%1,.-4", operands); 3010 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3011 3012 /* Handle the residual. */ 3013 if (n_bytes % 4 != 0) 3014 { 3015 if (n_bytes % 4 >= 2) 3016 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3017 if (n_bytes % 2 != 0) 3018 output_asm_insn ("stb %%r0,0(%0)", operands); 3019 } 3020 return ""; 3021 3022 case 1: 3023 /* Pre-adjust the loop counter. */ 3024 operands[2] = GEN_INT (n_bytes - 2); 3025 output_asm_insn ("ldi %2,%1", operands); 3026 3027 /* Loop. */ 3028 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3029 output_asm_insn ("addib,>= -2,%1,.-4", operands); 3030 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3031 3032 /* Handle the residual. */ 3033 if (n_bytes % 2 != 0) 3034 output_asm_insn ("stb %%r0,0(%0)", operands); 3035 3036 return ""; 3037 3038 default: 3039 gcc_unreachable (); 3040 } 3041 } 3042 3043 /* Count the number of insns necessary to handle this block move. 3044 3045 Basic structure is the same as emit_block_move, except that we 3046 count insns rather than emit them. */ 3047 3048 static int 3049 compute_clrmem_length (rtx insn) 3050 { 3051 rtx pat = PATTERN (insn); 3052 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); 3053 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); 3054 unsigned int n_insns = 0; 3055 3056 /* We can't clear more than a word at a time because the PA 3057 has no longer integer move insns. */ 3058 if (align > (TARGET_64BIT ? 8 : 4)) 3059 align = (TARGET_64BIT ? 8 : 4); 3060 3061 /* The basic loop. */ 3062 n_insns = 4; 3063 3064 /* Residuals. */ 3065 if (n_bytes % (2 * align) != 0) 3066 { 3067 if ((n_bytes % (2 * align)) >= align) 3068 n_insns++; 3069 3070 if ((n_bytes % align) != 0) 3071 n_insns++; 3072 } 3073 3074 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 3075 return n_insns * 4; 3076 } 3077 3078 3079 const char * 3080 pa_output_and (rtx *operands) 3081 { 3082 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3083 { 3084 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3085 int ls0, ls1, ms0, p, len; 3086 3087 for (ls0 = 0; ls0 < 32; ls0++) 3088 if ((mask & (1 << ls0)) == 0) 3089 break; 3090 3091 for (ls1 = ls0; ls1 < 32; ls1++) 3092 if ((mask & (1 << ls1)) != 0) 3093 break; 3094 3095 for (ms0 = ls1; ms0 < 32; ms0++) 3096 if ((mask & (1 << ms0)) == 0) 3097 break; 3098 3099 gcc_assert (ms0 == 32); 3100 3101 if (ls1 == 32) 3102 { 3103 len = ls0; 3104 3105 gcc_assert (len); 3106 3107 operands[2] = GEN_INT (len); 3108 return "{extru|extrw,u} %1,31,%2,%0"; 3109 } 3110 else 3111 { 3112 /* We could use this `depi' for the case above as well, but `depi' 3113 requires one more register file access than an `extru'. */ 3114 3115 p = 31 - ls0; 3116 len = ls1 - ls0; 3117 3118 operands[2] = GEN_INT (p); 3119 operands[3] = GEN_INT (len); 3120 return "{depi|depwi} 0,%2,%3,%0"; 3121 } 3122 } 3123 else 3124 return "and %1,%2,%0"; 3125 } 3126 3127 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3128 storing the result in operands[0]. */ 3129 const char * 3130 pa_output_64bit_and (rtx *operands) 3131 { 3132 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3133 { 3134 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3135 int ls0, ls1, ms0, p, len; 3136 3137 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++) 3138 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0) 3139 break; 3140 3141 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++) 3142 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0) 3143 break; 3144 3145 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++) 3146 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0) 3147 break; 3148 3149 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT); 3150 3151 if (ls1 == HOST_BITS_PER_WIDE_INT) 3152 { 3153 len = ls0; 3154 3155 gcc_assert (len); 3156 3157 operands[2] = GEN_INT (len); 3158 return "extrd,u %1,63,%2,%0"; 3159 } 3160 else 3161 { 3162 /* We could use this `depi' for the case above as well, but `depi' 3163 requires one more register file access than an `extru'. */ 3164 3165 p = 63 - ls0; 3166 len = ls1 - ls0; 3167 3168 operands[2] = GEN_INT (p); 3169 operands[3] = GEN_INT (len); 3170 return "depdi 0,%2,%3,%0"; 3171 } 3172 } 3173 else 3174 return "and %1,%2,%0"; 3175 } 3176 3177 const char * 3178 pa_output_ior (rtx *operands) 3179 { 3180 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3181 int bs0, bs1, p, len; 3182 3183 if (INTVAL (operands[2]) == 0) 3184 return "copy %1,%0"; 3185 3186 for (bs0 = 0; bs0 < 32; bs0++) 3187 if ((mask & (1 << bs0)) != 0) 3188 break; 3189 3190 for (bs1 = bs0; bs1 < 32; bs1++) 3191 if ((mask & (1 << bs1)) == 0) 3192 break; 3193 3194 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3195 3196 p = 31 - bs0; 3197 len = bs1 - bs0; 3198 3199 operands[2] = GEN_INT (p); 3200 operands[3] = GEN_INT (len); 3201 return "{depi|depwi} -1,%2,%3,%0"; 3202 } 3203 3204 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3205 storing the result in operands[0]. */ 3206 const char * 3207 pa_output_64bit_ior (rtx *operands) 3208 { 3209 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3210 int bs0, bs1, p, len; 3211 3212 if (INTVAL (operands[2]) == 0) 3213 return "copy %1,%0"; 3214 3215 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++) 3216 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0) 3217 break; 3218 3219 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++) 3220 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0) 3221 break; 3222 3223 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT 3224 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3225 3226 p = 63 - bs0; 3227 len = bs1 - bs0; 3228 3229 operands[2] = GEN_INT (p); 3230 operands[3] = GEN_INT (len); 3231 return "depdi -1,%2,%3,%0"; 3232 } 3233 3234 /* Target hook for assembling integer objects. This code handles 3235 aligned SI and DI integers specially since function references 3236 must be preceded by P%. */ 3237 3238 static bool 3239 pa_assemble_integer (rtx x, unsigned int size, int aligned_p) 3240 { 3241 if (size == UNITS_PER_WORD 3242 && aligned_p 3243 && function_label_operand (x, VOIDmode)) 3244 { 3245 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file); 3246 3247 /* We don't want an OPD when generating fast indirect calls. */ 3248 if (!TARGET_FAST_INDIRECT_CALLS) 3249 fputs ("P%", asm_out_file); 3250 3251 output_addr_const (asm_out_file, x); 3252 fputc ('\n', asm_out_file); 3253 return true; 3254 } 3255 return default_assemble_integer (x, size, aligned_p); 3256 } 3257 3258 /* Output an ascii string. */ 3259 void 3260 pa_output_ascii (FILE *file, const char *p, int size) 3261 { 3262 int i; 3263 int chars_output; 3264 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ 3265 3266 /* The HP assembler can only take strings of 256 characters at one 3267 time. This is a limitation on input line length, *not* the 3268 length of the string. Sigh. Even worse, it seems that the 3269 restriction is in number of input characters (see \xnn & 3270 \whatever). So we have to do this very carefully. */ 3271 3272 fputs ("\t.STRING \"", file); 3273 3274 chars_output = 0; 3275 for (i = 0; i < size; i += 4) 3276 { 3277 int co = 0; 3278 int io = 0; 3279 for (io = 0, co = 0; io < MIN (4, size - i); io++) 3280 { 3281 register unsigned int c = (unsigned char) p[i + io]; 3282 3283 if (c == '\"' || c == '\\') 3284 partial_output[co++] = '\\'; 3285 if (c >= ' ' && c < 0177) 3286 partial_output[co++] = c; 3287 else 3288 { 3289 unsigned int hexd; 3290 partial_output[co++] = '\\'; 3291 partial_output[co++] = 'x'; 3292 hexd = c / 16 - 0 + '0'; 3293 if (hexd > '9') 3294 hexd -= '9' - 'a' + 1; 3295 partial_output[co++] = hexd; 3296 hexd = c % 16 - 0 + '0'; 3297 if (hexd > '9') 3298 hexd -= '9' - 'a' + 1; 3299 partial_output[co++] = hexd; 3300 } 3301 } 3302 if (chars_output + co > 243) 3303 { 3304 fputs ("\"\n\t.STRING \"", file); 3305 chars_output = 0; 3306 } 3307 fwrite (partial_output, 1, (size_t) co, file); 3308 chars_output += co; 3309 co = 0; 3310 } 3311 fputs ("\"\n", file); 3312 } 3313 3314 /* Try to rewrite floating point comparisons & branches to avoid 3315 useless add,tr insns. 3316 3317 CHECK_NOTES is nonzero if we should examine REG_DEAD notes 3318 to see if FPCC is dead. CHECK_NOTES is nonzero for the 3319 first attempt to remove useless add,tr insns. It is zero 3320 for the second pass as reorg sometimes leaves bogus REG_DEAD 3321 notes lying around. 3322 3323 When CHECK_NOTES is zero we can only eliminate add,tr insns 3324 when there's a 1:1 correspondence between fcmp and ftest/fbranch 3325 instructions. */ 3326 static void 3327 remove_useless_addtr_insns (int check_notes) 3328 { 3329 rtx insn; 3330 static int pass = 0; 3331 3332 /* This is fairly cheap, so always run it when optimizing. */ 3333 if (optimize > 0) 3334 { 3335 int fcmp_count = 0; 3336 int fbranch_count = 0; 3337 3338 /* Walk all the insns in this function looking for fcmp & fbranch 3339 instructions. Keep track of how many of each we find. */ 3340 for (insn = get_insns (); insn; insn = next_insn (insn)) 3341 { 3342 rtx tmp; 3343 3344 /* Ignore anything that isn't an INSN or a JUMP_INSN. */ 3345 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN) 3346 continue; 3347 3348 tmp = PATTERN (insn); 3349 3350 /* It must be a set. */ 3351 if (GET_CODE (tmp) != SET) 3352 continue; 3353 3354 /* If the destination is CCFP, then we've found an fcmp insn. */ 3355 tmp = SET_DEST (tmp); 3356 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) 3357 { 3358 fcmp_count++; 3359 continue; 3360 } 3361 3362 tmp = PATTERN (insn); 3363 /* If this is an fbranch instruction, bump the fbranch counter. */ 3364 if (GET_CODE (tmp) == SET 3365 && SET_DEST (tmp) == pc_rtx 3366 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE 3367 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE 3368 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG 3369 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) 3370 { 3371 fbranch_count++; 3372 continue; 3373 } 3374 } 3375 3376 3377 /* Find all floating point compare + branch insns. If possible, 3378 reverse the comparison & the branch to avoid add,tr insns. */ 3379 for (insn = get_insns (); insn; insn = next_insn (insn)) 3380 { 3381 rtx tmp, next; 3382 3383 /* Ignore anything that isn't an INSN. */ 3384 if (GET_CODE (insn) != INSN) 3385 continue; 3386 3387 tmp = PATTERN (insn); 3388 3389 /* It must be a set. */ 3390 if (GET_CODE (tmp) != SET) 3391 continue; 3392 3393 /* The destination must be CCFP, which is register zero. */ 3394 tmp = SET_DEST (tmp); 3395 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) 3396 continue; 3397 3398 /* INSN should be a set of CCFP. 3399 3400 See if the result of this insn is used in a reversed FP 3401 conditional branch. If so, reverse our condition and 3402 the branch. Doing so avoids useless add,tr insns. */ 3403 next = next_insn (insn); 3404 while (next) 3405 { 3406 /* Jumps, calls and labels stop our search. */ 3407 if (GET_CODE (next) == JUMP_INSN 3408 || GET_CODE (next) == CALL_INSN 3409 || GET_CODE (next) == CODE_LABEL) 3410 break; 3411 3412 /* As does another fcmp insn. */ 3413 if (GET_CODE (next) == INSN 3414 && GET_CODE (PATTERN (next)) == SET 3415 && GET_CODE (SET_DEST (PATTERN (next))) == REG 3416 && REGNO (SET_DEST (PATTERN (next))) == 0) 3417 break; 3418 3419 next = next_insn (next); 3420 } 3421 3422 /* Is NEXT_INSN a branch? */ 3423 if (next 3424 && GET_CODE (next) == JUMP_INSN) 3425 { 3426 rtx pattern = PATTERN (next); 3427 3428 /* If it a reversed fp conditional branch (e.g. uses add,tr) 3429 and CCFP dies, then reverse our conditional and the branch 3430 to avoid the add,tr. */ 3431 if (GET_CODE (pattern) == SET 3432 && SET_DEST (pattern) == pc_rtx 3433 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE 3434 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE 3435 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG 3436 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 3437 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC 3438 && (fcmp_count == fbranch_count 3439 || (check_notes 3440 && find_regno_note (next, REG_DEAD, 0)))) 3441 { 3442 /* Reverse the branch. */ 3443 tmp = XEXP (SET_SRC (pattern), 1); 3444 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); 3445 XEXP (SET_SRC (pattern), 2) = tmp; 3446 INSN_CODE (next) = -1; 3447 3448 /* Reverse our condition. */ 3449 tmp = PATTERN (insn); 3450 PUT_CODE (XEXP (tmp, 1), 3451 (reverse_condition_maybe_unordered 3452 (GET_CODE (XEXP (tmp, 1))))); 3453 } 3454 } 3455 } 3456 } 3457 3458 pass = !pass; 3459 3460 } 3461 3462 /* You may have trouble believing this, but this is the 32 bit HP-PA 3463 stack layout. Wow. 3464 3465 Offset Contents 3466 3467 Variable arguments (optional; any number may be allocated) 3468 3469 SP-(4*(N+9)) arg word N 3470 : : 3471 SP-56 arg word 5 3472 SP-52 arg word 4 3473 3474 Fixed arguments (must be allocated; may remain unused) 3475 3476 SP-48 arg word 3 3477 SP-44 arg word 2 3478 SP-40 arg word 1 3479 SP-36 arg word 0 3480 3481 Frame Marker 3482 3483 SP-32 External Data Pointer (DP) 3484 SP-28 External sr4 3485 SP-24 External/stub RP (RP') 3486 SP-20 Current RP 3487 SP-16 Static Link 3488 SP-12 Clean up 3489 SP-8 Calling Stub RP (RP'') 3490 SP-4 Previous SP 3491 3492 Top of Frame 3493 3494 SP-0 Stack Pointer (points to next available address) 3495 3496 */ 3497 3498 /* This function saves registers as follows. Registers marked with ' are 3499 this function's registers (as opposed to the previous function's). 3500 If a frame_pointer isn't needed, r4 is saved as a general register; 3501 the space for the frame pointer is still allocated, though, to keep 3502 things simple. 3503 3504 3505 Top of Frame 3506 3507 SP (FP') Previous FP 3508 SP + 4 Alignment filler (sigh) 3509 SP + 8 Space for locals reserved here. 3510 . 3511 . 3512 . 3513 SP + n All call saved register used. 3514 . 3515 . 3516 . 3517 SP + o All call saved fp registers used. 3518 . 3519 . 3520 . 3521 SP + p (SP') points to next available address. 3522 3523 */ 3524 3525 /* Global variables set by output_function_prologue(). */ 3526 /* Size of frame. Need to know this to emit return insns from 3527 leaf procedures. */ 3528 static HOST_WIDE_INT actual_fsize, local_fsize; 3529 static int save_fregs; 3530 3531 /* Emit RTL to store REG at the memory location specified by BASE+DISP. 3532 Handle case where DISP > 8k by using the add_high_const patterns. 3533 3534 Note in DISP > 8k case, we will leave the high part of the address 3535 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ 3536 3537 static void 3538 store_reg (int reg, HOST_WIDE_INT disp, int base) 3539 { 3540 rtx insn, dest, src, basereg; 3541 3542 src = gen_rtx_REG (word_mode, reg); 3543 basereg = gen_rtx_REG (Pmode, base); 3544 if (VAL_14_BITS_P (disp)) 3545 { 3546 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 3547 insn = emit_move_insn (dest, src); 3548 } 3549 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3550 { 3551 rtx delta = GEN_INT (disp); 3552 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3553 3554 emit_move_insn (tmpreg, delta); 3555 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3556 if (DO_FRAME_NOTES) 3557 { 3558 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3559 gen_rtx_SET (VOIDmode, tmpreg, 3560 gen_rtx_PLUS (Pmode, basereg, delta))); 3561 RTX_FRAME_RELATED_P (insn) = 1; 3562 } 3563 dest = gen_rtx_MEM (word_mode, tmpreg); 3564 insn = emit_move_insn (dest, src); 3565 } 3566 else 3567 { 3568 rtx delta = GEN_INT (disp); 3569 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 3570 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3571 3572 emit_move_insn (tmpreg, high); 3573 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3574 insn = emit_move_insn (dest, src); 3575 if (DO_FRAME_NOTES) 3576 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3577 gen_rtx_SET (VOIDmode, 3578 gen_rtx_MEM (word_mode, 3579 gen_rtx_PLUS (word_mode, 3580 basereg, 3581 delta)), 3582 src)); 3583 } 3584 3585 if (DO_FRAME_NOTES) 3586 RTX_FRAME_RELATED_P (insn) = 1; 3587 } 3588 3589 /* Emit RTL to store REG at the memory location specified by BASE and then 3590 add MOD to BASE. MOD must be <= 8k. */ 3591 3592 static void 3593 store_reg_modify (int base, int reg, HOST_WIDE_INT mod) 3594 { 3595 rtx insn, basereg, srcreg, delta; 3596 3597 gcc_assert (VAL_14_BITS_P (mod)); 3598 3599 basereg = gen_rtx_REG (Pmode, base); 3600 srcreg = gen_rtx_REG (word_mode, reg); 3601 delta = GEN_INT (mod); 3602 3603 insn = emit_insn (gen_post_store (basereg, srcreg, delta)); 3604 if (DO_FRAME_NOTES) 3605 { 3606 RTX_FRAME_RELATED_P (insn) = 1; 3607 3608 /* RTX_FRAME_RELATED_P must be set on each frame related set 3609 in a parallel with more than one element. */ 3610 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1; 3611 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 3612 } 3613 } 3614 3615 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case 3616 where DISP > 8k by using the add_high_const patterns. NOTE indicates 3617 whether to add a frame note or not. 3618 3619 In the DISP > 8k case, we leave the high part of the address in %r1. 3620 There is code in expand_hppa_{prologue,epilogue} that knows about this. */ 3621 3622 static void 3623 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note) 3624 { 3625 rtx insn; 3626 3627 if (VAL_14_BITS_P (disp)) 3628 { 3629 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3630 plus_constant (Pmode, 3631 gen_rtx_REG (Pmode, base), disp)); 3632 } 3633 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3634 { 3635 rtx basereg = gen_rtx_REG (Pmode, base); 3636 rtx delta = GEN_INT (disp); 3637 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3638 3639 emit_move_insn (tmpreg, delta); 3640 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3641 gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3642 if (DO_FRAME_NOTES) 3643 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3644 gen_rtx_SET (VOIDmode, tmpreg, 3645 gen_rtx_PLUS (Pmode, basereg, delta))); 3646 } 3647 else 3648 { 3649 rtx basereg = gen_rtx_REG (Pmode, base); 3650 rtx delta = GEN_INT (disp); 3651 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3652 3653 emit_move_insn (tmpreg, 3654 gen_rtx_PLUS (Pmode, basereg, 3655 gen_rtx_HIGH (Pmode, delta))); 3656 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3657 gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3658 } 3659 3660 if (DO_FRAME_NOTES && note) 3661 RTX_FRAME_RELATED_P (insn) = 1; 3662 } 3663 3664 HOST_WIDE_INT 3665 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live) 3666 { 3667 int freg_saved = 0; 3668 int i, j; 3669 3670 /* The code in pa_expand_prologue and pa_expand_epilogue must 3671 be consistent with the rounding and size calculation done here. 3672 Change them at the same time. */ 3673 3674 /* We do our own stack alignment. First, round the size of the 3675 stack locals up to a word boundary. */ 3676 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3677 3678 /* Space for previous frame pointer + filler. If any frame is 3679 allocated, we need to add in the STARTING_FRAME_OFFSET. We 3680 waste some space here for the sake of HP compatibility. The 3681 first slot is only used when the frame pointer is needed. */ 3682 if (size || frame_pointer_needed) 3683 size += STARTING_FRAME_OFFSET; 3684 3685 /* If the current function calls __builtin_eh_return, then we need 3686 to allocate stack space for registers that will hold data for 3687 the exception handler. */ 3688 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3689 { 3690 unsigned int i; 3691 3692 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) 3693 continue; 3694 size += i * UNITS_PER_WORD; 3695 } 3696 3697 /* Account for space used by the callee general register saves. */ 3698 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--) 3699 if (df_regs_ever_live_p (i)) 3700 size += UNITS_PER_WORD; 3701 3702 /* Account for space used by the callee floating point register saves. */ 3703 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 3704 if (df_regs_ever_live_p (i) 3705 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1))) 3706 { 3707 freg_saved = 1; 3708 3709 /* We always save both halves of the FP register, so always 3710 increment the frame size by 8 bytes. */ 3711 size += 8; 3712 } 3713 3714 /* If any of the floating registers are saved, account for the 3715 alignment needed for the floating point register save block. */ 3716 if (freg_saved) 3717 { 3718 size = (size + 7) & ~7; 3719 if (fregs_live) 3720 *fregs_live = 1; 3721 } 3722 3723 /* The various ABIs include space for the outgoing parameters in the 3724 size of the current function's stack frame. We don't need to align 3725 for the outgoing arguments as their alignment is set by the final 3726 rounding for the frame as a whole. */ 3727 size += crtl->outgoing_args_size; 3728 3729 /* Allocate space for the fixed frame marker. This space must be 3730 allocated for any function that makes calls or allocates 3731 stack space. */ 3732 if (!crtl->is_leaf || size) 3733 size += TARGET_64BIT ? 48 : 32; 3734 3735 /* Finally, round to the preferred stack boundary. */ 3736 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1) 3737 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); 3738 } 3739 3740 /* Generate the assembly code for function entry. FILE is a stdio 3741 stream to output the code to. SIZE is an int: how many units of 3742 temporary storage to allocate. 3743 3744 Refer to the array `regs_ever_live' to determine which registers to 3745 save; `regs_ever_live[I]' is nonzero if register number I is ever 3746 used in the function. This function is responsible for knowing 3747 which registers should not be saved even if used. */ 3748 3749 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block 3750 of memory. If any fpu reg is used in the function, we allocate 3751 such a block here, at the bottom of the frame, just in case it's needed. 3752 3753 If this function is a leaf procedure, then we may choose not 3754 to do a "save" insn. The decision about whether or not 3755 to do this is made in regclass.c. */ 3756 3757 static void 3758 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 3759 { 3760 /* The function's label and associated .PROC must never be 3761 separated and must be output *after* any profiling declarations 3762 to avoid changing spaces/subspaces within a procedure. */ 3763 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); 3764 fputs ("\t.PROC\n", file); 3765 3766 /* pa_expand_prologue does the dirty work now. We just need 3767 to output the assembler directives which denote the start 3768 of a function. */ 3769 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize); 3770 if (crtl->is_leaf) 3771 fputs (",NO_CALLS", file); 3772 else 3773 fputs (",CALLS", file); 3774 if (rp_saved) 3775 fputs (",SAVE_RP", file); 3776 3777 /* The SAVE_SP flag is used to indicate that register %r3 is stored 3778 at the beginning of the frame and that it is used as the frame 3779 pointer for the frame. We do this because our current frame 3780 layout doesn't conform to that specified in the HP runtime 3781 documentation and we need a way to indicate to programs such as 3782 GDB where %r3 is saved. The SAVE_SP flag was chosen because it 3783 isn't used by HP compilers but is supported by the assembler. 3784 However, SAVE_SP is supposed to indicate that the previous stack 3785 pointer has been saved in the frame marker. */ 3786 if (frame_pointer_needed) 3787 fputs (",SAVE_SP", file); 3788 3789 /* Pass on information about the number of callee register saves 3790 performed in the prologue. 3791 3792 The compiler is supposed to pass the highest register number 3793 saved, the assembler then has to adjust that number before 3794 entering it into the unwind descriptor (to account for any 3795 caller saved registers with lower register numbers than the 3796 first callee saved register). */ 3797 if (gr_saved) 3798 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); 3799 3800 if (fr_saved) 3801 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); 3802 3803 fputs ("\n\t.ENTRY\n", file); 3804 3805 remove_useless_addtr_insns (0); 3806 } 3807 3808 void 3809 pa_expand_prologue (void) 3810 { 3811 int merge_sp_adjust_with_store = 0; 3812 HOST_WIDE_INT size = get_frame_size (); 3813 HOST_WIDE_INT offset; 3814 int i; 3815 rtx insn, tmpreg; 3816 3817 gr_saved = 0; 3818 fr_saved = 0; 3819 save_fregs = 0; 3820 3821 /* Compute total size for frame pointer, filler, locals and rounding to 3822 the next word boundary. Similar code appears in pa_compute_frame_size 3823 and must be changed in tandem with this code. */ 3824 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3825 if (local_fsize || frame_pointer_needed) 3826 local_fsize += STARTING_FRAME_OFFSET; 3827 3828 actual_fsize = pa_compute_frame_size (size, &save_fregs); 3829 if (flag_stack_usage_info) 3830 current_function_static_stack_size = actual_fsize; 3831 3832 /* Compute a few things we will use often. */ 3833 tmpreg = gen_rtx_REG (word_mode, 1); 3834 3835 /* Save RP first. The calling conventions manual states RP will 3836 always be stored into the caller's frame at sp - 20 or sp - 16 3837 depending on which ABI is in use. */ 3838 if (df_regs_ever_live_p (2) || crtl->calls_eh_return) 3839 { 3840 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM); 3841 rp_saved = true; 3842 } 3843 else 3844 rp_saved = false; 3845 3846 /* Allocate the local frame and set up the frame pointer if needed. */ 3847 if (actual_fsize != 0) 3848 { 3849 if (frame_pointer_needed) 3850 { 3851 /* Copy the old frame pointer temporarily into %r1. Set up the 3852 new stack pointer, then store away the saved old frame pointer 3853 into the stack at sp and at the same time update the stack 3854 pointer by actual_fsize bytes. Two versions, first 3855 handles small (<8k) frames. The second handles large (>=8k) 3856 frames. */ 3857 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx); 3858 if (DO_FRAME_NOTES) 3859 RTX_FRAME_RELATED_P (insn) = 1; 3860 3861 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 3862 if (DO_FRAME_NOTES) 3863 RTX_FRAME_RELATED_P (insn) = 1; 3864 3865 if (VAL_14_BITS_P (actual_fsize)) 3866 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize); 3867 else 3868 { 3869 /* It is incorrect to store the saved frame pointer at *sp, 3870 then increment sp (writes beyond the current stack boundary). 3871 3872 So instead use stwm to store at *sp and post-increment the 3873 stack pointer as an atomic operation. Then increment sp to 3874 finish allocating the new frame. */ 3875 HOST_WIDE_INT adjust1 = 8192 - 64; 3876 HOST_WIDE_INT adjust2 = actual_fsize - adjust1; 3877 3878 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1); 3879 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3880 adjust2, 1); 3881 } 3882 3883 /* We set SAVE_SP in frames that need a frame pointer. Thus, 3884 we need to store the previous stack pointer (frame pointer) 3885 into the frame marker on targets that use the HP unwind 3886 library. This allows the HP unwind library to be used to 3887 unwind GCC frames. However, we are not fully compatible 3888 with the HP library because our frame layout differs from 3889 that specified in the HP runtime specification. 3890 3891 We don't want a frame note on this instruction as the frame 3892 marker moves during dynamic stack allocation. 3893 3894 This instruction also serves as a blockage to prevent 3895 register spills from being scheduled before the stack 3896 pointer is raised. This is necessary as we store 3897 registers using the frame pointer as a base register, 3898 and the frame pointer is set before sp is raised. */ 3899 if (TARGET_HPUX_UNWIND_LIBRARY) 3900 { 3901 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, 3902 GEN_INT (TARGET_64BIT ? -8 : -4)); 3903 3904 emit_move_insn (gen_rtx_MEM (word_mode, addr), 3905 hard_frame_pointer_rtx); 3906 } 3907 else 3908 emit_insn (gen_blockage ()); 3909 } 3910 /* no frame pointer needed. */ 3911 else 3912 { 3913 /* In some cases we can perform the first callee register save 3914 and allocating the stack frame at the same time. If so, just 3915 make a note of it and defer allocating the frame until saving 3916 the callee registers. */ 3917 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0) 3918 merge_sp_adjust_with_store = 1; 3919 /* Can not optimize. Adjust the stack frame by actual_fsize 3920 bytes. */ 3921 else 3922 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3923 actual_fsize, 1); 3924 } 3925 } 3926 3927 /* Normal register save. 3928 3929 Do not save the frame pointer in the frame_pointer_needed case. It 3930 was done earlier. */ 3931 if (frame_pointer_needed) 3932 { 3933 offset = local_fsize; 3934 3935 /* Saving the EH return data registers in the frame is the simplest 3936 way to get the frame unwind information emitted. We put them 3937 just before the general registers. */ 3938 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3939 { 3940 unsigned int i, regno; 3941 3942 for (i = 0; ; ++i) 3943 { 3944 regno = EH_RETURN_DATA_REGNO (i); 3945 if (regno == INVALID_REGNUM) 3946 break; 3947 3948 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 3949 offset += UNITS_PER_WORD; 3950 } 3951 } 3952 3953 for (i = 18; i >= 4; i--) 3954 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 3955 { 3956 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 3957 offset += UNITS_PER_WORD; 3958 gr_saved++; 3959 } 3960 /* Account for %r3 which is saved in a special place. */ 3961 gr_saved++; 3962 } 3963 /* No frame pointer needed. */ 3964 else 3965 { 3966 offset = local_fsize - actual_fsize; 3967 3968 /* Saving the EH return data registers in the frame is the simplest 3969 way to get the frame unwind information emitted. */ 3970 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3971 { 3972 unsigned int i, regno; 3973 3974 for (i = 0; ; ++i) 3975 { 3976 regno = EH_RETURN_DATA_REGNO (i); 3977 if (regno == INVALID_REGNUM) 3978 break; 3979 3980 /* If merge_sp_adjust_with_store is nonzero, then we can 3981 optimize the first save. */ 3982 if (merge_sp_adjust_with_store) 3983 { 3984 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset); 3985 merge_sp_adjust_with_store = 0; 3986 } 3987 else 3988 store_reg (regno, offset, STACK_POINTER_REGNUM); 3989 offset += UNITS_PER_WORD; 3990 } 3991 } 3992 3993 for (i = 18; i >= 3; i--) 3994 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 3995 { 3996 /* If merge_sp_adjust_with_store is nonzero, then we can 3997 optimize the first GR save. */ 3998 if (merge_sp_adjust_with_store) 3999 { 4000 store_reg_modify (STACK_POINTER_REGNUM, i, -offset); 4001 merge_sp_adjust_with_store = 0; 4002 } 4003 else 4004 store_reg (i, offset, STACK_POINTER_REGNUM); 4005 offset += UNITS_PER_WORD; 4006 gr_saved++; 4007 } 4008 4009 /* If we wanted to merge the SP adjustment with a GR save, but we never 4010 did any GR saves, then just emit the adjustment here. */ 4011 if (merge_sp_adjust_with_store) 4012 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4013 actual_fsize, 1); 4014 } 4015 4016 /* The hppa calling conventions say that %r19, the pic offset 4017 register, is saved at sp - 32 (in this function's frame) 4018 when generating PIC code. FIXME: What is the correct thing 4019 to do for functions which make no calls and allocate no 4020 frame? Do we need to allocate a frame, or can we just omit 4021 the save? For now we'll just omit the save. 4022 4023 We don't want a note on this insn as the frame marker can 4024 move if there is a dynamic stack allocation. */ 4025 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT) 4026 { 4027 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); 4028 4029 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); 4030 4031 } 4032 4033 /* Align pointer properly (doubleword boundary). */ 4034 offset = (offset + 7) & ~7; 4035 4036 /* Floating point register store. */ 4037 if (save_fregs) 4038 { 4039 rtx base; 4040 4041 /* First get the frame or stack pointer to the start of the FP register 4042 save area. */ 4043 if (frame_pointer_needed) 4044 { 4045 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4046 base = hard_frame_pointer_rtx; 4047 } 4048 else 4049 { 4050 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4051 base = stack_pointer_rtx; 4052 } 4053 4054 /* Now actually save the FP registers. */ 4055 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4056 { 4057 if (df_regs_ever_live_p (i) 4058 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4059 { 4060 rtx addr, insn, reg; 4061 addr = gen_rtx_MEM (DFmode, 4062 gen_rtx_POST_INC (word_mode, tmpreg)); 4063 reg = gen_rtx_REG (DFmode, i); 4064 insn = emit_move_insn (addr, reg); 4065 if (DO_FRAME_NOTES) 4066 { 4067 RTX_FRAME_RELATED_P (insn) = 1; 4068 if (TARGET_64BIT) 4069 { 4070 rtx mem = gen_rtx_MEM (DFmode, 4071 plus_constant (Pmode, base, 4072 offset)); 4073 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4074 gen_rtx_SET (VOIDmode, mem, reg)); 4075 } 4076 else 4077 { 4078 rtx meml = gen_rtx_MEM (SFmode, 4079 plus_constant (Pmode, base, 4080 offset)); 4081 rtx memr = gen_rtx_MEM (SFmode, 4082 plus_constant (Pmode, base, 4083 offset + 4)); 4084 rtx regl = gen_rtx_REG (SFmode, i); 4085 rtx regr = gen_rtx_REG (SFmode, i + 1); 4086 rtx setl = gen_rtx_SET (VOIDmode, meml, regl); 4087 rtx setr = gen_rtx_SET (VOIDmode, memr, regr); 4088 rtvec vec; 4089 4090 RTX_FRAME_RELATED_P (setl) = 1; 4091 RTX_FRAME_RELATED_P (setr) = 1; 4092 vec = gen_rtvec (2, setl, setr); 4093 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4094 gen_rtx_SEQUENCE (VOIDmode, vec)); 4095 } 4096 } 4097 offset += GET_MODE_SIZE (DFmode); 4098 fr_saved++; 4099 } 4100 } 4101 } 4102 } 4103 4104 /* Emit RTL to load REG from the memory location specified by BASE+DISP. 4105 Handle case where DISP > 8k by using the add_high_const patterns. */ 4106 4107 static void 4108 load_reg (int reg, HOST_WIDE_INT disp, int base) 4109 { 4110 rtx dest = gen_rtx_REG (word_mode, reg); 4111 rtx basereg = gen_rtx_REG (Pmode, base); 4112 rtx src; 4113 4114 if (VAL_14_BITS_P (disp)) 4115 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 4116 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 4117 { 4118 rtx delta = GEN_INT (disp); 4119 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4120 4121 emit_move_insn (tmpreg, delta); 4122 if (TARGET_DISABLE_INDEXING) 4123 { 4124 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4125 src = gen_rtx_MEM (word_mode, tmpreg); 4126 } 4127 else 4128 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4129 } 4130 else 4131 { 4132 rtx delta = GEN_INT (disp); 4133 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 4134 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4135 4136 emit_move_insn (tmpreg, high); 4137 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 4138 } 4139 4140 emit_move_insn (dest, src); 4141 } 4142 4143 /* Update the total code bytes output to the text section. */ 4144 4145 static void 4146 update_total_code_bytes (unsigned int nbytes) 4147 { 4148 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM) 4149 && !IN_NAMED_SECTION_P (cfun->decl)) 4150 { 4151 unsigned int old_total = total_code_bytes; 4152 4153 total_code_bytes += nbytes; 4154 4155 /* Be prepared to handle overflows. */ 4156 if (old_total > total_code_bytes) 4157 total_code_bytes = UINT_MAX; 4158 } 4159 } 4160 4161 /* This function generates the assembly code for function exit. 4162 Args are as for output_function_prologue (). 4163 4164 The function epilogue should not depend on the current stack 4165 pointer! It should use the frame pointer only. This is mandatory 4166 because of alloca; we also take advantage of it to omit stack 4167 adjustments before returning. */ 4168 4169 static void 4170 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 4171 { 4172 rtx insn = get_last_insn (); 4173 bool extra_nop; 4174 4175 /* pa_expand_epilogue does the dirty work now. We just need 4176 to output the assembler directives which denote the end 4177 of a function. 4178 4179 To make debuggers happy, emit a nop if the epilogue was completely 4180 eliminated due to a volatile call as the last insn in the 4181 current function. That way the return address (in %r2) will 4182 always point to a valid instruction in the current function. */ 4183 4184 /* Get the last real insn. */ 4185 if (GET_CODE (insn) == NOTE) 4186 insn = prev_real_insn (insn); 4187 4188 /* If it is a sequence, then look inside. */ 4189 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) 4190 insn = XVECEXP (PATTERN (insn), 0, 0); 4191 4192 /* If insn is a CALL_INSN, then it must be a call to a volatile 4193 function (otherwise there would be epilogue insns). */ 4194 if (insn && GET_CODE (insn) == CALL_INSN) 4195 { 4196 fputs ("\tnop\n", file); 4197 extra_nop = true; 4198 } 4199 else 4200 extra_nop = false; 4201 4202 fputs ("\t.EXIT\n\t.PROCEND\n", file); 4203 4204 if (TARGET_SOM && TARGET_GAS) 4205 { 4206 /* We are done with this subspace except possibly for some additional 4207 debug information. Forget that we are in this subspace to ensure 4208 that the next function is output in its own subspace. */ 4209 in_section = NULL; 4210 cfun->machine->in_nsubspa = 2; 4211 } 4212 4213 /* Thunks do their own insn accounting. */ 4214 if (cfun->is_thunk) 4215 return; 4216 4217 if (INSN_ADDRESSES_SET_P ()) 4218 { 4219 last_address = extra_nop ? 4 : 0; 4220 insn = get_last_nonnote_insn (); 4221 if (insn) 4222 { 4223 last_address += INSN_ADDRESSES (INSN_UID (insn)); 4224 if (INSN_P (insn)) 4225 last_address += insn_default_length (insn); 4226 } 4227 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 4228 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 4229 } 4230 else 4231 last_address = UINT_MAX; 4232 4233 /* Finally, update the total number of code bytes output so far. */ 4234 update_total_code_bytes (last_address); 4235 } 4236 4237 void 4238 pa_expand_epilogue (void) 4239 { 4240 rtx tmpreg; 4241 HOST_WIDE_INT offset; 4242 HOST_WIDE_INT ret_off = 0; 4243 int i; 4244 int merge_sp_adjust_with_load = 0; 4245 4246 /* We will use this often. */ 4247 tmpreg = gen_rtx_REG (word_mode, 1); 4248 4249 /* Try to restore RP early to avoid load/use interlocks when 4250 RP gets used in the return (bv) instruction. This appears to still 4251 be necessary even when we schedule the prologue and epilogue. */ 4252 if (rp_saved) 4253 { 4254 ret_off = TARGET_64BIT ? -16 : -20; 4255 if (frame_pointer_needed) 4256 { 4257 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM); 4258 ret_off = 0; 4259 } 4260 else 4261 { 4262 /* No frame pointer, and stack is smaller than 8k. */ 4263 if (VAL_14_BITS_P (ret_off - actual_fsize)) 4264 { 4265 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM); 4266 ret_off = 0; 4267 } 4268 } 4269 } 4270 4271 /* General register restores. */ 4272 if (frame_pointer_needed) 4273 { 4274 offset = local_fsize; 4275 4276 /* If the current function calls __builtin_eh_return, then we need 4277 to restore the saved EH data registers. */ 4278 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4279 { 4280 unsigned int i, regno; 4281 4282 for (i = 0; ; ++i) 4283 { 4284 regno = EH_RETURN_DATA_REGNO (i); 4285 if (regno == INVALID_REGNUM) 4286 break; 4287 4288 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 4289 offset += UNITS_PER_WORD; 4290 } 4291 } 4292 4293 for (i = 18; i >= 4; i--) 4294 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4295 { 4296 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 4297 offset += UNITS_PER_WORD; 4298 } 4299 } 4300 else 4301 { 4302 offset = local_fsize - actual_fsize; 4303 4304 /* If the current function calls __builtin_eh_return, then we need 4305 to restore the saved EH data registers. */ 4306 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4307 { 4308 unsigned int i, regno; 4309 4310 for (i = 0; ; ++i) 4311 { 4312 regno = EH_RETURN_DATA_REGNO (i); 4313 if (regno == INVALID_REGNUM) 4314 break; 4315 4316 /* Only for the first load. 4317 merge_sp_adjust_with_load holds the register load 4318 with which we will merge the sp adjustment. */ 4319 if (merge_sp_adjust_with_load == 0 4320 && local_fsize == 0 4321 && VAL_14_BITS_P (-actual_fsize)) 4322 merge_sp_adjust_with_load = regno; 4323 else 4324 load_reg (regno, offset, STACK_POINTER_REGNUM); 4325 offset += UNITS_PER_WORD; 4326 } 4327 } 4328 4329 for (i = 18; i >= 3; i--) 4330 { 4331 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4332 { 4333 /* Only for the first load. 4334 merge_sp_adjust_with_load holds the register load 4335 with which we will merge the sp adjustment. */ 4336 if (merge_sp_adjust_with_load == 0 4337 && local_fsize == 0 4338 && VAL_14_BITS_P (-actual_fsize)) 4339 merge_sp_adjust_with_load = i; 4340 else 4341 load_reg (i, offset, STACK_POINTER_REGNUM); 4342 offset += UNITS_PER_WORD; 4343 } 4344 } 4345 } 4346 4347 /* Align pointer properly (doubleword boundary). */ 4348 offset = (offset + 7) & ~7; 4349 4350 /* FP register restores. */ 4351 if (save_fregs) 4352 { 4353 /* Adjust the register to index off of. */ 4354 if (frame_pointer_needed) 4355 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4356 else 4357 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4358 4359 /* Actually do the restores now. */ 4360 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4361 if (df_regs_ever_live_p (i) 4362 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4363 { 4364 rtx src = gen_rtx_MEM (DFmode, 4365 gen_rtx_POST_INC (word_mode, tmpreg)); 4366 rtx dest = gen_rtx_REG (DFmode, i); 4367 emit_move_insn (dest, src); 4368 } 4369 } 4370 4371 /* Emit a blockage insn here to keep these insns from being moved to 4372 an earlier spot in the epilogue, or into the main instruction stream. 4373 4374 This is necessary as we must not cut the stack back before all the 4375 restores are finished. */ 4376 emit_insn (gen_blockage ()); 4377 4378 /* Reset stack pointer (and possibly frame pointer). The stack 4379 pointer is initially set to fp + 64 to avoid a race condition. */ 4380 if (frame_pointer_needed) 4381 { 4382 rtx delta = GEN_INT (-64); 4383 4384 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0); 4385 emit_insn (gen_pre_load (hard_frame_pointer_rtx, 4386 stack_pointer_rtx, delta)); 4387 } 4388 /* If we were deferring a callee register restore, do it now. */ 4389 else if (merge_sp_adjust_with_load) 4390 { 4391 rtx delta = GEN_INT (-actual_fsize); 4392 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load); 4393 4394 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)); 4395 } 4396 else if (actual_fsize != 0) 4397 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4398 - actual_fsize, 0); 4399 4400 /* If we haven't restored %r2 yet (no frame pointer, and a stack 4401 frame greater than 8k), do so now. */ 4402 if (ret_off != 0) 4403 load_reg (2, ret_off, STACK_POINTER_REGNUM); 4404 4405 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4406 { 4407 rtx sa = EH_RETURN_STACKADJ_RTX; 4408 4409 emit_insn (gen_blockage ()); 4410 emit_insn (TARGET_64BIT 4411 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa) 4412 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa)); 4413 } 4414 } 4415 4416 bool 4417 pa_can_use_return_insn (void) 4418 { 4419 if (!reload_completed) 4420 return false; 4421 4422 if (frame_pointer_needed) 4423 return false; 4424 4425 if (df_regs_ever_live_p (2)) 4426 return false; 4427 4428 if (crtl->profile) 4429 return false; 4430 4431 return pa_compute_frame_size (get_frame_size (), 0) == 0; 4432 } 4433 4434 rtx 4435 hppa_pic_save_rtx (void) 4436 { 4437 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM); 4438 } 4439 4440 #ifndef NO_DEFERRED_PROFILE_COUNTERS 4441 #define NO_DEFERRED_PROFILE_COUNTERS 0 4442 #endif 4443 4444 4445 /* Vector of funcdef numbers. */ 4446 static vec<int> funcdef_nos; 4447 4448 /* Output deferred profile counters. */ 4449 static void 4450 output_deferred_profile_counters (void) 4451 { 4452 unsigned int i; 4453 int align, n; 4454 4455 if (funcdef_nos.is_empty ()) 4456 return; 4457 4458 switch_to_section (data_section); 4459 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); 4460 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); 4461 4462 for (i = 0; funcdef_nos.iterate (i, &n); i++) 4463 { 4464 targetm.asm_out.internal_label (asm_out_file, "LP", n); 4465 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); 4466 } 4467 4468 funcdef_nos.release (); 4469 } 4470 4471 void 4472 hppa_profile_hook (int label_no) 4473 { 4474 /* We use SImode for the address of the function in both 32 and 4475 64-bit code to avoid having to provide DImode versions of the 4476 lcla2 and load_offset_label_address insn patterns. */ 4477 rtx reg = gen_reg_rtx (SImode); 4478 rtx label_rtx = gen_label_rtx (); 4479 rtx begin_label_rtx, call_insn; 4480 char begin_label_name[16]; 4481 4482 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, 4483 label_no); 4484 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); 4485 4486 if (TARGET_64BIT) 4487 emit_move_insn (arg_pointer_rtx, 4488 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, 4489 GEN_INT (64))); 4490 4491 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); 4492 4493 /* The address of the function is loaded into %r25 with an instruction- 4494 relative sequence that avoids the use of relocations. The sequence 4495 is split so that the load_offset_label_address instruction can 4496 occupy the delay slot of the call to _mcount. */ 4497 if (TARGET_PA_20) 4498 emit_insn (gen_lcla2 (reg, label_rtx)); 4499 else 4500 emit_insn (gen_lcla1 (reg, label_rtx)); 4501 4502 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), 4503 reg, begin_label_rtx, label_rtx)); 4504 4505 #if !NO_DEFERRED_PROFILE_COUNTERS 4506 { 4507 rtx count_label_rtx, addr, r24; 4508 char count_label_name[16]; 4509 4510 funcdef_nos.safe_push (label_no); 4511 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); 4512 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name)); 4513 4514 addr = force_reg (Pmode, count_label_rtx); 4515 r24 = gen_rtx_REG (Pmode, 24); 4516 emit_move_insn (r24, addr); 4517 4518 call_insn = 4519 emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 4520 gen_rtx_SYMBOL_REF (Pmode, 4521 "_mcount")), 4522 GEN_INT (TARGET_64BIT ? 24 : 12))); 4523 4524 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); 4525 } 4526 #else 4527 4528 call_insn = 4529 emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 4530 gen_rtx_SYMBOL_REF (Pmode, 4531 "_mcount")), 4532 GEN_INT (TARGET_64BIT ? 16 : 8))); 4533 4534 #endif 4535 4536 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); 4537 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); 4538 4539 /* Indicate the _mcount call cannot throw, nor will it execute a 4540 non-local goto. */ 4541 make_reg_eh_region_note_nothrow_nononlocal (call_insn); 4542 } 4543 4544 /* Fetch the return address for the frame COUNT steps up from 4545 the current frame, after the prologue. FRAMEADDR is the 4546 frame pointer of the COUNT frame. 4547 4548 We want to ignore any export stub remnants here. To handle this, 4549 we examine the code at the return address, and if it is an export 4550 stub, we return a memory rtx for the stub return address stored 4551 at frame-24. 4552 4553 The value returned is used in two different ways: 4554 4555 1. To find a function's caller. 4556 4557 2. To change the return address for a function. 4558 4559 This function handles most instances of case 1; however, it will 4560 fail if there are two levels of stubs to execute on the return 4561 path. The only way I believe that can happen is if the return value 4562 needs a parameter relocation, which never happens for C code. 4563 4564 This function handles most instances of case 2; however, it will 4565 fail if we did not originally have stub code on the return path 4566 but will need stub code on the new return path. This can happen if 4567 the caller & callee are both in the main program, but the new 4568 return location is in a shared library. */ 4569 4570 rtx 4571 pa_return_addr_rtx (int count, rtx frameaddr) 4572 { 4573 rtx label; 4574 rtx rp; 4575 rtx saved_rp; 4576 rtx ins; 4577 4578 /* The instruction stream at the return address of a PA1.X export stub is: 4579 4580 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4581 0x004010a1 | stub+12: ldsid (sr0,rp),r1 4582 0x00011820 | stub+16: mtsp r1,sr0 4583 0xe0400002 | stub+20: be,n 0(sr0,rp) 4584 4585 0xe0400002 must be specified as -532676606 so that it won't be 4586 rejected as an invalid immediate operand on 64-bit hosts. 4587 4588 The instruction stream at the return address of a PA2.0 export stub is: 4589 4590 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4591 0xe840d002 | stub+12: bve,n (rp) 4592 */ 4593 4594 HOST_WIDE_INT insns[4]; 4595 int i, len; 4596 4597 if (count != 0) 4598 return NULL_RTX; 4599 4600 rp = get_hard_reg_initial_val (Pmode, 2); 4601 4602 if (TARGET_64BIT || TARGET_NO_SPACE_REGS) 4603 return rp; 4604 4605 /* If there is no export stub then just use the value saved from 4606 the return pointer register. */ 4607 4608 saved_rp = gen_reg_rtx (Pmode); 4609 emit_move_insn (saved_rp, rp); 4610 4611 /* Get pointer to the instruction stream. We have to mask out the 4612 privilege level from the two low order bits of the return address 4613 pointer here so that ins will point to the start of the first 4614 instruction that would have been executed if we returned. */ 4615 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR)); 4616 label = gen_label_rtx (); 4617 4618 if (TARGET_PA_20) 4619 { 4620 insns[0] = 0x4bc23fd1; 4621 insns[1] = -398405630; 4622 len = 2; 4623 } 4624 else 4625 { 4626 insns[0] = 0x4bc23fd1; 4627 insns[1] = 0x004010a1; 4628 insns[2] = 0x00011820; 4629 insns[3] = -532676606; 4630 len = 4; 4631 } 4632 4633 /* Check the instruction stream at the normal return address for the 4634 export stub. If it is an export stub, than our return address is 4635 really in -24[frameaddr]. */ 4636 4637 for (i = 0; i < len; i++) 4638 { 4639 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4)); 4640 rtx op1 = GEN_INT (insns[i]); 4641 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label); 4642 } 4643 4644 /* Here we know that our return address points to an export 4645 stub. We don't want to return the address of the export stub, 4646 but rather the return address of the export stub. That return 4647 address is stored at -24[frameaddr]. */ 4648 4649 emit_move_insn (saved_rp, 4650 gen_rtx_MEM (Pmode, 4651 memory_address (Pmode, 4652 plus_constant (Pmode, frameaddr, 4653 -24)))); 4654 4655 emit_label (label); 4656 4657 return saved_rp; 4658 } 4659 4660 void 4661 pa_emit_bcond_fp (rtx operands[]) 4662 { 4663 enum rtx_code code = GET_CODE (operands[0]); 4664 rtx operand0 = operands[1]; 4665 rtx operand1 = operands[2]; 4666 rtx label = operands[3]; 4667 4668 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), 4669 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1))); 4670 4671 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, 4672 gen_rtx_IF_THEN_ELSE (VOIDmode, 4673 gen_rtx_fmt_ee (NE, 4674 VOIDmode, 4675 gen_rtx_REG (CCFPmode, 0), 4676 const0_rtx), 4677 gen_rtx_LABEL_REF (VOIDmode, label), 4678 pc_rtx))); 4679 4680 } 4681 4682 /* Adjust the cost of a scheduling dependency. Return the new cost of 4683 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4684 4685 static int 4686 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 4687 { 4688 enum attr_type attr_type; 4689 4690 /* Don't adjust costs for a pa8000 chip, also do not adjust any 4691 true dependencies as they are described with bypasses now. */ 4692 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0) 4693 return cost; 4694 4695 if (! recog_memoized (insn)) 4696 return 0; 4697 4698 attr_type = get_attr_type (insn); 4699 4700 switch (REG_NOTE_KIND (link)) 4701 { 4702 case REG_DEP_ANTI: 4703 /* Anti dependency; DEP_INSN reads a register that INSN writes some 4704 cycles later. */ 4705 4706 if (attr_type == TYPE_FPLOAD) 4707 { 4708 rtx pat = PATTERN (insn); 4709 rtx dep_pat = PATTERN (dep_insn); 4710 if (GET_CODE (pat) == PARALLEL) 4711 { 4712 /* This happens for the fldXs,mb patterns. */ 4713 pat = XVECEXP (pat, 0, 0); 4714 } 4715 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4716 /* If this happens, we have to extend this to schedule 4717 optimally. Return 0 for now. */ 4718 return 0; 4719 4720 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4721 { 4722 if (! recog_memoized (dep_insn)) 4723 return 0; 4724 switch (get_attr_type (dep_insn)) 4725 { 4726 case TYPE_FPALU: 4727 case TYPE_FPMULSGL: 4728 case TYPE_FPMULDBL: 4729 case TYPE_FPDIVSGL: 4730 case TYPE_FPDIVDBL: 4731 case TYPE_FPSQRTSGL: 4732 case TYPE_FPSQRTDBL: 4733 /* A fpload can't be issued until one cycle before a 4734 preceding arithmetic operation has finished if 4735 the target of the fpload is any of the sources 4736 (or destination) of the arithmetic operation. */ 4737 return insn_default_latency (dep_insn) - 1; 4738 4739 default: 4740 return 0; 4741 } 4742 } 4743 } 4744 else if (attr_type == TYPE_FPALU) 4745 { 4746 rtx pat = PATTERN (insn); 4747 rtx dep_pat = PATTERN (dep_insn); 4748 if (GET_CODE (pat) == PARALLEL) 4749 { 4750 /* This happens for the fldXs,mb patterns. */ 4751 pat = XVECEXP (pat, 0, 0); 4752 } 4753 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4754 /* If this happens, we have to extend this to schedule 4755 optimally. Return 0 for now. */ 4756 return 0; 4757 4758 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4759 { 4760 if (! recog_memoized (dep_insn)) 4761 return 0; 4762 switch (get_attr_type (dep_insn)) 4763 { 4764 case TYPE_FPDIVSGL: 4765 case TYPE_FPDIVDBL: 4766 case TYPE_FPSQRTSGL: 4767 case TYPE_FPSQRTDBL: 4768 /* An ALU flop can't be issued until two cycles before a 4769 preceding divide or sqrt operation has finished if 4770 the target of the ALU flop is any of the sources 4771 (or destination) of the divide or sqrt operation. */ 4772 return insn_default_latency (dep_insn) - 2; 4773 4774 default: 4775 return 0; 4776 } 4777 } 4778 } 4779 4780 /* For other anti dependencies, the cost is 0. */ 4781 return 0; 4782 4783 case REG_DEP_OUTPUT: 4784 /* Output dependency; DEP_INSN writes a register that INSN writes some 4785 cycles later. */ 4786 if (attr_type == TYPE_FPLOAD) 4787 { 4788 rtx pat = PATTERN (insn); 4789 rtx dep_pat = PATTERN (dep_insn); 4790 if (GET_CODE (pat) == PARALLEL) 4791 { 4792 /* This happens for the fldXs,mb patterns. */ 4793 pat = XVECEXP (pat, 0, 0); 4794 } 4795 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4796 /* If this happens, we have to extend this to schedule 4797 optimally. Return 0 for now. */ 4798 return 0; 4799 4800 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4801 { 4802 if (! recog_memoized (dep_insn)) 4803 return 0; 4804 switch (get_attr_type (dep_insn)) 4805 { 4806 case TYPE_FPALU: 4807 case TYPE_FPMULSGL: 4808 case TYPE_FPMULDBL: 4809 case TYPE_FPDIVSGL: 4810 case TYPE_FPDIVDBL: 4811 case TYPE_FPSQRTSGL: 4812 case TYPE_FPSQRTDBL: 4813 /* A fpload can't be issued until one cycle before a 4814 preceding arithmetic operation has finished if 4815 the target of the fpload is the destination of the 4816 arithmetic operation. 4817 4818 Exception: For PA7100LC, PA7200 and PA7300, the cost 4819 is 3 cycles, unless they bundle together. We also 4820 pay the penalty if the second insn is a fpload. */ 4821 return insn_default_latency (dep_insn) - 1; 4822 4823 default: 4824 return 0; 4825 } 4826 } 4827 } 4828 else if (attr_type == TYPE_FPALU) 4829 { 4830 rtx pat = PATTERN (insn); 4831 rtx dep_pat = PATTERN (dep_insn); 4832 if (GET_CODE (pat) == PARALLEL) 4833 { 4834 /* This happens for the fldXs,mb patterns. */ 4835 pat = XVECEXP (pat, 0, 0); 4836 } 4837 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4838 /* If this happens, we have to extend this to schedule 4839 optimally. Return 0 for now. */ 4840 return 0; 4841 4842 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4843 { 4844 if (! recog_memoized (dep_insn)) 4845 return 0; 4846 switch (get_attr_type (dep_insn)) 4847 { 4848 case TYPE_FPDIVSGL: 4849 case TYPE_FPDIVDBL: 4850 case TYPE_FPSQRTSGL: 4851 case TYPE_FPSQRTDBL: 4852 /* An ALU flop can't be issued until two cycles before a 4853 preceding divide or sqrt operation has finished if 4854 the target of the ALU flop is also the target of 4855 the divide or sqrt operation. */ 4856 return insn_default_latency (dep_insn) - 2; 4857 4858 default: 4859 return 0; 4860 } 4861 } 4862 } 4863 4864 /* For other output dependencies, the cost is 0. */ 4865 return 0; 4866 4867 default: 4868 gcc_unreachable (); 4869 } 4870 } 4871 4872 /* Adjust scheduling priorities. We use this to try and keep addil 4873 and the next use of %r1 close together. */ 4874 static int 4875 pa_adjust_priority (rtx insn, int priority) 4876 { 4877 rtx set = single_set (insn); 4878 rtx src, dest; 4879 if (set) 4880 { 4881 src = SET_SRC (set); 4882 dest = SET_DEST (set); 4883 if (GET_CODE (src) == LO_SUM 4884 && symbolic_operand (XEXP (src, 1), VOIDmode) 4885 && ! read_only_operand (XEXP (src, 1), VOIDmode)) 4886 priority >>= 3; 4887 4888 else if (GET_CODE (src) == MEM 4889 && GET_CODE (XEXP (src, 0)) == LO_SUM 4890 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode) 4891 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode)) 4892 priority >>= 1; 4893 4894 else if (GET_CODE (dest) == MEM 4895 && GET_CODE (XEXP (dest, 0)) == LO_SUM 4896 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode) 4897 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)) 4898 priority >>= 3; 4899 } 4900 return priority; 4901 } 4902 4903 /* The 700 can only issue a single insn at a time. 4904 The 7XXX processors can issue two insns at a time. 4905 The 8000 can issue 4 insns at a time. */ 4906 static int 4907 pa_issue_rate (void) 4908 { 4909 switch (pa_cpu) 4910 { 4911 case PROCESSOR_700: return 1; 4912 case PROCESSOR_7100: return 2; 4913 case PROCESSOR_7100LC: return 2; 4914 case PROCESSOR_7200: return 2; 4915 case PROCESSOR_7300: return 2; 4916 case PROCESSOR_8000: return 4; 4917 4918 default: 4919 gcc_unreachable (); 4920 } 4921 } 4922 4923 4924 4925 /* Return any length plus adjustment needed by INSN which already has 4926 its length computed as LENGTH. Return LENGTH if no adjustment is 4927 necessary. 4928 4929 Also compute the length of an inline block move here as it is too 4930 complicated to express as a length attribute in pa.md. */ 4931 int 4932 pa_adjust_insn_length (rtx insn, int length) 4933 { 4934 rtx pat = PATTERN (insn); 4935 4936 /* If length is negative or undefined, provide initial length. */ 4937 if ((unsigned int) length >= INT_MAX) 4938 { 4939 if (GET_CODE (pat) == SEQUENCE) 4940 insn = XVECEXP (pat, 0, 0); 4941 4942 switch (get_attr_type (insn)) 4943 { 4944 case TYPE_MILLI: 4945 length = pa_attr_length_millicode_call (insn); 4946 break; 4947 case TYPE_CALL: 4948 length = pa_attr_length_call (insn, 0); 4949 break; 4950 case TYPE_SIBCALL: 4951 length = pa_attr_length_call (insn, 1); 4952 break; 4953 case TYPE_DYNCALL: 4954 length = pa_attr_length_indirect_call (insn); 4955 break; 4956 case TYPE_SH_FUNC_ADRS: 4957 length = pa_attr_length_millicode_call (insn) + 20; 4958 break; 4959 default: 4960 gcc_unreachable (); 4961 } 4962 } 4963 4964 /* Jumps inside switch tables which have unfilled delay slots need 4965 adjustment. */ 4966 if (GET_CODE (insn) == JUMP_INSN 4967 && GET_CODE (pat) == PARALLEL 4968 && get_attr_type (insn) == TYPE_BTABLE_BRANCH) 4969 length += 4; 4970 /* Block move pattern. */ 4971 else if (GET_CODE (insn) == INSN 4972 && GET_CODE (pat) == PARALLEL 4973 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 4974 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 4975 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM 4976 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode 4977 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) 4978 length += compute_movmem_length (insn) - 4; 4979 /* Block clear pattern. */ 4980 else if (GET_CODE (insn) == INSN 4981 && GET_CODE (pat) == PARALLEL 4982 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 4983 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 4984 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx 4985 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) 4986 length += compute_clrmem_length (insn) - 4; 4987 /* Conditional branch with an unfilled delay slot. */ 4988 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) 4989 { 4990 /* Adjust a short backwards conditional with an unfilled delay slot. */ 4991 if (GET_CODE (pat) == SET 4992 && length == 4 4993 && JUMP_LABEL (insn) != NULL_RTX 4994 && ! forward_branch_p (insn)) 4995 length += 4; 4996 else if (GET_CODE (pat) == PARALLEL 4997 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH 4998 && length == 4) 4999 length += 4; 5000 /* Adjust dbra insn with short backwards conditional branch with 5001 unfilled delay slot -- only for case where counter is in a 5002 general register register. */ 5003 else if (GET_CODE (pat) == PARALLEL 5004 && GET_CODE (XVECEXP (pat, 0, 1)) == SET 5005 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG 5006 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) 5007 && length == 4 5008 && ! forward_branch_p (insn)) 5009 length += 4; 5010 } 5011 return length; 5012 } 5013 5014 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */ 5015 5016 static bool 5017 pa_print_operand_punct_valid_p (unsigned char code) 5018 { 5019 if (code == '@' 5020 || code == '#' 5021 || code == '*' 5022 || code == '^') 5023 return true; 5024 5025 return false; 5026 } 5027 5028 /* Print operand X (an rtx) in assembler syntax to file FILE. 5029 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 5030 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 5031 5032 void 5033 pa_print_operand (FILE *file, rtx x, int code) 5034 { 5035 switch (code) 5036 { 5037 case '#': 5038 /* Output a 'nop' if there's nothing for the delay slot. */ 5039 if (dbr_sequence_length () == 0) 5040 fputs ("\n\tnop", file); 5041 return; 5042 case '*': 5043 /* Output a nullification completer if there's nothing for the */ 5044 /* delay slot or nullification is requested. */ 5045 if (dbr_sequence_length () == 0 || 5046 (final_sequence && 5047 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) 5048 fputs (",n", file); 5049 return; 5050 case 'R': 5051 /* Print out the second register name of a register pair. 5052 I.e., R (6) => 7. */ 5053 fputs (reg_names[REGNO (x) + 1], file); 5054 return; 5055 case 'r': 5056 /* A register or zero. */ 5057 if (x == const0_rtx 5058 || (x == CONST0_RTX (DFmode)) 5059 || (x == CONST0_RTX (SFmode))) 5060 { 5061 fputs ("%r0", file); 5062 return; 5063 } 5064 else 5065 break; 5066 case 'f': 5067 /* A register or zero (floating point). */ 5068 if (x == const0_rtx 5069 || (x == CONST0_RTX (DFmode)) 5070 || (x == CONST0_RTX (SFmode))) 5071 { 5072 fputs ("%fr0", file); 5073 return; 5074 } 5075 else 5076 break; 5077 case 'A': 5078 { 5079 rtx xoperands[2]; 5080 5081 xoperands[0] = XEXP (XEXP (x, 0), 0); 5082 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0); 5083 pa_output_global_address (file, xoperands[1], 0); 5084 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]); 5085 return; 5086 } 5087 5088 case 'C': /* Plain (C)ondition */ 5089 case 'X': 5090 switch (GET_CODE (x)) 5091 { 5092 case EQ: 5093 fputs ("=", file); break; 5094 case NE: 5095 fputs ("<>", file); break; 5096 case GT: 5097 fputs (">", file); break; 5098 case GE: 5099 fputs (">=", file); break; 5100 case GEU: 5101 fputs (">>=", file); break; 5102 case GTU: 5103 fputs (">>", file); break; 5104 case LT: 5105 fputs ("<", file); break; 5106 case LE: 5107 fputs ("<=", file); break; 5108 case LEU: 5109 fputs ("<<=", file); break; 5110 case LTU: 5111 fputs ("<<", file); break; 5112 default: 5113 gcc_unreachable (); 5114 } 5115 return; 5116 case 'N': /* Condition, (N)egated */ 5117 switch (GET_CODE (x)) 5118 { 5119 case EQ: 5120 fputs ("<>", file); break; 5121 case NE: 5122 fputs ("=", file); break; 5123 case GT: 5124 fputs ("<=", file); break; 5125 case GE: 5126 fputs ("<", file); break; 5127 case GEU: 5128 fputs ("<<", file); break; 5129 case GTU: 5130 fputs ("<<=", file); break; 5131 case LT: 5132 fputs (">=", file); break; 5133 case LE: 5134 fputs (">", file); break; 5135 case LEU: 5136 fputs (">>", file); break; 5137 case LTU: 5138 fputs (">>=", file); break; 5139 default: 5140 gcc_unreachable (); 5141 } 5142 return; 5143 /* For floating point comparisons. Note that the output 5144 predicates are the complement of the desired mode. The 5145 conditions for GT, GE, LT, LE and LTGT cause an invalid 5146 operation exception if the result is unordered and this 5147 exception is enabled in the floating-point status register. */ 5148 case 'Y': 5149 switch (GET_CODE (x)) 5150 { 5151 case EQ: 5152 fputs ("!=", file); break; 5153 case NE: 5154 fputs ("=", file); break; 5155 case GT: 5156 fputs ("!>", file); break; 5157 case GE: 5158 fputs ("!>=", file); break; 5159 case LT: 5160 fputs ("!<", file); break; 5161 case LE: 5162 fputs ("!<=", file); break; 5163 case LTGT: 5164 fputs ("!<>", file); break; 5165 case UNLE: 5166 fputs ("!?<=", file); break; 5167 case UNLT: 5168 fputs ("!?<", file); break; 5169 case UNGE: 5170 fputs ("!?>=", file); break; 5171 case UNGT: 5172 fputs ("!?>", file); break; 5173 case UNEQ: 5174 fputs ("!?=", file); break; 5175 case UNORDERED: 5176 fputs ("!?", file); break; 5177 case ORDERED: 5178 fputs ("?", file); break; 5179 default: 5180 gcc_unreachable (); 5181 } 5182 return; 5183 case 'S': /* Condition, operands are (S)wapped. */ 5184 switch (GET_CODE (x)) 5185 { 5186 case EQ: 5187 fputs ("=", file); break; 5188 case NE: 5189 fputs ("<>", file); break; 5190 case GT: 5191 fputs ("<", file); break; 5192 case GE: 5193 fputs ("<=", file); break; 5194 case GEU: 5195 fputs ("<<=", file); break; 5196 case GTU: 5197 fputs ("<<", file); break; 5198 case LT: 5199 fputs (">", file); break; 5200 case LE: 5201 fputs (">=", file); break; 5202 case LEU: 5203 fputs (">>=", file); break; 5204 case LTU: 5205 fputs (">>", file); break; 5206 default: 5207 gcc_unreachable (); 5208 } 5209 return; 5210 case 'B': /* Condition, (B)oth swapped and negate. */ 5211 switch (GET_CODE (x)) 5212 { 5213 case EQ: 5214 fputs ("<>", file); break; 5215 case NE: 5216 fputs ("=", file); break; 5217 case GT: 5218 fputs (">=", file); break; 5219 case GE: 5220 fputs (">", file); break; 5221 case GEU: 5222 fputs (">>", file); break; 5223 case GTU: 5224 fputs (">>=", file); break; 5225 case LT: 5226 fputs ("<=", file); break; 5227 case LE: 5228 fputs ("<", file); break; 5229 case LEU: 5230 fputs ("<<", file); break; 5231 case LTU: 5232 fputs ("<<=", file); break; 5233 default: 5234 gcc_unreachable (); 5235 } 5236 return; 5237 case 'k': 5238 gcc_assert (GET_CODE (x) == CONST_INT); 5239 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); 5240 return; 5241 case 'Q': 5242 gcc_assert (GET_CODE (x) == CONST_INT); 5243 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63)); 5244 return; 5245 case 'L': 5246 gcc_assert (GET_CODE (x) == CONST_INT); 5247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31)); 5248 return; 5249 case 'O': 5250 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0); 5251 fprintf (file, "%d", exact_log2 (INTVAL (x))); 5252 return; 5253 case 'p': 5254 gcc_assert (GET_CODE (x) == CONST_INT); 5255 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63)); 5256 return; 5257 case 'P': 5258 gcc_assert (GET_CODE (x) == CONST_INT); 5259 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31)); 5260 return; 5261 case 'I': 5262 if (GET_CODE (x) == CONST_INT) 5263 fputs ("i", file); 5264 return; 5265 case 'M': 5266 case 'F': 5267 switch (GET_CODE (XEXP (x, 0))) 5268 { 5269 case PRE_DEC: 5270 case PRE_INC: 5271 if (ASSEMBLER_DIALECT == 0) 5272 fputs ("s,mb", file); 5273 else 5274 fputs (",mb", file); 5275 break; 5276 case POST_DEC: 5277 case POST_INC: 5278 if (ASSEMBLER_DIALECT == 0) 5279 fputs ("s,ma", file); 5280 else 5281 fputs (",ma", file); 5282 break; 5283 case PLUS: 5284 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5285 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5286 { 5287 if (ASSEMBLER_DIALECT == 0) 5288 fputs ("x", file); 5289 } 5290 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5291 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5292 { 5293 if (ASSEMBLER_DIALECT == 0) 5294 fputs ("x,s", file); 5295 else 5296 fputs (",s", file); 5297 } 5298 else if (code == 'F' && ASSEMBLER_DIALECT == 0) 5299 fputs ("s", file); 5300 break; 5301 default: 5302 if (code == 'F' && ASSEMBLER_DIALECT == 0) 5303 fputs ("s", file); 5304 break; 5305 } 5306 return; 5307 case 'G': 5308 pa_output_global_address (file, x, 0); 5309 return; 5310 case 'H': 5311 pa_output_global_address (file, x, 1); 5312 return; 5313 case 0: /* Don't do anything special */ 5314 break; 5315 case 'Z': 5316 { 5317 unsigned op[3]; 5318 compute_zdepwi_operands (INTVAL (x), op); 5319 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5320 return; 5321 } 5322 case 'z': 5323 { 5324 unsigned op[3]; 5325 compute_zdepdi_operands (INTVAL (x), op); 5326 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5327 return; 5328 } 5329 case 'c': 5330 /* We can get here from a .vtable_inherit due to our 5331 CONSTANT_ADDRESS_P rejecting perfectly good constant 5332 addresses. */ 5333 break; 5334 default: 5335 gcc_unreachable (); 5336 } 5337 if (GET_CODE (x) == REG) 5338 { 5339 fputs (reg_names [REGNO (x)], file); 5340 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4) 5341 { 5342 fputs ("R", file); 5343 return; 5344 } 5345 if (FP_REG_P (x) 5346 && GET_MODE_SIZE (GET_MODE (x)) <= 4 5347 && (REGNO (x) & 1) == 0) 5348 fputs ("L", file); 5349 } 5350 else if (GET_CODE (x) == MEM) 5351 { 5352 int size = GET_MODE_SIZE (GET_MODE (x)); 5353 rtx base = NULL_RTX; 5354 switch (GET_CODE (XEXP (x, 0))) 5355 { 5356 case PRE_DEC: 5357 case POST_DEC: 5358 base = XEXP (XEXP (x, 0), 0); 5359 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]); 5360 break; 5361 case PRE_INC: 5362 case POST_INC: 5363 base = XEXP (XEXP (x, 0), 0); 5364 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]); 5365 break; 5366 case PLUS: 5367 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) 5368 fprintf (file, "%s(%s)", 5369 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], 5370 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); 5371 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5372 fprintf (file, "%s(%s)", 5373 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], 5374 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); 5375 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5376 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5377 { 5378 /* Because the REG_POINTER flag can get lost during reload, 5379 pa_legitimate_address_p canonicalizes the order of the 5380 index and base registers in the combined move patterns. */ 5381 rtx base = XEXP (XEXP (x, 0), 1); 5382 rtx index = XEXP (XEXP (x, 0), 0); 5383 5384 fprintf (file, "%s(%s)", 5385 reg_names [REGNO (index)], reg_names [REGNO (base)]); 5386 } 5387 else 5388 output_address (XEXP (x, 0)); 5389 break; 5390 default: 5391 output_address (XEXP (x, 0)); 5392 break; 5393 } 5394 } 5395 else 5396 output_addr_const (file, x); 5397 } 5398 5399 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ 5400 5401 void 5402 pa_output_global_address (FILE *file, rtx x, int round_constant) 5403 { 5404 5405 /* Imagine (high (const (plus ...))). */ 5406 if (GET_CODE (x) == HIGH) 5407 x = XEXP (x, 0); 5408 5409 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode)) 5410 output_addr_const (file, x); 5411 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) 5412 { 5413 output_addr_const (file, x); 5414 fputs ("-$global$", file); 5415 } 5416 else if (GET_CODE (x) == CONST) 5417 { 5418 const char *sep = ""; 5419 int offset = 0; /* assembler wants -$global$ at end */ 5420 rtx base = NULL_RTX; 5421 5422 switch (GET_CODE (XEXP (XEXP (x, 0), 0))) 5423 { 5424 case LABEL_REF: 5425 case SYMBOL_REF: 5426 base = XEXP (XEXP (x, 0), 0); 5427 output_addr_const (file, base); 5428 break; 5429 case CONST_INT: 5430 offset = INTVAL (XEXP (XEXP (x, 0), 0)); 5431 break; 5432 default: 5433 gcc_unreachable (); 5434 } 5435 5436 switch (GET_CODE (XEXP (XEXP (x, 0), 1))) 5437 { 5438 case LABEL_REF: 5439 case SYMBOL_REF: 5440 base = XEXP (XEXP (x, 0), 1); 5441 output_addr_const (file, base); 5442 break; 5443 case CONST_INT: 5444 offset = INTVAL (XEXP (XEXP (x, 0), 1)); 5445 break; 5446 default: 5447 gcc_unreachable (); 5448 } 5449 5450 /* How bogus. The compiler is apparently responsible for 5451 rounding the constant if it uses an LR field selector. 5452 5453 The linker and/or assembler seem a better place since 5454 they have to do this kind of thing already. 5455 5456 If we fail to do this, HP's optimizing linker may eliminate 5457 an addil, but not update the ldw/stw/ldo instruction that 5458 uses the result of the addil. */ 5459 if (round_constant) 5460 offset = ((offset + 0x1000) & ~0x1fff); 5461 5462 switch (GET_CODE (XEXP (x, 0))) 5463 { 5464 case PLUS: 5465 if (offset < 0) 5466 { 5467 offset = -offset; 5468 sep = "-"; 5469 } 5470 else 5471 sep = "+"; 5472 break; 5473 5474 case MINUS: 5475 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF); 5476 sep = "-"; 5477 break; 5478 5479 default: 5480 gcc_unreachable (); 5481 } 5482 5483 if (!read_only_operand (base, VOIDmode) && !flag_pic) 5484 fputs ("-$global$", file); 5485 if (offset) 5486 fprintf (file, "%s%d", sep, offset); 5487 } 5488 else 5489 output_addr_const (file, x); 5490 } 5491 5492 /* Output boilerplate text to appear at the beginning of the file. 5493 There are several possible versions. */ 5494 #define aputs(x) fputs(x, asm_out_file) 5495 static inline void 5496 pa_file_start_level (void) 5497 { 5498 if (TARGET_64BIT) 5499 aputs ("\t.LEVEL 2.0w\n"); 5500 else if (TARGET_PA_20) 5501 aputs ("\t.LEVEL 2.0\n"); 5502 else if (TARGET_PA_11) 5503 aputs ("\t.LEVEL 1.1\n"); 5504 else 5505 aputs ("\t.LEVEL 1.0\n"); 5506 } 5507 5508 static inline void 5509 pa_file_start_space (int sortspace) 5510 { 5511 aputs ("\t.SPACE $PRIVATE$"); 5512 if (sortspace) 5513 aputs (",SORT=16"); 5514 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"); 5515 if (flag_tm) 5516 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31"); 5517 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82" 5518 "\n\t.SPACE $TEXT$"); 5519 if (sortspace) 5520 aputs (",SORT=8"); 5521 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44" 5522 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n"); 5523 } 5524 5525 static inline void 5526 pa_file_start_file (int want_version) 5527 { 5528 if (write_symbols != NO_DEBUG) 5529 { 5530 output_file_directive (asm_out_file, main_input_filename); 5531 if (want_version) 5532 aputs ("\t.version\t\"01.01\"\n"); 5533 } 5534 } 5535 5536 static inline void 5537 pa_file_start_mcount (const char *aswhat) 5538 { 5539 if (profile_flag) 5540 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat); 5541 } 5542 5543 static void 5544 pa_elf_file_start (void) 5545 { 5546 pa_file_start_level (); 5547 pa_file_start_mcount ("ENTRY"); 5548 pa_file_start_file (0); 5549 } 5550 5551 static void 5552 pa_som_file_start (void) 5553 { 5554 pa_file_start_level (); 5555 pa_file_start_space (0); 5556 aputs ("\t.IMPORT $global$,DATA\n" 5557 "\t.IMPORT $$dyncall,MILLICODE\n"); 5558 pa_file_start_mcount ("CODE"); 5559 pa_file_start_file (0); 5560 } 5561 5562 static void 5563 pa_linux_file_start (void) 5564 { 5565 pa_file_start_file (0); 5566 pa_file_start_level (); 5567 pa_file_start_mcount ("CODE"); 5568 } 5569 5570 static void 5571 pa_hpux64_gas_file_start (void) 5572 { 5573 pa_file_start_level (); 5574 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 5575 if (profile_flag) 5576 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function"); 5577 #endif 5578 pa_file_start_file (1); 5579 } 5580 5581 static void 5582 pa_hpux64_hpas_file_start (void) 5583 { 5584 pa_file_start_level (); 5585 pa_file_start_space (1); 5586 pa_file_start_mcount ("CODE"); 5587 pa_file_start_file (0); 5588 } 5589 #undef aputs 5590 5591 /* Search the deferred plabel list for SYMBOL and return its internal 5592 label. If an entry for SYMBOL is not found, a new entry is created. */ 5593 5594 rtx 5595 pa_get_deferred_plabel (rtx symbol) 5596 { 5597 const char *fname = XSTR (symbol, 0); 5598 size_t i; 5599 5600 /* See if we have already put this function on the list of deferred 5601 plabels. This list is generally small, so a liner search is not 5602 too ugly. If it proves too slow replace it with something faster. */ 5603 for (i = 0; i < n_deferred_plabels; i++) 5604 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0) 5605 break; 5606 5607 /* If the deferred plabel list is empty, or this entry was not found 5608 on the list, create a new entry on the list. */ 5609 if (deferred_plabels == NULL || i == n_deferred_plabels) 5610 { 5611 tree id; 5612 5613 if (deferred_plabels == 0) 5614 deferred_plabels = ggc_alloc_deferred_plabel (); 5615 else 5616 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel, 5617 deferred_plabels, 5618 n_deferred_plabels + 1); 5619 5620 i = n_deferred_plabels++; 5621 deferred_plabels[i].internal_label = gen_label_rtx (); 5622 deferred_plabels[i].symbol = symbol; 5623 5624 /* Gross. We have just implicitly taken the address of this 5625 function. Mark it in the same manner as assemble_name. */ 5626 id = maybe_get_identifier (targetm.strip_name_encoding (fname)); 5627 if (id) 5628 mark_referenced (id); 5629 } 5630 5631 return deferred_plabels[i].internal_label; 5632 } 5633 5634 static void 5635 output_deferred_plabels (void) 5636 { 5637 size_t i; 5638 5639 /* If we have some deferred plabels, then we need to switch into the 5640 data or readonly data section, and align it to a 4 byte boundary 5641 before outputting the deferred plabels. */ 5642 if (n_deferred_plabels) 5643 { 5644 switch_to_section (flag_pic ? data_section : readonly_data_section); 5645 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 5646 } 5647 5648 /* Now output the deferred plabels. */ 5649 for (i = 0; i < n_deferred_plabels; i++) 5650 { 5651 targetm.asm_out.internal_label (asm_out_file, "L", 5652 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); 5653 assemble_integer (deferred_plabels[i].symbol, 5654 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1); 5655 } 5656 } 5657 5658 /* Initialize optabs to point to emulation routines. */ 5659 5660 static void 5661 pa_init_libfuncs (void) 5662 { 5663 if (HPUX_LONG_DOUBLE_LIBRARY) 5664 { 5665 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 5666 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 5667 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 5668 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 5669 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin"); 5670 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 5671 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt"); 5672 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 5673 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 5674 5675 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 5676 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 5677 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 5678 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 5679 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 5680 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 5681 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord"); 5682 5683 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 5684 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 5685 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 5686 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 5687 5688 set_conv_libfunc (sfix_optab, SImode, TFmode, 5689 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl" 5690 : "_U_Qfcnvfxt_quad_to_sgl"); 5691 set_conv_libfunc (sfix_optab, DImode, TFmode, 5692 "_U_Qfcnvfxt_quad_to_dbl"); 5693 set_conv_libfunc (ufix_optab, SImode, TFmode, 5694 "_U_Qfcnvfxt_quad_to_usgl"); 5695 set_conv_libfunc (ufix_optab, DImode, TFmode, 5696 "_U_Qfcnvfxt_quad_to_udbl"); 5697 5698 set_conv_libfunc (sfloat_optab, TFmode, SImode, 5699 "_U_Qfcnvxf_sgl_to_quad"); 5700 set_conv_libfunc (sfloat_optab, TFmode, DImode, 5701 "_U_Qfcnvxf_dbl_to_quad"); 5702 set_conv_libfunc (ufloat_optab, TFmode, SImode, 5703 "_U_Qfcnvxf_usgl_to_quad"); 5704 set_conv_libfunc (ufloat_optab, TFmode, DImode, 5705 "_U_Qfcnvxf_udbl_to_quad"); 5706 } 5707 5708 if (TARGET_SYNC_LIBCALL) 5709 init_sync_libfuncs (UNITS_PER_WORD); 5710 } 5711 5712 /* HP's millicode routines mean something special to the assembler. 5713 Keep track of which ones we have used. */ 5714 5715 enum millicodes { remI, remU, divI, divU, mulI, end1000 }; 5716 static void import_milli (enum millicodes); 5717 static char imported[(int) end1000]; 5718 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"}; 5719 static const char import_string[] = ".IMPORT $$....,MILLICODE"; 5720 #define MILLI_START 10 5721 5722 static void 5723 import_milli (enum millicodes code) 5724 { 5725 char str[sizeof (import_string)]; 5726 5727 if (!imported[(int) code]) 5728 { 5729 imported[(int) code] = 1; 5730 strcpy (str, import_string); 5731 strncpy (str + MILLI_START, milli_names[(int) code], 4); 5732 output_asm_insn (str, 0); 5733 } 5734 } 5735 5736 /* The register constraints have put the operands and return value in 5737 the proper registers. */ 5738 5739 const char * 5740 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn) 5741 { 5742 import_milli (mulI); 5743 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI")); 5744 } 5745 5746 /* Emit the rtl for doing a division by a constant. */ 5747 5748 /* Do magic division millicodes exist for this value? */ 5749 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1}; 5750 5751 /* We'll use an array to keep track of the magic millicodes and 5752 whether or not we've used them already. [n][0] is signed, [n][1] is 5753 unsigned. */ 5754 5755 static int div_milli[16][2]; 5756 5757 int 5758 pa_emit_hpdiv_const (rtx *operands, int unsignedp) 5759 { 5760 if (GET_CODE (operands[2]) == CONST_INT 5761 && INTVAL (operands[2]) > 0 5762 && INTVAL (operands[2]) < 16 5763 && pa_magic_milli[INTVAL (operands[2])]) 5764 { 5765 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); 5766 5767 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); 5768 emit 5769 (gen_rtx_PARALLEL 5770 (VOIDmode, 5771 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), 5772 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 5773 SImode, 5774 gen_rtx_REG (SImode, 26), 5775 operands[2])), 5776 gen_rtx_CLOBBER (VOIDmode, operands[4]), 5777 gen_rtx_CLOBBER (VOIDmode, operands[3]), 5778 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), 5779 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), 5780 gen_rtx_CLOBBER (VOIDmode, ret)))); 5781 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); 5782 return 1; 5783 } 5784 return 0; 5785 } 5786 5787 const char * 5788 pa_output_div_insn (rtx *operands, int unsignedp, rtx insn) 5789 { 5790 HOST_WIDE_INT divisor; 5791 5792 /* If the divisor is a constant, try to use one of the special 5793 opcodes .*/ 5794 if (GET_CODE (operands[0]) == CONST_INT) 5795 { 5796 static char buf[100]; 5797 divisor = INTVAL (operands[0]); 5798 if (!div_milli[divisor][unsignedp]) 5799 { 5800 div_milli[divisor][unsignedp] = 1; 5801 if (unsignedp) 5802 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); 5803 else 5804 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); 5805 } 5806 if (unsignedp) 5807 { 5808 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC, 5809 INTVAL (operands[0])); 5810 return pa_output_millicode_call (insn, 5811 gen_rtx_SYMBOL_REF (SImode, buf)); 5812 } 5813 else 5814 { 5815 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC, 5816 INTVAL (operands[0])); 5817 return pa_output_millicode_call (insn, 5818 gen_rtx_SYMBOL_REF (SImode, buf)); 5819 } 5820 } 5821 /* Divisor isn't a special constant. */ 5822 else 5823 { 5824 if (unsignedp) 5825 { 5826 import_milli (divU); 5827 return pa_output_millicode_call (insn, 5828 gen_rtx_SYMBOL_REF (SImode, "$$divU")); 5829 } 5830 else 5831 { 5832 import_milli (divI); 5833 return pa_output_millicode_call (insn, 5834 gen_rtx_SYMBOL_REF (SImode, "$$divI")); 5835 } 5836 } 5837 } 5838 5839 /* Output a $$rem millicode to do mod. */ 5840 5841 const char * 5842 pa_output_mod_insn (int unsignedp, rtx insn) 5843 { 5844 if (unsignedp) 5845 { 5846 import_milli (remU); 5847 return pa_output_millicode_call (insn, 5848 gen_rtx_SYMBOL_REF (SImode, "$$remU")); 5849 } 5850 else 5851 { 5852 import_milli (remI); 5853 return pa_output_millicode_call (insn, 5854 gen_rtx_SYMBOL_REF (SImode, "$$remI")); 5855 } 5856 } 5857 5858 void 5859 pa_output_arg_descriptor (rtx call_insn) 5860 { 5861 const char *arg_regs[4]; 5862 enum machine_mode arg_mode; 5863 rtx link; 5864 int i, output_flag = 0; 5865 int regno; 5866 5867 /* We neither need nor want argument location descriptors for the 5868 64bit runtime environment or the ELF32 environment. */ 5869 if (TARGET_64BIT || TARGET_ELF32) 5870 return; 5871 5872 for (i = 0; i < 4; i++) 5873 arg_regs[i] = 0; 5874 5875 /* Specify explicitly that no argument relocations should take place 5876 if using the portable runtime calling conventions. */ 5877 if (TARGET_PORTABLE_RUNTIME) 5878 { 5879 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", 5880 asm_out_file); 5881 return; 5882 } 5883 5884 gcc_assert (GET_CODE (call_insn) == CALL_INSN); 5885 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); 5886 link; link = XEXP (link, 1)) 5887 { 5888 rtx use = XEXP (link, 0); 5889 5890 if (! (GET_CODE (use) == USE 5891 && GET_CODE (XEXP (use, 0)) == REG 5892 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 5893 continue; 5894 5895 arg_mode = GET_MODE (XEXP (use, 0)); 5896 regno = REGNO (XEXP (use, 0)); 5897 if (regno >= 23 && regno <= 26) 5898 { 5899 arg_regs[26 - regno] = "GR"; 5900 if (arg_mode == DImode) 5901 arg_regs[25 - regno] = "GR"; 5902 } 5903 else if (regno >= 32 && regno <= 39) 5904 { 5905 if (arg_mode == SFmode) 5906 arg_regs[(regno - 32) / 2] = "FR"; 5907 else 5908 { 5909 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED 5910 arg_regs[(regno - 34) / 2] = "FR"; 5911 arg_regs[(regno - 34) / 2 + 1] = "FU"; 5912 #else 5913 arg_regs[(regno - 34) / 2] = "FU"; 5914 arg_regs[(regno - 34) / 2 + 1] = "FR"; 5915 #endif 5916 } 5917 } 5918 } 5919 fputs ("\t.CALL ", asm_out_file); 5920 for (i = 0; i < 4; i++) 5921 { 5922 if (arg_regs[i]) 5923 { 5924 if (output_flag++) 5925 fputc (',', asm_out_file); 5926 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); 5927 } 5928 } 5929 fputc ('\n', asm_out_file); 5930 } 5931 5932 /* Inform reload about cases where moving X with a mode MODE to or from 5933 a register in RCLASS requires an extra scratch or immediate register. 5934 Return the class needed for the immediate register. */ 5935 5936 static reg_class_t 5937 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 5938 enum machine_mode mode, secondary_reload_info *sri) 5939 { 5940 int regno; 5941 enum reg_class rclass = (enum reg_class) rclass_i; 5942 5943 /* Handle the easy stuff first. */ 5944 if (rclass == R1_REGS) 5945 return NO_REGS; 5946 5947 if (REG_P (x)) 5948 { 5949 regno = REGNO (x); 5950 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER) 5951 return NO_REGS; 5952 } 5953 else 5954 regno = -1; 5955 5956 /* If we have something like (mem (mem (...)), we can safely assume the 5957 inner MEM will end up in a general register after reloading, so there's 5958 no need for a secondary reload. */ 5959 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM) 5960 return NO_REGS; 5961 5962 /* Trying to load a constant into a FP register during PIC code 5963 generation requires %r1 as a scratch register. For float modes, 5964 the only legitimate constant is CONST0_RTX. However, there are 5965 a few patterns that accept constant double operands. */ 5966 if (flag_pic 5967 && FP_REG_CLASS_P (rclass) 5968 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)) 5969 { 5970 switch (mode) 5971 { 5972 case SImode: 5973 sri->icode = CODE_FOR_reload_insi_r1; 5974 break; 5975 5976 case DImode: 5977 sri->icode = CODE_FOR_reload_indi_r1; 5978 break; 5979 5980 case SFmode: 5981 sri->icode = CODE_FOR_reload_insf_r1; 5982 break; 5983 5984 case DFmode: 5985 sri->icode = CODE_FOR_reload_indf_r1; 5986 break; 5987 5988 default: 5989 gcc_unreachable (); 5990 } 5991 return NO_REGS; 5992 } 5993 5994 /* Secondary reloads of symbolic expressions require %r1 as a scratch 5995 register when we're generating PIC code or when the operand isn't 5996 readonly. */ 5997 if (pa_symbolic_expression_p (x)) 5998 { 5999 if (GET_CODE (x) == HIGH) 6000 x = XEXP (x, 0); 6001 6002 if (flag_pic || !read_only_operand (x, VOIDmode)) 6003 { 6004 switch (mode) 6005 { 6006 case SImode: 6007 sri->icode = CODE_FOR_reload_insi_r1; 6008 break; 6009 6010 case DImode: 6011 sri->icode = CODE_FOR_reload_indi_r1; 6012 break; 6013 6014 default: 6015 gcc_unreachable (); 6016 } 6017 return NO_REGS; 6018 } 6019 } 6020 6021 /* Profiling showed the PA port spends about 1.3% of its compilation 6022 time in true_regnum from calls inside pa_secondary_reload_class. */ 6023 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) 6024 regno = true_regnum (x); 6025 6026 /* Handle reloads for floating point loads and stores. */ 6027 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1) 6028 && FP_REG_CLASS_P (rclass)) 6029 { 6030 if (MEM_P (x)) 6031 { 6032 x = XEXP (x, 0); 6033 6034 /* We don't need an intermediate for indexed and LO_SUM DLT 6035 memory addresses. When INT14_OK_STRICT is true, it might 6036 appear that we could directly allow register indirect 6037 memory addresses. However, this doesn't work because we 6038 don't support SUBREGs in floating-point register copies 6039 and reload doesn't tell us when it's going to use a SUBREG. */ 6040 if (IS_INDEX_ADDR_P (x) 6041 || IS_LO_SUM_DLT_ADDR_P (x)) 6042 return NO_REGS; 6043 6044 /* Request intermediate general register. */ 6045 return GENERAL_REGS; 6046 } 6047 6048 /* Request a secondary reload with a general scratch register 6049 for everything else. ??? Could symbolic operands be handled 6050 directly when generating non-pic PA 2.0 code? */ 6051 sri->icode = (in_p 6052 ? direct_optab_handler (reload_in_optab, mode) 6053 : direct_optab_handler (reload_out_optab, mode)); 6054 return NO_REGS; 6055 } 6056 6057 /* A SAR<->FP register copy requires an intermediate general register 6058 and secondary memory. We need a secondary reload with a general 6059 scratch register for spills. */ 6060 if (rclass == SHIFT_REGS) 6061 { 6062 /* Handle spill. */ 6063 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0) 6064 { 6065 sri->icode = (in_p 6066 ? direct_optab_handler (reload_in_optab, mode) 6067 : direct_optab_handler (reload_out_optab, mode)); 6068 return NO_REGS; 6069 } 6070 6071 /* Handle FP copy. */ 6072 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno))) 6073 return GENERAL_REGS; 6074 } 6075 6076 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER 6077 && REGNO_REG_CLASS (regno) == SHIFT_REGS 6078 && FP_REG_CLASS_P (rclass)) 6079 return GENERAL_REGS; 6080 6081 return NO_REGS; 6082 } 6083 6084 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer 6085 is only marked as live on entry by df-scan when it is a fixed 6086 register. It isn't a fixed register in the 64-bit runtime, 6087 so we need to mark it here. */ 6088 6089 static void 6090 pa_extra_live_on_entry (bitmap regs) 6091 { 6092 if (TARGET_64BIT) 6093 bitmap_set_bit (regs, ARG_POINTER_REGNUM); 6094 } 6095 6096 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile 6097 to prevent it from being deleted. */ 6098 6099 rtx 6100 pa_eh_return_handler_rtx (void) 6101 { 6102 rtx tmp; 6103 6104 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx, 6105 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20)); 6106 tmp = gen_rtx_MEM (word_mode, tmp); 6107 tmp->volatil = 1; 6108 return tmp; 6109 } 6110 6111 /* In the 32-bit runtime, arguments larger than eight bytes are passed 6112 by invisible reference. As a GCC extension, we also pass anything 6113 with a zero or variable size by reference. 6114 6115 The 64-bit runtime does not describe passing any types by invisible 6116 reference. The internals of GCC can't currently handle passing 6117 empty structures, and zero or variable length arrays when they are 6118 not passed entirely on the stack or by reference. Thus, as a GCC 6119 extension, we pass these types by reference. The HP compiler doesn't 6120 support these types, so hopefully there shouldn't be any compatibility 6121 issues. This may have to be revisited when HP releases a C99 compiler 6122 or updates the ABI. */ 6123 6124 static bool 6125 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, 6126 enum machine_mode mode, const_tree type, 6127 bool named ATTRIBUTE_UNUSED) 6128 { 6129 HOST_WIDE_INT size; 6130 6131 if (type) 6132 size = int_size_in_bytes (type); 6133 else 6134 size = GET_MODE_SIZE (mode); 6135 6136 if (TARGET_64BIT) 6137 return size <= 0; 6138 else 6139 return size <= 0 || size > 8; 6140 } 6141 6142 enum direction 6143 pa_function_arg_padding (enum machine_mode mode, const_tree type) 6144 { 6145 if (mode == BLKmode 6146 || (TARGET_64BIT 6147 && type 6148 && (AGGREGATE_TYPE_P (type) 6149 || TREE_CODE (type) == COMPLEX_TYPE 6150 || TREE_CODE (type) == VECTOR_TYPE))) 6151 { 6152 /* Return none if justification is not required. */ 6153 if (type 6154 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 6155 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0) 6156 return none; 6157 6158 /* The directions set here are ignored when a BLKmode argument larger 6159 than a word is placed in a register. Different code is used for 6160 the stack and registers. This makes it difficult to have a 6161 consistent data representation for both the stack and registers. 6162 For both runtimes, the justification and padding for arguments on 6163 the stack and in registers should be identical. */ 6164 if (TARGET_64BIT) 6165 /* The 64-bit runtime specifies left justification for aggregates. */ 6166 return upward; 6167 else 6168 /* The 32-bit runtime architecture specifies right justification. 6169 When the argument is passed on the stack, the argument is padded 6170 with garbage on the left. The HP compiler pads with zeros. */ 6171 return downward; 6172 } 6173 6174 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 6175 return downward; 6176 else 6177 return none; 6178 } 6179 6180 6181 /* Do what is necessary for `va_start'. We look at the current function 6182 to determine if stdargs or varargs is used and fill in an initial 6183 va_list. A pointer to this constructor is returned. */ 6184 6185 static rtx 6186 hppa_builtin_saveregs (void) 6187 { 6188 rtx offset, dest; 6189 tree fntype = TREE_TYPE (current_function_decl); 6190 int argadj = ((!stdarg_p (fntype)) 6191 ? UNITS_PER_WORD : 0); 6192 6193 if (argadj) 6194 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj); 6195 else 6196 offset = crtl->args.arg_offset_rtx; 6197 6198 if (TARGET_64BIT) 6199 { 6200 int i, off; 6201 6202 /* Adjust for varargs/stdarg differences. */ 6203 if (argadj) 6204 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj); 6205 else 6206 offset = crtl->args.arg_offset_rtx; 6207 6208 /* We need to save %r26 .. %r19 inclusive starting at offset -64 6209 from the incoming arg pointer and growing to larger addresses. */ 6210 for (i = 26, off = -64; i >= 19; i--, off += 8) 6211 emit_move_insn (gen_rtx_MEM (word_mode, 6212 plus_constant (Pmode, 6213 arg_pointer_rtx, off)), 6214 gen_rtx_REG (word_mode, i)); 6215 6216 /* The incoming args pointer points just beyond the flushback area; 6217 normally this is not a serious concern. However, when we are doing 6218 varargs/stdargs we want to make the arg pointer point to the start 6219 of the incoming argument area. */ 6220 emit_move_insn (virtual_incoming_args_rtx, 6221 plus_constant (Pmode, arg_pointer_rtx, -64)); 6222 6223 /* Now return a pointer to the first anonymous argument. */ 6224 return copy_to_reg (expand_binop (Pmode, add_optab, 6225 virtual_incoming_args_rtx, 6226 offset, 0, 0, OPTAB_LIB_WIDEN)); 6227 } 6228 6229 /* Store general registers on the stack. */ 6230 dest = gen_rtx_MEM (BLKmode, 6231 plus_constant (Pmode, crtl->args.internal_arg_pointer, 6232 -16)); 6233 set_mem_alias_set (dest, get_varargs_alias_set ()); 6234 set_mem_align (dest, BITS_PER_WORD); 6235 move_block_from_reg (23, dest, 4); 6236 6237 /* move_block_from_reg will emit code to store the argument registers 6238 individually as scalar stores. 6239 6240 However, other insns may later load from the same addresses for 6241 a structure load (passing a struct to a varargs routine). 6242 6243 The alias code assumes that such aliasing can never happen, so we 6244 have to keep memory referencing insns from moving up beyond the 6245 last argument register store. So we emit a blockage insn here. */ 6246 emit_insn (gen_blockage ()); 6247 6248 return copy_to_reg (expand_binop (Pmode, add_optab, 6249 crtl->args.internal_arg_pointer, 6250 offset, 0, 0, OPTAB_LIB_WIDEN)); 6251 } 6252 6253 static void 6254 hppa_va_start (tree valist, rtx nextarg) 6255 { 6256 nextarg = expand_builtin_saveregs (); 6257 std_expand_builtin_va_start (valist, nextarg); 6258 } 6259 6260 static tree 6261 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 6262 gimple_seq *post_p) 6263 { 6264 if (TARGET_64BIT) 6265 { 6266 /* Args grow upward. We can use the generic routines. */ 6267 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6268 } 6269 else /* !TARGET_64BIT */ 6270 { 6271 tree ptr = build_pointer_type (type); 6272 tree valist_type; 6273 tree t, u; 6274 unsigned int size, ofs; 6275 bool indirect; 6276 6277 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); 6278 if (indirect) 6279 { 6280 type = ptr; 6281 ptr = build_pointer_type (type); 6282 } 6283 size = int_size_in_bytes (type); 6284 valist_type = TREE_TYPE (valist); 6285 6286 /* Args grow down. Not handled by generic routines. */ 6287 6288 u = fold_convert (sizetype, size_in_bytes (type)); 6289 u = fold_build1 (NEGATE_EXPR, sizetype, u); 6290 t = fold_build_pointer_plus (valist, u); 6291 6292 /* Align to 4 or 8 byte boundary depending on argument size. */ 6293 6294 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4)); 6295 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u); 6296 t = fold_convert (valist_type, t); 6297 6298 t = build2 (MODIFY_EXPR, valist_type, valist, t); 6299 6300 ofs = (8 - size) % 4; 6301 if (ofs != 0) 6302 t = fold_build_pointer_plus_hwi (t, ofs); 6303 6304 t = fold_convert (ptr, t); 6305 t = build_va_arg_indirect_ref (t); 6306 6307 if (indirect) 6308 t = build_va_arg_indirect_ref (t); 6309 6310 return t; 6311 } 6312 } 6313 6314 /* True if MODE is valid for the target. By "valid", we mean able to 6315 be manipulated in non-trivial ways. In particular, this means all 6316 the arithmetic is supported. 6317 6318 Currently, TImode is not valid as the HP 64-bit runtime documentation 6319 doesn't document the alignment and calling conventions for this type. 6320 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and 6321 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */ 6322 6323 static bool 6324 pa_scalar_mode_supported_p (enum machine_mode mode) 6325 { 6326 int precision = GET_MODE_PRECISION (mode); 6327 6328 switch (GET_MODE_CLASS (mode)) 6329 { 6330 case MODE_PARTIAL_INT: 6331 case MODE_INT: 6332 if (precision == CHAR_TYPE_SIZE) 6333 return true; 6334 if (precision == SHORT_TYPE_SIZE) 6335 return true; 6336 if (precision == INT_TYPE_SIZE) 6337 return true; 6338 if (precision == LONG_TYPE_SIZE) 6339 return true; 6340 if (precision == LONG_LONG_TYPE_SIZE) 6341 return true; 6342 return false; 6343 6344 case MODE_FLOAT: 6345 if (precision == FLOAT_TYPE_SIZE) 6346 return true; 6347 if (precision == DOUBLE_TYPE_SIZE) 6348 return true; 6349 if (precision == LONG_DOUBLE_TYPE_SIZE) 6350 return true; 6351 return false; 6352 6353 case MODE_DECIMAL_FLOAT: 6354 return false; 6355 6356 default: 6357 gcc_unreachable (); 6358 } 6359 } 6360 6361 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and 6362 it branches into the delay slot. Otherwise, return FALSE. */ 6363 6364 static bool 6365 branch_to_delay_slot_p (rtx insn) 6366 { 6367 rtx jump_insn; 6368 6369 if (dbr_sequence_length ()) 6370 return FALSE; 6371 6372 jump_insn = next_active_insn (JUMP_LABEL (insn)); 6373 while (insn) 6374 { 6375 insn = next_active_insn (insn); 6376 if (jump_insn == insn) 6377 return TRUE; 6378 6379 /* We can't rely on the length of asms. So, we return FALSE when 6380 the branch is followed by an asm. */ 6381 if (!insn 6382 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6383 || extract_asm_operands (PATTERN (insn)) != NULL_RTX 6384 || get_attr_length (insn) > 0) 6385 break; 6386 } 6387 6388 return FALSE; 6389 } 6390 6391 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot. 6392 6393 This occurs when INSN has an unfilled delay slot and is followed 6394 by an asm. Disaster can occur if the asm is empty and the jump 6395 branches into the delay slot. So, we add a nop in the delay slot 6396 when this occurs. */ 6397 6398 static bool 6399 branch_needs_nop_p (rtx insn) 6400 { 6401 rtx jump_insn; 6402 6403 if (dbr_sequence_length ()) 6404 return FALSE; 6405 6406 jump_insn = next_active_insn (JUMP_LABEL (insn)); 6407 while (insn) 6408 { 6409 insn = next_active_insn (insn); 6410 if (!insn || jump_insn == insn) 6411 return TRUE; 6412 6413 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT 6414 || extract_asm_operands (PATTERN (insn)) != NULL_RTX) 6415 && get_attr_length (insn) > 0) 6416 break; 6417 } 6418 6419 return FALSE; 6420 } 6421 6422 /* Return TRUE if INSN, a forward jump insn, can use nullification 6423 to skip the following instruction. This avoids an extra cycle due 6424 to a mis-predicted branch when we fall through. */ 6425 6426 static bool 6427 use_skip_p (rtx insn) 6428 { 6429 rtx jump_insn = next_active_insn (JUMP_LABEL (insn)); 6430 6431 while (insn) 6432 { 6433 insn = next_active_insn (insn); 6434 6435 /* We can't rely on the length of asms, so we can't skip asms. */ 6436 if (!insn 6437 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6438 || extract_asm_operands (PATTERN (insn)) != NULL_RTX) 6439 break; 6440 if (get_attr_length (insn) == 4 6441 && jump_insn == next_active_insn (insn)) 6442 return TRUE; 6443 if (get_attr_length (insn) > 0) 6444 break; 6445 } 6446 6447 return FALSE; 6448 } 6449 6450 /* This routine handles all the normal conditional branch sequences we 6451 might need to generate. It handles compare immediate vs compare 6452 register, nullification of delay slots, varying length branches, 6453 negated branches, and all combinations of the above. It returns the 6454 output appropriate to emit the branch corresponding to all given 6455 parameters. */ 6456 6457 const char * 6458 pa_output_cbranch (rtx *operands, int negated, rtx insn) 6459 { 6460 static char buf[100]; 6461 bool useskip; 6462 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6463 int length = get_attr_length (insn); 6464 int xdelay; 6465 6466 /* A conditional branch to the following instruction (e.g. the delay slot) 6467 is asking for a disaster. This can happen when not optimizing and 6468 when jump optimization fails. 6469 6470 While it is usually safe to emit nothing, this can fail if the 6471 preceding instruction is a nullified branch with an empty delay 6472 slot and the same branch target as this branch. We could check 6473 for this but jump optimization should eliminate nop jumps. It 6474 is always safe to emit a nop. */ 6475 if (branch_to_delay_slot_p (insn)) 6476 return "nop"; 6477 6478 /* The doubleword form of the cmpib instruction doesn't have the LEU 6479 and GTU conditions while the cmpb instruction does. Since we accept 6480 zero for cmpb, we must ensure that we use cmpb for the comparison. */ 6481 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx) 6482 operands[2] = gen_rtx_REG (DImode, 0); 6483 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx) 6484 operands[1] = gen_rtx_REG (DImode, 0); 6485 6486 /* If this is a long branch with its delay slot unfilled, set `nullify' 6487 as it can nullify the delay slot and save a nop. */ 6488 if (length == 8 && dbr_sequence_length () == 0) 6489 nullify = 1; 6490 6491 /* If this is a short forward conditional branch which did not get 6492 its delay slot filled, the delay slot can still be nullified. */ 6493 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6494 nullify = forward_branch_p (insn); 6495 6496 /* A forward branch over a single nullified insn can be done with a 6497 comclr instruction. This avoids a single cycle penalty due to 6498 mis-predicted branch if we fall through (branch not taken). */ 6499 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6500 6501 switch (length) 6502 { 6503 /* All short conditional branches except backwards with an unfilled 6504 delay slot. */ 6505 case 4: 6506 if (useskip) 6507 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6508 else 6509 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6510 if (GET_MODE (operands[1]) == DImode) 6511 strcat (buf, "*"); 6512 if (negated) 6513 strcat (buf, "%B3"); 6514 else 6515 strcat (buf, "%S3"); 6516 if (useskip) 6517 strcat (buf, " %2,%r1,%%r0"); 6518 else if (nullify) 6519 { 6520 if (branch_needs_nop_p (insn)) 6521 strcat (buf, ",n %2,%r1,%0%#"); 6522 else 6523 strcat (buf, ",n %2,%r1,%0"); 6524 } 6525 else 6526 strcat (buf, " %2,%r1,%0"); 6527 break; 6528 6529 /* All long conditionals. Note a short backward branch with an 6530 unfilled delay slot is treated just like a long backward branch 6531 with an unfilled delay slot. */ 6532 case 8: 6533 /* Handle weird backwards branch with a filled delay slot 6534 which is nullified. */ 6535 if (dbr_sequence_length () != 0 6536 && ! forward_branch_p (insn) 6537 && nullify) 6538 { 6539 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6540 if (GET_MODE (operands[1]) == DImode) 6541 strcat (buf, "*"); 6542 if (negated) 6543 strcat (buf, "%S3"); 6544 else 6545 strcat (buf, "%B3"); 6546 strcat (buf, ",n %2,%r1,.+12\n\tb %0"); 6547 } 6548 /* Handle short backwards branch with an unfilled delay slot. 6549 Using a comb;nop rather than comiclr;bl saves 1 cycle for both 6550 taken and untaken branches. */ 6551 else if (dbr_sequence_length () == 0 6552 && ! forward_branch_p (insn) 6553 && INSN_ADDRESSES_SET_P () 6554 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6555 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6556 { 6557 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6558 if (GET_MODE (operands[1]) == DImode) 6559 strcat (buf, "*"); 6560 if (negated) 6561 strcat (buf, "%B3 %2,%r1,%0%#"); 6562 else 6563 strcat (buf, "%S3 %2,%r1,%0%#"); 6564 } 6565 else 6566 { 6567 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6568 if (GET_MODE (operands[1]) == DImode) 6569 strcat (buf, "*"); 6570 if (negated) 6571 strcat (buf, "%S3"); 6572 else 6573 strcat (buf, "%B3"); 6574 if (nullify) 6575 strcat (buf, " %2,%r1,%%r0\n\tb,n %0"); 6576 else 6577 strcat (buf, " %2,%r1,%%r0\n\tb %0"); 6578 } 6579 break; 6580 6581 default: 6582 /* The reversed conditional branch must branch over one additional 6583 instruction if the delay slot is filled and needs to be extracted 6584 by pa_output_lbranch. If the delay slot is empty or this is a 6585 nullified forward branch, the instruction after the reversed 6586 condition branch must be nullified. */ 6587 if (dbr_sequence_length () == 0 6588 || (nullify && forward_branch_p (insn))) 6589 { 6590 nullify = 1; 6591 xdelay = 0; 6592 operands[4] = GEN_INT (length); 6593 } 6594 else 6595 { 6596 xdelay = 1; 6597 operands[4] = GEN_INT (length + 4); 6598 } 6599 6600 /* Create a reversed conditional branch which branches around 6601 the following insns. */ 6602 if (GET_MODE (operands[1]) != DImode) 6603 { 6604 if (nullify) 6605 { 6606 if (negated) 6607 strcpy (buf, 6608 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}"); 6609 else 6610 strcpy (buf, 6611 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}"); 6612 } 6613 else 6614 { 6615 if (negated) 6616 strcpy (buf, 6617 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}"); 6618 else 6619 strcpy (buf, 6620 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}"); 6621 } 6622 } 6623 else 6624 { 6625 if (nullify) 6626 { 6627 if (negated) 6628 strcpy (buf, 6629 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}"); 6630 else 6631 strcpy (buf, 6632 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}"); 6633 } 6634 else 6635 { 6636 if (negated) 6637 strcpy (buf, 6638 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}"); 6639 else 6640 strcpy (buf, 6641 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}"); 6642 } 6643 } 6644 6645 output_asm_insn (buf, operands); 6646 return pa_output_lbranch (operands[0], insn, xdelay); 6647 } 6648 return buf; 6649 } 6650 6651 /* This routine handles output of long unconditional branches that 6652 exceed the maximum range of a simple branch instruction. Since 6653 we don't have a register available for the branch, we save register 6654 %r1 in the frame marker, load the branch destination DEST into %r1, 6655 execute the branch, and restore %r1 in the delay slot of the branch. 6656 6657 Since long branches may have an insn in the delay slot and the 6658 delay slot is used to restore %r1, we in general need to extract 6659 this insn and execute it before the branch. However, to facilitate 6660 use of this function by conditional branches, we also provide an 6661 option to not extract the delay insn so that it will be emitted 6662 after the long branch. So, if there is an insn in the delay slot, 6663 it is extracted if XDELAY is nonzero. 6664 6665 The lengths of the various long-branch sequences are 20, 16 and 24 6666 bytes for the portable runtime, non-PIC and PIC cases, respectively. */ 6667 6668 const char * 6669 pa_output_lbranch (rtx dest, rtx insn, int xdelay) 6670 { 6671 rtx xoperands[2]; 6672 6673 xoperands[0] = dest; 6674 6675 /* First, free up the delay slot. */ 6676 if (xdelay && dbr_sequence_length () != 0) 6677 { 6678 /* We can't handle a jump in the delay slot. */ 6679 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN); 6680 6681 final_scan_insn (NEXT_INSN (insn), asm_out_file, 6682 optimize, 0, NULL); 6683 6684 /* Now delete the delay insn. */ 6685 SET_INSN_DELETED (NEXT_INSN (insn)); 6686 } 6687 6688 /* Output an insn to save %r1. The runtime documentation doesn't 6689 specify whether the "Clean Up" slot in the callers frame can 6690 be clobbered by the callee. It isn't copied by HP's builtin 6691 alloca, so this suggests that it can be clobbered if necessary. 6692 The "Static Link" location is copied by HP builtin alloca, so 6693 we avoid using it. Using the cleanup slot might be a problem 6694 if we have to interoperate with languages that pass cleanup 6695 information. However, it should be possible to handle these 6696 situations with GCC's asm feature. 6697 6698 The "Current RP" slot is reserved for the called procedure, so 6699 we try to use it when we don't have a frame of our own. It's 6700 rather unlikely that we won't have a frame when we need to emit 6701 a very long branch. 6702 6703 Really the way to go long term is a register scavenger; goto 6704 the target of the jump and find a register which we can use 6705 as a scratch to hold the value in %r1. Then, we wouldn't have 6706 to free up the delay slot or clobber a slot that may be needed 6707 for other purposes. */ 6708 if (TARGET_64BIT) 6709 { 6710 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6711 /* Use the return pointer slot in the frame marker. */ 6712 output_asm_insn ("std %%r1,-16(%%r30)", xoperands); 6713 else 6714 /* Use the slot at -40 in the frame marker since HP builtin 6715 alloca doesn't copy it. */ 6716 output_asm_insn ("std %%r1,-40(%%r30)", xoperands); 6717 } 6718 else 6719 { 6720 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6721 /* Use the return pointer slot in the frame marker. */ 6722 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands); 6723 else 6724 /* Use the "Clean Up" slot in the frame marker. In GCC, 6725 the only other use of this location is for copying a 6726 floating point double argument from a floating-point 6727 register to two general registers. The copy is done 6728 as an "atomic" operation when outputting a call, so it 6729 won't interfere with our using the location here. */ 6730 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands); 6731 } 6732 6733 if (TARGET_PORTABLE_RUNTIME) 6734 { 6735 output_asm_insn ("ldil L'%0,%%r1", xoperands); 6736 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 6737 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6738 } 6739 else if (flag_pic) 6740 { 6741 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 6742 if (TARGET_SOM || !TARGET_GAS) 6743 { 6744 xoperands[1] = gen_label_rtx (); 6745 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands); 6746 targetm.asm_out.internal_label (asm_out_file, "L", 6747 CODE_LABEL_NUMBER (xoperands[1])); 6748 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands); 6749 } 6750 else 6751 { 6752 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands); 6753 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 6754 } 6755 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6756 } 6757 else 6758 /* Now output a very long branch to the original target. */ 6759 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands); 6760 6761 /* Now restore the value of %r1 in the delay slot. */ 6762 if (TARGET_64BIT) 6763 { 6764 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6765 return "ldd -16(%%r30),%%r1"; 6766 else 6767 return "ldd -40(%%r30),%%r1"; 6768 } 6769 else 6770 { 6771 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6772 return "ldw -20(%%r30),%%r1"; 6773 else 6774 return "ldw -12(%%r30),%%r1"; 6775 } 6776 } 6777 6778 /* This routine handles all the branch-on-bit conditional branch sequences we 6779 might need to generate. It handles nullification of delay slots, 6780 varying length branches, negated branches and all combinations of the 6781 above. it returns the appropriate output template to emit the branch. */ 6782 6783 const char * 6784 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which) 6785 { 6786 static char buf[100]; 6787 bool useskip; 6788 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6789 int length = get_attr_length (insn); 6790 int xdelay; 6791 6792 /* A conditional branch to the following instruction (e.g. the delay slot) is 6793 asking for a disaster. I do not think this can happen as this pattern 6794 is only used when optimizing; jump optimization should eliminate the 6795 jump. But be prepared just in case. */ 6796 6797 if (branch_to_delay_slot_p (insn)) 6798 return "nop"; 6799 6800 /* If this is a long branch with its delay slot unfilled, set `nullify' 6801 as it can nullify the delay slot and save a nop. */ 6802 if (length == 8 && dbr_sequence_length () == 0) 6803 nullify = 1; 6804 6805 /* If this is a short forward conditional branch which did not get 6806 its delay slot filled, the delay slot can still be nullified. */ 6807 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6808 nullify = forward_branch_p (insn); 6809 6810 /* A forward branch over a single nullified insn can be done with a 6811 extrs instruction. This avoids a single cycle penalty due to 6812 mis-predicted branch if we fall through (branch not taken). */ 6813 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6814 6815 switch (length) 6816 { 6817 6818 /* All short conditional branches except backwards with an unfilled 6819 delay slot. */ 6820 case 4: 6821 if (useskip) 6822 strcpy (buf, "{extrs,|extrw,s,}"); 6823 else 6824 strcpy (buf, "bb,"); 6825 if (useskip && GET_MODE (operands[0]) == DImode) 6826 strcpy (buf, "extrd,s,*"); 6827 else if (GET_MODE (operands[0]) == DImode) 6828 strcpy (buf, "bb,*"); 6829 if ((which == 0 && negated) 6830 || (which == 1 && ! negated)) 6831 strcat (buf, ">="); 6832 else 6833 strcat (buf, "<"); 6834 if (useskip) 6835 strcat (buf, " %0,%1,1,%%r0"); 6836 else if (nullify && negated) 6837 { 6838 if (branch_needs_nop_p (insn)) 6839 strcat (buf, ",n %0,%1,%3%#"); 6840 else 6841 strcat (buf, ",n %0,%1,%3"); 6842 } 6843 else if (nullify && ! negated) 6844 { 6845 if (branch_needs_nop_p (insn)) 6846 strcat (buf, ",n %0,%1,%2%#"); 6847 else 6848 strcat (buf, ",n %0,%1,%2"); 6849 } 6850 else if (! nullify && negated) 6851 strcat (buf, " %0,%1,%3"); 6852 else if (! nullify && ! negated) 6853 strcat (buf, " %0,%1,%2"); 6854 break; 6855 6856 /* All long conditionals. Note a short backward branch with an 6857 unfilled delay slot is treated just like a long backward branch 6858 with an unfilled delay slot. */ 6859 case 8: 6860 /* Handle weird backwards branch with a filled delay slot 6861 which is nullified. */ 6862 if (dbr_sequence_length () != 0 6863 && ! forward_branch_p (insn) 6864 && nullify) 6865 { 6866 strcpy (buf, "bb,"); 6867 if (GET_MODE (operands[0]) == DImode) 6868 strcat (buf, "*"); 6869 if ((which == 0 && negated) 6870 || (which == 1 && ! negated)) 6871 strcat (buf, "<"); 6872 else 6873 strcat (buf, ">="); 6874 if (negated) 6875 strcat (buf, ",n %0,%1,.+12\n\tb %3"); 6876 else 6877 strcat (buf, ",n %0,%1,.+12\n\tb %2"); 6878 } 6879 /* Handle short backwards branch with an unfilled delay slot. 6880 Using a bb;nop rather than extrs;bl saves 1 cycle for both 6881 taken and untaken branches. */ 6882 else if (dbr_sequence_length () == 0 6883 && ! forward_branch_p (insn) 6884 && INSN_ADDRESSES_SET_P () 6885 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6886 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6887 { 6888 strcpy (buf, "bb,"); 6889 if (GET_MODE (operands[0]) == DImode) 6890 strcat (buf, "*"); 6891 if ((which == 0 && negated) 6892 || (which == 1 && ! negated)) 6893 strcat (buf, ">="); 6894 else 6895 strcat (buf, "<"); 6896 if (negated) 6897 strcat (buf, " %0,%1,%3%#"); 6898 else 6899 strcat (buf, " %0,%1,%2%#"); 6900 } 6901 else 6902 { 6903 if (GET_MODE (operands[0]) == DImode) 6904 strcpy (buf, "extrd,s,*"); 6905 else 6906 strcpy (buf, "{extrs,|extrw,s,}"); 6907 if ((which == 0 && negated) 6908 || (which == 1 && ! negated)) 6909 strcat (buf, "<"); 6910 else 6911 strcat (buf, ">="); 6912 if (nullify && negated) 6913 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3"); 6914 else if (nullify && ! negated) 6915 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2"); 6916 else if (negated) 6917 strcat (buf, " %0,%1,1,%%r0\n\tb %3"); 6918 else 6919 strcat (buf, " %0,%1,1,%%r0\n\tb %2"); 6920 } 6921 break; 6922 6923 default: 6924 /* The reversed conditional branch must branch over one additional 6925 instruction if the delay slot is filled and needs to be extracted 6926 by pa_output_lbranch. If the delay slot is empty or this is a 6927 nullified forward branch, the instruction after the reversed 6928 condition branch must be nullified. */ 6929 if (dbr_sequence_length () == 0 6930 || (nullify && forward_branch_p (insn))) 6931 { 6932 nullify = 1; 6933 xdelay = 0; 6934 operands[4] = GEN_INT (length); 6935 } 6936 else 6937 { 6938 xdelay = 1; 6939 operands[4] = GEN_INT (length + 4); 6940 } 6941 6942 if (GET_MODE (operands[0]) == DImode) 6943 strcpy (buf, "bb,*"); 6944 else 6945 strcpy (buf, "bb,"); 6946 if ((which == 0 && negated) 6947 || (which == 1 && !negated)) 6948 strcat (buf, "<"); 6949 else 6950 strcat (buf, ">="); 6951 if (nullify) 6952 strcat (buf, ",n %0,%1,.+%4"); 6953 else 6954 strcat (buf, " %0,%1,.+%4"); 6955 output_asm_insn (buf, operands); 6956 return pa_output_lbranch (negated ? operands[3] : operands[2], 6957 insn, xdelay); 6958 } 6959 return buf; 6960 } 6961 6962 /* This routine handles all the branch-on-variable-bit conditional branch 6963 sequences we might need to generate. It handles nullification of delay 6964 slots, varying length branches, negated branches and all combinations 6965 of the above. it returns the appropriate output template to emit the 6966 branch. */ 6967 6968 const char * 6969 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, 6970 int which) 6971 { 6972 static char buf[100]; 6973 bool useskip; 6974 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6975 int length = get_attr_length (insn); 6976 int xdelay; 6977 6978 /* A conditional branch to the following instruction (e.g. the delay slot) is 6979 asking for a disaster. I do not think this can happen as this pattern 6980 is only used when optimizing; jump optimization should eliminate the 6981 jump. But be prepared just in case. */ 6982 6983 if (branch_to_delay_slot_p (insn)) 6984 return "nop"; 6985 6986 /* If this is a long branch with its delay slot unfilled, set `nullify' 6987 as it can nullify the delay slot and save a nop. */ 6988 if (length == 8 && dbr_sequence_length () == 0) 6989 nullify = 1; 6990 6991 /* If this is a short forward conditional branch which did not get 6992 its delay slot filled, the delay slot can still be nullified. */ 6993 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6994 nullify = forward_branch_p (insn); 6995 6996 /* A forward branch over a single nullified insn can be done with a 6997 extrs instruction. This avoids a single cycle penalty due to 6998 mis-predicted branch if we fall through (branch not taken). */ 6999 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 7000 7001 switch (length) 7002 { 7003 7004 /* All short conditional branches except backwards with an unfilled 7005 delay slot. */ 7006 case 4: 7007 if (useskip) 7008 strcpy (buf, "{vextrs,|extrw,s,}"); 7009 else 7010 strcpy (buf, "{bvb,|bb,}"); 7011 if (useskip && GET_MODE (operands[0]) == DImode) 7012 strcpy (buf, "extrd,s,*"); 7013 else if (GET_MODE (operands[0]) == DImode) 7014 strcpy (buf, "bb,*"); 7015 if ((which == 0 && negated) 7016 || (which == 1 && ! negated)) 7017 strcat (buf, ">="); 7018 else 7019 strcat (buf, "<"); 7020 if (useskip) 7021 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}"); 7022 else if (nullify && negated) 7023 { 7024 if (branch_needs_nop_p (insn)) 7025 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}"); 7026 else 7027 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}"); 7028 } 7029 else if (nullify && ! negated) 7030 { 7031 if (branch_needs_nop_p (insn)) 7032 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}"); 7033 else 7034 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}"); 7035 } 7036 else if (! nullify && negated) 7037 strcat (buf, "{ %0,%3| %0,%%sar,%3}"); 7038 else if (! nullify && ! negated) 7039 strcat (buf, "{ %0,%2| %0,%%sar,%2}"); 7040 break; 7041 7042 /* All long conditionals. Note a short backward branch with an 7043 unfilled delay slot is treated just like a long backward branch 7044 with an unfilled delay slot. */ 7045 case 8: 7046 /* Handle weird backwards branch with a filled delay slot 7047 which is nullified. */ 7048 if (dbr_sequence_length () != 0 7049 && ! forward_branch_p (insn) 7050 && nullify) 7051 { 7052 strcpy (buf, "{bvb,|bb,}"); 7053 if (GET_MODE (operands[0]) == DImode) 7054 strcat (buf, "*"); 7055 if ((which == 0 && negated) 7056 || (which == 1 && ! negated)) 7057 strcat (buf, "<"); 7058 else 7059 strcat (buf, ">="); 7060 if (negated) 7061 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}"); 7062 else 7063 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}"); 7064 } 7065 /* Handle short backwards branch with an unfilled delay slot. 7066 Using a bb;nop rather than extrs;bl saves 1 cycle for both 7067 taken and untaken branches. */ 7068 else if (dbr_sequence_length () == 0 7069 && ! forward_branch_p (insn) 7070 && INSN_ADDRESSES_SET_P () 7071 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7072 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7073 { 7074 strcpy (buf, "{bvb,|bb,}"); 7075 if (GET_MODE (operands[0]) == DImode) 7076 strcat (buf, "*"); 7077 if ((which == 0 && negated) 7078 || (which == 1 && ! negated)) 7079 strcat (buf, ">="); 7080 else 7081 strcat (buf, "<"); 7082 if (negated) 7083 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}"); 7084 else 7085 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}"); 7086 } 7087 else 7088 { 7089 strcpy (buf, "{vextrs,|extrw,s,}"); 7090 if (GET_MODE (operands[0]) == DImode) 7091 strcpy (buf, "extrd,s,*"); 7092 if ((which == 0 && negated) 7093 || (which == 1 && ! negated)) 7094 strcat (buf, "<"); 7095 else 7096 strcat (buf, ">="); 7097 if (nullify && negated) 7098 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}"); 7099 else if (nullify && ! negated) 7100 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}"); 7101 else if (negated) 7102 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}"); 7103 else 7104 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}"); 7105 } 7106 break; 7107 7108 default: 7109 /* The reversed conditional branch must branch over one additional 7110 instruction if the delay slot is filled and needs to be extracted 7111 by pa_output_lbranch. If the delay slot is empty or this is a 7112 nullified forward branch, the instruction after the reversed 7113 condition branch must be nullified. */ 7114 if (dbr_sequence_length () == 0 7115 || (nullify && forward_branch_p (insn))) 7116 { 7117 nullify = 1; 7118 xdelay = 0; 7119 operands[4] = GEN_INT (length); 7120 } 7121 else 7122 { 7123 xdelay = 1; 7124 operands[4] = GEN_INT (length + 4); 7125 } 7126 7127 if (GET_MODE (operands[0]) == DImode) 7128 strcpy (buf, "bb,*"); 7129 else 7130 strcpy (buf, "{bvb,|bb,}"); 7131 if ((which == 0 && negated) 7132 || (which == 1 && !negated)) 7133 strcat (buf, "<"); 7134 else 7135 strcat (buf, ">="); 7136 if (nullify) 7137 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}"); 7138 else 7139 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}"); 7140 output_asm_insn (buf, operands); 7141 return pa_output_lbranch (negated ? operands[3] : operands[2], 7142 insn, xdelay); 7143 } 7144 return buf; 7145 } 7146 7147 /* Return the output template for emitting a dbra type insn. 7148 7149 Note it may perform some output operations on its own before 7150 returning the final output string. */ 7151 const char * 7152 pa_output_dbra (rtx *operands, rtx insn, int which_alternative) 7153 { 7154 int length = get_attr_length (insn); 7155 7156 /* A conditional branch to the following instruction (e.g. the delay slot) is 7157 asking for a disaster. Be prepared! */ 7158 7159 if (branch_to_delay_slot_p (insn)) 7160 { 7161 if (which_alternative == 0) 7162 return "ldo %1(%0),%0"; 7163 else if (which_alternative == 1) 7164 { 7165 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands); 7166 output_asm_insn ("ldw -16(%%r30),%4", operands); 7167 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7168 return "{fldws|fldw} -16(%%r30),%0"; 7169 } 7170 else 7171 { 7172 output_asm_insn ("ldw %0,%4", operands); 7173 return "ldo %1(%4),%4\n\tstw %4,%0"; 7174 } 7175 } 7176 7177 if (which_alternative == 0) 7178 { 7179 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7180 int xdelay; 7181 7182 /* If this is a long branch with its delay slot unfilled, set `nullify' 7183 as it can nullify the delay slot and save a nop. */ 7184 if (length == 8 && dbr_sequence_length () == 0) 7185 nullify = 1; 7186 7187 /* If this is a short forward conditional branch which did not get 7188 its delay slot filled, the delay slot can still be nullified. */ 7189 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7190 nullify = forward_branch_p (insn); 7191 7192 switch (length) 7193 { 7194 case 4: 7195 if (nullify) 7196 { 7197 if (branch_needs_nop_p (insn)) 7198 return "addib,%C2,n %1,%0,%3%#"; 7199 else 7200 return "addib,%C2,n %1,%0,%3"; 7201 } 7202 else 7203 return "addib,%C2 %1,%0,%3"; 7204 7205 case 8: 7206 /* Handle weird backwards branch with a fulled delay slot 7207 which is nullified. */ 7208 if (dbr_sequence_length () != 0 7209 && ! forward_branch_p (insn) 7210 && nullify) 7211 return "addib,%N2,n %1,%0,.+12\n\tb %3"; 7212 /* Handle short backwards branch with an unfilled delay slot. 7213 Using a addb;nop rather than addi;bl saves 1 cycle for both 7214 taken and untaken branches. */ 7215 else if (dbr_sequence_length () == 0 7216 && ! forward_branch_p (insn) 7217 && INSN_ADDRESSES_SET_P () 7218 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7219 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7220 return "addib,%C2 %1,%0,%3%#"; 7221 7222 /* Handle normal cases. */ 7223 if (nullify) 7224 return "addi,%N2 %1,%0,%0\n\tb,n %3"; 7225 else 7226 return "addi,%N2 %1,%0,%0\n\tb %3"; 7227 7228 default: 7229 /* The reversed conditional branch must branch over one additional 7230 instruction if the delay slot is filled and needs to be extracted 7231 by pa_output_lbranch. If the delay slot is empty or this is a 7232 nullified forward branch, the instruction after the reversed 7233 condition branch must be nullified. */ 7234 if (dbr_sequence_length () == 0 7235 || (nullify && forward_branch_p (insn))) 7236 { 7237 nullify = 1; 7238 xdelay = 0; 7239 operands[4] = GEN_INT (length); 7240 } 7241 else 7242 { 7243 xdelay = 1; 7244 operands[4] = GEN_INT (length + 4); 7245 } 7246 7247 if (nullify) 7248 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands); 7249 else 7250 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands); 7251 7252 return pa_output_lbranch (operands[3], insn, xdelay); 7253 } 7254 7255 } 7256 /* Deal with gross reload from FP register case. */ 7257 else if (which_alternative == 1) 7258 { 7259 /* Move loop counter from FP register to MEM then into a GR, 7260 increment the GR, store the GR into MEM, and finally reload 7261 the FP register from MEM from within the branch's delay slot. */ 7262 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4", 7263 operands); 7264 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7265 if (length == 24) 7266 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7267 else if (length == 28) 7268 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7269 else 7270 { 7271 operands[5] = GEN_INT (length - 16); 7272 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands); 7273 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7274 return pa_output_lbranch (operands[3], insn, 0); 7275 } 7276 } 7277 /* Deal with gross reload from memory case. */ 7278 else 7279 { 7280 /* Reload loop counter from memory, the store back to memory 7281 happens in the branch's delay slot. */ 7282 output_asm_insn ("ldw %0,%4", operands); 7283 if (length == 12) 7284 return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; 7285 else if (length == 16) 7286 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0"; 7287 else 7288 { 7289 operands[5] = GEN_INT (length - 4); 7290 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands); 7291 return pa_output_lbranch (operands[3], insn, 0); 7292 } 7293 } 7294 } 7295 7296 /* Return the output template for emitting a movb type insn. 7297 7298 Note it may perform some output operations on its own before 7299 returning the final output string. */ 7300 const char * 7301 pa_output_movb (rtx *operands, rtx insn, int which_alternative, 7302 int reverse_comparison) 7303 { 7304 int length = get_attr_length (insn); 7305 7306 /* A conditional branch to the following instruction (e.g. the delay slot) is 7307 asking for a disaster. Be prepared! */ 7308 7309 if (branch_to_delay_slot_p (insn)) 7310 { 7311 if (which_alternative == 0) 7312 return "copy %1,%0"; 7313 else if (which_alternative == 1) 7314 { 7315 output_asm_insn ("stw %1,-16(%%r30)", operands); 7316 return "{fldws|fldw} -16(%%r30),%0"; 7317 } 7318 else if (which_alternative == 2) 7319 return "stw %1,%0"; 7320 else 7321 return "mtsar %r1"; 7322 } 7323 7324 /* Support the second variant. */ 7325 if (reverse_comparison) 7326 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); 7327 7328 if (which_alternative == 0) 7329 { 7330 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7331 int xdelay; 7332 7333 /* If this is a long branch with its delay slot unfilled, set `nullify' 7334 as it can nullify the delay slot and save a nop. */ 7335 if (length == 8 && dbr_sequence_length () == 0) 7336 nullify = 1; 7337 7338 /* If this is a short forward conditional branch which did not get 7339 its delay slot filled, the delay slot can still be nullified. */ 7340 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7341 nullify = forward_branch_p (insn); 7342 7343 switch (length) 7344 { 7345 case 4: 7346 if (nullify) 7347 { 7348 if (branch_needs_nop_p (insn)) 7349 return "movb,%C2,n %1,%0,%3%#"; 7350 else 7351 return "movb,%C2,n %1,%0,%3"; 7352 } 7353 else 7354 return "movb,%C2 %1,%0,%3"; 7355 7356 case 8: 7357 /* Handle weird backwards branch with a filled delay slot 7358 which is nullified. */ 7359 if (dbr_sequence_length () != 0 7360 && ! forward_branch_p (insn) 7361 && nullify) 7362 return "movb,%N2,n %1,%0,.+12\n\tb %3"; 7363 7364 /* Handle short backwards branch with an unfilled delay slot. 7365 Using a movb;nop rather than or;bl saves 1 cycle for both 7366 taken and untaken branches. */ 7367 else if (dbr_sequence_length () == 0 7368 && ! forward_branch_p (insn) 7369 && INSN_ADDRESSES_SET_P () 7370 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7371 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7372 return "movb,%C2 %1,%0,%3%#"; 7373 /* Handle normal cases. */ 7374 if (nullify) 7375 return "or,%N2 %1,%%r0,%0\n\tb,n %3"; 7376 else 7377 return "or,%N2 %1,%%r0,%0\n\tb %3"; 7378 7379 default: 7380 /* The reversed conditional branch must branch over one additional 7381 instruction if the delay slot is filled and needs to be extracted 7382 by pa_output_lbranch. If the delay slot is empty or this is a 7383 nullified forward branch, the instruction after the reversed 7384 condition branch must be nullified. */ 7385 if (dbr_sequence_length () == 0 7386 || (nullify && forward_branch_p (insn))) 7387 { 7388 nullify = 1; 7389 xdelay = 0; 7390 operands[4] = GEN_INT (length); 7391 } 7392 else 7393 { 7394 xdelay = 1; 7395 operands[4] = GEN_INT (length + 4); 7396 } 7397 7398 if (nullify) 7399 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands); 7400 else 7401 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands); 7402 7403 return pa_output_lbranch (operands[3], insn, xdelay); 7404 } 7405 } 7406 /* Deal with gross reload for FP destination register case. */ 7407 else if (which_alternative == 1) 7408 { 7409 /* Move source register to MEM, perform the branch test, then 7410 finally load the FP register from MEM from within the branch's 7411 delay slot. */ 7412 output_asm_insn ("stw %1,-16(%%r30)", operands); 7413 if (length == 12) 7414 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7415 else if (length == 16) 7416 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7417 else 7418 { 7419 operands[4] = GEN_INT (length - 4); 7420 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands); 7421 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7422 return pa_output_lbranch (operands[3], insn, 0); 7423 } 7424 } 7425 /* Deal with gross reload from memory case. */ 7426 else if (which_alternative == 2) 7427 { 7428 /* Reload loop counter from memory, the store back to memory 7429 happens in the branch's delay slot. */ 7430 if (length == 8) 7431 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0"; 7432 else if (length == 12) 7433 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0"; 7434 else 7435 { 7436 operands[4] = GEN_INT (length); 7437 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0", 7438 operands); 7439 return pa_output_lbranch (operands[3], insn, 0); 7440 } 7441 } 7442 /* Handle SAR as a destination. */ 7443 else 7444 { 7445 if (length == 8) 7446 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1"; 7447 else if (length == 12) 7448 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1"; 7449 else 7450 { 7451 operands[4] = GEN_INT (length); 7452 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1", 7453 operands); 7454 return pa_output_lbranch (operands[3], insn, 0); 7455 } 7456 } 7457 } 7458 7459 /* Copy any FP arguments in INSN into integer registers. */ 7460 static void 7461 copy_fp_args (rtx insn) 7462 { 7463 rtx link; 7464 rtx xoperands[2]; 7465 7466 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7467 { 7468 int arg_mode, regno; 7469 rtx use = XEXP (link, 0); 7470 7471 if (! (GET_CODE (use) == USE 7472 && GET_CODE (XEXP (use, 0)) == REG 7473 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7474 continue; 7475 7476 arg_mode = GET_MODE (XEXP (use, 0)); 7477 regno = REGNO (XEXP (use, 0)); 7478 7479 /* Is it a floating point register? */ 7480 if (regno >= 32 && regno <= 39) 7481 { 7482 /* Copy the FP register into an integer register via memory. */ 7483 if (arg_mode == SFmode) 7484 { 7485 xoperands[0] = XEXP (use, 0); 7486 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); 7487 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands); 7488 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7489 } 7490 else 7491 { 7492 xoperands[0] = XEXP (use, 0); 7493 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); 7494 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands); 7495 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); 7496 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7497 } 7498 } 7499 } 7500 } 7501 7502 /* Compute length of the FP argument copy sequence for INSN. */ 7503 static int 7504 length_fp_args (rtx insn) 7505 { 7506 int length = 0; 7507 rtx link; 7508 7509 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7510 { 7511 int arg_mode, regno; 7512 rtx use = XEXP (link, 0); 7513 7514 if (! (GET_CODE (use) == USE 7515 && GET_CODE (XEXP (use, 0)) == REG 7516 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7517 continue; 7518 7519 arg_mode = GET_MODE (XEXP (use, 0)); 7520 regno = REGNO (XEXP (use, 0)); 7521 7522 /* Is it a floating point register? */ 7523 if (regno >= 32 && regno <= 39) 7524 { 7525 if (arg_mode == SFmode) 7526 length += 8; 7527 else 7528 length += 12; 7529 } 7530 } 7531 7532 return length; 7533 } 7534 7535 /* Return the attribute length for the millicode call instruction INSN. 7536 The length must match the code generated by pa_output_millicode_call. 7537 We include the delay slot in the returned length as it is better to 7538 over estimate the length than to under estimate it. */ 7539 7540 int 7541 pa_attr_length_millicode_call (rtx insn) 7542 { 7543 unsigned long distance = -1; 7544 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7545 7546 if (INSN_ADDRESSES_SET_P ()) 7547 { 7548 distance = (total + insn_current_reference_address (insn)); 7549 if (distance < total) 7550 distance = -1; 7551 } 7552 7553 if (TARGET_64BIT) 7554 { 7555 if (!TARGET_LONG_CALLS && distance < 7600000) 7556 return 8; 7557 7558 return 20; 7559 } 7560 else if (TARGET_PORTABLE_RUNTIME) 7561 return 24; 7562 else 7563 { 7564 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET) 7565 return 8; 7566 7567 if (!flag_pic) 7568 return 12; 7569 7570 return 24; 7571 } 7572 } 7573 7574 /* INSN is a function call. It may have an unconditional jump 7575 in its delay slot. 7576 7577 CALL_DEST is the routine we are calling. */ 7578 7579 const char * 7580 pa_output_millicode_call (rtx insn, rtx call_dest) 7581 { 7582 int attr_length = get_attr_length (insn); 7583 int seq_length = dbr_sequence_length (); 7584 int distance; 7585 rtx seq_insn; 7586 rtx xoperands[3]; 7587 7588 xoperands[0] = call_dest; 7589 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31); 7590 7591 /* Handle the common case where we are sure that the branch will 7592 reach the beginning of the $CODE$ subspace. The within reach 7593 form of the $$sh_func_adrs call has a length of 28. Because it 7594 has an attribute type of sh_func_adrs, it never has a nonzero 7595 sequence length (i.e., the delay slot is never filled). */ 7596 if (!TARGET_LONG_CALLS 7597 && (attr_length == 8 7598 || (attr_length == 28 7599 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS))) 7600 { 7601 output_asm_insn ("{bl|b,l} %0,%2", xoperands); 7602 } 7603 else 7604 { 7605 if (TARGET_64BIT) 7606 { 7607 /* It might seem that one insn could be saved by accessing 7608 the millicode function using the linkage table. However, 7609 this doesn't work in shared libraries and other dynamically 7610 loaded objects. Using a pc-relative sequence also avoids 7611 problems related to the implicit use of the gp register. */ 7612 output_asm_insn ("b,l .+8,%%r1", xoperands); 7613 7614 if (TARGET_GAS) 7615 { 7616 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 7617 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 7618 } 7619 else 7620 { 7621 xoperands[1] = gen_label_rtx (); 7622 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7623 targetm.asm_out.internal_label (asm_out_file, "L", 7624 CODE_LABEL_NUMBER (xoperands[1])); 7625 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7626 } 7627 7628 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 7629 } 7630 else if (TARGET_PORTABLE_RUNTIME) 7631 { 7632 /* Pure portable runtime doesn't allow be/ble; we also don't 7633 have PIC support in the assembler/linker, so this sequence 7634 is needed. */ 7635 7636 /* Get the address of our target into %r1. */ 7637 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7638 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 7639 7640 /* Get our return address into %r31. */ 7641 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands); 7642 output_asm_insn ("addi 8,%%r31,%%r31", xoperands); 7643 7644 /* Jump to our target address in %r1. */ 7645 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7646 } 7647 else if (!flag_pic) 7648 { 7649 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7650 if (TARGET_PA_20) 7651 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands); 7652 else 7653 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7654 } 7655 else 7656 { 7657 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7658 output_asm_insn ("addi 16,%%r1,%%r31", xoperands); 7659 7660 if (TARGET_SOM || !TARGET_GAS) 7661 { 7662 /* The HP assembler can generate relocations for the 7663 difference of two symbols. GAS can do this for a 7664 millicode symbol but not an arbitrary external 7665 symbol when generating SOM output. */ 7666 xoperands[1] = gen_label_rtx (); 7667 targetm.asm_out.internal_label (asm_out_file, "L", 7668 CODE_LABEL_NUMBER (xoperands[1])); 7669 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7670 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7671 } 7672 else 7673 { 7674 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands); 7675 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1", 7676 xoperands); 7677 } 7678 7679 /* Jump to our target address in %r1. */ 7680 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7681 } 7682 } 7683 7684 if (seq_length == 0) 7685 output_asm_insn ("nop", xoperands); 7686 7687 /* We are done if there isn't a jump in the delay slot. */ 7688 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) 7689 return ""; 7690 7691 /* This call has an unconditional jump in its delay slot. */ 7692 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); 7693 7694 /* See if the return address can be adjusted. Use the containing 7695 sequence insn's address. */ 7696 if (INSN_ADDRESSES_SET_P ()) 7697 { 7698 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); 7699 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) 7700 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); 7701 7702 if (VAL_14_BITS_P (distance)) 7703 { 7704 xoperands[1] = gen_label_rtx (); 7705 output_asm_insn ("ldo %0-%1(%2),%2", xoperands); 7706 targetm.asm_out.internal_label (asm_out_file, "L", 7707 CODE_LABEL_NUMBER (xoperands[1])); 7708 } 7709 else 7710 /* ??? This branch may not reach its target. */ 7711 output_asm_insn ("nop\n\tb,n %0", xoperands); 7712 } 7713 else 7714 /* ??? This branch may not reach its target. */ 7715 output_asm_insn ("nop\n\tb,n %0", xoperands); 7716 7717 /* Delete the jump. */ 7718 SET_INSN_DELETED (NEXT_INSN (insn)); 7719 7720 return ""; 7721 } 7722 7723 /* Return the attribute length of the call instruction INSN. The SIBCALL 7724 flag indicates whether INSN is a regular call or a sibling call. The 7725 length returned must be longer than the code actually generated by 7726 pa_output_call. Since branch shortening is done before delay branch 7727 sequencing, there is no way to determine whether or not the delay 7728 slot will be filled during branch shortening. Even when the delay 7729 slot is filled, we may have to add a nop if the delay slot contains 7730 a branch that can't reach its target. Thus, we always have to include 7731 the delay slot in the length estimate. This used to be done in 7732 pa_adjust_insn_length but we do it here now as some sequences always 7733 fill the delay slot and we can save four bytes in the estimate for 7734 these sequences. */ 7735 7736 int 7737 pa_attr_length_call (rtx insn, int sibcall) 7738 { 7739 int local_call; 7740 rtx call, call_dest; 7741 tree call_decl; 7742 int length = 0; 7743 rtx pat = PATTERN (insn); 7744 unsigned long distance = -1; 7745 7746 gcc_assert (GET_CODE (insn) == CALL_INSN); 7747 7748 if (INSN_ADDRESSES_SET_P ()) 7749 { 7750 unsigned long total; 7751 7752 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7753 distance = (total + insn_current_reference_address (insn)); 7754 if (distance < total) 7755 distance = -1; 7756 } 7757 7758 gcc_assert (GET_CODE (pat) == PARALLEL); 7759 7760 /* Get the call rtx. */ 7761 call = XVECEXP (pat, 0, 0); 7762 if (GET_CODE (call) == SET) 7763 call = SET_SRC (call); 7764 7765 gcc_assert (GET_CODE (call) == CALL); 7766 7767 /* Determine if this is a local call. */ 7768 call_dest = XEXP (XEXP (call, 0), 0); 7769 call_decl = SYMBOL_REF_DECL (call_dest); 7770 local_call = call_decl && targetm.binds_local_p (call_decl); 7771 7772 /* pc-relative branch. */ 7773 if (!TARGET_LONG_CALLS 7774 && ((TARGET_PA_20 && !sibcall && distance < 7600000) 7775 || distance < MAX_PCREL17F_OFFSET)) 7776 length += 8; 7777 7778 /* 64-bit plabel sequence. */ 7779 else if (TARGET_64BIT && !local_call) 7780 length += sibcall ? 28 : 24; 7781 7782 /* non-pic long absolute branch sequence. */ 7783 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7784 length += 12; 7785 7786 /* long pc-relative branch sequence. */ 7787 else if (TARGET_LONG_PIC_SDIFF_CALL 7788 || (TARGET_GAS && !TARGET_SOM 7789 && (TARGET_LONG_PIC_PCREL_CALL || local_call))) 7790 { 7791 length += 20; 7792 7793 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7794 length += 8; 7795 } 7796 7797 /* 32-bit plabel sequence. */ 7798 else 7799 { 7800 length += 32; 7801 7802 if (TARGET_SOM) 7803 length += length_fp_args (insn); 7804 7805 if (flag_pic) 7806 length += 4; 7807 7808 if (!TARGET_PA_20) 7809 { 7810 if (!sibcall) 7811 length += 8; 7812 7813 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7814 length += 8; 7815 } 7816 } 7817 7818 return length; 7819 } 7820 7821 /* INSN is a function call. It may have an unconditional jump 7822 in its delay slot. 7823 7824 CALL_DEST is the routine we are calling. */ 7825 7826 const char * 7827 pa_output_call (rtx insn, rtx call_dest, int sibcall) 7828 { 7829 int delay_insn_deleted = 0; 7830 int delay_slot_filled = 0; 7831 int seq_length = dbr_sequence_length (); 7832 tree call_decl = SYMBOL_REF_DECL (call_dest); 7833 int local_call = call_decl && targetm.binds_local_p (call_decl); 7834 rtx xoperands[2]; 7835 7836 xoperands[0] = call_dest; 7837 7838 /* Handle the common case where we're sure that the branch will reach 7839 the beginning of the "$CODE$" subspace. This is the beginning of 7840 the current function if we are in a named section. */ 7841 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8) 7842 { 7843 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2); 7844 output_asm_insn ("{bl|b,l} %0,%1", xoperands); 7845 } 7846 else 7847 { 7848 if (TARGET_64BIT && !local_call) 7849 { 7850 /* ??? As far as I can tell, the HP linker doesn't support the 7851 long pc-relative sequence described in the 64-bit runtime 7852 architecture. So, we use a slightly longer indirect call. */ 7853 xoperands[0] = pa_get_deferred_plabel (call_dest); 7854 xoperands[1] = gen_label_rtx (); 7855 7856 /* If this isn't a sibcall, we put the load of %r27 into the 7857 delay slot. We can't do this in a sibcall as we don't 7858 have a second call-clobbered scratch register available. */ 7859 if (seq_length != 0 7860 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN 7861 && !sibcall) 7862 { 7863 final_scan_insn (NEXT_INSN (insn), asm_out_file, 7864 optimize, 0, NULL); 7865 7866 /* Now delete the delay insn. */ 7867 SET_INSN_DELETED (NEXT_INSN (insn)); 7868 delay_insn_deleted = 1; 7869 } 7870 7871 output_asm_insn ("addil LT'%0,%%r27", xoperands); 7872 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); 7873 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); 7874 7875 if (sibcall) 7876 { 7877 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 7878 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands); 7879 output_asm_insn ("bve (%%r1)", xoperands); 7880 } 7881 else 7882 { 7883 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); 7884 output_asm_insn ("bve,l (%%r2),%%r2", xoperands); 7885 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 7886 delay_slot_filled = 1; 7887 } 7888 } 7889 else 7890 { 7891 int indirect_call = 0; 7892 7893 /* Emit a long call. There are several different sequences 7894 of increasing length and complexity. In most cases, 7895 they don't allow an instruction in the delay slot. */ 7896 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7897 && !TARGET_LONG_PIC_SDIFF_CALL 7898 && !(TARGET_GAS && !TARGET_SOM 7899 && (TARGET_LONG_PIC_PCREL_CALL || local_call)) 7900 && !TARGET_64BIT) 7901 indirect_call = 1; 7902 7903 if (seq_length != 0 7904 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN 7905 && !sibcall 7906 && (!TARGET_PA_20 7907 || indirect_call 7908 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic))) 7909 { 7910 /* A non-jump insn in the delay slot. By definition we can 7911 emit this insn before the call (and in fact before argument 7912 relocating. */ 7913 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 7914 NULL); 7915 7916 /* Now delete the delay insn. */ 7917 SET_INSN_DELETED (NEXT_INSN (insn)); 7918 delay_insn_deleted = 1; 7919 } 7920 7921 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7922 { 7923 /* This is the best sequence for making long calls in 7924 non-pic code. Unfortunately, GNU ld doesn't provide 7925 the stub needed for external calls, and GAS's support 7926 for this with the SOM linker is buggy. It is safe 7927 to use this for local calls. */ 7928 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7929 if (sibcall) 7930 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands); 7931 else 7932 { 7933 if (TARGET_PA_20) 7934 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", 7935 xoperands); 7936 else 7937 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7938 7939 output_asm_insn ("copy %%r31,%%r2", xoperands); 7940 delay_slot_filled = 1; 7941 } 7942 } 7943 else 7944 { 7945 if (TARGET_LONG_PIC_SDIFF_CALL) 7946 { 7947 /* The HP assembler and linker can handle relocations 7948 for the difference of two symbols. The HP assembler 7949 recognizes the sequence as a pc-relative call and 7950 the linker provides stubs when needed. */ 7951 xoperands[1] = gen_label_rtx (); 7952 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7953 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7954 targetm.asm_out.internal_label (asm_out_file, "L", 7955 CODE_LABEL_NUMBER (xoperands[1])); 7956 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7957 } 7958 else if (TARGET_GAS && !TARGET_SOM 7959 && (TARGET_LONG_PIC_PCREL_CALL || local_call)) 7960 { 7961 /* GAS currently can't generate the relocations that 7962 are needed for the SOM linker under HP-UX using this 7963 sequence. The GNU linker doesn't generate the stubs 7964 that are needed for external calls on TARGET_ELF32 7965 with this sequence. For now, we have to use a 7966 longer plabel sequence when using GAS. */ 7967 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7968 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", 7969 xoperands); 7970 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", 7971 xoperands); 7972 } 7973 else 7974 { 7975 /* Emit a long plabel-based call sequence. This is 7976 essentially an inline implementation of $$dyncall. 7977 We don't actually try to call $$dyncall as this is 7978 as difficult as calling the function itself. */ 7979 xoperands[0] = pa_get_deferred_plabel (call_dest); 7980 xoperands[1] = gen_label_rtx (); 7981 7982 /* Since the call is indirect, FP arguments in registers 7983 need to be copied to the general registers. Then, the 7984 argument relocation stub will copy them back. */ 7985 if (TARGET_SOM) 7986 copy_fp_args (insn); 7987 7988 if (flag_pic) 7989 { 7990 output_asm_insn ("addil LT'%0,%%r19", xoperands); 7991 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); 7992 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); 7993 } 7994 else 7995 { 7996 output_asm_insn ("addil LR'%0-$global$,%%r27", 7997 xoperands); 7998 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", 7999 xoperands); 8000 } 8001 8002 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); 8003 output_asm_insn ("depi 0,31,2,%%r1", xoperands); 8004 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); 8005 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); 8006 8007 if (!sibcall && !TARGET_PA_20) 8008 { 8009 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); 8010 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8011 output_asm_insn ("addi 8,%%r2,%%r2", xoperands); 8012 else 8013 output_asm_insn ("addi 16,%%r2,%%r2", xoperands); 8014 } 8015 } 8016 8017 if (TARGET_PA_20) 8018 { 8019 if (sibcall) 8020 output_asm_insn ("bve (%%r1)", xoperands); 8021 else 8022 { 8023 if (indirect_call) 8024 { 8025 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8026 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands); 8027 delay_slot_filled = 1; 8028 } 8029 else 8030 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8031 } 8032 } 8033 else 8034 { 8035 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 8036 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0", 8037 xoperands); 8038 8039 if (sibcall) 8040 { 8041 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8042 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands); 8043 else 8044 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands); 8045 } 8046 else 8047 { 8048 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8049 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands); 8050 else 8051 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands); 8052 8053 if (indirect_call) 8054 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands); 8055 else 8056 output_asm_insn ("copy %%r31,%%r2", xoperands); 8057 delay_slot_filled = 1; 8058 } 8059 } 8060 } 8061 } 8062 } 8063 8064 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted)) 8065 output_asm_insn ("nop", xoperands); 8066 8067 /* We are done if there isn't a jump in the delay slot. */ 8068 if (seq_length == 0 8069 || delay_insn_deleted 8070 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) 8071 return ""; 8072 8073 /* A sibcall should never have a branch in the delay slot. */ 8074 gcc_assert (!sibcall); 8075 8076 /* This call has an unconditional jump in its delay slot. */ 8077 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); 8078 8079 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ()) 8080 { 8081 /* See if the return address can be adjusted. Use the containing 8082 sequence insn's address. This would break the regular call/return@ 8083 relationship assumed by the table based eh unwinder, so only do that 8084 if the call is not possibly throwing. */ 8085 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); 8086 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) 8087 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); 8088 8089 if (VAL_14_BITS_P (distance) 8090 && !(can_throw_internal (insn) || can_throw_external (insn))) 8091 { 8092 xoperands[1] = gen_label_rtx (); 8093 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands); 8094 targetm.asm_out.internal_label (asm_out_file, "L", 8095 CODE_LABEL_NUMBER (xoperands[1])); 8096 } 8097 else 8098 output_asm_insn ("nop\n\tb,n %0", xoperands); 8099 } 8100 else 8101 output_asm_insn ("b,n %0", xoperands); 8102 8103 /* Delete the jump. */ 8104 SET_INSN_DELETED (NEXT_INSN (insn)); 8105 8106 return ""; 8107 } 8108 8109 /* Return the attribute length of the indirect call instruction INSN. 8110 The length must match the code generated by output_indirect call. 8111 The returned length includes the delay slot. Currently, the delay 8112 slot of an indirect call sequence is not exposed and it is used by 8113 the sequence itself. */ 8114 8115 int 8116 pa_attr_length_indirect_call (rtx insn) 8117 { 8118 unsigned long distance = -1; 8119 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 8120 8121 if (INSN_ADDRESSES_SET_P ()) 8122 { 8123 distance = (total + insn_current_reference_address (insn)); 8124 if (distance < total) 8125 distance = -1; 8126 } 8127 8128 if (TARGET_64BIT) 8129 return 12; 8130 8131 if (TARGET_FAST_INDIRECT_CALLS 8132 || (!TARGET_LONG_CALLS 8133 && !TARGET_PORTABLE_RUNTIME 8134 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) 8135 || distance < MAX_PCREL17F_OFFSET))) 8136 return 8; 8137 8138 if (flag_pic) 8139 return 24; 8140 8141 if (TARGET_PORTABLE_RUNTIME) 8142 return 20; 8143 8144 /* Out of reach, can use ble. */ 8145 return 12; 8146 } 8147 8148 const char * 8149 pa_output_indirect_call (rtx insn, rtx call_dest) 8150 { 8151 rtx xoperands[1]; 8152 8153 if (TARGET_64BIT) 8154 { 8155 xoperands[0] = call_dest; 8156 output_asm_insn ("ldd 16(%0),%%r2", xoperands); 8157 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands); 8158 return ""; 8159 } 8160 8161 /* First the special case for kernels, level 0 systems, etc. */ 8162 if (TARGET_FAST_INDIRECT_CALLS) 8163 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 8164 8165 /* Now the normal case -- we can reach $$dyncall directly or 8166 we're sure that we can get there via a long-branch stub. 8167 8168 No need to check target flags as the length uniquely identifies 8169 the remaining cases. */ 8170 if (pa_attr_length_indirect_call (insn) == 8) 8171 { 8172 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to 8173 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit 8174 variant of the B,L instruction can't be used on the SOM target. */ 8175 if (TARGET_PA_20 && !TARGET_SOM) 8176 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31"; 8177 else 8178 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2"; 8179 } 8180 8181 /* Long millicode call, but we are not generating PIC or portable runtime 8182 code. */ 8183 if (pa_attr_length_indirect_call (insn) == 12) 8184 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; 8185 8186 /* Long millicode call for portable runtime. */ 8187 if (pa_attr_length_indirect_call (insn) == 20) 8188 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop"; 8189 8190 /* We need a long PIC call to $$dyncall. */ 8191 xoperands[0] = NULL_RTX; 8192 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 8193 if (TARGET_SOM || !TARGET_GAS) 8194 { 8195 xoperands[0] = gen_label_rtx (); 8196 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands); 8197 targetm.asm_out.internal_label (asm_out_file, "L", 8198 CODE_LABEL_NUMBER (xoperands[0])); 8199 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands); 8200 } 8201 else 8202 { 8203 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands); 8204 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1", 8205 xoperands); 8206 } 8207 output_asm_insn ("blr %%r0,%%r2", xoperands); 8208 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands); 8209 return ""; 8210 } 8211 8212 /* In HPUX 8.0's shared library scheme, special relocations are needed 8213 for function labels if they might be passed to a function 8214 in a shared library (because shared libraries don't live in code 8215 space), and special magic is needed to construct their address. */ 8216 8217 void 8218 pa_encode_label (rtx sym) 8219 { 8220 const char *str = XSTR (sym, 0); 8221 int len = strlen (str) + 1; 8222 char *newstr, *p; 8223 8224 p = newstr = XALLOCAVEC (char, len + 1); 8225 *p++ = '@'; 8226 strcpy (p, str); 8227 8228 XSTR (sym, 0) = ggc_alloc_string (newstr, len); 8229 } 8230 8231 static void 8232 pa_encode_section_info (tree decl, rtx rtl, int first) 8233 { 8234 int old_referenced = 0; 8235 8236 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF) 8237 old_referenced 8238 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED; 8239 8240 default_encode_section_info (decl, rtl, first); 8241 8242 if (first && TEXT_SPACE_P (decl)) 8243 { 8244 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; 8245 if (TREE_CODE (decl) == FUNCTION_DECL) 8246 pa_encode_label (XEXP (rtl, 0)); 8247 } 8248 else if (old_referenced) 8249 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced; 8250 } 8251 8252 /* This is sort of inverse to pa_encode_section_info. */ 8253 8254 static const char * 8255 pa_strip_name_encoding (const char *str) 8256 { 8257 str += (*str == '@'); 8258 str += (*str == '*'); 8259 return str; 8260 } 8261 8262 /* Returns 1 if OP is a function label involved in a simple addition 8263 with a constant. Used to keep certain patterns from matching 8264 during instruction combination. */ 8265 int 8266 pa_is_function_label_plus_const (rtx op) 8267 { 8268 /* Strip off any CONST. */ 8269 if (GET_CODE (op) == CONST) 8270 op = XEXP (op, 0); 8271 8272 return (GET_CODE (op) == PLUS 8273 && function_label_operand (XEXP (op, 0), VOIDmode) 8274 && GET_CODE (XEXP (op, 1)) == CONST_INT); 8275 } 8276 8277 /* Output assembly code for a thunk to FUNCTION. */ 8278 8279 static void 8280 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, 8281 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, 8282 tree function) 8283 { 8284 static unsigned int current_thunk_number; 8285 int val_14 = VAL_14_BITS_P (delta); 8286 unsigned int old_last_address = last_address, nbytes = 0; 8287 char label[16]; 8288 rtx xoperands[4]; 8289 8290 xoperands[0] = XEXP (DECL_RTL (function), 0); 8291 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0); 8292 xoperands[2] = GEN_INT (delta); 8293 8294 final_start_function (emit_barrier (), file, 1); 8295 8296 /* Output the thunk. We know that the function is in the same 8297 translation unit (i.e., the same space) as the thunk, and that 8298 thunks are output after their method. Thus, we don't need an 8299 external branch to reach the function. With SOM and GAS, 8300 functions and thunks are effectively in different sections. 8301 Thus, we can always use a IA-relative branch and the linker 8302 will add a long branch stub if necessary. 8303 8304 However, we have to be careful when generating PIC code on the 8305 SOM port to ensure that the sequence does not transfer to an 8306 import stub for the target function as this could clobber the 8307 return value saved at SP-24. This would also apply to the 8308 32-bit linux port if the multi-space model is implemented. */ 8309 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8310 && !(flag_pic && TREE_PUBLIC (function)) 8311 && (TARGET_GAS || last_address < 262132)) 8312 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8313 && ((targetm_common.have_named_sections 8314 && DECL_SECTION_NAME (thunk_fndecl) != NULL 8315 /* The GNU 64-bit linker has rather poor stub management. 8316 So, we use a long branch from thunks that aren't in 8317 the same section as the target function. */ 8318 && ((!TARGET_64BIT 8319 && (DECL_SECTION_NAME (thunk_fndecl) 8320 != DECL_SECTION_NAME (function))) 8321 || ((DECL_SECTION_NAME (thunk_fndecl) 8322 == DECL_SECTION_NAME (function)) 8323 && last_address < 262132))) 8324 /* In this case, we need to be able to reach the start of 8325 the stub table even though the function is likely closer 8326 and can be jumped to directly. */ 8327 || (targetm_common.have_named_sections 8328 && DECL_SECTION_NAME (thunk_fndecl) == NULL 8329 && DECL_SECTION_NAME (function) == NULL 8330 && total_code_bytes < MAX_PCREL17F_OFFSET) 8331 /* Likewise. */ 8332 || (!targetm_common.have_named_sections 8333 && total_code_bytes < MAX_PCREL17F_OFFSET)))) 8334 { 8335 if (!val_14) 8336 output_asm_insn ("addil L'%2,%%r26", xoperands); 8337 8338 output_asm_insn ("b %0", xoperands); 8339 8340 if (val_14) 8341 { 8342 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8343 nbytes += 8; 8344 } 8345 else 8346 { 8347 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8348 nbytes += 12; 8349 } 8350 } 8351 else if (TARGET_64BIT) 8352 { 8353 /* We only have one call-clobbered scratch register, so we can't 8354 make use of the delay slot if delta doesn't fit in 14 bits. */ 8355 if (!val_14) 8356 { 8357 output_asm_insn ("addil L'%2,%%r26", xoperands); 8358 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8359 } 8360 8361 output_asm_insn ("b,l .+8,%%r1", xoperands); 8362 8363 if (TARGET_GAS) 8364 { 8365 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 8366 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 8367 } 8368 else 8369 { 8370 xoperands[3] = GEN_INT (val_14 ? 8 : 16); 8371 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands); 8372 } 8373 8374 if (val_14) 8375 { 8376 output_asm_insn ("bv %%r0(%%r1)", xoperands); 8377 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8378 nbytes += 20; 8379 } 8380 else 8381 { 8382 output_asm_insn ("bv,n %%r0(%%r1)", xoperands); 8383 nbytes += 24; 8384 } 8385 } 8386 else if (TARGET_PORTABLE_RUNTIME) 8387 { 8388 output_asm_insn ("ldil L'%0,%%r1", xoperands); 8389 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); 8390 8391 if (!val_14) 8392 output_asm_insn ("addil L'%2,%%r26", xoperands); 8393 8394 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8395 8396 if (val_14) 8397 { 8398 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8399 nbytes += 16; 8400 } 8401 else 8402 { 8403 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8404 nbytes += 20; 8405 } 8406 } 8407 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8408 { 8409 /* The function is accessible from outside this module. The only 8410 way to avoid an import stub between the thunk and function is to 8411 call the function directly with an indirect sequence similar to 8412 that used by $$dyncall. This is possible because $$dyncall acts 8413 as the import stub in an indirect call. */ 8414 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); 8415 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); 8416 output_asm_insn ("addil LT'%3,%%r19", xoperands); 8417 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); 8418 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8419 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8420 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8421 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); 8422 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8423 8424 if (!val_14) 8425 { 8426 output_asm_insn ("addil L'%2,%%r26", xoperands); 8427 nbytes += 4; 8428 } 8429 8430 if (TARGET_PA_20) 8431 { 8432 output_asm_insn ("bve (%%r22)", xoperands); 8433 nbytes += 36; 8434 } 8435 else if (TARGET_NO_SPACE_REGS) 8436 { 8437 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); 8438 nbytes += 36; 8439 } 8440 else 8441 { 8442 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); 8443 output_asm_insn ("mtsp %%r21,%%sr0", xoperands); 8444 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); 8445 nbytes += 44; 8446 } 8447 8448 if (val_14) 8449 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8450 else 8451 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8452 } 8453 else if (flag_pic) 8454 { 8455 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 8456 8457 if (TARGET_SOM || !TARGET_GAS) 8458 { 8459 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands); 8460 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands); 8461 } 8462 else 8463 { 8464 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 8465 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands); 8466 } 8467 8468 if (!val_14) 8469 output_asm_insn ("addil L'%2,%%r26", xoperands); 8470 8471 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8472 8473 if (val_14) 8474 { 8475 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8476 nbytes += 20; 8477 } 8478 else 8479 { 8480 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8481 nbytes += 24; 8482 } 8483 } 8484 else 8485 { 8486 if (!val_14) 8487 output_asm_insn ("addil L'%2,%%r26", xoperands); 8488 8489 output_asm_insn ("ldil L'%0,%%r22", xoperands); 8490 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); 8491 8492 if (val_14) 8493 { 8494 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8495 nbytes += 12; 8496 } 8497 else 8498 { 8499 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8500 nbytes += 16; 8501 } 8502 } 8503 8504 final_end_function (); 8505 8506 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8507 { 8508 switch_to_section (data_section); 8509 output_asm_insn (".align 4", xoperands); 8510 ASM_OUTPUT_LABEL (file, label); 8511 output_asm_insn (".word P'%0", xoperands); 8512 } 8513 8514 current_thunk_number++; 8515 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 8516 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 8517 last_address += nbytes; 8518 if (old_last_address > last_address) 8519 last_address = UINT_MAX; 8520 update_total_code_bytes (nbytes); 8521 } 8522 8523 /* Only direct calls to static functions are allowed to be sibling (tail) 8524 call optimized. 8525 8526 This restriction is necessary because some linker generated stubs will 8527 store return pointers into rp' in some cases which might clobber a 8528 live value already in rp'. 8529 8530 In a sibcall the current function and the target function share stack 8531 space. Thus if the path to the current function and the path to the 8532 target function save a value in rp', they save the value into the 8533 same stack slot, which has undesirable consequences. 8534 8535 Because of the deferred binding nature of shared libraries any function 8536 with external scope could be in a different load module and thus require 8537 rp' to be saved when calling that function. So sibcall optimizations 8538 can only be safe for static function. 8539 8540 Note that GCC never needs return value relocations, so we don't have to 8541 worry about static calls with return value relocations (which require 8542 saving rp'). 8543 8544 It is safe to perform a sibcall optimization when the target function 8545 will never return. */ 8546 static bool 8547 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 8548 { 8549 if (TARGET_PORTABLE_RUNTIME) 8550 return false; 8551 8552 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in 8553 single subspace mode and the call is not indirect. As far as I know, 8554 there is no operating system support for the multiple subspace mode. 8555 It might be possible to support indirect calls if we didn't use 8556 $$dyncall (see the indirect sequence generated in pa_output_call). */ 8557 if (TARGET_ELF32) 8558 return (decl != NULL_TREE); 8559 8560 /* Sibcalls are not ok because the arg pointer register is not a fixed 8561 register. This prevents the sibcall optimization from occurring. In 8562 addition, there are problems with stub placement using GNU ld. This 8563 is because a normal sibcall branch uses a 17-bit relocation while 8564 a regular call branch uses a 22-bit relocation. As a result, more 8565 care needs to be taken in the placement of long-branch stubs. */ 8566 if (TARGET_64BIT) 8567 return false; 8568 8569 /* Sibcalls are only ok within a translation unit. */ 8570 return (decl && !TREE_PUBLIC (decl)); 8571 } 8572 8573 /* ??? Addition is not commutative on the PA due to the weird implicit 8574 space register selection rules for memory addresses. Therefore, we 8575 don't consider a + b == b + a, as this might be inside a MEM. */ 8576 static bool 8577 pa_commutative_p (const_rtx x, int outer_code) 8578 { 8579 return (COMMUTATIVE_P (x) 8580 && (TARGET_NO_SPACE_REGS 8581 || (outer_code != UNKNOWN && outer_code != MEM) 8582 || GET_CODE (x) != PLUS)); 8583 } 8584 8585 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8586 use in fmpyadd instructions. */ 8587 int 8588 pa_fmpyaddoperands (rtx *operands) 8589 { 8590 enum machine_mode mode = GET_MODE (operands[0]); 8591 8592 /* Must be a floating point mode. */ 8593 if (mode != SFmode && mode != DFmode) 8594 return 0; 8595 8596 /* All modes must be the same. */ 8597 if (! (mode == GET_MODE (operands[1]) 8598 && mode == GET_MODE (operands[2]) 8599 && mode == GET_MODE (operands[3]) 8600 && mode == GET_MODE (operands[4]) 8601 && mode == GET_MODE (operands[5]))) 8602 return 0; 8603 8604 /* All operands must be registers. */ 8605 if (! (GET_CODE (operands[1]) == REG 8606 && GET_CODE (operands[2]) == REG 8607 && GET_CODE (operands[3]) == REG 8608 && GET_CODE (operands[4]) == REG 8609 && GET_CODE (operands[5]) == REG)) 8610 return 0; 8611 8612 /* Only 2 real operands to the addition. One of the input operands must 8613 be the same as the output operand. */ 8614 if (! rtx_equal_p (operands[3], operands[4]) 8615 && ! rtx_equal_p (operands[3], operands[5])) 8616 return 0; 8617 8618 /* Inout operand of add cannot conflict with any operands from multiply. */ 8619 if (rtx_equal_p (operands[3], operands[0]) 8620 || rtx_equal_p (operands[3], operands[1]) 8621 || rtx_equal_p (operands[3], operands[2])) 8622 return 0; 8623 8624 /* multiply cannot feed into addition operands. */ 8625 if (rtx_equal_p (operands[4], operands[0]) 8626 || rtx_equal_p (operands[5], operands[0])) 8627 return 0; 8628 8629 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8630 if (mode == SFmode 8631 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8632 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8633 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8634 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8635 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8636 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8637 return 0; 8638 8639 /* Passed. Operands are suitable for fmpyadd. */ 8640 return 1; 8641 } 8642 8643 #if !defined(USE_COLLECT2) 8644 static void 8645 pa_asm_out_constructor (rtx symbol, int priority) 8646 { 8647 if (!function_label_operand (symbol, VOIDmode)) 8648 pa_encode_label (symbol); 8649 8650 #ifdef CTORS_SECTION_ASM_OP 8651 default_ctor_section_asm_out_constructor (symbol, priority); 8652 #else 8653 # ifdef TARGET_ASM_NAMED_SECTION 8654 default_named_section_asm_out_constructor (symbol, priority); 8655 # else 8656 default_stabs_asm_out_constructor (symbol, priority); 8657 # endif 8658 #endif 8659 } 8660 8661 static void 8662 pa_asm_out_destructor (rtx symbol, int priority) 8663 { 8664 if (!function_label_operand (symbol, VOIDmode)) 8665 pa_encode_label (symbol); 8666 8667 #ifdef DTORS_SECTION_ASM_OP 8668 default_dtor_section_asm_out_destructor (symbol, priority); 8669 #else 8670 # ifdef TARGET_ASM_NAMED_SECTION 8671 default_named_section_asm_out_destructor (symbol, priority); 8672 # else 8673 default_stabs_asm_out_destructor (symbol, priority); 8674 # endif 8675 #endif 8676 } 8677 #endif 8678 8679 /* This function places uninitialized global data in the bss section. 8680 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this 8681 function on the SOM port to prevent uninitialized global data from 8682 being placed in the data section. */ 8683 8684 void 8685 pa_asm_output_aligned_bss (FILE *stream, 8686 const char *name, 8687 unsigned HOST_WIDE_INT size, 8688 unsigned int align) 8689 { 8690 switch_to_section (bss_section); 8691 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8692 8693 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 8694 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); 8695 #endif 8696 8697 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE 8698 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); 8699 #endif 8700 8701 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8702 ASM_OUTPUT_LABEL (stream, name); 8703 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8704 } 8705 8706 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive 8707 that doesn't allow the alignment of global common storage to be directly 8708 specified. The SOM linker aligns common storage based on the rounded 8709 value of the NUM_BYTES parameter in the .comm directive. It's not 8710 possible to use the .align directive as it doesn't affect the alignment 8711 of the label associated with a .comm directive. */ 8712 8713 void 8714 pa_asm_output_aligned_common (FILE *stream, 8715 const char *name, 8716 unsigned HOST_WIDE_INT size, 8717 unsigned int align) 8718 { 8719 unsigned int max_common_align; 8720 8721 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64); 8722 if (align > max_common_align) 8723 { 8724 warning (0, "alignment (%u) for %s exceeds maximum alignment " 8725 "for global common data. Using %u", 8726 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT); 8727 align = max_common_align; 8728 } 8729 8730 switch_to_section (bss_section); 8731 8732 assemble_name (stream, name); 8733 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n", 8734 MAX (size, align / BITS_PER_UNIT)); 8735 } 8736 8737 /* We can't use .comm for local common storage as the SOM linker effectively 8738 treats the symbol as universal and uses the same storage for local symbols 8739 with the same name in different object files. The .block directive 8740 reserves an uninitialized block of storage. However, it's not common 8741 storage. Fortunately, GCC never requests common storage with the same 8742 name in any given translation unit. */ 8743 8744 void 8745 pa_asm_output_aligned_local (FILE *stream, 8746 const char *name, 8747 unsigned HOST_WIDE_INT size, 8748 unsigned int align) 8749 { 8750 switch_to_section (bss_section); 8751 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8752 8753 #ifdef LOCAL_ASM_OP 8754 fprintf (stream, "%s", LOCAL_ASM_OP); 8755 assemble_name (stream, name); 8756 fprintf (stream, "\n"); 8757 #endif 8758 8759 ASM_OUTPUT_LABEL (stream, name); 8760 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8761 } 8762 8763 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8764 use in fmpysub instructions. */ 8765 int 8766 pa_fmpysuboperands (rtx *operands) 8767 { 8768 enum machine_mode mode = GET_MODE (operands[0]); 8769 8770 /* Must be a floating point mode. */ 8771 if (mode != SFmode && mode != DFmode) 8772 return 0; 8773 8774 /* All modes must be the same. */ 8775 if (! (mode == GET_MODE (operands[1]) 8776 && mode == GET_MODE (operands[2]) 8777 && mode == GET_MODE (operands[3]) 8778 && mode == GET_MODE (operands[4]) 8779 && mode == GET_MODE (operands[5]))) 8780 return 0; 8781 8782 /* All operands must be registers. */ 8783 if (! (GET_CODE (operands[1]) == REG 8784 && GET_CODE (operands[2]) == REG 8785 && GET_CODE (operands[3]) == REG 8786 && GET_CODE (operands[4]) == REG 8787 && GET_CODE (operands[5]) == REG)) 8788 return 0; 8789 8790 /* Only 2 real operands to the subtraction. Subtraction is not a commutative 8791 operation, so operands[4] must be the same as operand[3]. */ 8792 if (! rtx_equal_p (operands[3], operands[4])) 8793 return 0; 8794 8795 /* multiply cannot feed into subtraction. */ 8796 if (rtx_equal_p (operands[5], operands[0])) 8797 return 0; 8798 8799 /* Inout operand of sub cannot conflict with any operands from multiply. */ 8800 if (rtx_equal_p (operands[3], operands[0]) 8801 || rtx_equal_p (operands[3], operands[1]) 8802 || rtx_equal_p (operands[3], operands[2])) 8803 return 0; 8804 8805 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8806 if (mode == SFmode 8807 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8808 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8809 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8810 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8811 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8812 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8813 return 0; 8814 8815 /* Passed. Operands are suitable for fmpysub. */ 8816 return 1; 8817 } 8818 8819 /* Return 1 if the given constant is 2, 4, or 8. These are the valid 8820 constants for shadd instructions. */ 8821 int 8822 pa_shadd_constant_p (int val) 8823 { 8824 if (val == 2 || val == 4 || val == 8) 8825 return 1; 8826 else 8827 return 0; 8828 } 8829 8830 /* Return TRUE if INSN branches forward. */ 8831 8832 static bool 8833 forward_branch_p (rtx insn) 8834 { 8835 rtx lab = JUMP_LABEL (insn); 8836 8837 /* The INSN must have a jump label. */ 8838 gcc_assert (lab != NULL_RTX); 8839 8840 if (INSN_ADDRESSES_SET_P ()) 8841 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn)); 8842 8843 while (insn) 8844 { 8845 if (insn == lab) 8846 return true; 8847 else 8848 insn = NEXT_INSN (insn); 8849 } 8850 8851 return false; 8852 } 8853 8854 /* Return 1 if INSN is in the delay slot of a call instruction. */ 8855 int 8856 pa_jump_in_call_delay (rtx insn) 8857 { 8858 8859 if (GET_CODE (insn) != JUMP_INSN) 8860 return 0; 8861 8862 if (PREV_INSN (insn) 8863 && PREV_INSN (PREV_INSN (insn)) 8864 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN) 8865 { 8866 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn))); 8867 8868 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE 8869 && XVECEXP (PATTERN (test_insn), 0, 1) == insn); 8870 8871 } 8872 else 8873 return 0; 8874 } 8875 8876 /* Output an unconditional move and branch insn. */ 8877 8878 const char * 8879 pa_output_parallel_movb (rtx *operands, rtx insn) 8880 { 8881 int length = get_attr_length (insn); 8882 8883 /* These are the cases in which we win. */ 8884 if (length == 4) 8885 return "mov%I1b,tr %1,%0,%2"; 8886 8887 /* None of the following cases win, but they don't lose either. */ 8888 if (length == 8) 8889 { 8890 if (dbr_sequence_length () == 0) 8891 { 8892 /* Nothing in the delay slot, fake it by putting the combined 8893 insn (the copy or add) in the delay slot of a bl. */ 8894 if (GET_CODE (operands[1]) == CONST_INT) 8895 return "b %2\n\tldi %1,%0"; 8896 else 8897 return "b %2\n\tcopy %1,%0"; 8898 } 8899 else 8900 { 8901 /* Something in the delay slot, but we've got a long branch. */ 8902 if (GET_CODE (operands[1]) == CONST_INT) 8903 return "ldi %1,%0\n\tb %2"; 8904 else 8905 return "copy %1,%0\n\tb %2"; 8906 } 8907 } 8908 8909 if (GET_CODE (operands[1]) == CONST_INT) 8910 output_asm_insn ("ldi %1,%0", operands); 8911 else 8912 output_asm_insn ("copy %1,%0", operands); 8913 return pa_output_lbranch (operands[2], insn, 1); 8914 } 8915 8916 /* Output an unconditional add and branch insn. */ 8917 8918 const char * 8919 pa_output_parallel_addb (rtx *operands, rtx insn) 8920 { 8921 int length = get_attr_length (insn); 8922 8923 /* To make life easy we want operand0 to be the shared input/output 8924 operand and operand1 to be the readonly operand. */ 8925 if (operands[0] == operands[1]) 8926 operands[1] = operands[2]; 8927 8928 /* These are the cases in which we win. */ 8929 if (length == 4) 8930 return "add%I1b,tr %1,%0,%3"; 8931 8932 /* None of the following cases win, but they don't lose either. */ 8933 if (length == 8) 8934 { 8935 if (dbr_sequence_length () == 0) 8936 /* Nothing in the delay slot, fake it by putting the combined 8937 insn (the copy or add) in the delay slot of a bl. */ 8938 return "b %3\n\tadd%I1 %1,%0,%0"; 8939 else 8940 /* Something in the delay slot, but we've got a long branch. */ 8941 return "add%I1 %1,%0,%0\n\tb %3"; 8942 } 8943 8944 output_asm_insn ("add%I1 %1,%0,%0", operands); 8945 return pa_output_lbranch (operands[3], insn, 1); 8946 } 8947 8948 /* Return nonzero if INSN (a jump insn) immediately follows a call 8949 to a named function. This is used to avoid filling the delay slot 8950 of the jump since it can usually be eliminated by modifying RP in 8951 the delay slot of the call. */ 8952 8953 int 8954 pa_following_call (rtx insn) 8955 { 8956 if (! TARGET_JUMP_IN_DELAY) 8957 return 0; 8958 8959 /* Find the previous real insn, skipping NOTEs. */ 8960 insn = PREV_INSN (insn); 8961 while (insn && GET_CODE (insn) == NOTE) 8962 insn = PREV_INSN (insn); 8963 8964 /* Check for CALL_INSNs and millicode calls. */ 8965 if (insn 8966 && ((GET_CODE (insn) == CALL_INSN 8967 && get_attr_type (insn) != TYPE_DYNCALL) 8968 || (GET_CODE (insn) == INSN 8969 && GET_CODE (PATTERN (insn)) != SEQUENCE 8970 && GET_CODE (PATTERN (insn)) != USE 8971 && GET_CODE (PATTERN (insn)) != CLOBBER 8972 && get_attr_type (insn) == TYPE_MILLI))) 8973 return 1; 8974 8975 return 0; 8976 } 8977 8978 /* We use this hook to perform a PA specific optimization which is difficult 8979 to do in earlier passes. 8980 8981 We want the delay slots of branches within jump tables to be filled. 8982 None of the compiler passes at the moment even has the notion that a 8983 PA jump table doesn't contain addresses, but instead contains actual 8984 instructions! 8985 8986 Because we actually jump into the table, the addresses of each entry 8987 must stay constant in relation to the beginning of the table (which 8988 itself must stay constant relative to the instruction to jump into 8989 it). I don't believe we can guarantee earlier passes of the compiler 8990 will adhere to those rules. 8991 8992 So, late in the compilation process we find all the jump tables, and 8993 expand them into real code -- e.g. each entry in the jump table vector 8994 will get an appropriate label followed by a jump to the final target. 8995 8996 Reorg and the final jump pass can then optimize these branches and 8997 fill their delay slots. We end up with smaller, more efficient code. 8998 8999 The jump instructions within the table are special; we must be able 9000 to identify them during assembly output (if the jumps don't get filled 9001 we need to emit a nop rather than nullifying the delay slot)). We 9002 identify jumps in switch tables by using insns with the attribute 9003 type TYPE_BTABLE_BRANCH. 9004 9005 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB 9006 insns. This serves two purposes, first it prevents jump.c from 9007 noticing that the last N entries in the table jump to the instruction 9008 immediately after the table and deleting the jumps. Second, those 9009 insns mark where we should emit .begin_brtab and .end_brtab directives 9010 when using GAS (allows for better link time optimizations). */ 9011 9012 static void 9013 pa_reorg (void) 9014 { 9015 rtx insn; 9016 9017 remove_useless_addtr_insns (1); 9018 9019 if (pa_cpu < PROCESSOR_8000) 9020 pa_combine_instructions (); 9021 9022 9023 /* This is fairly cheap, so always run it if optimizing. */ 9024 if (optimize > 0 && !TARGET_BIG_SWITCH) 9025 { 9026 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */ 9027 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9028 { 9029 rtx pattern, tmp, location, label; 9030 unsigned int length, i; 9031 9032 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */ 9033 if (GET_CODE (insn) != JUMP_INSN 9034 || (GET_CODE (PATTERN (insn)) != ADDR_VEC 9035 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) 9036 continue; 9037 9038 /* Emit marker for the beginning of the branch table. */ 9039 emit_insn_before (gen_begin_brtab (), insn); 9040 9041 pattern = PATTERN (insn); 9042 location = PREV_INSN (insn); 9043 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC); 9044 9045 for (i = 0; i < length; i++) 9046 { 9047 /* Emit a label before each jump to keep jump.c from 9048 removing this code. */ 9049 tmp = gen_label_rtx (); 9050 LABEL_NUSES (tmp) = 1; 9051 emit_label_after (tmp, location); 9052 location = NEXT_INSN (location); 9053 9054 if (GET_CODE (pattern) == ADDR_VEC) 9055 label = XEXP (XVECEXP (pattern, 0, i), 0); 9056 else 9057 label = XEXP (XVECEXP (pattern, 1, i), 0); 9058 9059 tmp = gen_short_jump (label); 9060 9061 /* Emit the jump itself. */ 9062 tmp = emit_jump_insn_after (tmp, location); 9063 JUMP_LABEL (tmp) = label; 9064 LABEL_NUSES (label)++; 9065 location = NEXT_INSN (location); 9066 9067 /* Emit a BARRIER after the jump. */ 9068 emit_barrier_after (location); 9069 location = NEXT_INSN (location); 9070 } 9071 9072 /* Emit marker for the end of the branch table. */ 9073 emit_insn_before (gen_end_brtab (), location); 9074 location = NEXT_INSN (location); 9075 emit_barrier_after (location); 9076 9077 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */ 9078 delete_insn (insn); 9079 } 9080 } 9081 else 9082 { 9083 /* Still need brtab marker insns. FIXME: the presence of these 9084 markers disables output of the branch table to readonly memory, 9085 and any alignment directives that might be needed. Possibly, 9086 the begin_brtab insn should be output before the label for the 9087 table. This doesn't matter at the moment since the tables are 9088 always output in the text section. */ 9089 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9090 { 9091 /* Find an ADDR_VEC insn. */ 9092 if (GET_CODE (insn) != JUMP_INSN 9093 || (GET_CODE (PATTERN (insn)) != ADDR_VEC 9094 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) 9095 continue; 9096 9097 /* Now generate markers for the beginning and end of the 9098 branch table. */ 9099 emit_insn_before (gen_begin_brtab (), insn); 9100 emit_insn_after (gen_end_brtab (), insn); 9101 } 9102 } 9103 } 9104 9105 /* The PA has a number of odd instructions which can perform multiple 9106 tasks at once. On first generation PA machines (PA1.0 and PA1.1) 9107 it may be profitable to combine two instructions into one instruction 9108 with two outputs. It's not profitable PA2.0 machines because the 9109 two outputs would take two slots in the reorder buffers. 9110 9111 This routine finds instructions which can be combined and combines 9112 them. We only support some of the potential combinations, and we 9113 only try common ways to find suitable instructions. 9114 9115 * addb can add two registers or a register and a small integer 9116 and jump to a nearby (+-8k) location. Normally the jump to the 9117 nearby location is conditional on the result of the add, but by 9118 using the "true" condition we can make the jump unconditional. 9119 Thus addb can perform two independent operations in one insn. 9120 9121 * movb is similar to addb in that it can perform a reg->reg 9122 or small immediate->reg copy and jump to a nearby (+-8k location). 9123 9124 * fmpyadd and fmpysub can perform a FP multiply and either an 9125 FP add or FP sub if the operands of the multiply and add/sub are 9126 independent (there are other minor restrictions). Note both 9127 the fmpy and fadd/fsub can in theory move to better spots according 9128 to data dependencies, but for now we require the fmpy stay at a 9129 fixed location. 9130 9131 * Many of the memory operations can perform pre & post updates 9132 of index registers. GCC's pre/post increment/decrement addressing 9133 is far too simple to take advantage of all the possibilities. This 9134 pass may not be suitable since those insns may not be independent. 9135 9136 * comclr can compare two ints or an int and a register, nullify 9137 the following instruction and zero some other register. This 9138 is more difficult to use as it's harder to find an insn which 9139 will generate a comclr than finding something like an unconditional 9140 branch. (conditional moves & long branches create comclr insns). 9141 9142 * Most arithmetic operations can conditionally skip the next 9143 instruction. They can be viewed as "perform this operation 9144 and conditionally jump to this nearby location" (where nearby 9145 is an insns away). These are difficult to use due to the 9146 branch length restrictions. */ 9147 9148 static void 9149 pa_combine_instructions (void) 9150 { 9151 rtx anchor, new_rtx; 9152 9153 /* This can get expensive since the basic algorithm is on the 9154 order of O(n^2) (or worse). Only do it for -O2 or higher 9155 levels of optimization. */ 9156 if (optimize < 2) 9157 return; 9158 9159 /* Walk down the list of insns looking for "anchor" insns which 9160 may be combined with "floating" insns. As the name implies, 9161 "anchor" instructions don't move, while "floating" insns may 9162 move around. */ 9163 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); 9164 new_rtx = make_insn_raw (new_rtx); 9165 9166 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) 9167 { 9168 enum attr_pa_combine_type anchor_attr; 9169 enum attr_pa_combine_type floater_attr; 9170 9171 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. 9172 Also ignore any special USE insns. */ 9173 if ((GET_CODE (anchor) != INSN 9174 && GET_CODE (anchor) != JUMP_INSN 9175 && GET_CODE (anchor) != CALL_INSN) 9176 || GET_CODE (PATTERN (anchor)) == USE 9177 || GET_CODE (PATTERN (anchor)) == CLOBBER 9178 || GET_CODE (PATTERN (anchor)) == ADDR_VEC 9179 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) 9180 continue; 9181 9182 anchor_attr = get_attr_pa_combine_type (anchor); 9183 /* See if anchor is an insn suitable for combination. */ 9184 if (anchor_attr == PA_COMBINE_TYPE_FMPY 9185 || anchor_attr == PA_COMBINE_TYPE_FADDSUB 9186 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9187 && ! forward_branch_p (anchor))) 9188 { 9189 rtx floater; 9190 9191 for (floater = PREV_INSN (anchor); 9192 floater; 9193 floater = PREV_INSN (floater)) 9194 { 9195 if (GET_CODE (floater) == NOTE 9196 || (GET_CODE (floater) == INSN 9197 && (GET_CODE (PATTERN (floater)) == USE 9198 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9199 continue; 9200 9201 /* Anything except a regular INSN will stop our search. */ 9202 if (GET_CODE (floater) != INSN 9203 || GET_CODE (PATTERN (floater)) == ADDR_VEC 9204 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) 9205 { 9206 floater = NULL_RTX; 9207 break; 9208 } 9209 9210 /* See if FLOATER is suitable for combination with the 9211 anchor. */ 9212 floater_attr = get_attr_pa_combine_type (floater); 9213 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9214 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9215 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9216 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9217 { 9218 /* If ANCHOR and FLOATER can be combined, then we're 9219 done with this pass. */ 9220 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9221 SET_DEST (PATTERN (floater)), 9222 XEXP (SET_SRC (PATTERN (floater)), 0), 9223 XEXP (SET_SRC (PATTERN (floater)), 1))) 9224 break; 9225 } 9226 9227 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9228 && floater_attr == PA_COMBINE_TYPE_ADDMOVE) 9229 { 9230 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) 9231 { 9232 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9233 SET_DEST (PATTERN (floater)), 9234 XEXP (SET_SRC (PATTERN (floater)), 0), 9235 XEXP (SET_SRC (PATTERN (floater)), 1))) 9236 break; 9237 } 9238 else 9239 { 9240 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9241 SET_DEST (PATTERN (floater)), 9242 SET_SRC (PATTERN (floater)), 9243 SET_SRC (PATTERN (floater)))) 9244 break; 9245 } 9246 } 9247 } 9248 9249 /* If we didn't find anything on the backwards scan try forwards. */ 9250 if (!floater 9251 && (anchor_attr == PA_COMBINE_TYPE_FMPY 9252 || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) 9253 { 9254 for (floater = anchor; floater; floater = NEXT_INSN (floater)) 9255 { 9256 if (GET_CODE (floater) == NOTE 9257 || (GET_CODE (floater) == INSN 9258 && (GET_CODE (PATTERN (floater)) == USE 9259 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9260 9261 continue; 9262 9263 /* Anything except a regular INSN will stop our search. */ 9264 if (GET_CODE (floater) != INSN 9265 || GET_CODE (PATTERN (floater)) == ADDR_VEC 9266 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) 9267 { 9268 floater = NULL_RTX; 9269 break; 9270 } 9271 9272 /* See if FLOATER is suitable for combination with the 9273 anchor. */ 9274 floater_attr = get_attr_pa_combine_type (floater); 9275 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9276 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9277 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9278 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9279 { 9280 /* If ANCHOR and FLOATER can be combined, then we're 9281 done with this pass. */ 9282 if (pa_can_combine_p (new_rtx, anchor, floater, 1, 9283 SET_DEST (PATTERN (floater)), 9284 XEXP (SET_SRC (PATTERN (floater)), 9285 0), 9286 XEXP (SET_SRC (PATTERN (floater)), 9287 1))) 9288 break; 9289 } 9290 } 9291 } 9292 9293 /* FLOATER will be nonzero if we found a suitable floating 9294 insn for combination with ANCHOR. */ 9295 if (floater 9296 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9297 || anchor_attr == PA_COMBINE_TYPE_FMPY)) 9298 { 9299 /* Emit the new instruction and delete the old anchor. */ 9300 emit_insn_before (gen_rtx_PARALLEL 9301 (VOIDmode, 9302 gen_rtvec (2, PATTERN (anchor), 9303 PATTERN (floater))), 9304 anchor); 9305 9306 SET_INSN_DELETED (anchor); 9307 9308 /* Emit a special USE insn for FLOATER, then delete 9309 the floating insn. */ 9310 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); 9311 delete_insn (floater); 9312 9313 continue; 9314 } 9315 else if (floater 9316 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) 9317 { 9318 rtx temp; 9319 /* Emit the new_jump instruction and delete the old anchor. */ 9320 temp 9321 = emit_jump_insn_before (gen_rtx_PARALLEL 9322 (VOIDmode, 9323 gen_rtvec (2, PATTERN (anchor), 9324 PATTERN (floater))), 9325 anchor); 9326 9327 JUMP_LABEL (temp) = JUMP_LABEL (anchor); 9328 SET_INSN_DELETED (anchor); 9329 9330 /* Emit a special USE insn for FLOATER, then delete 9331 the floating insn. */ 9332 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); 9333 delete_insn (floater); 9334 continue; 9335 } 9336 } 9337 } 9338 } 9339 9340 static int 9341 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest, 9342 rtx src1, rtx src2) 9343 { 9344 int insn_code_number; 9345 rtx start, end; 9346 9347 /* Create a PARALLEL with the patterns of ANCHOR and 9348 FLOATER, try to recognize it, then test constraints 9349 for the resulting pattern. 9350 9351 If the pattern doesn't match or the constraints 9352 aren't met keep searching for a suitable floater 9353 insn. */ 9354 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor); 9355 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater); 9356 INSN_CODE (new_rtx) = -1; 9357 insn_code_number = recog_memoized (new_rtx); 9358 if (insn_code_number < 0 9359 || (extract_insn (new_rtx), ! constrain_operands (1))) 9360 return 0; 9361 9362 if (reversed) 9363 { 9364 start = anchor; 9365 end = floater; 9366 } 9367 else 9368 { 9369 start = floater; 9370 end = anchor; 9371 } 9372 9373 /* There's up to three operands to consider. One 9374 output and two inputs. 9375 9376 The output must not be used between FLOATER & ANCHOR 9377 exclusive. The inputs must not be set between 9378 FLOATER and ANCHOR exclusive. */ 9379 9380 if (reg_used_between_p (dest, start, end)) 9381 return 0; 9382 9383 if (reg_set_between_p (src1, start, end)) 9384 return 0; 9385 9386 if (reg_set_between_p (src2, start, end)) 9387 return 0; 9388 9389 /* If we get here, then everything is good. */ 9390 return 1; 9391 } 9392 9393 /* Return nonzero if references for INSN are delayed. 9394 9395 Millicode insns are actually function calls with some special 9396 constraints on arguments and register usage. 9397 9398 Millicode calls always expect their arguments in the integer argument 9399 registers, and always return their result in %r29 (ret1). They 9400 are expected to clobber their arguments, %r1, %r29, and the return 9401 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. 9402 9403 This function tells reorg that the references to arguments and 9404 millicode calls do not appear to happen until after the millicode call. 9405 This allows reorg to put insns which set the argument registers into the 9406 delay slot of the millicode call -- thus they act more like traditional 9407 CALL_INSNs. 9408 9409 Note we cannot consider side effects of the insn to be delayed because 9410 the branch and link insn will clobber the return pointer. If we happened 9411 to use the return pointer in the delay slot of the call, then we lose. 9412 9413 get_attr_type will try to recognize the given insn, so make sure to 9414 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns 9415 in particular. */ 9416 int 9417 pa_insn_refs_are_delayed (rtx insn) 9418 { 9419 return ((GET_CODE (insn) == INSN 9420 && GET_CODE (PATTERN (insn)) != SEQUENCE 9421 && GET_CODE (PATTERN (insn)) != USE 9422 && GET_CODE (PATTERN (insn)) != CLOBBER 9423 && get_attr_type (insn) == TYPE_MILLI)); 9424 } 9425 9426 /* Promote the return value, but not the arguments. */ 9427 9428 static enum machine_mode 9429 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, 9430 enum machine_mode mode, 9431 int *punsignedp ATTRIBUTE_UNUSED, 9432 const_tree fntype ATTRIBUTE_UNUSED, 9433 int for_return) 9434 { 9435 if (for_return == 0) 9436 return mode; 9437 return promote_mode (type, mode, punsignedp); 9438 } 9439 9440 /* On the HP-PA the value is found in register(s) 28(-29), unless 9441 the mode is SF or DF. Then the value is returned in fr4 (32). 9442 9443 This must perform the same promotions as PROMOTE_MODE, else promoting 9444 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly. 9445 9446 Small structures must be returned in a PARALLEL on PA64 in order 9447 to match the HP Compiler ABI. */ 9448 9449 static rtx 9450 pa_function_value (const_tree valtype, 9451 const_tree func ATTRIBUTE_UNUSED, 9452 bool outgoing ATTRIBUTE_UNUSED) 9453 { 9454 enum machine_mode valmode; 9455 9456 if (AGGREGATE_TYPE_P (valtype) 9457 || TREE_CODE (valtype) == COMPLEX_TYPE 9458 || TREE_CODE (valtype) == VECTOR_TYPE) 9459 { 9460 if (TARGET_64BIT) 9461 { 9462 /* Aggregates with a size less than or equal to 128 bits are 9463 returned in GR 28(-29). They are left justified. The pad 9464 bits are undefined. Larger aggregates are returned in 9465 memory. */ 9466 rtx loc[2]; 9467 int i, offset = 0; 9468 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2; 9469 9470 for (i = 0; i < ub; i++) 9471 { 9472 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9473 gen_rtx_REG (DImode, 28 + i), 9474 GEN_INT (offset)); 9475 offset += 8; 9476 } 9477 9478 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); 9479 } 9480 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD) 9481 { 9482 /* Aggregates 5 to 8 bytes in size are returned in general 9483 registers r28-r29 in the same manner as other non 9484 floating-point objects. The data is right-justified and 9485 zero-extended to 64 bits. This is opposite to the normal 9486 justification used on big endian targets and requires 9487 special treatment. */ 9488 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9489 gen_rtx_REG (DImode, 28), const0_rtx); 9490 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9491 } 9492 } 9493 9494 if ((INTEGRAL_TYPE_P (valtype) 9495 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD) 9496 || POINTER_TYPE_P (valtype)) 9497 valmode = word_mode; 9498 else 9499 valmode = TYPE_MODE (valtype); 9500 9501 if (TREE_CODE (valtype) == REAL_TYPE 9502 && !AGGREGATE_TYPE_P (valtype) 9503 && TYPE_MODE (valtype) != TFmode 9504 && !TARGET_SOFT_FLOAT) 9505 return gen_rtx_REG (valmode, 32); 9506 9507 return gen_rtx_REG (valmode, 28); 9508 } 9509 9510 /* Implement the TARGET_LIBCALL_VALUE hook. */ 9511 9512 static rtx 9513 pa_libcall_value (enum machine_mode mode, 9514 const_rtx fun ATTRIBUTE_UNUSED) 9515 { 9516 if (! TARGET_SOFT_FLOAT 9517 && (mode == SFmode || mode == DFmode)) 9518 return gen_rtx_REG (mode, 32); 9519 else 9520 return gen_rtx_REG (mode, 28); 9521 } 9522 9523 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */ 9524 9525 static bool 9526 pa_function_value_regno_p (const unsigned int regno) 9527 { 9528 if (regno == 28 9529 || (! TARGET_SOFT_FLOAT && regno == 32)) 9530 return true; 9531 9532 return false; 9533 } 9534 9535 /* Update the data in CUM to advance over an argument 9536 of mode MODE and data type TYPE. 9537 (TYPE is null for libcalls where that information may not be available.) */ 9538 9539 static void 9540 pa_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, 9541 const_tree type, bool named ATTRIBUTE_UNUSED) 9542 { 9543 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9544 int arg_size = FUNCTION_ARG_SIZE (mode, type); 9545 9546 cum->nargs_prototype--; 9547 cum->words += (arg_size 9548 + ((cum->words & 01) 9549 && type != NULL_TREE 9550 && arg_size > 1)); 9551 } 9552 9553 /* Return the location of a parameter that is passed in a register or NULL 9554 if the parameter has any component that is passed in memory. 9555 9556 This is new code and will be pushed to into the net sources after 9557 further testing. 9558 9559 ??? We might want to restructure this so that it looks more like other 9560 ports. */ 9561 static rtx 9562 pa_function_arg (cumulative_args_t cum_v, enum machine_mode mode, 9563 const_tree type, bool named ATTRIBUTE_UNUSED) 9564 { 9565 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9566 int max_arg_words = (TARGET_64BIT ? 8 : 4); 9567 int alignment = 0; 9568 int arg_size; 9569 int fpr_reg_base; 9570 int gpr_reg_base; 9571 rtx retval; 9572 9573 if (mode == VOIDmode) 9574 return NULL_RTX; 9575 9576 arg_size = FUNCTION_ARG_SIZE (mode, type); 9577 9578 /* If this arg would be passed partially or totally on the stack, then 9579 this routine should return zero. pa_arg_partial_bytes will 9580 handle arguments which are split between regs and stack slots if 9581 the ABI mandates split arguments. */ 9582 if (!TARGET_64BIT) 9583 { 9584 /* The 32-bit ABI does not split arguments. */ 9585 if (cum->words + arg_size > max_arg_words) 9586 return NULL_RTX; 9587 } 9588 else 9589 { 9590 if (arg_size > 1) 9591 alignment = cum->words & 1; 9592 if (cum->words + alignment >= max_arg_words) 9593 return NULL_RTX; 9594 } 9595 9596 /* The 32bit ABIs and the 64bit ABIs are rather different, 9597 particularly in their handling of FP registers. We might 9598 be able to cleverly share code between them, but I'm not 9599 going to bother in the hope that splitting them up results 9600 in code that is more easily understood. */ 9601 9602 if (TARGET_64BIT) 9603 { 9604 /* Advance the base registers to their current locations. 9605 9606 Remember, gprs grow towards smaller register numbers while 9607 fprs grow to higher register numbers. Also remember that 9608 although FP regs are 32-bit addressable, we pretend that 9609 the registers are 64-bits wide. */ 9610 gpr_reg_base = 26 - cum->words; 9611 fpr_reg_base = 32 + cum->words; 9612 9613 /* Arguments wider than one word and small aggregates need special 9614 treatment. */ 9615 if (arg_size > 1 9616 || mode == BLKmode 9617 || (type && (AGGREGATE_TYPE_P (type) 9618 || TREE_CODE (type) == COMPLEX_TYPE 9619 || TREE_CODE (type) == VECTOR_TYPE))) 9620 { 9621 /* Double-extended precision (80-bit), quad-precision (128-bit) 9622 and aggregates including complex numbers are aligned on 9623 128-bit boundaries. The first eight 64-bit argument slots 9624 are associated one-to-one, with general registers r26 9625 through r19, and also with floating-point registers fr4 9626 through fr11. Arguments larger than one word are always 9627 passed in general registers. 9628 9629 Using a PARALLEL with a word mode register results in left 9630 justified data on a big-endian target. */ 9631 9632 rtx loc[8]; 9633 int i, offset = 0, ub = arg_size; 9634 9635 /* Align the base register. */ 9636 gpr_reg_base -= alignment; 9637 9638 ub = MIN (ub, max_arg_words - cum->words - alignment); 9639 for (i = 0; i < ub; i++) 9640 { 9641 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9642 gen_rtx_REG (DImode, gpr_reg_base), 9643 GEN_INT (offset)); 9644 gpr_reg_base -= 1; 9645 offset += 8; 9646 } 9647 9648 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc)); 9649 } 9650 } 9651 else 9652 { 9653 /* If the argument is larger than a word, then we know precisely 9654 which registers we must use. */ 9655 if (arg_size > 1) 9656 { 9657 if (cum->words) 9658 { 9659 gpr_reg_base = 23; 9660 fpr_reg_base = 38; 9661 } 9662 else 9663 { 9664 gpr_reg_base = 25; 9665 fpr_reg_base = 34; 9666 } 9667 9668 /* Structures 5 to 8 bytes in size are passed in the general 9669 registers in the same manner as other non floating-point 9670 objects. The data is right-justified and zero-extended 9671 to 64 bits. This is opposite to the normal justification 9672 used on big endian targets and requires special treatment. 9673 We now define BLOCK_REG_PADDING to pad these objects. 9674 Aggregates, complex and vector types are passed in the same 9675 manner as structures. */ 9676 if (mode == BLKmode 9677 || (type && (AGGREGATE_TYPE_P (type) 9678 || TREE_CODE (type) == COMPLEX_TYPE 9679 || TREE_CODE (type) == VECTOR_TYPE))) 9680 { 9681 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9682 gen_rtx_REG (DImode, gpr_reg_base), 9683 const0_rtx); 9684 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9685 } 9686 } 9687 else 9688 { 9689 /* We have a single word (32 bits). A simple computation 9690 will get us the register #s we need. */ 9691 gpr_reg_base = 26 - cum->words; 9692 fpr_reg_base = 32 + 2 * cum->words; 9693 } 9694 } 9695 9696 /* Determine if the argument needs to be passed in both general and 9697 floating point registers. */ 9698 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32) 9699 /* If we are doing soft-float with portable runtime, then there 9700 is no need to worry about FP regs. */ 9701 && !TARGET_SOFT_FLOAT 9702 /* The parameter must be some kind of scalar float, else we just 9703 pass it in integer registers. */ 9704 && GET_MODE_CLASS (mode) == MODE_FLOAT 9705 /* The target function must not have a prototype. */ 9706 && cum->nargs_prototype <= 0 9707 /* libcalls do not need to pass items in both FP and general 9708 registers. */ 9709 && type != NULL_TREE 9710 /* All this hair applies to "outgoing" args only. This includes 9711 sibcall arguments setup with FUNCTION_INCOMING_ARG. */ 9712 && !cum->incoming) 9713 /* Also pass outgoing floating arguments in both registers in indirect 9714 calls with the 32 bit ABI and the HP assembler since there is no 9715 way to the specify argument locations in static functions. */ 9716 || (!TARGET_64BIT 9717 && !TARGET_GAS 9718 && !cum->incoming 9719 && cum->indirect 9720 && GET_MODE_CLASS (mode) == MODE_FLOAT)) 9721 { 9722 retval 9723 = gen_rtx_PARALLEL 9724 (mode, 9725 gen_rtvec (2, 9726 gen_rtx_EXPR_LIST (VOIDmode, 9727 gen_rtx_REG (mode, fpr_reg_base), 9728 const0_rtx), 9729 gen_rtx_EXPR_LIST (VOIDmode, 9730 gen_rtx_REG (mode, gpr_reg_base), 9731 const0_rtx))); 9732 } 9733 else 9734 { 9735 /* See if we should pass this parameter in a general register. */ 9736 if (TARGET_SOFT_FLOAT 9737 /* Indirect calls in the normal 32bit ABI require all arguments 9738 to be passed in general registers. */ 9739 || (!TARGET_PORTABLE_RUNTIME 9740 && !TARGET_64BIT 9741 && !TARGET_ELF32 9742 && cum->indirect) 9743 /* If the parameter is not a scalar floating-point parameter, 9744 then it belongs in GPRs. */ 9745 || GET_MODE_CLASS (mode) != MODE_FLOAT 9746 /* Structure with single SFmode field belongs in GPR. */ 9747 || (type && AGGREGATE_TYPE_P (type))) 9748 retval = gen_rtx_REG (mode, gpr_reg_base); 9749 else 9750 retval = gen_rtx_REG (mode, fpr_reg_base); 9751 } 9752 return retval; 9753 } 9754 9755 /* Arguments larger than one word are double word aligned. */ 9756 9757 static unsigned int 9758 pa_function_arg_boundary (enum machine_mode mode, const_tree type) 9759 { 9760 bool singleword = (type 9761 ? (integer_zerop (TYPE_SIZE (type)) 9762 || !TREE_CONSTANT (TYPE_SIZE (type)) 9763 || int_size_in_bytes (type) <= UNITS_PER_WORD) 9764 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD); 9765 9766 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY; 9767 } 9768 9769 /* If this arg would be passed totally in registers or totally on the stack, 9770 then this routine should return zero. */ 9771 9772 static int 9773 pa_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode, 9774 tree type, bool named ATTRIBUTE_UNUSED) 9775 { 9776 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9777 unsigned int max_arg_words = 8; 9778 unsigned int offset = 0; 9779 9780 if (!TARGET_64BIT) 9781 return 0; 9782 9783 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1)) 9784 offset = 1; 9785 9786 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words) 9787 /* Arg fits fully into registers. */ 9788 return 0; 9789 else if (cum->words + offset >= max_arg_words) 9790 /* Arg fully on the stack. */ 9791 return 0; 9792 else 9793 /* Arg is split. */ 9794 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD; 9795 } 9796 9797 9798 /* A get_unnamed_section callback for switching to the text section. 9799 9800 This function is only used with SOM. Because we don't support 9801 named subspaces, we can only create a new subspace or switch back 9802 to the default text subspace. */ 9803 9804 static void 9805 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) 9806 { 9807 gcc_assert (TARGET_SOM); 9808 if (TARGET_GAS) 9809 { 9810 if (cfun && cfun->machine && !cfun->machine->in_nsubspa) 9811 { 9812 /* We only want to emit a .nsubspa directive once at the 9813 start of the function. */ 9814 cfun->machine->in_nsubspa = 1; 9815 9816 /* Create a new subspace for the text. This provides 9817 better stub placement and one-only functions. */ 9818 if (cfun->decl 9819 && DECL_ONE_ONLY (cfun->decl) 9820 && !DECL_WEAK (cfun->decl)) 9821 { 9822 output_section_asm_op ("\t.SPACE $TEXT$\n" 9823 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8," 9824 "ACCESS=44,SORT=24,COMDAT"); 9825 return; 9826 } 9827 } 9828 else 9829 { 9830 /* There isn't a current function or the body of the current 9831 function has been completed. So, we are changing to the 9832 text section to output debugging information. Thus, we 9833 need to forget that we are in the text section so that 9834 varasm.c will call us when text_section is selected again. */ 9835 gcc_assert (!cfun || !cfun->machine 9836 || cfun->machine->in_nsubspa == 2); 9837 in_section = NULL; 9838 } 9839 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$"); 9840 return; 9841 } 9842 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$"); 9843 } 9844 9845 /* A get_unnamed_section callback for switching to comdat data 9846 sections. This function is only used with SOM. */ 9847 9848 static void 9849 som_output_comdat_data_section_asm_op (const void *data) 9850 { 9851 in_section = NULL; 9852 output_section_asm_op (data); 9853 } 9854 9855 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */ 9856 9857 static void 9858 pa_som_asm_init_sections (void) 9859 { 9860 text_section 9861 = get_unnamed_section (0, som_output_text_section_asm_op, NULL); 9862 9863 /* SOM puts readonly data in the default $LIT$ subspace when PIC code 9864 is not being generated. */ 9865 som_readonly_data_section 9866 = get_unnamed_section (0, output_section_asm_op, 9867 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$"); 9868 9869 /* When secondary definitions are not supported, SOM makes readonly 9870 data one-only by creating a new $LIT$ subspace in $TEXT$ with 9871 the comdat flag. */ 9872 som_one_only_readonly_data_section 9873 = get_unnamed_section (0, som_output_comdat_data_section_asm_op, 9874 "\t.SPACE $TEXT$\n" 9875 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8," 9876 "ACCESS=0x2c,SORT=16,COMDAT"); 9877 9878 9879 /* When secondary definitions are not supported, SOM makes data one-only 9880 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */ 9881 som_one_only_data_section 9882 = get_unnamed_section (SECTION_WRITE, 9883 som_output_comdat_data_section_asm_op, 9884 "\t.SPACE $PRIVATE$\n" 9885 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8," 9886 "ACCESS=31,SORT=24,COMDAT"); 9887 9888 if (flag_tm) 9889 som_tm_clone_table_section 9890 = get_unnamed_section (0, output_section_asm_op, 9891 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$"); 9892 9893 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups 9894 which reference data within the $TEXT$ space (for example constant 9895 strings in the $LIT$ subspace). 9896 9897 The assemblers (GAS and HP as) both have problems with handling 9898 the difference of two symbols which is the other correct way to 9899 reference constant data during PIC code generation. 9900 9901 So, there's no way to reference constant data which is in the 9902 $TEXT$ space during PIC generation. Instead place all constant 9903 data into the $PRIVATE$ subspace (this reduces sharing, but it 9904 works correctly). */ 9905 readonly_data_section = flag_pic ? data_section : som_readonly_data_section; 9906 9907 /* We must not have a reference to an external symbol defined in a 9908 shared library in a readonly section, else the SOM linker will 9909 complain. 9910 9911 So, we force exception information into the data section. */ 9912 exception_section = data_section; 9913 } 9914 9915 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */ 9916 9917 static section * 9918 pa_som_tm_clone_table_section (void) 9919 { 9920 return som_tm_clone_table_section; 9921 } 9922 9923 /* On hpux10, the linker will give an error if we have a reference 9924 in the read-only data section to a symbol defined in a shared 9925 library. Therefore, expressions that might require a reloc can 9926 not be placed in the read-only data section. */ 9927 9928 static section * 9929 pa_select_section (tree exp, int reloc, 9930 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 9931 { 9932 if (TREE_CODE (exp) == VAR_DECL 9933 && TREE_READONLY (exp) 9934 && !TREE_THIS_VOLATILE (exp) 9935 && DECL_INITIAL (exp) 9936 && (DECL_INITIAL (exp) == error_mark_node 9937 || TREE_CONSTANT (DECL_INITIAL (exp))) 9938 && !reloc) 9939 { 9940 if (TARGET_SOM 9941 && DECL_ONE_ONLY (exp) 9942 && !DECL_WEAK (exp)) 9943 return som_one_only_readonly_data_section; 9944 else 9945 return readonly_data_section; 9946 } 9947 else if (CONSTANT_CLASS_P (exp) && !reloc) 9948 return readonly_data_section; 9949 else if (TARGET_SOM 9950 && TREE_CODE (exp) == VAR_DECL 9951 && DECL_ONE_ONLY (exp) 9952 && !DECL_WEAK (exp)) 9953 return som_one_only_data_section; 9954 else 9955 return data_section; 9956 } 9957 9958 static void 9959 pa_globalize_label (FILE *stream, const char *name) 9960 { 9961 /* We only handle DATA objects here, functions are globalized in 9962 ASM_DECLARE_FUNCTION_NAME. */ 9963 if (! FUNCTION_NAME_P (name)) 9964 { 9965 fputs ("\t.EXPORT ", stream); 9966 assemble_name (stream, name); 9967 fputs (",DATA\n", stream); 9968 } 9969 } 9970 9971 /* Worker function for TARGET_STRUCT_VALUE_RTX. */ 9972 9973 static rtx 9974 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, 9975 int incoming ATTRIBUTE_UNUSED) 9976 { 9977 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM); 9978 } 9979 9980 /* Worker function for TARGET_RETURN_IN_MEMORY. */ 9981 9982 bool 9983 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 9984 { 9985 /* SOM ABI says that objects larger than 64 bits are returned in memory. 9986 PA64 ABI says that objects larger than 128 bits are returned in memory. 9987 Note, int_size_in_bytes can return -1 if the size of the object is 9988 variable or larger than the maximum value that can be expressed as 9989 a HOST_WIDE_INT. It can also return zero for an empty type. The 9990 simplest way to handle variable and empty types is to pass them in 9991 memory. This avoids problems in defining the boundaries of argument 9992 slots, allocating registers, etc. */ 9993 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8) 9994 || int_size_in_bytes (type) <= 0); 9995 } 9996 9997 /* Structure to hold declaration and name of external symbols that are 9998 emitted by GCC. We generate a vector of these symbols and output them 9999 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true. 10000 This avoids putting out names that are never really used. */ 10001 10002 typedef struct GTY(()) extern_symbol 10003 { 10004 tree decl; 10005 const char *name; 10006 } extern_symbol; 10007 10008 /* Define gc'd vector type for extern_symbol. */ 10009 10010 /* Vector of extern_symbol pointers. */ 10011 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols; 10012 10013 #ifdef ASM_OUTPUT_EXTERNAL_REAL 10014 /* Mark DECL (name NAME) as an external reference (assembler output 10015 file FILE). This saves the names to output at the end of the file 10016 if actually referenced. */ 10017 10018 void 10019 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name) 10020 { 10021 gcc_assert (file == asm_out_file); 10022 extern_symbol p = {decl, name}; 10023 vec_safe_push (extern_symbols, p); 10024 } 10025 10026 /* Output text required at the end of an assembler file. 10027 This includes deferred plabels and .import directives for 10028 all external symbols that were actually referenced. */ 10029 10030 static void 10031 pa_hpux_file_end (void) 10032 { 10033 unsigned int i; 10034 extern_symbol *p; 10035 10036 if (!NO_DEFERRED_PROFILE_COUNTERS) 10037 output_deferred_profile_counters (); 10038 10039 output_deferred_plabels (); 10040 10041 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++) 10042 { 10043 tree decl = p->decl; 10044 10045 if (!TREE_ASM_WRITTEN (decl) 10046 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0))) 10047 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name); 10048 } 10049 10050 vec_free (extern_symbols); 10051 } 10052 #endif 10053 10054 /* Return true if a change from mode FROM to mode TO for a register 10055 in register class RCLASS is invalid. */ 10056 10057 bool 10058 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 10059 enum reg_class rclass) 10060 { 10061 if (from == to) 10062 return false; 10063 10064 /* Reject changes to/from complex and vector modes. */ 10065 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from) 10066 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to)) 10067 return true; 10068 10069 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)) 10070 return false; 10071 10072 /* There is no way to load QImode or HImode values directly from 10073 memory. SImode loads to the FP registers are not zero extended. 10074 On the 64-bit target, this conflicts with the definition of 10075 LOAD_EXTEND_OP. Thus, we can't allow changing between modes 10076 with different sizes in the floating-point registers. */ 10077 if (MAYBE_FP_REG_CLASS_P (rclass)) 10078 return true; 10079 10080 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word 10081 in specific sets of registers. Thus, we cannot allow changing 10082 to a larger mode when it's larger than a word. */ 10083 if (GET_MODE_SIZE (to) > UNITS_PER_WORD 10084 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from)) 10085 return true; 10086 10087 return false; 10088 } 10089 10090 /* Returns TRUE if it is a good idea to tie two pseudo registers 10091 when one has mode MODE1 and one has mode MODE2. 10092 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, 10093 for any hard reg, then this must be FALSE for correct output. 10094 10095 We should return FALSE for QImode and HImode because these modes 10096 are not ok in the floating-point registers. However, this prevents 10097 tieing these modes to SImode and DImode in the general registers. 10098 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and 10099 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used 10100 in the floating-point registers. */ 10101 10102 bool 10103 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 10104 { 10105 /* Don't tie modes in different classes. */ 10106 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2)) 10107 return false; 10108 10109 return true; 10110 } 10111 10112 10113 /* Length in units of the trampoline instruction code. */ 10114 10115 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) 10116 10117 10118 /* Output assembler code for a block containing the constant parts 10119 of a trampoline, leaving space for the variable parts.\ 10120 10121 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM 10122 and then branches to the specified routine. 10123 10124 This code template is copied from text segment to stack location 10125 and then patched with pa_trampoline_init to contain valid values, 10126 and then entered as a subroutine. 10127 10128 It is best to keep this as small as possible to avoid having to 10129 flush multiple lines in the cache. */ 10130 10131 static void 10132 pa_asm_trampoline_template (FILE *f) 10133 { 10134 if (!TARGET_64BIT) 10135 { 10136 fputs ("\tldw 36(%r22),%r21\n", f); 10137 fputs ("\tbb,>=,n %r21,30,.+16\n", f); 10138 if (ASSEMBLER_DIALECT == 0) 10139 fputs ("\tdepi 0,31,2,%r21\n", f); 10140 else 10141 fputs ("\tdepwi 0,31,2,%r21\n", f); 10142 fputs ("\tldw 4(%r21),%r19\n", f); 10143 fputs ("\tldw 0(%r21),%r21\n", f); 10144 if (TARGET_PA_20) 10145 { 10146 fputs ("\tbve (%r21)\n", f); 10147 fputs ("\tldw 40(%r22),%r29\n", f); 10148 fputs ("\t.word 0\n", f); 10149 fputs ("\t.word 0\n", f); 10150 } 10151 else 10152 { 10153 fputs ("\tldsid (%r21),%r1\n", f); 10154 fputs ("\tmtsp %r1,%sr0\n", f); 10155 fputs ("\tbe 0(%sr0,%r21)\n", f); 10156 fputs ("\tldw 40(%r22),%r29\n", f); 10157 } 10158 fputs ("\t.word 0\n", f); 10159 fputs ("\t.word 0\n", f); 10160 fputs ("\t.word 0\n", f); 10161 fputs ("\t.word 0\n", f); 10162 } 10163 else 10164 { 10165 fputs ("\t.dword 0\n", f); 10166 fputs ("\t.dword 0\n", f); 10167 fputs ("\t.dword 0\n", f); 10168 fputs ("\t.dword 0\n", f); 10169 fputs ("\tmfia %r31\n", f); 10170 fputs ("\tldd 24(%r31),%r1\n", f); 10171 fputs ("\tldd 24(%r1),%r27\n", f); 10172 fputs ("\tldd 16(%r1),%r1\n", f); 10173 fputs ("\tbve (%r1)\n", f); 10174 fputs ("\tldd 32(%r31),%r31\n", f); 10175 fputs ("\t.dword 0 ; fptr\n", f); 10176 fputs ("\t.dword 0 ; static link\n", f); 10177 } 10178 } 10179 10180 /* Emit RTL insns to initialize the variable parts of a trampoline. 10181 FNADDR is an RTX for the address of the function's pure code. 10182 CXT is an RTX for the static chain value for the function. 10183 10184 Move the function address to the trampoline template at offset 36. 10185 Move the static chain value to trampoline template at offset 40. 10186 Move the trampoline address to trampoline template at offset 44. 10187 Move r19 to trampoline template at offset 48. The latter two 10188 words create a plabel for the indirect call to the trampoline. 10189 10190 A similar sequence is used for the 64-bit port but the plabel is 10191 at the beginning of the trampoline. 10192 10193 Finally, the cache entries for the trampoline code are flushed. 10194 This is necessary to ensure that the trampoline instruction sequence 10195 is written to memory prior to any attempts at prefetching the code 10196 sequence. */ 10197 10198 static void 10199 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 10200 { 10201 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 10202 rtx start_addr = gen_reg_rtx (Pmode); 10203 rtx end_addr = gen_reg_rtx (Pmode); 10204 rtx line_length = gen_reg_rtx (Pmode); 10205 rtx r_tramp, tmp; 10206 10207 emit_block_move (m_tramp, assemble_trampoline_template (), 10208 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); 10209 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0)); 10210 10211 if (!TARGET_64BIT) 10212 { 10213 tmp = adjust_address (m_tramp, Pmode, 36); 10214 emit_move_insn (tmp, fnaddr); 10215 tmp = adjust_address (m_tramp, Pmode, 40); 10216 emit_move_insn (tmp, chain_value); 10217 10218 /* Create a fat pointer for the trampoline. */ 10219 tmp = adjust_address (m_tramp, Pmode, 44); 10220 emit_move_insn (tmp, r_tramp); 10221 tmp = adjust_address (m_tramp, Pmode, 48); 10222 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); 10223 10224 /* fdc and fic only use registers for the address to flush, 10225 they do not accept integer displacements. We align the 10226 start and end addresses to the beginning of their respective 10227 cache lines to minimize the number of lines flushed. */ 10228 emit_insn (gen_andsi3 (start_addr, r_tramp, 10229 GEN_INT (-MIN_CACHELINE_SIZE))); 10230 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 10231 TRAMPOLINE_CODE_SIZE-1)); 10232 emit_insn (gen_andsi3 (end_addr, tmp, 10233 GEN_INT (-MIN_CACHELINE_SIZE))); 10234 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10235 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); 10236 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, 10237 gen_reg_rtx (Pmode), 10238 gen_reg_rtx (Pmode))); 10239 } 10240 else 10241 { 10242 tmp = adjust_address (m_tramp, Pmode, 56); 10243 emit_move_insn (tmp, fnaddr); 10244 tmp = adjust_address (m_tramp, Pmode, 64); 10245 emit_move_insn (tmp, chain_value); 10246 10247 /* Create a fat pointer for the trampoline. */ 10248 tmp = adjust_address (m_tramp, Pmode, 16); 10249 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode, 10250 r_tramp, 32))); 10251 tmp = adjust_address (m_tramp, Pmode, 24); 10252 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); 10253 10254 /* fdc and fic only use registers for the address to flush, 10255 they do not accept integer displacements. We align the 10256 start and end addresses to the beginning of their respective 10257 cache lines to minimize the number of lines flushed. */ 10258 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32)); 10259 emit_insn (gen_anddi3 (start_addr, tmp, 10260 GEN_INT (-MIN_CACHELINE_SIZE))); 10261 tmp = force_reg (Pmode, plus_constant (Pmode, tmp, 10262 TRAMPOLINE_CODE_SIZE - 1)); 10263 emit_insn (gen_anddi3 (end_addr, tmp, 10264 GEN_INT (-MIN_CACHELINE_SIZE))); 10265 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10266 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); 10267 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, 10268 gen_reg_rtx (Pmode), 10269 gen_reg_rtx (Pmode))); 10270 } 10271 } 10272 10273 /* Perform any machine-specific adjustment in the address of the trampoline. 10274 ADDR contains the address that was passed to pa_trampoline_init. 10275 Adjust the trampoline address to point to the plabel at offset 44. */ 10276 10277 static rtx 10278 pa_trampoline_adjust_address (rtx addr) 10279 { 10280 if (!TARGET_64BIT) 10281 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46)); 10282 return addr; 10283 } 10284 10285 static rtx 10286 pa_delegitimize_address (rtx orig_x) 10287 { 10288 rtx x = delegitimize_mem_from_attrs (orig_x); 10289 10290 if (GET_CODE (x) == LO_SUM 10291 && GET_CODE (XEXP (x, 1)) == UNSPEC 10292 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R) 10293 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0)); 10294 return x; 10295 } 10296 10297 static rtx 10298 pa_internal_arg_pointer (void) 10299 { 10300 /* The argument pointer and the hard frame pointer are the same in 10301 the 32-bit runtime, so we don't need a copy. */ 10302 if (TARGET_64BIT) 10303 return copy_to_reg (virtual_incoming_args_rtx); 10304 else 10305 return virtual_incoming_args_rtx; 10306 } 10307 10308 /* Given FROM and TO register numbers, say whether this elimination is allowed. 10309 Frame pointer elimination is automatically handled. */ 10310 10311 static bool 10312 pa_can_eliminate (const int from, const int to) 10313 { 10314 /* The argument cannot be eliminated in the 64-bit runtime. */ 10315 if (TARGET_64BIT && from == ARG_POINTER_REGNUM) 10316 return false; 10317 10318 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM 10319 ? ! frame_pointer_needed 10320 : true); 10321 } 10322 10323 /* Define the offset between two registers, FROM to be eliminated and its 10324 replacement TO, at the start of a routine. */ 10325 HOST_WIDE_INT 10326 pa_initial_elimination_offset (int from, int to) 10327 { 10328 HOST_WIDE_INT offset; 10329 10330 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM) 10331 && to == STACK_POINTER_REGNUM) 10332 offset = -pa_compute_frame_size (get_frame_size (), 0); 10333 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 10334 offset = 0; 10335 else 10336 gcc_unreachable (); 10337 10338 return offset; 10339 } 10340 10341 static void 10342 pa_conditional_register_usage (void) 10343 { 10344 int i; 10345 10346 if (!TARGET_64BIT && !TARGET_PA_11) 10347 { 10348 for (i = 56; i <= FP_REG_LAST; i++) 10349 fixed_regs[i] = call_used_regs[i] = 1; 10350 for (i = 33; i < 56; i += 2) 10351 fixed_regs[i] = call_used_regs[i] = 1; 10352 } 10353 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) 10354 { 10355 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 10356 fixed_regs[i] = call_used_regs[i] = 1; 10357 } 10358 if (flag_pic) 10359 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 10360 } 10361 10362 /* Target hook for c_mode_for_suffix. */ 10363 10364 static enum machine_mode 10365 pa_c_mode_for_suffix (char suffix) 10366 { 10367 if (HPUX_LONG_DOUBLE_LIBRARY) 10368 { 10369 if (suffix == 'q') 10370 return TFmode; 10371 } 10372 10373 return VOIDmode; 10374 } 10375 10376 /* Target hook for function_section. */ 10377 10378 static section * 10379 pa_function_section (tree decl, enum node_frequency freq, 10380 bool startup, bool exit) 10381 { 10382 /* Put functions in text section if target doesn't have named sections. */ 10383 if (!targetm_common.have_named_sections) 10384 return text_section; 10385 10386 /* Force nested functions into the same section as the containing 10387 function. */ 10388 if (decl 10389 && DECL_SECTION_NAME (decl) == NULL_TREE 10390 && DECL_CONTEXT (decl) != NULL_TREE 10391 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL 10392 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE) 10393 return function_section (DECL_CONTEXT (decl)); 10394 10395 /* Otherwise, use the default function section. */ 10396 return default_function_section (decl, freq, startup, exit); 10397 } 10398 10399 /* Implement TARGET_LEGITIMATE_CONSTANT_P. 10400 10401 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS 10402 that need more than three instructions to load prior to reload. This 10403 limit is somewhat arbitrary. It takes three instructions to load a 10404 CONST_INT from memory but two are memory accesses. It may be better 10405 to increase the allowed range for CONST_INTS. We may also be able 10406 to handle CONST_DOUBLES. */ 10407 10408 static bool 10409 pa_legitimate_constant_p (enum machine_mode mode, rtx x) 10410 { 10411 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode)) 10412 return false; 10413 10414 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF) 10415 return false; 10416 10417 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not 10418 legitimate constants. The other variants can't be handled by 10419 the move patterns after reload starts. */ 10420 if (pa_tls_referenced_p (x)) 10421 return false; 10422 10423 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE) 10424 return false; 10425 10426 if (TARGET_64BIT 10427 && HOST_BITS_PER_WIDE_INT > 32 10428 && GET_CODE (x) == CONST_INT 10429 && !reload_in_progress 10430 && !reload_completed 10431 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x)) 10432 && !pa_cint_ok_for_move (INTVAL (x))) 10433 return false; 10434 10435 if (function_label_operand (x, mode)) 10436 return false; 10437 10438 return true; 10439 } 10440 10441 /* Implement TARGET_SECTION_TYPE_FLAGS. */ 10442 10443 static unsigned int 10444 pa_section_type_flags (tree decl, const char *name, int reloc) 10445 { 10446 unsigned int flags; 10447 10448 flags = default_section_type_flags (decl, name, reloc); 10449 10450 /* Function labels are placed in the constant pool. This can 10451 cause a section conflict if decls are put in ".data.rel.ro" 10452 or ".data.rel.ro.local" using the __attribute__ construct. */ 10453 if (strcmp (name, ".data.rel.ro") == 0 10454 || strcmp (name, ".data.rel.ro.local") == 0) 10455 flags |= SECTION_WRITE | SECTION_RELRO; 10456 10457 return flags; 10458 } 10459 10460 /* pa_legitimate_address_p recognizes an RTL expression that is a 10461 valid memory address for an instruction. The MODE argument is the 10462 machine mode for the MEM expression that wants to use this address. 10463 10464 On HP PA-RISC, the legitimate address forms are REG+SMALLINT, 10465 REG+REG, and REG+(REG*SCALE). The indexed address forms are only 10466 available with floating point loads and stores, and integer loads. 10467 We get better code by allowing indexed addresses in the initial 10468 RTL generation. 10469 10470 The acceptance of indexed addresses as legitimate implies that we 10471 must provide patterns for doing indexed integer stores, or the move 10472 expanders must force the address of an indexed store to a register. 10473 We have adopted the latter approach. 10474 10475 Another function of pa_legitimate_address_p is to ensure that 10476 the base register is a valid pointer for indexed instructions. 10477 On targets that have non-equivalent space registers, we have to 10478 know at the time of assembler output which register in a REG+REG 10479 pair is the base register. The REG_POINTER flag is sometimes lost 10480 in reload and the following passes, so it can't be relied on during 10481 code generation. Thus, we either have to canonicalize the order 10482 of the registers in REG+REG indexed addresses, or treat REG+REG 10483 addresses separately and provide patterns for both permutations. 10484 10485 The latter approach requires several hundred additional lines of 10486 code in pa.md. The downside to canonicalizing is that a PLUS 10487 in the wrong order can't combine to form to make a scaled indexed 10488 memory operand. As we won't need to canonicalize the operands if 10489 the REG_POINTER lossage can be fixed, it seems better canonicalize. 10490 10491 We initially break out scaled indexed addresses in canonical order 10492 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes 10493 scaled indexed addresses during RTL generation. However, fold_rtx 10494 has its own opinion on how the operands of a PLUS should be ordered. 10495 If one of the operands is equivalent to a constant, it will make 10496 that operand the second operand. As the base register is likely to 10497 be equivalent to a SYMBOL_REF, we have made it the second operand. 10498 10499 pa_legitimate_address_p accepts REG+REG as legitimate when the 10500 operands are in the order INDEX+BASE on targets with non-equivalent 10501 space registers, and in any order on targets with equivalent space 10502 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing. 10503 10504 We treat a SYMBOL_REF as legitimate if it is part of the current 10505 function's constant-pool, because such addresses can actually be 10506 output as REG+SMALLINT. */ 10507 10508 static bool 10509 pa_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) 10510 { 10511 if ((REG_P (x) 10512 && (strict ? STRICT_REG_OK_FOR_BASE_P (x) 10513 : REG_OK_FOR_BASE_P (x))) 10514 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC 10515 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC) 10516 && REG_P (XEXP (x, 0)) 10517 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10518 : REG_OK_FOR_BASE_P (XEXP (x, 0))))) 10519 return true; 10520 10521 if (GET_CODE (x) == PLUS) 10522 { 10523 rtx base, index; 10524 10525 /* For REG+REG, the base register should be in XEXP (x, 1), 10526 so check it first. */ 10527 if (REG_P (XEXP (x, 1)) 10528 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1)) 10529 : REG_OK_FOR_BASE_P (XEXP (x, 1)))) 10530 base = XEXP (x, 1), index = XEXP (x, 0); 10531 else if (REG_P (XEXP (x, 0)) 10532 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10533 : REG_OK_FOR_BASE_P (XEXP (x, 0)))) 10534 base = XEXP (x, 0), index = XEXP (x, 1); 10535 else 10536 return false; 10537 10538 if (GET_CODE (index) == CONST_INT) 10539 { 10540 if (INT_5_BITS (index)) 10541 return true; 10542 10543 /* When INT14_OK_STRICT is false, a secondary reload is needed 10544 to adjust the displacement of SImode and DImode floating point 10545 instructions but this may fail when the register also needs 10546 reloading. So, we return false when STRICT is true. We 10547 also reject long displacements for float mode addresses since 10548 the majority of accesses will use floating point instructions 10549 that don't support 14-bit offsets. */ 10550 if (!INT14_OK_STRICT 10551 && (strict || !(reload_in_progress || reload_completed)) 10552 && mode != QImode 10553 && mode != HImode) 10554 return false; 10555 10556 return base14_operand (index, mode); 10557 } 10558 10559 if (!TARGET_DISABLE_INDEXING 10560 /* Only accept the "canonical" INDEX+BASE operand order 10561 on targets with non-equivalent space registers. */ 10562 && (TARGET_NO_SPACE_REGS 10563 ? REG_P (index) 10564 : (base == XEXP (x, 1) && REG_P (index) 10565 && (reload_completed 10566 || (reload_in_progress && HARD_REGISTER_P (base)) 10567 || REG_POINTER (base)) 10568 && (reload_completed 10569 || (reload_in_progress && HARD_REGISTER_P (index)) 10570 || !REG_POINTER (index)))) 10571 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode) 10572 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index) 10573 : REG_OK_FOR_INDEX_P (index)) 10574 && borx_reg_operand (base, Pmode) 10575 && borx_reg_operand (index, Pmode)) 10576 return true; 10577 10578 if (!TARGET_DISABLE_INDEXING 10579 && GET_CODE (index) == MULT 10580 && MODE_OK_FOR_SCALED_INDEXING_P (mode) 10581 && REG_P (XEXP (index, 0)) 10582 && GET_MODE (XEXP (index, 0)) == Pmode 10583 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0)) 10584 : REG_OK_FOR_INDEX_P (XEXP (index, 0))) 10585 && GET_CODE (XEXP (index, 1)) == CONST_INT 10586 && INTVAL (XEXP (index, 1)) 10587 == (HOST_WIDE_INT) GET_MODE_SIZE (mode) 10588 && borx_reg_operand (base, Pmode)) 10589 return true; 10590 10591 return false; 10592 } 10593 10594 if (GET_CODE (x) == LO_SUM) 10595 { 10596 rtx y = XEXP (x, 0); 10597 10598 if (GET_CODE (y) == SUBREG) 10599 y = SUBREG_REG (y); 10600 10601 if (REG_P (y) 10602 && (strict ? STRICT_REG_OK_FOR_BASE_P (y) 10603 : REG_OK_FOR_BASE_P (y))) 10604 { 10605 /* Needed for -fPIC */ 10606 if (mode == Pmode 10607 && GET_CODE (XEXP (x, 1)) == UNSPEC) 10608 return true; 10609 10610 if (!INT14_OK_STRICT 10611 && (strict || !(reload_in_progress || reload_completed)) 10612 && mode != QImode 10613 && mode != HImode) 10614 return false; 10615 10616 if (CONSTANT_P (XEXP (x, 1))) 10617 return true; 10618 } 10619 return false; 10620 } 10621 10622 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x)) 10623 return true; 10624 10625 return false; 10626 } 10627 10628 /* Look for machine dependent ways to make the invalid address AD a 10629 valid address. 10630 10631 For the PA, transform: 10632 10633 memory(X + <large int>) 10634 10635 into: 10636 10637 if (<large int> & mask) >= 16 10638 Y = (<large int> & ~mask) + mask + 1 Round up. 10639 else 10640 Y = (<large int> & ~mask) Round down. 10641 Z = X + Y 10642 memory (Z + (<large int> - Y)); 10643 10644 This makes reload inheritance and reload_cse work better since Z 10645 can be reused. 10646 10647 There may be more opportunities to improve code with this hook. */ 10648 10649 rtx 10650 pa_legitimize_reload_address (rtx ad, enum machine_mode mode, 10651 int opnum, int type, 10652 int ind_levels ATTRIBUTE_UNUSED) 10653 { 10654 long offset, newoffset, mask; 10655 rtx new_rtx, temp = NULL_RTX; 10656 10657 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 10658 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 10659 10660 if (optimize && GET_CODE (ad) == PLUS) 10661 temp = simplify_binary_operation (PLUS, Pmode, 10662 XEXP (ad, 0), XEXP (ad, 1)); 10663 10664 new_rtx = temp ? temp : ad; 10665 10666 if (optimize 10667 && GET_CODE (new_rtx) == PLUS 10668 && GET_CODE (XEXP (new_rtx, 0)) == REG 10669 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT) 10670 { 10671 offset = INTVAL (XEXP ((new_rtx), 1)); 10672 10673 /* Choose rounding direction. Round up if we are >= halfway. */ 10674 if ((offset & mask) >= ((mask + 1) / 2)) 10675 newoffset = (offset & ~mask) + mask + 1; 10676 else 10677 newoffset = offset & ~mask; 10678 10679 /* Ensure that long displacements are aligned. */ 10680 if (mask == 0x3fff 10681 && (GET_MODE_CLASS (mode) == MODE_FLOAT 10682 || (TARGET_64BIT && (mode) == DImode))) 10683 newoffset &= ~(GET_MODE_SIZE (mode) - 1); 10684 10685 if (newoffset != 0 && VAL_14_BITS_P (newoffset)) 10686 { 10687 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0), 10688 GEN_INT (newoffset)); 10689 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset)); 10690 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0, 10691 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, 10692 opnum, (enum reload_type) type); 10693 return ad; 10694 } 10695 } 10696 10697 return NULL_RTX; 10698 } 10699 10700 #include "gt-pa.h" 10701