1 /* Subroutines for insn-output.c for HPPA. 2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 4 Free Software Foundation, Inc. 5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3, or (at your option) 12 any later version. 13 14 GCC is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "rtl.h" 28 #include "regs.h" 29 #include "hard-reg-set.h" 30 #include "real.h" 31 #include "insn-config.h" 32 #include "conditions.h" 33 #include "insn-attr.h" 34 #include "flags.h" 35 #include "tree.h" 36 #include "output.h" 37 #include "except.h" 38 #include "expr.h" 39 #include "optabs.h" 40 #include "reload.h" 41 #include "integrate.h" 42 #include "function.h" 43 #include "toplev.h" 44 #include "ggc.h" 45 #include "recog.h" 46 #include "predict.h" 47 #include "tm_p.h" 48 #include "target.h" 49 #include "target-def.h" 50 #include "df.h" 51 52 /* Return nonzero if there is a bypass for the output of 53 OUT_INSN and the fp store IN_INSN. */ 54 int 55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn) 56 { 57 enum machine_mode store_mode; 58 enum machine_mode other_mode; 59 rtx set; 60 61 if (recog_memoized (in_insn) < 0 62 || (get_attr_type (in_insn) != TYPE_FPSTORE 63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD) 64 || recog_memoized (out_insn) < 0) 65 return 0; 66 67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn))); 68 69 set = single_set (out_insn); 70 if (!set) 71 return 0; 72 73 other_mode = GET_MODE (SET_SRC (set)); 74 75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode)); 76 } 77 78 79 #ifndef DO_FRAME_NOTES 80 #ifdef INCOMING_RETURN_ADDR_RTX 81 #define DO_FRAME_NOTES 1 82 #else 83 #define DO_FRAME_NOTES 0 84 #endif 85 #endif 86 87 static void copy_reg_pointer (rtx, rtx); 88 static void fix_range (const char *); 89 static bool pa_handle_option (size_t, const char *, int); 90 static int hppa_address_cost (rtx, bool); 91 static bool hppa_rtx_costs (rtx, int, int, int *, bool); 92 static inline rtx force_mode (enum machine_mode, rtx); 93 static void pa_reorg (void); 94 static void pa_combine_instructions (void); 95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx); 96 static bool forward_branch_p (rtx); 97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); 98 static int compute_movmem_length (rtx); 99 static int compute_clrmem_length (rtx); 100 static bool pa_assemble_integer (rtx, unsigned int, int); 101 static void remove_useless_addtr_insns (int); 102 static void store_reg (int, HOST_WIDE_INT, int); 103 static void store_reg_modify (int, int, HOST_WIDE_INT); 104 static void load_reg (int, HOST_WIDE_INT, int); 105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int); 106 static rtx pa_function_value (const_tree, const_tree, bool); 107 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT); 108 static void update_total_code_bytes (unsigned int); 109 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT); 110 static int pa_adjust_cost (rtx, rtx, rtx, int); 111 static int pa_adjust_priority (rtx, int); 112 static int pa_issue_rate (void); 113 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; 114 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT) 115 ATTRIBUTE_UNUSED; 116 static void pa_encode_section_info (tree, rtx, int); 117 static const char *pa_strip_name_encoding (const char *); 118 static bool pa_function_ok_for_sibcall (tree, tree); 119 static void pa_globalize_label (FILE *, const char *) 120 ATTRIBUTE_UNUSED; 121 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 122 HOST_WIDE_INT, tree); 123 #if !defined(USE_COLLECT2) 124 static void pa_asm_out_constructor (rtx, int); 125 static void pa_asm_out_destructor (rtx, int); 126 #endif 127 static void pa_init_builtins (void); 128 static rtx hppa_builtin_saveregs (void); 129 static void hppa_va_start (tree, rtx); 130 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 131 static bool pa_scalar_mode_supported_p (enum machine_mode); 132 static bool pa_commutative_p (const_rtx x, int outer_code); 133 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED; 134 static int length_fp_args (rtx) ATTRIBUTE_UNUSED; 135 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode); 136 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED; 137 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED; 138 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED; 139 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED; 140 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED; 141 static void pa_som_file_start (void) ATTRIBUTE_UNUSED; 142 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED; 143 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED; 144 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED; 145 static void output_deferred_plabels (void); 146 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED; 147 #ifdef ASM_OUTPUT_EXTERNAL_REAL 148 static void pa_hpux_file_end (void); 149 #endif 150 #ifdef HPUX_LONG_DOUBLE_LIBRARY 151 static void pa_hpux_init_libfuncs (void); 152 #endif 153 static rtx pa_struct_value_rtx (tree, int); 154 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, 155 const_tree, bool); 156 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, 157 tree, bool); 158 static struct machine_function * pa_init_machine_status (void); 159 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class, 160 enum machine_mode, 161 secondary_reload_info *); 162 static void pa_extra_live_on_entry (bitmap); 163 static enum machine_mode pa_promote_function_mode (const_tree, 164 enum machine_mode, int *, 165 const_tree, int); 166 167 static void pa_asm_trampoline_template (FILE *); 168 static void pa_trampoline_init (rtx, tree, rtx); 169 static rtx pa_trampoline_adjust_address (rtx); 170 static rtx pa_delegitimize_address (rtx); 171 172 /* The following extra sections are only used for SOM. */ 173 static GTY(()) section *som_readonly_data_section; 174 static GTY(()) section *som_one_only_readonly_data_section; 175 static GTY(()) section *som_one_only_data_section; 176 177 /* Which cpu we are scheduling for. */ 178 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT; 179 180 /* The UNIX standard to use for predefines and linking. */ 181 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993; 182 183 /* Counts for the number of callee-saved general and floating point 184 registers which were saved by the current function's prologue. */ 185 static int gr_saved, fr_saved; 186 187 /* Boolean indicating whether the return pointer was saved by the 188 current function's prologue. */ 189 static bool rp_saved; 190 191 static rtx find_addr_reg (rtx); 192 193 /* Keep track of the number of bytes we have output in the CODE subspace 194 during this compilation so we'll know when to emit inline long-calls. */ 195 unsigned long total_code_bytes; 196 197 /* The last address of the previous function plus the number of bytes in 198 associated thunks that have been output. This is used to determine if 199 a thunk can use an IA-relative branch to reach its target function. */ 200 static unsigned int last_address; 201 202 /* Variables to handle plabels that we discover are necessary at assembly 203 output time. They are output after the current function. */ 204 struct GTY(()) deferred_plabel 205 { 206 rtx internal_label; 207 rtx symbol; 208 }; 209 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel * 210 deferred_plabels; 211 static size_t n_deferred_plabels = 0; 212 213 214 /* Initialize the GCC target structure. */ 215 216 #undef TARGET_ASM_ALIGNED_HI_OP 217 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 218 #undef TARGET_ASM_ALIGNED_SI_OP 219 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 220 #undef TARGET_ASM_ALIGNED_DI_OP 221 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" 222 #undef TARGET_ASM_UNALIGNED_HI_OP 223 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 224 #undef TARGET_ASM_UNALIGNED_SI_OP 225 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 226 #undef TARGET_ASM_UNALIGNED_DI_OP 227 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 228 #undef TARGET_ASM_INTEGER 229 #define TARGET_ASM_INTEGER pa_assemble_integer 230 231 #undef TARGET_ASM_FUNCTION_PROLOGUE 232 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue 233 #undef TARGET_ASM_FUNCTION_EPILOGUE 234 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue 235 236 #undef TARGET_FUNCTION_VALUE 237 #define TARGET_FUNCTION_VALUE pa_function_value 238 239 #undef TARGET_LEGITIMIZE_ADDRESS 240 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address 241 242 #undef TARGET_SCHED_ADJUST_COST 243 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost 244 #undef TARGET_SCHED_ADJUST_PRIORITY 245 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority 246 #undef TARGET_SCHED_ISSUE_RATE 247 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate 248 249 #undef TARGET_ENCODE_SECTION_INFO 250 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info 251 #undef TARGET_STRIP_NAME_ENCODING 252 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding 253 254 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 255 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall 256 257 #undef TARGET_COMMUTATIVE_P 258 #define TARGET_COMMUTATIVE_P pa_commutative_p 259 260 #undef TARGET_ASM_OUTPUT_MI_THUNK 261 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk 262 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 263 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall 264 265 #undef TARGET_ASM_FILE_END 266 #ifdef ASM_OUTPUT_EXTERNAL_REAL 267 #define TARGET_ASM_FILE_END pa_hpux_file_end 268 #else 269 #define TARGET_ASM_FILE_END output_deferred_plabels 270 #endif 271 272 #if !defined(USE_COLLECT2) 273 #undef TARGET_ASM_CONSTRUCTOR 274 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor 275 #undef TARGET_ASM_DESTRUCTOR 276 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor 277 #endif 278 279 #undef TARGET_DEFAULT_TARGET_FLAGS 280 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT) 281 #undef TARGET_HANDLE_OPTION 282 #define TARGET_HANDLE_OPTION pa_handle_option 283 284 #undef TARGET_INIT_BUILTINS 285 #define TARGET_INIT_BUILTINS pa_init_builtins 286 287 #undef TARGET_RTX_COSTS 288 #define TARGET_RTX_COSTS hppa_rtx_costs 289 #undef TARGET_ADDRESS_COST 290 #define TARGET_ADDRESS_COST hppa_address_cost 291 292 #undef TARGET_MACHINE_DEPENDENT_REORG 293 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg 294 295 #ifdef HPUX_LONG_DOUBLE_LIBRARY 296 #undef TARGET_INIT_LIBFUNCS 297 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs 298 #endif 299 300 #undef TARGET_PROMOTE_FUNCTION_MODE 301 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode 302 #undef TARGET_PROMOTE_PROTOTYPES 303 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 304 305 #undef TARGET_STRUCT_VALUE_RTX 306 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx 307 #undef TARGET_RETURN_IN_MEMORY 308 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory 309 #undef TARGET_MUST_PASS_IN_STACK 310 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 311 #undef TARGET_PASS_BY_REFERENCE 312 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference 313 #undef TARGET_CALLEE_COPIES 314 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true 315 #undef TARGET_ARG_PARTIAL_BYTES 316 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes 317 318 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 319 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs 320 #undef TARGET_EXPAND_BUILTIN_VA_START 321 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start 322 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 323 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr 324 325 #undef TARGET_SCALAR_MODE_SUPPORTED_P 326 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p 327 328 #undef TARGET_CANNOT_FORCE_CONST_MEM 329 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p 330 331 #undef TARGET_SECONDARY_RELOAD 332 #define TARGET_SECONDARY_RELOAD pa_secondary_reload 333 334 #undef TARGET_EXTRA_LIVE_ON_ENTRY 335 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry 336 337 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE 338 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template 339 #undef TARGET_TRAMPOLINE_INIT 340 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init 341 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 342 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address 343 #undef TARGET_DELEGITIMIZE_ADDRESS 344 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address 345 346 struct gcc_target targetm = TARGET_INITIALIZER; 347 348 /* Parse the -mfixed-range= option string. */ 349 350 static void 351 fix_range (const char *const_str) 352 { 353 int i, first, last; 354 char *str, *dash, *comma; 355 356 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 357 REG2 are either register names or register numbers. The effect 358 of this option is to mark the registers in the range from REG1 to 359 REG2 as ``fixed'' so they won't be used by the compiler. This is 360 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */ 361 362 i = strlen (const_str); 363 str = (char *) alloca (i + 1); 364 memcpy (str, const_str, i + 1); 365 366 while (1) 367 { 368 dash = strchr (str, '-'); 369 if (!dash) 370 { 371 warning (0, "value of -mfixed-range must have form REG1-REG2"); 372 return; 373 } 374 *dash = '\0'; 375 376 comma = strchr (dash + 1, ','); 377 if (comma) 378 *comma = '\0'; 379 380 first = decode_reg_name (str); 381 if (first < 0) 382 { 383 warning (0, "unknown register name: %s", str); 384 return; 385 } 386 387 last = decode_reg_name (dash + 1); 388 if (last < 0) 389 { 390 warning (0, "unknown register name: %s", dash + 1); 391 return; 392 } 393 394 *dash = '-'; 395 396 if (first > last) 397 { 398 warning (0, "%s-%s is an empty range", str, dash + 1); 399 return; 400 } 401 402 for (i = first; i <= last; ++i) 403 fixed_regs[i] = call_used_regs[i] = 1; 404 405 if (!comma) 406 break; 407 408 *comma = ','; 409 str = comma + 1; 410 } 411 412 /* Check if all floating point registers have been fixed. */ 413 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 414 if (!fixed_regs[i]) 415 break; 416 417 if (i > FP_REG_LAST) 418 target_flags |= MASK_DISABLE_FPREGS; 419 } 420 421 /* Implement TARGET_HANDLE_OPTION. */ 422 423 static bool 424 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) 425 { 426 switch (code) 427 { 428 case OPT_mnosnake: 429 case OPT_mpa_risc_1_0: 430 case OPT_march_1_0: 431 target_flags &= ~(MASK_PA_11 | MASK_PA_20); 432 return true; 433 434 case OPT_msnake: 435 case OPT_mpa_risc_1_1: 436 case OPT_march_1_1: 437 target_flags &= ~MASK_PA_20; 438 target_flags |= MASK_PA_11; 439 return true; 440 441 case OPT_mpa_risc_2_0: 442 case OPT_march_2_0: 443 target_flags |= MASK_PA_11 | MASK_PA_20; 444 return true; 445 446 case OPT_mschedule_: 447 if (strcmp (arg, "8000") == 0) 448 pa_cpu = PROCESSOR_8000; 449 else if (strcmp (arg, "7100") == 0) 450 pa_cpu = PROCESSOR_7100; 451 else if (strcmp (arg, "700") == 0) 452 pa_cpu = PROCESSOR_700; 453 else if (strcmp (arg, "7100LC") == 0) 454 pa_cpu = PROCESSOR_7100LC; 455 else if (strcmp (arg, "7200") == 0) 456 pa_cpu = PROCESSOR_7200; 457 else if (strcmp (arg, "7300") == 0) 458 pa_cpu = PROCESSOR_7300; 459 else 460 return false; 461 return true; 462 463 case OPT_mfixed_range_: 464 fix_range (arg); 465 return true; 466 467 #if TARGET_HPUX 468 case OPT_munix_93: 469 flag_pa_unix = 1993; 470 return true; 471 #endif 472 473 #if TARGET_HPUX_10_10 474 case OPT_munix_95: 475 flag_pa_unix = 1995; 476 return true; 477 #endif 478 479 #if TARGET_HPUX_11_11 480 case OPT_munix_98: 481 flag_pa_unix = 1998; 482 return true; 483 #endif 484 485 default: 486 return true; 487 } 488 } 489 490 void 491 override_options (void) 492 { 493 /* Unconditional branches in the delay slot are not compatible with dwarf2 494 call frame information. There is no benefit in using this optimization 495 on PA8000 and later processors. */ 496 if (pa_cpu >= PROCESSOR_8000 497 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions) 498 || flag_unwind_tables) 499 target_flags &= ~MASK_JUMP_IN_DELAY; 500 501 if (flag_pic && TARGET_PORTABLE_RUNTIME) 502 { 503 warning (0, "PIC code generation is not supported in the portable runtime model"); 504 } 505 506 if (flag_pic && TARGET_FAST_INDIRECT_CALLS) 507 { 508 warning (0, "PIC code generation is not compatible with fast indirect calls"); 509 } 510 511 if (! TARGET_GAS && write_symbols != NO_DEBUG) 512 { 513 warning (0, "-g is only supported when using GAS on this processor,"); 514 warning (0, "-g option disabled"); 515 write_symbols = NO_DEBUG; 516 } 517 518 /* We only support the "big PIC" model now. And we always generate PIC 519 code when in 64bit mode. */ 520 if (flag_pic == 1 || TARGET_64BIT) 521 flag_pic = 2; 522 523 /* Disable -freorder-blocks-and-partition as we don't support hot and 524 cold partitioning. */ 525 if (flag_reorder_blocks_and_partition) 526 { 527 inform (input_location, 528 "-freorder-blocks-and-partition does not work " 529 "on this architecture"); 530 flag_reorder_blocks_and_partition = 0; 531 flag_reorder_blocks = 1; 532 } 533 534 /* We can't guarantee that .dword is available for 32-bit targets. */ 535 if (UNITS_PER_WORD == 4) 536 targetm.asm_out.aligned_op.di = NULL; 537 538 /* The unaligned ops are only available when using GAS. */ 539 if (!TARGET_GAS) 540 { 541 targetm.asm_out.unaligned_op.hi = NULL; 542 targetm.asm_out.unaligned_op.si = NULL; 543 targetm.asm_out.unaligned_op.di = NULL; 544 } 545 546 init_machine_status = pa_init_machine_status; 547 } 548 549 static void 550 pa_init_builtins (void) 551 { 552 #ifdef DONT_HAVE_FPUTC_UNLOCKED 553 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = 554 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED]; 555 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] 556 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED]; 557 #endif 558 #if TARGET_HPUX_11 559 if (built_in_decls [BUILT_IN_FINITE]) 560 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite"); 561 if (built_in_decls [BUILT_IN_FINITEF]) 562 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef"); 563 #endif 564 } 565 566 /* Function to init struct machine_function. 567 This will be called, via a pointer variable, 568 from push_function_context. */ 569 570 static struct machine_function * 571 pa_init_machine_status (void) 572 { 573 return GGC_CNEW (machine_function); 574 } 575 576 /* If FROM is a probable pointer register, mark TO as a probable 577 pointer register with the same pointer alignment as FROM. */ 578 579 static void 580 copy_reg_pointer (rtx to, rtx from) 581 { 582 if (REG_POINTER (from)) 583 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from))); 584 } 585 586 /* Return 1 if X contains a symbolic expression. We know these 587 expressions will have one of a few well defined forms, so 588 we need only check those forms. */ 589 int 590 symbolic_expression_p (rtx x) 591 { 592 593 /* Strip off any HIGH. */ 594 if (GET_CODE (x) == HIGH) 595 x = XEXP (x, 0); 596 597 return (symbolic_operand (x, VOIDmode)); 598 } 599 600 /* Accept any constant that can be moved in one instruction into a 601 general register. */ 602 int 603 cint_ok_for_move (HOST_WIDE_INT ival) 604 { 605 /* OK if ldo, ldil, or zdepi, can be used. */ 606 return (VAL_14_BITS_P (ival) 607 || ldil_cint_p (ival) 608 || zdepi_cint_p (ival)); 609 } 610 611 /* Return truth value of whether OP can be used as an operand in a 612 adddi3 insn. */ 613 int 614 adddi3_operand (rtx op, enum machine_mode mode) 615 { 616 return (register_operand (op, mode) 617 || (GET_CODE (op) == CONST_INT 618 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op)))); 619 } 620 621 /* True iff the operand OP can be used as the destination operand of 622 an integer store. This also implies the operand could be used as 623 the source operand of an integer load. Symbolic, lo_sum and indexed 624 memory operands are not allowed. We accept reloading pseudos and 625 other memory operands. */ 626 int 627 integer_store_memory_operand (rtx op, enum machine_mode mode) 628 { 629 return ((reload_in_progress 630 && REG_P (op) 631 && REGNO (op) >= FIRST_PSEUDO_REGISTER 632 && reg_renumber [REGNO (op)] < 0) 633 || (GET_CODE (op) == MEM 634 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0))) 635 && !symbolic_memory_operand (op, VOIDmode) 636 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0)) 637 && !IS_INDEX_ADDR_P (XEXP (op, 0)))); 638 } 639 640 /* True iff ldil can be used to load this CONST_INT. The least 641 significant 11 bits of the value must be zero and the value must 642 not change sign when extended from 32 to 64 bits. */ 643 int 644 ldil_cint_p (HOST_WIDE_INT ival) 645 { 646 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff); 647 648 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31); 649 } 650 651 /* True iff zdepi can be used to generate this CONST_INT. 652 zdepi first sign extends a 5-bit signed number to a given field 653 length, then places this field anywhere in a zero. */ 654 int 655 zdepi_cint_p (unsigned HOST_WIDE_INT x) 656 { 657 unsigned HOST_WIDE_INT lsb_mask, t; 658 659 /* This might not be obvious, but it's at least fast. 660 This function is critical; we don't have the time loops would take. */ 661 lsb_mask = x & -x; 662 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); 663 /* Return true iff t is a power of two. */ 664 return ((t & (t - 1)) == 0); 665 } 666 667 /* True iff depi or extru can be used to compute (reg & mask). 668 Accept bit pattern like these: 669 0....01....1 670 1....10....0 671 1..10..01..1 */ 672 int 673 and_mask_p (unsigned HOST_WIDE_INT mask) 674 { 675 mask = ~mask; 676 mask += mask & -mask; 677 return (mask & (mask - 1)) == 0; 678 } 679 680 /* True iff depi can be used to compute (reg | MASK). */ 681 int 682 ior_mask_p (unsigned HOST_WIDE_INT mask) 683 { 684 mask += mask & -mask; 685 return (mask & (mask - 1)) == 0; 686 } 687 688 /* Legitimize PIC addresses. If the address is already 689 position-independent, we return ORIG. Newly generated 690 position-independent addresses go to REG. If we need more 691 than one register, we lose. */ 692 693 rtx 694 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) 695 { 696 rtx pic_ref = orig; 697 698 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig)); 699 700 /* Labels need special handling. */ 701 if (pic_label_operand (orig, mode)) 702 { 703 rtx insn; 704 705 /* We do not want to go through the movXX expanders here since that 706 would create recursion. 707 708 Nor do we really want to call a generator for a named pattern 709 since that requires multiple patterns if we want to support 710 multiple word sizes. 711 712 So instead we just emit the raw set, which avoids the movXX 713 expanders completely. */ 714 mark_reg_pointer (reg, BITS_PER_UNIT); 715 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig)); 716 717 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 718 add_reg_note (insn, REG_EQUAL, orig); 719 720 /* During and after reload, we need to generate a REG_LABEL_OPERAND note 721 and update LABEL_NUSES because this is not done automatically. */ 722 if (reload_in_progress || reload_completed) 723 { 724 /* Extract LABEL_REF. */ 725 if (GET_CODE (orig) == CONST) 726 orig = XEXP (XEXP (orig, 0), 0); 727 /* Extract CODE_LABEL. */ 728 orig = XEXP (orig, 0); 729 add_reg_note (insn, REG_LABEL_OPERAND, orig); 730 LABEL_NUSES (orig)++; 731 } 732 crtl->uses_pic_offset_table = 1; 733 return reg; 734 } 735 if (GET_CODE (orig) == SYMBOL_REF) 736 { 737 rtx insn, tmp_reg; 738 739 gcc_assert (reg); 740 741 /* Before reload, allocate a temporary register for the intermediate 742 result. This allows the sequence to be deleted when the final 743 result is unused and the insns are trivially dead. */ 744 tmp_reg = ((reload_in_progress || reload_completed) 745 ? reg : gen_reg_rtx (Pmode)); 746 747 if (function_label_operand (orig, mode)) 748 { 749 /* Force function label into memory in word mode. */ 750 orig = XEXP (force_const_mem (word_mode, orig), 0); 751 /* Load plabel address from DLT. */ 752 emit_move_insn (tmp_reg, 753 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 754 gen_rtx_HIGH (word_mode, orig))); 755 pic_ref 756 = gen_const_mem (Pmode, 757 gen_rtx_LO_SUM (Pmode, tmp_reg, 758 gen_rtx_UNSPEC (Pmode, 759 gen_rtvec (1, orig), 760 UNSPEC_DLTIND14R))); 761 emit_move_insn (reg, pic_ref); 762 /* Now load address of function descriptor. */ 763 pic_ref = gen_rtx_MEM (Pmode, reg); 764 } 765 else 766 { 767 /* Load symbol reference from DLT. */ 768 emit_move_insn (tmp_reg, 769 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 770 gen_rtx_HIGH (word_mode, orig))); 771 pic_ref 772 = gen_const_mem (Pmode, 773 gen_rtx_LO_SUM (Pmode, tmp_reg, 774 gen_rtx_UNSPEC (Pmode, 775 gen_rtvec (1, orig), 776 UNSPEC_DLTIND14R))); 777 } 778 779 crtl->uses_pic_offset_table = 1; 780 mark_reg_pointer (reg, BITS_PER_UNIT); 781 insn = emit_move_insn (reg, pic_ref); 782 783 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 784 set_unique_reg_note (insn, REG_EQUAL, orig); 785 786 return reg; 787 } 788 else if (GET_CODE (orig) == CONST) 789 { 790 rtx base; 791 792 if (GET_CODE (XEXP (orig, 0)) == PLUS 793 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 794 return orig; 795 796 gcc_assert (reg); 797 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 798 799 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); 800 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, 801 base == reg ? 0 : reg); 802 803 if (GET_CODE (orig) == CONST_INT) 804 { 805 if (INT_14_BITS (orig)) 806 return plus_constant (base, INTVAL (orig)); 807 orig = force_reg (Pmode, orig); 808 } 809 pic_ref = gen_rtx_PLUS (Pmode, base, orig); 810 /* Likewise, should we set special REG_NOTEs here? */ 811 } 812 813 return pic_ref; 814 } 815 816 static GTY(()) rtx gen_tls_tga; 817 818 static rtx 819 gen_tls_get_addr (void) 820 { 821 if (!gen_tls_tga) 822 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 823 return gen_tls_tga; 824 } 825 826 static rtx 827 hppa_tls_call (rtx arg) 828 { 829 rtx ret; 830 831 ret = gen_reg_rtx (Pmode); 832 emit_library_call_value (gen_tls_get_addr (), ret, 833 LCT_CONST, Pmode, 1, arg, Pmode); 834 835 return ret; 836 } 837 838 static rtx 839 legitimize_tls_address (rtx addr) 840 { 841 rtx ret, insn, tmp, t1, t2, tp; 842 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr); 843 844 switch (model) 845 { 846 case TLS_MODEL_GLOBAL_DYNAMIC: 847 tmp = gen_reg_rtx (Pmode); 848 if (flag_pic) 849 emit_insn (gen_tgd_load_pic (tmp, addr)); 850 else 851 emit_insn (gen_tgd_load (tmp, addr)); 852 ret = hppa_tls_call (tmp); 853 break; 854 855 case TLS_MODEL_LOCAL_DYNAMIC: 856 ret = gen_reg_rtx (Pmode); 857 tmp = gen_reg_rtx (Pmode); 858 start_sequence (); 859 if (flag_pic) 860 emit_insn (gen_tld_load_pic (tmp, addr)); 861 else 862 emit_insn (gen_tld_load (tmp, addr)); 863 t1 = hppa_tls_call (tmp); 864 insn = get_insns (); 865 end_sequence (); 866 t2 = gen_reg_rtx (Pmode); 867 emit_libcall_block (insn, t2, t1, 868 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 869 UNSPEC_TLSLDBASE)); 870 emit_insn (gen_tld_offset_load (ret, addr, t2)); 871 break; 872 873 case TLS_MODEL_INITIAL_EXEC: 874 tp = gen_reg_rtx (Pmode); 875 tmp = gen_reg_rtx (Pmode); 876 ret = gen_reg_rtx (Pmode); 877 emit_insn (gen_tp_load (tp)); 878 if (flag_pic) 879 emit_insn (gen_tie_load_pic (tmp, addr)); 880 else 881 emit_insn (gen_tie_load (tmp, addr)); 882 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp)); 883 break; 884 885 case TLS_MODEL_LOCAL_EXEC: 886 tp = gen_reg_rtx (Pmode); 887 ret = gen_reg_rtx (Pmode); 888 emit_insn (gen_tp_load (tp)); 889 emit_insn (gen_tle_load (ret, addr, tp)); 890 break; 891 892 default: 893 gcc_unreachable (); 894 } 895 896 return ret; 897 } 898 899 /* Try machine-dependent ways of modifying an illegitimate address 900 to be legitimate. If we find one, return the new, valid address. 901 This macro is used in only one place: `memory_address' in explow.c. 902 903 OLDX is the address as it was before break_out_memory_refs was called. 904 In some cases it is useful to look at this to decide what needs to be done. 905 906 It is always safe for this macro to do nothing. It exists to recognize 907 opportunities to optimize the output. 908 909 For the PA, transform: 910 911 memory(X + <large int>) 912 913 into: 914 915 if (<large int> & mask) >= 16 916 Y = (<large int> & ~mask) + mask + 1 Round up. 917 else 918 Y = (<large int> & ~mask) Round down. 919 Z = X + Y 920 memory (Z + (<large int> - Y)); 921 922 This is for CSE to find several similar references, and only use one Z. 923 924 X can either be a SYMBOL_REF or REG, but because combine cannot 925 perform a 4->2 combination we do nothing for SYMBOL_REF + D where 926 D will not fit in 14 bits. 927 928 MODE_FLOAT references allow displacements which fit in 5 bits, so use 929 0x1f as the mask. 930 931 MODE_INT references allow displacements which fit in 14 bits, so use 932 0x3fff as the mask. 933 934 This relies on the fact that most mode MODE_FLOAT references will use FP 935 registers and most mode MODE_INT references will use integer registers. 936 (In the rare case of an FP register used in an integer MODE, we depend 937 on secondary reloads to clean things up.) 938 939 940 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special 941 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed 942 addressing modes to be used). 943 944 Put X and Z into registers. Then put the entire expression into 945 a register. */ 946 947 rtx 948 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 949 enum machine_mode mode) 950 { 951 rtx orig = x; 952 953 /* We need to canonicalize the order of operands in unscaled indexed 954 addresses since the code that checks if an address is valid doesn't 955 always try both orders. */ 956 if (!TARGET_NO_SPACE_REGS 957 && GET_CODE (x) == PLUS 958 && GET_MODE (x) == Pmode 959 && REG_P (XEXP (x, 0)) 960 && REG_P (XEXP (x, 1)) 961 && REG_POINTER (XEXP (x, 0)) 962 && !REG_POINTER (XEXP (x, 1))) 963 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0)); 964 965 if (PA_SYMBOL_REF_TLS_P (x)) 966 return legitimize_tls_address (x); 967 else if (flag_pic) 968 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); 969 970 /* Strip off CONST. */ 971 if (GET_CODE (x) == CONST) 972 x = XEXP (x, 0); 973 974 /* Special case. Get the SYMBOL_REF into a register and use indexing. 975 That should always be safe. */ 976 if (GET_CODE (x) == PLUS 977 && GET_CODE (XEXP (x, 0)) == REG 978 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) 979 { 980 rtx reg = force_reg (Pmode, XEXP (x, 1)); 981 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0))); 982 } 983 984 /* Note we must reject symbols which represent function addresses 985 since the assembler/linker can't handle arithmetic on plabels. */ 986 if (GET_CODE (x) == PLUS 987 && GET_CODE (XEXP (x, 1)) == CONST_INT 988 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF 989 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) 990 || GET_CODE (XEXP (x, 0)) == REG)) 991 { 992 rtx int_part, ptr_reg; 993 HOST_WIDE_INT newoffset; 994 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 995 HOST_WIDE_INT mask; 996 997 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 998 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff); 999 1000 /* Choose which way to round the offset. Round up if we 1001 are >= halfway to the next boundary. */ 1002 if ((offset & mask) >= ((mask + 1) / 2)) 1003 newoffset = (offset & ~ mask) + mask + 1; 1004 else 1005 newoffset = (offset & ~ mask); 1006 1007 /* If the newoffset will not fit in 14 bits (ldo), then 1008 handling this would take 4 or 5 instructions (2 to load 1009 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to 1010 add the new offset and the SYMBOL_REF.) Combine can 1011 not handle 4->2 or 5->2 combinations, so do not create 1012 them. */ 1013 if (! VAL_14_BITS_P (newoffset) 1014 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) 1015 { 1016 rtx const_part = plus_constant (XEXP (x, 0), newoffset); 1017 rtx tmp_reg 1018 = force_reg (Pmode, 1019 gen_rtx_HIGH (Pmode, const_part)); 1020 ptr_reg 1021 = force_reg (Pmode, 1022 gen_rtx_LO_SUM (Pmode, 1023 tmp_reg, const_part)); 1024 } 1025 else 1026 { 1027 if (! VAL_14_BITS_P (newoffset)) 1028 int_part = force_reg (Pmode, GEN_INT (newoffset)); 1029 else 1030 int_part = GEN_INT (newoffset); 1031 1032 ptr_reg = force_reg (Pmode, 1033 gen_rtx_PLUS (Pmode, 1034 force_reg (Pmode, XEXP (x, 0)), 1035 int_part)); 1036 } 1037 return plus_constant (ptr_reg, offset - newoffset); 1038 } 1039 1040 /* Handle (plus (mult (a) (shadd_constant)) (b)). */ 1041 1042 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT 1043 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1044 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) 1045 && (OBJECT_P (XEXP (x, 1)) 1046 || GET_CODE (XEXP (x, 1)) == SUBREG) 1047 && GET_CODE (XEXP (x, 1)) != CONST) 1048 { 1049 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 1050 rtx reg1, reg2; 1051 1052 reg1 = XEXP (x, 1); 1053 if (GET_CODE (reg1) != REG) 1054 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1055 1056 reg2 = XEXP (XEXP (x, 0), 0); 1057 if (GET_CODE (reg2) != REG) 1058 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1059 1060 return force_reg (Pmode, gen_rtx_PLUS (Pmode, 1061 gen_rtx_MULT (Pmode, 1062 reg2, 1063 GEN_INT (val)), 1064 reg1)); 1065 } 1066 1067 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). 1068 1069 Only do so for floating point modes since this is more speculative 1070 and we lose if it's an integer store. */ 1071 if (GET_CODE (x) == PLUS 1072 && GET_CODE (XEXP (x, 0)) == PLUS 1073 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 1074 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 1075 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) 1076 && (mode == SFmode || mode == DFmode)) 1077 { 1078 1079 /* First, try and figure out what to use as a base register. */ 1080 rtx reg1, reg2, base, idx, orig_base; 1081 1082 reg1 = XEXP (XEXP (x, 0), 1); 1083 reg2 = XEXP (x, 1); 1084 base = NULL_RTX; 1085 idx = NULL_RTX; 1086 1087 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], 1088 then emit_move_sequence will turn on REG_POINTER so we'll know 1089 it's a base register below. */ 1090 if (GET_CODE (reg1) != REG) 1091 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1092 1093 if (GET_CODE (reg2) != REG) 1094 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1095 1096 /* Figure out what the base and index are. */ 1097 1098 if (GET_CODE (reg1) == REG 1099 && REG_POINTER (reg1)) 1100 { 1101 base = reg1; 1102 orig_base = XEXP (XEXP (x, 0), 1); 1103 idx = gen_rtx_PLUS (Pmode, 1104 gen_rtx_MULT (Pmode, 1105 XEXP (XEXP (XEXP (x, 0), 0), 0), 1106 XEXP (XEXP (XEXP (x, 0), 0), 1)), 1107 XEXP (x, 1)); 1108 } 1109 else if (GET_CODE (reg2) == REG 1110 && REG_POINTER (reg2)) 1111 { 1112 base = reg2; 1113 orig_base = XEXP (x, 1); 1114 idx = XEXP (x, 0); 1115 } 1116 1117 if (base == 0) 1118 return orig; 1119 1120 /* If the index adds a large constant, try to scale the 1121 constant so that it can be loaded with only one insn. */ 1122 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1123 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) 1124 / INTVAL (XEXP (XEXP (idx, 0), 1))) 1125 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) 1126 { 1127 /* Divide the CONST_INT by the scale factor, then add it to A. */ 1128 HOST_WIDE_INT val = INTVAL (XEXP (idx, 1)); 1129 1130 val /= INTVAL (XEXP (XEXP (idx, 0), 1)); 1131 reg1 = XEXP (XEXP (idx, 0), 0); 1132 if (GET_CODE (reg1) != REG) 1133 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1134 1135 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); 1136 1137 /* We can now generate a simple scaled indexed address. */ 1138 return 1139 force_reg 1140 (Pmode, gen_rtx_PLUS (Pmode, 1141 gen_rtx_MULT (Pmode, reg1, 1142 XEXP (XEXP (idx, 0), 1)), 1143 base)); 1144 } 1145 1146 /* If B + C is still a valid base register, then add them. */ 1147 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1148 && INTVAL (XEXP (idx, 1)) <= 4096 1149 && INTVAL (XEXP (idx, 1)) >= -4096) 1150 { 1151 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (idx, 0), 1)); 1152 rtx reg1, reg2; 1153 1154 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); 1155 1156 reg2 = XEXP (XEXP (idx, 0), 0); 1157 if (GET_CODE (reg2) != CONST_INT) 1158 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1159 1160 return force_reg (Pmode, gen_rtx_PLUS (Pmode, 1161 gen_rtx_MULT (Pmode, 1162 reg2, 1163 GEN_INT (val)), 1164 reg1)); 1165 } 1166 1167 /* Get the index into a register, then add the base + index and 1168 return a register holding the result. */ 1169 1170 /* First get A into a register. */ 1171 reg1 = XEXP (XEXP (idx, 0), 0); 1172 if (GET_CODE (reg1) != REG) 1173 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1174 1175 /* And get B into a register. */ 1176 reg2 = XEXP (idx, 1); 1177 if (GET_CODE (reg2) != REG) 1178 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1179 1180 reg1 = force_reg (Pmode, 1181 gen_rtx_PLUS (Pmode, 1182 gen_rtx_MULT (Pmode, reg1, 1183 XEXP (XEXP (idx, 0), 1)), 1184 reg2)); 1185 1186 /* Add the result to our base register and return. */ 1187 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); 1188 1189 } 1190 1191 /* Uh-oh. We might have an address for x[n-100000]. This needs 1192 special handling to avoid creating an indexed memory address 1193 with x-100000 as the base. 1194 1195 If the constant part is small enough, then it's still safe because 1196 there is a guard page at the beginning and end of the data segment. 1197 1198 Scaled references are common enough that we want to try and rearrange the 1199 terms so that we can use indexing for these addresses too. Only 1200 do the optimization for floatint point modes. */ 1201 1202 if (GET_CODE (x) == PLUS 1203 && symbolic_expression_p (XEXP (x, 1))) 1204 { 1205 /* Ugly. We modify things here so that the address offset specified 1206 by the index expression is computed first, then added to x to form 1207 the entire address. */ 1208 1209 rtx regx1, regx2, regy1, regy2, y; 1210 1211 /* Strip off any CONST. */ 1212 y = XEXP (x, 1); 1213 if (GET_CODE (y) == CONST) 1214 y = XEXP (y, 0); 1215 1216 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) 1217 { 1218 /* See if this looks like 1219 (plus (mult (reg) (shadd_const)) 1220 (const (plus (symbol_ref) (const_int)))) 1221 1222 Where const_int is small. In that case the const 1223 expression is a valid pointer for indexing. 1224 1225 If const_int is big, but can be divided evenly by shadd_const 1226 and added to (reg). This allows more scaled indexed addresses. */ 1227 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1228 && GET_CODE (XEXP (x, 0)) == MULT 1229 && GET_CODE (XEXP (y, 1)) == CONST_INT 1230 && INTVAL (XEXP (y, 1)) >= -4096 1231 && INTVAL (XEXP (y, 1)) <= 4095 1232 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1233 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) 1234 { 1235 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 1236 rtx reg1, reg2; 1237 1238 reg1 = XEXP (x, 1); 1239 if (GET_CODE (reg1) != REG) 1240 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1241 1242 reg2 = XEXP (XEXP (x, 0), 0); 1243 if (GET_CODE (reg2) != REG) 1244 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1245 1246 return force_reg (Pmode, 1247 gen_rtx_PLUS (Pmode, 1248 gen_rtx_MULT (Pmode, 1249 reg2, 1250 GEN_INT (val)), 1251 reg1)); 1252 } 1253 else if ((mode == DFmode || mode == SFmode) 1254 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1255 && GET_CODE (XEXP (x, 0)) == MULT 1256 && GET_CODE (XEXP (y, 1)) == CONST_INT 1257 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 1258 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1259 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) 1260 { 1261 regx1 1262 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) 1263 / INTVAL (XEXP (XEXP (x, 0), 1)))); 1264 regx2 = XEXP (XEXP (x, 0), 0); 1265 if (GET_CODE (regx2) != REG) 1266 regx2 = force_reg (Pmode, force_operand (regx2, 0)); 1267 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1268 regx2, regx1)); 1269 return 1270 force_reg (Pmode, 1271 gen_rtx_PLUS (Pmode, 1272 gen_rtx_MULT (Pmode, regx2, 1273 XEXP (XEXP (x, 0), 1)), 1274 force_reg (Pmode, XEXP (y, 0)))); 1275 } 1276 else if (GET_CODE (XEXP (y, 1)) == CONST_INT 1277 && INTVAL (XEXP (y, 1)) >= -4096 1278 && INTVAL (XEXP (y, 1)) <= 4095) 1279 { 1280 /* This is safe because of the guard page at the 1281 beginning and end of the data space. Just 1282 return the original address. */ 1283 return orig; 1284 } 1285 else 1286 { 1287 /* Doesn't look like one we can optimize. */ 1288 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); 1289 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); 1290 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); 1291 regx1 = force_reg (Pmode, 1292 gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1293 regx1, regy2)); 1294 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); 1295 } 1296 } 1297 } 1298 1299 return orig; 1300 } 1301 1302 /* For the HPPA, REG and REG+CONST is cost 0 1303 and addresses involving symbolic constants are cost 2. 1304 1305 PIC addresses are very expensive. 1306 1307 It is no coincidence that this has the same structure 1308 as GO_IF_LEGITIMATE_ADDRESS. */ 1309 1310 static int 1311 hppa_address_cost (rtx X, 1312 bool speed ATTRIBUTE_UNUSED) 1313 { 1314 switch (GET_CODE (X)) 1315 { 1316 case REG: 1317 case PLUS: 1318 case LO_SUM: 1319 return 1; 1320 case HIGH: 1321 return 2; 1322 default: 1323 return 4; 1324 } 1325 } 1326 1327 /* Compute a (partial) cost for rtx X. Return true if the complete 1328 cost has been computed, and false if subexpressions should be 1329 scanned. In either case, *TOTAL contains the cost result. */ 1330 1331 static bool 1332 hppa_rtx_costs (rtx x, int code, int outer_code, int *total, 1333 bool speed ATTRIBUTE_UNUSED) 1334 { 1335 switch (code) 1336 { 1337 case CONST_INT: 1338 if (INTVAL (x) == 0) 1339 *total = 0; 1340 else if (INT_14_BITS (x)) 1341 *total = 1; 1342 else 1343 *total = 2; 1344 return true; 1345 1346 case HIGH: 1347 *total = 2; 1348 return true; 1349 1350 case CONST: 1351 case LABEL_REF: 1352 case SYMBOL_REF: 1353 *total = 4; 1354 return true; 1355 1356 case CONST_DOUBLE: 1357 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode)) 1358 && outer_code != SET) 1359 *total = 0; 1360 else 1361 *total = 8; 1362 return true; 1363 1364 case MULT: 1365 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1366 *total = COSTS_N_INSNS (3); 1367 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) 1368 *total = COSTS_N_INSNS (8); 1369 else 1370 *total = COSTS_N_INSNS (20); 1371 return true; 1372 1373 case DIV: 1374 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1375 { 1376 *total = COSTS_N_INSNS (14); 1377 return true; 1378 } 1379 /* FALLTHRU */ 1380 1381 case UDIV: 1382 case MOD: 1383 case UMOD: 1384 *total = COSTS_N_INSNS (60); 1385 return true; 1386 1387 case PLUS: /* this includes shNadd insns */ 1388 case MINUS: 1389 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1390 *total = COSTS_N_INSNS (3); 1391 else 1392 *total = COSTS_N_INSNS (1); 1393 return true; 1394 1395 case ASHIFT: 1396 case ASHIFTRT: 1397 case LSHIFTRT: 1398 *total = COSTS_N_INSNS (1); 1399 return true; 1400 1401 default: 1402 return false; 1403 } 1404 } 1405 1406 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a 1407 new rtx with the correct mode. */ 1408 static inline rtx 1409 force_mode (enum machine_mode mode, rtx orig) 1410 { 1411 if (mode == GET_MODE (orig)) 1412 return orig; 1413 1414 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER); 1415 1416 return gen_rtx_REG (mode, REGNO (orig)); 1417 } 1418 1419 /* Return 1 if *X is a thread-local symbol. */ 1420 1421 static int 1422 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED) 1423 { 1424 return PA_SYMBOL_REF_TLS_P (*x); 1425 } 1426 1427 /* Return 1 if X contains a thread-local symbol. */ 1428 1429 bool 1430 pa_tls_referenced_p (rtx x) 1431 { 1432 if (!TARGET_HAVE_TLS) 1433 return false; 1434 1435 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0); 1436 } 1437 1438 /* Emit insns to move operands[1] into operands[0]. 1439 1440 Return 1 if we have written out everything that needs to be done to 1441 do the move. Otherwise, return 0 and the caller will emit the move 1442 normally. 1443 1444 Note SCRATCH_REG may not be in the proper mode depending on how it 1445 will be used. This routine is responsible for creating a new copy 1446 of SCRATCH_REG in the proper mode. */ 1447 1448 int 1449 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg) 1450 { 1451 register rtx operand0 = operands[0]; 1452 register rtx operand1 = operands[1]; 1453 register rtx tem; 1454 1455 /* We can only handle indexed addresses in the destination operand 1456 of floating point stores. Thus, we need to break out indexed 1457 addresses from the destination operand. */ 1458 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) 1459 { 1460 gcc_assert (can_create_pseudo_p ()); 1461 1462 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); 1463 operand0 = replace_equiv_address (operand0, tem); 1464 } 1465 1466 /* On targets with non-equivalent space registers, break out unscaled 1467 indexed addresses from the source operand before the final CSE. 1468 We have to do this because the REG_POINTER flag is not correctly 1469 carried through various optimization passes and CSE may substitute 1470 a pseudo without the pointer set for one with the pointer set. As 1471 a result, we loose various opportunities to create insns with 1472 unscaled indexed addresses. */ 1473 if (!TARGET_NO_SPACE_REGS 1474 && !cse_not_expected 1475 && GET_CODE (operand1) == MEM 1476 && GET_CODE (XEXP (operand1, 0)) == PLUS 1477 && REG_P (XEXP (XEXP (operand1, 0), 0)) 1478 && REG_P (XEXP (XEXP (operand1, 0), 1))) 1479 operand1 1480 = replace_equiv_address (operand1, 1481 copy_to_mode_reg (Pmode, XEXP (operand1, 0))); 1482 1483 if (scratch_reg 1484 && reload_in_progress && GET_CODE (operand0) == REG 1485 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 1486 operand0 = reg_equiv_mem[REGNO (operand0)]; 1487 else if (scratch_reg 1488 && reload_in_progress && GET_CODE (operand0) == SUBREG 1489 && GET_CODE (SUBREG_REG (operand0)) == REG 1490 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) 1491 { 1492 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1493 the code which tracks sets/uses for delete_output_reload. */ 1494 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0), 1495 reg_equiv_mem [REGNO (SUBREG_REG (operand0))], 1496 SUBREG_BYTE (operand0)); 1497 operand0 = alter_subreg (&temp); 1498 } 1499 1500 if (scratch_reg 1501 && reload_in_progress && GET_CODE (operand1) == REG 1502 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) 1503 operand1 = reg_equiv_mem[REGNO (operand1)]; 1504 else if (scratch_reg 1505 && reload_in_progress && GET_CODE (operand1) == SUBREG 1506 && GET_CODE (SUBREG_REG (operand1)) == REG 1507 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) 1508 { 1509 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1510 the code which tracks sets/uses for delete_output_reload. */ 1511 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1), 1512 reg_equiv_mem [REGNO (SUBREG_REG (operand1))], 1513 SUBREG_BYTE (operand1)); 1514 operand1 = alter_subreg (&temp); 1515 } 1516 1517 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM 1518 && ((tem = find_replacement (&XEXP (operand0, 0))) 1519 != XEXP (operand0, 0))) 1520 operand0 = replace_equiv_address (operand0, tem); 1521 1522 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM 1523 && ((tem = find_replacement (&XEXP (operand1, 0))) 1524 != XEXP (operand1, 0))) 1525 operand1 = replace_equiv_address (operand1, tem); 1526 1527 /* Handle secondary reloads for loads/stores of FP registers from 1528 REG+D addresses where D does not fit in 5 or 14 bits, including 1529 (subreg (mem (addr))) cases. */ 1530 if (scratch_reg 1531 && fp_reg_operand (operand0, mode) 1532 && ((GET_CODE (operand1) == MEM 1533 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode), 1534 XEXP (operand1, 0))) 1535 || ((GET_CODE (operand1) == SUBREG 1536 && GET_CODE (XEXP (operand1, 0)) == MEM 1537 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 1538 ? SFmode : DFmode), 1539 XEXP (XEXP (operand1, 0), 0)))))) 1540 { 1541 if (GET_CODE (operand1) == SUBREG) 1542 operand1 = XEXP (operand1, 0); 1543 1544 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1545 it in WORD_MODE regardless of what mode it was originally given 1546 to us. */ 1547 scratch_reg = force_mode (word_mode, scratch_reg); 1548 1549 /* D might not fit in 14 bits either; for such cases load D into 1550 scratch reg. */ 1551 if (!memory_address_p (Pmode, XEXP (operand1, 0))) 1552 { 1553 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); 1554 emit_move_insn (scratch_reg, 1555 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), 1556 Pmode, 1557 XEXP (XEXP (operand1, 0), 0), 1558 scratch_reg)); 1559 } 1560 else 1561 emit_move_insn (scratch_reg, XEXP (operand1, 0)); 1562 emit_insn (gen_rtx_SET (VOIDmode, operand0, 1563 replace_equiv_address (operand1, scratch_reg))); 1564 return 1; 1565 } 1566 else if (scratch_reg 1567 && fp_reg_operand (operand1, mode) 1568 && ((GET_CODE (operand0) == MEM 1569 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 1570 ? SFmode : DFmode), 1571 XEXP (operand0, 0))) 1572 || ((GET_CODE (operand0) == SUBREG) 1573 && GET_CODE (XEXP (operand0, 0)) == MEM 1574 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 1575 ? SFmode : DFmode), 1576 XEXP (XEXP (operand0, 0), 0))))) 1577 { 1578 if (GET_CODE (operand0) == SUBREG) 1579 operand0 = XEXP (operand0, 0); 1580 1581 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1582 it in WORD_MODE regardless of what mode it was originally given 1583 to us. */ 1584 scratch_reg = force_mode (word_mode, scratch_reg); 1585 1586 /* D might not fit in 14 bits either; for such cases load D into 1587 scratch reg. */ 1588 if (!memory_address_p (Pmode, XEXP (operand0, 0))) 1589 { 1590 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1)); 1591 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0, 1592 0)), 1593 Pmode, 1594 XEXP (XEXP (operand0, 0), 1595 0), 1596 scratch_reg)); 1597 } 1598 else 1599 emit_move_insn (scratch_reg, XEXP (operand0, 0)); 1600 emit_insn (gen_rtx_SET (VOIDmode, 1601 replace_equiv_address (operand0, scratch_reg), 1602 operand1)); 1603 return 1; 1604 } 1605 /* Handle secondary reloads for loads of FP registers from constant 1606 expressions by forcing the constant into memory. 1607 1608 Use scratch_reg to hold the address of the memory location. 1609 1610 The proper fix is to change PREFERRED_RELOAD_CLASS to return 1611 NO_REGS when presented with a const_int and a register class 1612 containing only FP registers. Doing so unfortunately creates 1613 more problems than it solves. Fix this for 2.5. */ 1614 else if (scratch_reg 1615 && CONSTANT_P (operand1) 1616 && fp_reg_operand (operand0, mode)) 1617 { 1618 rtx const_mem, xoperands[2]; 1619 1620 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1621 it in WORD_MODE regardless of what mode it was originally given 1622 to us. */ 1623 scratch_reg = force_mode (word_mode, scratch_reg); 1624 1625 /* Force the constant into memory and put the address of the 1626 memory location into scratch_reg. */ 1627 const_mem = force_const_mem (mode, operand1); 1628 xoperands[0] = scratch_reg; 1629 xoperands[1] = XEXP (const_mem, 0); 1630 emit_move_sequence (xoperands, Pmode, 0); 1631 1632 /* Now load the destination register. */ 1633 emit_insn (gen_rtx_SET (mode, operand0, 1634 replace_equiv_address (const_mem, scratch_reg))); 1635 return 1; 1636 } 1637 /* Handle secondary reloads for SAR. These occur when trying to load 1638 the SAR from memory or a constant. */ 1639 else if (scratch_reg 1640 && GET_CODE (operand0) == REG 1641 && REGNO (operand0) < FIRST_PSEUDO_REGISTER 1642 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS 1643 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT)) 1644 { 1645 /* D might not fit in 14 bits either; for such cases load D into 1646 scratch reg. */ 1647 if (GET_CODE (operand1) == MEM 1648 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0))) 1649 { 1650 /* We are reloading the address into the scratch register, so we 1651 want to make sure the scratch register is a full register. */ 1652 scratch_reg = force_mode (word_mode, scratch_reg); 1653 1654 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); 1655 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 1656 0)), 1657 Pmode, 1658 XEXP (XEXP (operand1, 0), 1659 0), 1660 scratch_reg)); 1661 1662 /* Now we are going to load the scratch register from memory, 1663 we want to load it in the same width as the original MEM, 1664 which must be the same as the width of the ultimate destination, 1665 OPERAND0. */ 1666 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1667 1668 emit_move_insn (scratch_reg, 1669 replace_equiv_address (operand1, scratch_reg)); 1670 } 1671 else 1672 { 1673 /* We want to load the scratch register using the same mode as 1674 the ultimate destination. */ 1675 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1676 1677 emit_move_insn (scratch_reg, operand1); 1678 } 1679 1680 /* And emit the insn to set the ultimate destination. We know that 1681 the scratch register has the same mode as the destination at this 1682 point. */ 1683 emit_move_insn (operand0, scratch_reg); 1684 return 1; 1685 } 1686 /* Handle the most common case: storing into a register. */ 1687 else if (register_operand (operand0, mode)) 1688 { 1689 /* Legitimize TLS symbol references. This happens for references 1690 that aren't a legitimate constant. */ 1691 if (PA_SYMBOL_REF_TLS_P (operand1)) 1692 operand1 = legitimize_tls_address (operand1); 1693 1694 if (register_operand (operand1, mode) 1695 || (GET_CODE (operand1) == CONST_INT 1696 && cint_ok_for_move (INTVAL (operand1))) 1697 || (operand1 == CONST0_RTX (mode)) 1698 || (GET_CODE (operand1) == HIGH 1699 && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) 1700 /* Only `general_operands' can come here, so MEM is ok. */ 1701 || GET_CODE (operand1) == MEM) 1702 { 1703 /* Various sets are created during RTL generation which don't 1704 have the REG_POINTER flag correctly set. After the CSE pass, 1705 instruction recognition can fail if we don't consistently 1706 set this flag when performing register copies. This should 1707 also improve the opportunities for creating insns that use 1708 unscaled indexing. */ 1709 if (REG_P (operand0) && REG_P (operand1)) 1710 { 1711 if (REG_POINTER (operand1) 1712 && !REG_POINTER (operand0) 1713 && !HARD_REGISTER_P (operand0)) 1714 copy_reg_pointer (operand0, operand1); 1715 } 1716 1717 /* When MEMs are broken out, the REG_POINTER flag doesn't 1718 get set. In some cases, we can set the REG_POINTER flag 1719 from the declaration for the MEM. */ 1720 if (REG_P (operand0) 1721 && GET_CODE (operand1) == MEM 1722 && !REG_POINTER (operand0)) 1723 { 1724 tree decl = MEM_EXPR (operand1); 1725 1726 /* Set the register pointer flag and register alignment 1727 if the declaration for this memory reference is a 1728 pointer type. Fortran indirect argument references 1729 are ignored. */ 1730 if (decl 1731 && !(flag_argument_noalias > 1 1732 && TREE_CODE (decl) == INDIRECT_REF 1733 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL)) 1734 { 1735 tree type; 1736 1737 /* If this is a COMPONENT_REF, use the FIELD_DECL from 1738 tree operand 1. */ 1739 if (TREE_CODE (decl) == COMPONENT_REF) 1740 decl = TREE_OPERAND (decl, 1); 1741 1742 type = TREE_TYPE (decl); 1743 type = strip_array_types (type); 1744 1745 if (POINTER_TYPE_P (type)) 1746 { 1747 int align; 1748 1749 type = TREE_TYPE (type); 1750 /* Using TYPE_ALIGN_OK is rather conservative as 1751 only the ada frontend actually sets it. */ 1752 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type) 1753 : BITS_PER_UNIT); 1754 mark_reg_pointer (operand0, align); 1755 } 1756 } 1757 } 1758 1759 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1760 return 1; 1761 } 1762 } 1763 else if (GET_CODE (operand0) == MEM) 1764 { 1765 if (mode == DFmode && operand1 == CONST0_RTX (mode) 1766 && !(reload_in_progress || reload_completed)) 1767 { 1768 rtx temp = gen_reg_rtx (DFmode); 1769 1770 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); 1771 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); 1772 return 1; 1773 } 1774 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) 1775 { 1776 /* Run this case quickly. */ 1777 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1778 return 1; 1779 } 1780 if (! (reload_in_progress || reload_completed)) 1781 { 1782 operands[0] = validize_mem (operand0); 1783 operands[1] = operand1 = force_reg (mode, operand1); 1784 } 1785 } 1786 1787 /* Simplify the source if we need to. 1788 Note we do have to handle function labels here, even though we do 1789 not consider them legitimate constants. Loop optimizations can 1790 call the emit_move_xxx with one as a source. */ 1791 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) 1792 || function_label_operand (operand1, mode) 1793 || (GET_CODE (operand1) == HIGH 1794 && symbolic_operand (XEXP (operand1, 0), mode))) 1795 { 1796 int ishighonly = 0; 1797 1798 if (GET_CODE (operand1) == HIGH) 1799 { 1800 ishighonly = 1; 1801 operand1 = XEXP (operand1, 0); 1802 } 1803 if (symbolic_operand (operand1, mode)) 1804 { 1805 /* Argh. The assembler and linker can't handle arithmetic 1806 involving plabels. 1807 1808 So we force the plabel into memory, load operand0 from 1809 the memory location, then add in the constant part. */ 1810 if ((GET_CODE (operand1) == CONST 1811 && GET_CODE (XEXP (operand1, 0)) == PLUS 1812 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode)) 1813 || function_label_operand (operand1, mode)) 1814 { 1815 rtx temp, const_part; 1816 1817 /* Figure out what (if any) scratch register to use. */ 1818 if (reload_in_progress || reload_completed) 1819 { 1820 scratch_reg = scratch_reg ? scratch_reg : operand0; 1821 /* SCRATCH_REG will hold an address and maybe the actual 1822 data. We want it in WORD_MODE regardless of what mode it 1823 was originally given to us. */ 1824 scratch_reg = force_mode (word_mode, scratch_reg); 1825 } 1826 else if (flag_pic) 1827 scratch_reg = gen_reg_rtx (Pmode); 1828 1829 if (GET_CODE (operand1) == CONST) 1830 { 1831 /* Save away the constant part of the expression. */ 1832 const_part = XEXP (XEXP (operand1, 0), 1); 1833 gcc_assert (GET_CODE (const_part) == CONST_INT); 1834 1835 /* Force the function label into memory. */ 1836 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); 1837 } 1838 else 1839 { 1840 /* No constant part. */ 1841 const_part = NULL_RTX; 1842 1843 /* Force the function label into memory. */ 1844 temp = force_const_mem (mode, operand1); 1845 } 1846 1847 1848 /* Get the address of the memory location. PIC-ify it if 1849 necessary. */ 1850 temp = XEXP (temp, 0); 1851 if (flag_pic) 1852 temp = legitimize_pic_address (temp, mode, scratch_reg); 1853 1854 /* Put the address of the memory location into our destination 1855 register. */ 1856 operands[1] = temp; 1857 emit_move_sequence (operands, mode, scratch_reg); 1858 1859 /* Now load from the memory location into our destination 1860 register. */ 1861 operands[1] = gen_rtx_MEM (Pmode, operands[0]); 1862 emit_move_sequence (operands, mode, scratch_reg); 1863 1864 /* And add back in the constant part. */ 1865 if (const_part != NULL_RTX) 1866 expand_inc (operand0, const_part); 1867 1868 return 1; 1869 } 1870 1871 if (flag_pic) 1872 { 1873 rtx temp; 1874 1875 if (reload_in_progress || reload_completed) 1876 { 1877 temp = scratch_reg ? scratch_reg : operand0; 1878 /* TEMP will hold an address and maybe the actual 1879 data. We want it in WORD_MODE regardless of what mode it 1880 was originally given to us. */ 1881 temp = force_mode (word_mode, temp); 1882 } 1883 else 1884 temp = gen_reg_rtx (Pmode); 1885 1886 /* (const (plus (symbol) (const_int))) must be forced to 1887 memory during/after reload if the const_int will not fit 1888 in 14 bits. */ 1889 if (GET_CODE (operand1) == CONST 1890 && GET_CODE (XEXP (operand1, 0)) == PLUS 1891 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT 1892 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)) 1893 && (reload_completed || reload_in_progress) 1894 && flag_pic) 1895 { 1896 rtx const_mem = force_const_mem (mode, operand1); 1897 operands[1] = legitimize_pic_address (XEXP (const_mem, 0), 1898 mode, temp); 1899 operands[1] = replace_equiv_address (const_mem, operands[1]); 1900 emit_move_sequence (operands, mode, temp); 1901 } 1902 else 1903 { 1904 operands[1] = legitimize_pic_address (operand1, mode, temp); 1905 if (REG_P (operand0) && REG_P (operands[1])) 1906 copy_reg_pointer (operand0, operands[1]); 1907 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1])); 1908 } 1909 } 1910 /* On the HPPA, references to data space are supposed to use dp, 1911 register 27, but showing it in the RTL inhibits various cse 1912 and loop optimizations. */ 1913 else 1914 { 1915 rtx temp, set; 1916 1917 if (reload_in_progress || reload_completed) 1918 { 1919 temp = scratch_reg ? scratch_reg : operand0; 1920 /* TEMP will hold an address and maybe the actual 1921 data. We want it in WORD_MODE regardless of what mode it 1922 was originally given to us. */ 1923 temp = force_mode (word_mode, temp); 1924 } 1925 else 1926 temp = gen_reg_rtx (mode); 1927 1928 /* Loading a SYMBOL_REF into a register makes that register 1929 safe to be used as the base in an indexed address. 1930 1931 Don't mark hard registers though. That loses. */ 1932 if (GET_CODE (operand0) == REG 1933 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 1934 mark_reg_pointer (operand0, BITS_PER_UNIT); 1935 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) 1936 mark_reg_pointer (temp, BITS_PER_UNIT); 1937 1938 if (ishighonly) 1939 set = gen_rtx_SET (mode, operand0, temp); 1940 else 1941 set = gen_rtx_SET (VOIDmode, 1942 operand0, 1943 gen_rtx_LO_SUM (mode, temp, operand1)); 1944 1945 emit_insn (gen_rtx_SET (VOIDmode, 1946 temp, 1947 gen_rtx_HIGH (mode, operand1))); 1948 emit_insn (set); 1949 1950 } 1951 return 1; 1952 } 1953 else if (pa_tls_referenced_p (operand1)) 1954 { 1955 rtx tmp = operand1; 1956 rtx addend = NULL; 1957 1958 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) 1959 { 1960 addend = XEXP (XEXP (tmp, 0), 1); 1961 tmp = XEXP (XEXP (tmp, 0), 0); 1962 } 1963 1964 gcc_assert (GET_CODE (tmp) == SYMBOL_REF); 1965 tmp = legitimize_tls_address (tmp); 1966 if (addend) 1967 { 1968 tmp = gen_rtx_PLUS (mode, tmp, addend); 1969 tmp = force_operand (tmp, operands[0]); 1970 } 1971 operands[1] = tmp; 1972 } 1973 else if (GET_CODE (operand1) != CONST_INT 1974 || !cint_ok_for_move (INTVAL (operand1))) 1975 { 1976 rtx insn, temp; 1977 rtx op1 = operand1; 1978 HOST_WIDE_INT value = 0; 1979 HOST_WIDE_INT insv = 0; 1980 int insert = 0; 1981 1982 if (GET_CODE (operand1) == CONST_INT) 1983 value = INTVAL (operand1); 1984 1985 if (TARGET_64BIT 1986 && GET_CODE (operand1) == CONST_INT 1987 && HOST_BITS_PER_WIDE_INT > 32 1988 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32) 1989 { 1990 HOST_WIDE_INT nval; 1991 1992 /* Extract the low order 32 bits of the value and sign extend. 1993 If the new value is the same as the original value, we can 1994 can use the original value as-is. If the new value is 1995 different, we use it and insert the most-significant 32-bits 1996 of the original value into the final result. */ 1997 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1)) 1998 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31); 1999 if (value != nval) 2000 { 2001 #if HOST_BITS_PER_WIDE_INT > 32 2002 insv = value >= 0 ? value >> 32 : ~(~value >> 32); 2003 #endif 2004 insert = 1; 2005 value = nval; 2006 operand1 = GEN_INT (nval); 2007 } 2008 } 2009 2010 if (reload_in_progress || reload_completed) 2011 temp = scratch_reg ? scratch_reg : operand0; 2012 else 2013 temp = gen_reg_rtx (mode); 2014 2015 /* We don't directly split DImode constants on 32-bit targets 2016 because PLUS uses an 11-bit immediate and the insn sequence 2017 generated is not as efficient as the one using HIGH/LO_SUM. */ 2018 if (GET_CODE (operand1) == CONST_INT 2019 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD 2020 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT 2021 && !insert) 2022 { 2023 /* Directly break constant into high and low parts. This 2024 provides better optimization opportunities because various 2025 passes recognize constants split with PLUS but not LO_SUM. 2026 We use a 14-bit signed low part except when the addition 2027 of 0x4000 to the high part might change the sign of the 2028 high part. */ 2029 HOST_WIDE_INT low = value & 0x3fff; 2030 HOST_WIDE_INT high = value & ~ 0x3fff; 2031 2032 if (low >= 0x2000) 2033 { 2034 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000)) 2035 high += 0x2000; 2036 else 2037 high += 0x4000; 2038 } 2039 2040 low = value - high; 2041 2042 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high))); 2043 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 2044 } 2045 else 2046 { 2047 emit_insn (gen_rtx_SET (VOIDmode, temp, 2048 gen_rtx_HIGH (mode, operand1))); 2049 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); 2050 } 2051 2052 insn = emit_move_insn (operands[0], operands[1]); 2053 2054 /* Now insert the most significant 32 bits of the value 2055 into the register. When we don't have a second register 2056 available, it could take up to nine instructions to load 2057 a 64-bit integer constant. Prior to reload, we force 2058 constants that would take more than three instructions 2059 to load to the constant pool. During and after reload, 2060 we have to handle all possible values. */ 2061 if (insert) 2062 { 2063 /* Use a HIGH/LO_SUM/INSV sequence if we have a second 2064 register and the value to be inserted is outside the 2065 range that can be loaded with three depdi instructions. */ 2066 if (temp != operand0 && (insv >= 16384 || insv < -16384)) 2067 { 2068 operand1 = GEN_INT (insv); 2069 2070 emit_insn (gen_rtx_SET (VOIDmode, temp, 2071 gen_rtx_HIGH (mode, operand1))); 2072 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1)); 2073 emit_insn (gen_insv (operand0, GEN_INT (32), 2074 const0_rtx, temp)); 2075 } 2076 else 2077 { 2078 int len = 5, pos = 27; 2079 2080 /* Insert the bits using the depdi instruction. */ 2081 while (pos >= 0) 2082 { 2083 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16; 2084 HOST_WIDE_INT sign = v5 < 0; 2085 2086 /* Left extend the insertion. */ 2087 insv = (insv >= 0 ? insv >> len : ~(~insv >> len)); 2088 while (pos > 0 && (insv & 1) == sign) 2089 { 2090 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1)); 2091 len += 1; 2092 pos -= 1; 2093 } 2094 2095 emit_insn (gen_insv (operand0, GEN_INT (len), 2096 GEN_INT (pos), GEN_INT (v5))); 2097 2098 len = pos > 0 && pos < 5 ? pos : 5; 2099 pos -= len; 2100 } 2101 } 2102 } 2103 2104 set_unique_reg_note (insn, REG_EQUAL, op1); 2105 2106 return 1; 2107 } 2108 } 2109 /* Now have insn-emit do whatever it normally does. */ 2110 return 0; 2111 } 2112 2113 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning 2114 it will need a link/runtime reloc). */ 2115 2116 int 2117 reloc_needed (tree exp) 2118 { 2119 int reloc = 0; 2120 2121 switch (TREE_CODE (exp)) 2122 { 2123 case ADDR_EXPR: 2124 return 1; 2125 2126 case POINTER_PLUS_EXPR: 2127 case PLUS_EXPR: 2128 case MINUS_EXPR: 2129 reloc = reloc_needed (TREE_OPERAND (exp, 0)); 2130 reloc |= reloc_needed (TREE_OPERAND (exp, 1)); 2131 break; 2132 2133 CASE_CONVERT: 2134 case NON_LVALUE_EXPR: 2135 reloc = reloc_needed (TREE_OPERAND (exp, 0)); 2136 break; 2137 2138 case CONSTRUCTOR: 2139 { 2140 tree value; 2141 unsigned HOST_WIDE_INT ix; 2142 2143 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value) 2144 if (value) 2145 reloc |= reloc_needed (value); 2146 } 2147 break; 2148 2149 case ERROR_MARK: 2150 break; 2151 2152 default: 2153 break; 2154 } 2155 return reloc; 2156 } 2157 2158 /* Does operand (which is a symbolic_operand) live in text space? 2159 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info, 2160 will be true. */ 2161 2162 int 2163 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED) 2164 { 2165 if (GET_CODE (operand) == CONST) 2166 operand = XEXP (XEXP (operand, 0), 0); 2167 if (flag_pic) 2168 { 2169 if (GET_CODE (operand) == SYMBOL_REF) 2170 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand); 2171 } 2172 else 2173 { 2174 if (GET_CODE (operand) == SYMBOL_REF) 2175 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand); 2176 } 2177 return 1; 2178 } 2179 2180 2181 /* Return the best assembler insn template 2182 for moving operands[1] into operands[0] as a fullword. */ 2183 const char * 2184 singlemove_string (rtx *operands) 2185 { 2186 HOST_WIDE_INT intval; 2187 2188 if (GET_CODE (operands[0]) == MEM) 2189 return "stw %r1,%0"; 2190 if (GET_CODE (operands[1]) == MEM) 2191 return "ldw %1,%0"; 2192 if (GET_CODE (operands[1]) == CONST_DOUBLE) 2193 { 2194 long i; 2195 REAL_VALUE_TYPE d; 2196 2197 gcc_assert (GET_MODE (operands[1]) == SFmode); 2198 2199 /* Translate the CONST_DOUBLE to a CONST_INT with the same target 2200 bit pattern. */ 2201 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); 2202 REAL_VALUE_TO_TARGET_SINGLE (d, i); 2203 2204 operands[1] = GEN_INT (i); 2205 /* Fall through to CONST_INT case. */ 2206 } 2207 if (GET_CODE (operands[1]) == CONST_INT) 2208 { 2209 intval = INTVAL (operands[1]); 2210 2211 if (VAL_14_BITS_P (intval)) 2212 return "ldi %1,%0"; 2213 else if ((intval & 0x7ff) == 0) 2214 return "ldil L'%1,%0"; 2215 else if (zdepi_cint_p (intval)) 2216 return "{zdepi %Z1,%0|depwi,z %Z1,%0}"; 2217 else 2218 return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; 2219 } 2220 return "copy %1,%0"; 2221 } 2222 2223 2224 /* Compute position (in OP[1]) and width (in OP[2]) 2225 useful for copying IMM to a register using the zdepi 2226 instructions. Store the immediate value to insert in OP[0]. */ 2227 static void 2228 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2229 { 2230 int lsb, len; 2231 2232 /* Find the least significant set bit in IMM. */ 2233 for (lsb = 0; lsb < 32; lsb++) 2234 { 2235 if ((imm & 1) != 0) 2236 break; 2237 imm >>= 1; 2238 } 2239 2240 /* Choose variants based on *sign* of the 5-bit field. */ 2241 if ((imm & 0x10) == 0) 2242 len = (lsb <= 28) ? 4 : 32 - lsb; 2243 else 2244 { 2245 /* Find the width of the bitstring in IMM. */ 2246 for (len = 5; len < 32 - lsb; len++) 2247 { 2248 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2249 break; 2250 } 2251 2252 /* Sign extend IMM as a 5-bit value. */ 2253 imm = (imm & 0xf) - 0x10; 2254 } 2255 2256 op[0] = imm; 2257 op[1] = 31 - lsb; 2258 op[2] = len; 2259 } 2260 2261 /* Compute position (in OP[1]) and width (in OP[2]) 2262 useful for copying IMM to a register using the depdi,z 2263 instructions. Store the immediate value to insert in OP[0]. */ 2264 void 2265 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2266 { 2267 int lsb, len, maxlen; 2268 2269 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64); 2270 2271 /* Find the least significant set bit in IMM. */ 2272 for (lsb = 0; lsb < maxlen; lsb++) 2273 { 2274 if ((imm & 1) != 0) 2275 break; 2276 imm >>= 1; 2277 } 2278 2279 /* Choose variants based on *sign* of the 5-bit field. */ 2280 if ((imm & 0x10) == 0) 2281 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb; 2282 else 2283 { 2284 /* Find the width of the bitstring in IMM. */ 2285 for (len = 5; len < maxlen - lsb; len++) 2286 { 2287 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2288 break; 2289 } 2290 2291 /* Extend length if host is narrow and IMM is negative. */ 2292 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb) 2293 len += 32; 2294 2295 /* Sign extend IMM as a 5-bit value. */ 2296 imm = (imm & 0xf) - 0x10; 2297 } 2298 2299 op[0] = imm; 2300 op[1] = 63 - lsb; 2301 op[2] = len; 2302 } 2303 2304 /* Output assembler code to perform a doubleword move insn 2305 with operands OPERANDS. */ 2306 2307 const char * 2308 output_move_double (rtx *operands) 2309 { 2310 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; 2311 rtx latehalf[2]; 2312 rtx addreg0 = 0, addreg1 = 0; 2313 2314 /* First classify both operands. */ 2315 2316 if (REG_P (operands[0])) 2317 optype0 = REGOP; 2318 else if (offsettable_memref_p (operands[0])) 2319 optype0 = OFFSOP; 2320 else if (GET_CODE (operands[0]) == MEM) 2321 optype0 = MEMOP; 2322 else 2323 optype0 = RNDOP; 2324 2325 if (REG_P (operands[1])) 2326 optype1 = REGOP; 2327 else if (CONSTANT_P (operands[1])) 2328 optype1 = CNSTOP; 2329 else if (offsettable_memref_p (operands[1])) 2330 optype1 = OFFSOP; 2331 else if (GET_CODE (operands[1]) == MEM) 2332 optype1 = MEMOP; 2333 else 2334 optype1 = RNDOP; 2335 2336 /* Check for the cases that the operand constraints are not 2337 supposed to allow to happen. */ 2338 gcc_assert (optype0 == REGOP || optype1 == REGOP); 2339 2340 /* Handle copies between general and floating registers. */ 2341 2342 if (optype0 == REGOP && optype1 == REGOP 2343 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])) 2344 { 2345 if (FP_REG_P (operands[0])) 2346 { 2347 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands); 2348 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands); 2349 return "{fldds|fldd} -16(%%sp),%0"; 2350 } 2351 else 2352 { 2353 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands); 2354 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands); 2355 return "{ldws|ldw} -12(%%sp),%R0"; 2356 } 2357 } 2358 2359 /* Handle auto decrementing and incrementing loads and stores 2360 specifically, since the structure of the function doesn't work 2361 for them without major modification. Do it better when we learn 2362 this port about the general inc/dec addressing of PA. 2363 (This was written by tege. Chide him if it doesn't work.) */ 2364 2365 if (optype0 == MEMOP) 2366 { 2367 /* We have to output the address syntax ourselves, since print_operand 2368 doesn't deal with the addresses we want to use. Fix this later. */ 2369 2370 rtx addr = XEXP (operands[0], 0); 2371 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2372 { 2373 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2374 2375 operands[0] = XEXP (addr, 0); 2376 gcc_assert (GET_CODE (operands[1]) == REG 2377 && GET_CODE (operands[0]) == REG); 2378 2379 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2380 2381 /* No overlap between high target register and address 2382 register. (We do this in a non-obvious way to 2383 save a register file writeback) */ 2384 if (GET_CODE (addr) == POST_INC) 2385 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)"; 2386 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)"; 2387 } 2388 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2389 { 2390 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2391 2392 operands[0] = XEXP (addr, 0); 2393 gcc_assert (GET_CODE (operands[1]) == REG 2394 && GET_CODE (operands[0]) == REG); 2395 2396 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2397 /* No overlap between high target register and address 2398 register. (We do this in a non-obvious way to save a 2399 register file writeback) */ 2400 if (GET_CODE (addr) == PRE_INC) 2401 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)"; 2402 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)"; 2403 } 2404 } 2405 if (optype1 == MEMOP) 2406 { 2407 /* We have to output the address syntax ourselves, since print_operand 2408 doesn't deal with the addresses we want to use. Fix this later. */ 2409 2410 rtx addr = XEXP (operands[1], 0); 2411 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2412 { 2413 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2414 2415 operands[1] = XEXP (addr, 0); 2416 gcc_assert (GET_CODE (operands[0]) == REG 2417 && GET_CODE (operands[1]) == REG); 2418 2419 if (!reg_overlap_mentioned_p (high_reg, addr)) 2420 { 2421 /* No overlap between high target register and address 2422 register. (We do this in a non-obvious way to 2423 save a register file writeback) */ 2424 if (GET_CODE (addr) == POST_INC) 2425 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0"; 2426 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0"; 2427 } 2428 else 2429 { 2430 /* This is an undefined situation. We should load into the 2431 address register *and* update that register. Probably 2432 we don't need to handle this at all. */ 2433 if (GET_CODE (addr) == POST_INC) 2434 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0"; 2435 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0"; 2436 } 2437 } 2438 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2439 { 2440 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2441 2442 operands[1] = XEXP (addr, 0); 2443 gcc_assert (GET_CODE (operands[0]) == REG 2444 && GET_CODE (operands[1]) == REG); 2445 2446 if (!reg_overlap_mentioned_p (high_reg, addr)) 2447 { 2448 /* No overlap between high target register and address 2449 register. (We do this in a non-obvious way to 2450 save a register file writeback) */ 2451 if (GET_CODE (addr) == PRE_INC) 2452 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0"; 2453 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0"; 2454 } 2455 else 2456 { 2457 /* This is an undefined situation. We should load into the 2458 address register *and* update that register. Probably 2459 we don't need to handle this at all. */ 2460 if (GET_CODE (addr) == PRE_INC) 2461 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0"; 2462 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0"; 2463 } 2464 } 2465 else if (GET_CODE (addr) == PLUS 2466 && GET_CODE (XEXP (addr, 0)) == MULT) 2467 { 2468 rtx xoperands[4]; 2469 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2470 2471 if (!reg_overlap_mentioned_p (high_reg, addr)) 2472 { 2473 xoperands[0] = high_reg; 2474 xoperands[1] = XEXP (addr, 1); 2475 xoperands[2] = XEXP (XEXP (addr, 0), 0); 2476 xoperands[3] = XEXP (XEXP (addr, 0), 1); 2477 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}", 2478 xoperands); 2479 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2480 } 2481 else 2482 { 2483 xoperands[0] = high_reg; 2484 xoperands[1] = XEXP (addr, 1); 2485 xoperands[2] = XEXP (XEXP (addr, 0), 0); 2486 xoperands[3] = XEXP (XEXP (addr, 0), 1); 2487 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}", 2488 xoperands); 2489 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0"; 2490 } 2491 } 2492 } 2493 2494 /* If an operand is an unoffsettable memory ref, find a register 2495 we can increment temporarily to make it refer to the second word. */ 2496 2497 if (optype0 == MEMOP) 2498 addreg0 = find_addr_reg (XEXP (operands[0], 0)); 2499 2500 if (optype1 == MEMOP) 2501 addreg1 = find_addr_reg (XEXP (operands[1], 0)); 2502 2503 /* Ok, we can do one word at a time. 2504 Normally we do the low-numbered word first. 2505 2506 In either case, set up in LATEHALF the operands to use 2507 for the high-numbered word and in some cases alter the 2508 operands in OPERANDS to be suitable for the low-numbered word. */ 2509 2510 if (optype0 == REGOP) 2511 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2512 else if (optype0 == OFFSOP) 2513 latehalf[0] = adjust_address (operands[0], SImode, 4); 2514 else 2515 latehalf[0] = operands[0]; 2516 2517 if (optype1 == REGOP) 2518 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); 2519 else if (optype1 == OFFSOP) 2520 latehalf[1] = adjust_address (operands[1], SImode, 4); 2521 else if (optype1 == CNSTOP) 2522 split_double (operands[1], &operands[1], &latehalf[1]); 2523 else 2524 latehalf[1] = operands[1]; 2525 2526 /* If the first move would clobber the source of the second one, 2527 do them in the other order. 2528 2529 This can happen in two cases: 2530 2531 mem -> register where the first half of the destination register 2532 is the same register used in the memory's address. Reload 2533 can create such insns. 2534 2535 mem in this case will be either register indirect or register 2536 indirect plus a valid offset. 2537 2538 register -> register move where REGNO(dst) == REGNO(src + 1) 2539 someone (Tim/Tege?) claimed this can happen for parameter loads. 2540 2541 Handle mem -> register case first. */ 2542 if (optype0 == REGOP 2543 && (optype1 == MEMOP || optype1 == OFFSOP) 2544 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, 2545 operands[1], 0)) 2546 { 2547 /* Do the late half first. */ 2548 if (addreg1) 2549 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2550 output_asm_insn (singlemove_string (latehalf), latehalf); 2551 2552 /* Then clobber. */ 2553 if (addreg1) 2554 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2555 return singlemove_string (operands); 2556 } 2557 2558 /* Now handle register -> register case. */ 2559 if (optype0 == REGOP && optype1 == REGOP 2560 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 2561 { 2562 output_asm_insn (singlemove_string (latehalf), latehalf); 2563 return singlemove_string (operands); 2564 } 2565 2566 /* Normal case: do the two words, low-numbered first. */ 2567 2568 output_asm_insn (singlemove_string (operands), operands); 2569 2570 /* Make any unoffsettable addresses point at high-numbered word. */ 2571 if (addreg0) 2572 output_asm_insn ("ldo 4(%0),%0", &addreg0); 2573 if (addreg1) 2574 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2575 2576 /* Do that word. */ 2577 output_asm_insn (singlemove_string (latehalf), latehalf); 2578 2579 /* Undo the adds we just did. */ 2580 if (addreg0) 2581 output_asm_insn ("ldo -4(%0),%0", &addreg0); 2582 if (addreg1) 2583 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2584 2585 return ""; 2586 } 2587 2588 const char * 2589 output_fp_move_double (rtx *operands) 2590 { 2591 if (FP_REG_P (operands[0])) 2592 { 2593 if (FP_REG_P (operands[1]) 2594 || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) 2595 output_asm_insn ("fcpy,dbl %f1,%0", operands); 2596 else 2597 output_asm_insn ("fldd%F1 %1,%0", operands); 2598 } 2599 else if (FP_REG_P (operands[1])) 2600 { 2601 output_asm_insn ("fstd%F0 %1,%0", operands); 2602 } 2603 else 2604 { 2605 rtx xoperands[2]; 2606 2607 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0]))); 2608 2609 /* This is a pain. You have to be prepared to deal with an 2610 arbitrary address here including pre/post increment/decrement. 2611 2612 so avoid this in the MD. */ 2613 gcc_assert (GET_CODE (operands[0]) == REG); 2614 2615 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2616 xoperands[0] = operands[0]; 2617 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); 2618 } 2619 return ""; 2620 } 2621 2622 /* Return a REG that occurs in ADDR with coefficient 1. 2623 ADDR can be effectively incremented by incrementing REG. */ 2624 2625 static rtx 2626 find_addr_reg (rtx addr) 2627 { 2628 while (GET_CODE (addr) == PLUS) 2629 { 2630 if (GET_CODE (XEXP (addr, 0)) == REG) 2631 addr = XEXP (addr, 0); 2632 else if (GET_CODE (XEXP (addr, 1)) == REG) 2633 addr = XEXP (addr, 1); 2634 else if (CONSTANT_P (XEXP (addr, 0))) 2635 addr = XEXP (addr, 1); 2636 else if (CONSTANT_P (XEXP (addr, 1))) 2637 addr = XEXP (addr, 0); 2638 else 2639 gcc_unreachable (); 2640 } 2641 gcc_assert (GET_CODE (addr) == REG); 2642 return addr; 2643 } 2644 2645 /* Emit code to perform a block move. 2646 2647 OPERANDS[0] is the destination pointer as a REG, clobbered. 2648 OPERANDS[1] is the source pointer as a REG, clobbered. 2649 OPERANDS[2] is a register for temporary storage. 2650 OPERANDS[3] is a register for temporary storage. 2651 OPERANDS[4] is the size as a CONST_INT 2652 OPERANDS[5] is the alignment safe to use, as a CONST_INT. 2653 OPERANDS[6] is another temporary register. */ 2654 2655 const char * 2656 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 2657 { 2658 HOST_WIDE_INT align = INTVAL (operands[5]); 2659 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]); 2660 2661 /* We can't move more than a word at a time because the PA 2662 has no longer integer move insns. (Could use fp mem ops?) */ 2663 if (align > (TARGET_64BIT ? 8 : 4)) 2664 align = (TARGET_64BIT ? 8 : 4); 2665 2666 /* Note that we know each loop below will execute at least twice 2667 (else we would have open-coded the copy). */ 2668 switch (align) 2669 { 2670 case 8: 2671 /* Pre-adjust the loop counter. */ 2672 operands[4] = GEN_INT (n_bytes - 16); 2673 output_asm_insn ("ldi %4,%2", operands); 2674 2675 /* Copying loop. */ 2676 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2677 output_asm_insn ("ldd,ma 8(%1),%6", operands); 2678 output_asm_insn ("std,ma %3,8(%0)", operands); 2679 output_asm_insn ("addib,>= -16,%2,.-12", operands); 2680 output_asm_insn ("std,ma %6,8(%0)", operands); 2681 2682 /* Handle the residual. There could be up to 7 bytes of 2683 residual to copy! */ 2684 if (n_bytes % 16 != 0) 2685 { 2686 operands[4] = GEN_INT (n_bytes % 8); 2687 if (n_bytes % 16 >= 8) 2688 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2689 if (n_bytes % 8 != 0) 2690 output_asm_insn ("ldd 0(%1),%6", operands); 2691 if (n_bytes % 16 >= 8) 2692 output_asm_insn ("std,ma %3,8(%0)", operands); 2693 if (n_bytes % 8 != 0) 2694 output_asm_insn ("stdby,e %6,%4(%0)", operands); 2695 } 2696 return ""; 2697 2698 case 4: 2699 /* Pre-adjust the loop counter. */ 2700 operands[4] = GEN_INT (n_bytes - 8); 2701 output_asm_insn ("ldi %4,%2", operands); 2702 2703 /* Copying loop. */ 2704 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2705 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands); 2706 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2707 output_asm_insn ("addib,>= -8,%2,.-12", operands); 2708 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands); 2709 2710 /* Handle the residual. There could be up to 7 bytes of 2711 residual to copy! */ 2712 if (n_bytes % 8 != 0) 2713 { 2714 operands[4] = GEN_INT (n_bytes % 4); 2715 if (n_bytes % 8 >= 4) 2716 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2717 if (n_bytes % 4 != 0) 2718 output_asm_insn ("ldw 0(%1),%6", operands); 2719 if (n_bytes % 8 >= 4) 2720 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2721 if (n_bytes % 4 != 0) 2722 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands); 2723 } 2724 return ""; 2725 2726 case 2: 2727 /* Pre-adjust the loop counter. */ 2728 operands[4] = GEN_INT (n_bytes - 4); 2729 output_asm_insn ("ldi %4,%2", operands); 2730 2731 /* Copying loop. */ 2732 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2733 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands); 2734 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2735 output_asm_insn ("addib,>= -4,%2,.-12", operands); 2736 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands); 2737 2738 /* Handle the residual. */ 2739 if (n_bytes % 4 != 0) 2740 { 2741 if (n_bytes % 4 >= 2) 2742 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2743 if (n_bytes % 2 != 0) 2744 output_asm_insn ("ldb 0(%1),%6", operands); 2745 if (n_bytes % 4 >= 2) 2746 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2747 if (n_bytes % 2 != 0) 2748 output_asm_insn ("stb %6,0(%0)", operands); 2749 } 2750 return ""; 2751 2752 case 1: 2753 /* Pre-adjust the loop counter. */ 2754 operands[4] = GEN_INT (n_bytes - 2); 2755 output_asm_insn ("ldi %4,%2", operands); 2756 2757 /* Copying loop. */ 2758 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands); 2759 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands); 2760 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands); 2761 output_asm_insn ("addib,>= -2,%2,.-12", operands); 2762 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands); 2763 2764 /* Handle the residual. */ 2765 if (n_bytes % 2 != 0) 2766 { 2767 output_asm_insn ("ldb 0(%1),%3", operands); 2768 output_asm_insn ("stb %3,0(%0)", operands); 2769 } 2770 return ""; 2771 2772 default: 2773 gcc_unreachable (); 2774 } 2775 } 2776 2777 /* Count the number of insns necessary to handle this block move. 2778 2779 Basic structure is the same as emit_block_move, except that we 2780 count insns rather than emit them. */ 2781 2782 static int 2783 compute_movmem_length (rtx insn) 2784 { 2785 rtx pat = PATTERN (insn); 2786 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); 2787 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); 2788 unsigned int n_insns = 0; 2789 2790 /* We can't move more than four bytes at a time because the PA 2791 has no longer integer move insns. (Could use fp mem ops?) */ 2792 if (align > (TARGET_64BIT ? 8 : 4)) 2793 align = (TARGET_64BIT ? 8 : 4); 2794 2795 /* The basic copying loop. */ 2796 n_insns = 6; 2797 2798 /* Residuals. */ 2799 if (n_bytes % (2 * align) != 0) 2800 { 2801 if ((n_bytes % (2 * align)) >= align) 2802 n_insns += 2; 2803 2804 if ((n_bytes % align) != 0) 2805 n_insns += 2; 2806 } 2807 2808 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 2809 return n_insns * 4; 2810 } 2811 2812 /* Emit code to perform a block clear. 2813 2814 OPERANDS[0] is the destination pointer as a REG, clobbered. 2815 OPERANDS[1] is a register for temporary storage. 2816 OPERANDS[2] is the size as a CONST_INT 2817 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ 2818 2819 const char * 2820 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 2821 { 2822 HOST_WIDE_INT align = INTVAL (operands[3]); 2823 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]); 2824 2825 /* We can't clear more than a word at a time because the PA 2826 has no longer integer move insns. */ 2827 if (align > (TARGET_64BIT ? 8 : 4)) 2828 align = (TARGET_64BIT ? 8 : 4); 2829 2830 /* Note that we know each loop below will execute at least twice 2831 (else we would have open-coded the copy). */ 2832 switch (align) 2833 { 2834 case 8: 2835 /* Pre-adjust the loop counter. */ 2836 operands[2] = GEN_INT (n_bytes - 16); 2837 output_asm_insn ("ldi %2,%1", operands); 2838 2839 /* Loop. */ 2840 output_asm_insn ("std,ma %%r0,8(%0)", operands); 2841 output_asm_insn ("addib,>= -16,%1,.-4", operands); 2842 output_asm_insn ("std,ma %%r0,8(%0)", operands); 2843 2844 /* Handle the residual. There could be up to 7 bytes of 2845 residual to copy! */ 2846 if (n_bytes % 16 != 0) 2847 { 2848 operands[2] = GEN_INT (n_bytes % 8); 2849 if (n_bytes % 16 >= 8) 2850 output_asm_insn ("std,ma %%r0,8(%0)", operands); 2851 if (n_bytes % 8 != 0) 2852 output_asm_insn ("stdby,e %%r0,%2(%0)", operands); 2853 } 2854 return ""; 2855 2856 case 4: 2857 /* Pre-adjust the loop counter. */ 2858 operands[2] = GEN_INT (n_bytes - 8); 2859 output_asm_insn ("ldi %2,%1", operands); 2860 2861 /* Loop. */ 2862 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 2863 output_asm_insn ("addib,>= -8,%1,.-4", operands); 2864 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 2865 2866 /* Handle the residual. There could be up to 7 bytes of 2867 residual to copy! */ 2868 if (n_bytes % 8 != 0) 2869 { 2870 operands[2] = GEN_INT (n_bytes % 4); 2871 if (n_bytes % 8 >= 4) 2872 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 2873 if (n_bytes % 4 != 0) 2874 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); 2875 } 2876 return ""; 2877 2878 case 2: 2879 /* Pre-adjust the loop counter. */ 2880 operands[2] = GEN_INT (n_bytes - 4); 2881 output_asm_insn ("ldi %2,%1", operands); 2882 2883 /* Loop. */ 2884 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 2885 output_asm_insn ("addib,>= -4,%1,.-4", operands); 2886 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 2887 2888 /* Handle the residual. */ 2889 if (n_bytes % 4 != 0) 2890 { 2891 if (n_bytes % 4 >= 2) 2892 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 2893 if (n_bytes % 2 != 0) 2894 output_asm_insn ("stb %%r0,0(%0)", operands); 2895 } 2896 return ""; 2897 2898 case 1: 2899 /* Pre-adjust the loop counter. */ 2900 operands[2] = GEN_INT (n_bytes - 2); 2901 output_asm_insn ("ldi %2,%1", operands); 2902 2903 /* Loop. */ 2904 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 2905 output_asm_insn ("addib,>= -2,%1,.-4", operands); 2906 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 2907 2908 /* Handle the residual. */ 2909 if (n_bytes % 2 != 0) 2910 output_asm_insn ("stb %%r0,0(%0)", operands); 2911 2912 return ""; 2913 2914 default: 2915 gcc_unreachable (); 2916 } 2917 } 2918 2919 /* Count the number of insns necessary to handle this block move. 2920 2921 Basic structure is the same as emit_block_move, except that we 2922 count insns rather than emit them. */ 2923 2924 static int 2925 compute_clrmem_length (rtx insn) 2926 { 2927 rtx pat = PATTERN (insn); 2928 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); 2929 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); 2930 unsigned int n_insns = 0; 2931 2932 /* We can't clear more than a word at a time because the PA 2933 has no longer integer move insns. */ 2934 if (align > (TARGET_64BIT ? 8 : 4)) 2935 align = (TARGET_64BIT ? 8 : 4); 2936 2937 /* The basic loop. */ 2938 n_insns = 4; 2939 2940 /* Residuals. */ 2941 if (n_bytes % (2 * align) != 0) 2942 { 2943 if ((n_bytes % (2 * align)) >= align) 2944 n_insns++; 2945 2946 if ((n_bytes % align) != 0) 2947 n_insns++; 2948 } 2949 2950 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 2951 return n_insns * 4; 2952 } 2953 2954 2955 const char * 2956 output_and (rtx *operands) 2957 { 2958 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 2959 { 2960 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 2961 int ls0, ls1, ms0, p, len; 2962 2963 for (ls0 = 0; ls0 < 32; ls0++) 2964 if ((mask & (1 << ls0)) == 0) 2965 break; 2966 2967 for (ls1 = ls0; ls1 < 32; ls1++) 2968 if ((mask & (1 << ls1)) != 0) 2969 break; 2970 2971 for (ms0 = ls1; ms0 < 32; ms0++) 2972 if ((mask & (1 << ms0)) == 0) 2973 break; 2974 2975 gcc_assert (ms0 == 32); 2976 2977 if (ls1 == 32) 2978 { 2979 len = ls0; 2980 2981 gcc_assert (len); 2982 2983 operands[2] = GEN_INT (len); 2984 return "{extru|extrw,u} %1,31,%2,%0"; 2985 } 2986 else 2987 { 2988 /* We could use this `depi' for the case above as well, but `depi' 2989 requires one more register file access than an `extru'. */ 2990 2991 p = 31 - ls0; 2992 len = ls1 - ls0; 2993 2994 operands[2] = GEN_INT (p); 2995 operands[3] = GEN_INT (len); 2996 return "{depi|depwi} 0,%2,%3,%0"; 2997 } 2998 } 2999 else 3000 return "and %1,%2,%0"; 3001 } 3002 3003 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3004 storing the result in operands[0]. */ 3005 const char * 3006 output_64bit_and (rtx *operands) 3007 { 3008 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3009 { 3010 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3011 int ls0, ls1, ms0, p, len; 3012 3013 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++) 3014 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0) 3015 break; 3016 3017 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++) 3018 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0) 3019 break; 3020 3021 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++) 3022 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0) 3023 break; 3024 3025 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT); 3026 3027 if (ls1 == HOST_BITS_PER_WIDE_INT) 3028 { 3029 len = ls0; 3030 3031 gcc_assert (len); 3032 3033 operands[2] = GEN_INT (len); 3034 return "extrd,u %1,63,%2,%0"; 3035 } 3036 else 3037 { 3038 /* We could use this `depi' for the case above as well, but `depi' 3039 requires one more register file access than an `extru'. */ 3040 3041 p = 63 - ls0; 3042 len = ls1 - ls0; 3043 3044 operands[2] = GEN_INT (p); 3045 operands[3] = GEN_INT (len); 3046 return "depdi 0,%2,%3,%0"; 3047 } 3048 } 3049 else 3050 return "and %1,%2,%0"; 3051 } 3052 3053 const char * 3054 output_ior (rtx *operands) 3055 { 3056 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3057 int bs0, bs1, p, len; 3058 3059 if (INTVAL (operands[2]) == 0) 3060 return "copy %1,%0"; 3061 3062 for (bs0 = 0; bs0 < 32; bs0++) 3063 if ((mask & (1 << bs0)) != 0) 3064 break; 3065 3066 for (bs1 = bs0; bs1 < 32; bs1++) 3067 if ((mask & (1 << bs1)) == 0) 3068 break; 3069 3070 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3071 3072 p = 31 - bs0; 3073 len = bs1 - bs0; 3074 3075 operands[2] = GEN_INT (p); 3076 operands[3] = GEN_INT (len); 3077 return "{depi|depwi} -1,%2,%3,%0"; 3078 } 3079 3080 /* Return a string to perform a bitwise-and of operands[1] with operands[2] 3081 storing the result in operands[0]. */ 3082 const char * 3083 output_64bit_ior (rtx *operands) 3084 { 3085 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3086 int bs0, bs1, p, len; 3087 3088 if (INTVAL (operands[2]) == 0) 3089 return "copy %1,%0"; 3090 3091 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++) 3092 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0) 3093 break; 3094 3095 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++) 3096 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0) 3097 break; 3098 3099 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT 3100 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3101 3102 p = 63 - bs0; 3103 len = bs1 - bs0; 3104 3105 operands[2] = GEN_INT (p); 3106 operands[3] = GEN_INT (len); 3107 return "depdi -1,%2,%3,%0"; 3108 } 3109 3110 /* Target hook for assembling integer objects. This code handles 3111 aligned SI and DI integers specially since function references 3112 must be preceded by P%. */ 3113 3114 static bool 3115 pa_assemble_integer (rtx x, unsigned int size, int aligned_p) 3116 { 3117 if (size == UNITS_PER_WORD 3118 && aligned_p 3119 && function_label_operand (x, VOIDmode)) 3120 { 3121 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file); 3122 output_addr_const (asm_out_file, x); 3123 fputc ('\n', asm_out_file); 3124 return true; 3125 } 3126 return default_assemble_integer (x, size, aligned_p); 3127 } 3128 3129 /* Output an ascii string. */ 3130 void 3131 output_ascii (FILE *file, const char *p, int size) 3132 { 3133 int i; 3134 int chars_output; 3135 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ 3136 3137 /* The HP assembler can only take strings of 256 characters at one 3138 time. This is a limitation on input line length, *not* the 3139 length of the string. Sigh. Even worse, it seems that the 3140 restriction is in number of input characters (see \xnn & 3141 \whatever). So we have to do this very carefully. */ 3142 3143 fputs ("\t.STRING \"", file); 3144 3145 chars_output = 0; 3146 for (i = 0; i < size; i += 4) 3147 { 3148 int co = 0; 3149 int io = 0; 3150 for (io = 0, co = 0; io < MIN (4, size - i); io++) 3151 { 3152 register unsigned int c = (unsigned char) p[i + io]; 3153 3154 if (c == '\"' || c == '\\') 3155 partial_output[co++] = '\\'; 3156 if (c >= ' ' && c < 0177) 3157 partial_output[co++] = c; 3158 else 3159 { 3160 unsigned int hexd; 3161 partial_output[co++] = '\\'; 3162 partial_output[co++] = 'x'; 3163 hexd = c / 16 - 0 + '0'; 3164 if (hexd > '9') 3165 hexd -= '9' - 'a' + 1; 3166 partial_output[co++] = hexd; 3167 hexd = c % 16 - 0 + '0'; 3168 if (hexd > '9') 3169 hexd -= '9' - 'a' + 1; 3170 partial_output[co++] = hexd; 3171 } 3172 } 3173 if (chars_output + co > 243) 3174 { 3175 fputs ("\"\n\t.STRING \"", file); 3176 chars_output = 0; 3177 } 3178 fwrite (partial_output, 1, (size_t) co, file); 3179 chars_output += co; 3180 co = 0; 3181 } 3182 fputs ("\"\n", file); 3183 } 3184 3185 /* Try to rewrite floating point comparisons & branches to avoid 3186 useless add,tr insns. 3187 3188 CHECK_NOTES is nonzero if we should examine REG_DEAD notes 3189 to see if FPCC is dead. CHECK_NOTES is nonzero for the 3190 first attempt to remove useless add,tr insns. It is zero 3191 for the second pass as reorg sometimes leaves bogus REG_DEAD 3192 notes lying around. 3193 3194 When CHECK_NOTES is zero we can only eliminate add,tr insns 3195 when there's a 1:1 correspondence between fcmp and ftest/fbranch 3196 instructions. */ 3197 static void 3198 remove_useless_addtr_insns (int check_notes) 3199 { 3200 rtx insn; 3201 static int pass = 0; 3202 3203 /* This is fairly cheap, so always run it when optimizing. */ 3204 if (optimize > 0) 3205 { 3206 int fcmp_count = 0; 3207 int fbranch_count = 0; 3208 3209 /* Walk all the insns in this function looking for fcmp & fbranch 3210 instructions. Keep track of how many of each we find. */ 3211 for (insn = get_insns (); insn; insn = next_insn (insn)) 3212 { 3213 rtx tmp; 3214 3215 /* Ignore anything that isn't an INSN or a JUMP_INSN. */ 3216 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN) 3217 continue; 3218 3219 tmp = PATTERN (insn); 3220 3221 /* It must be a set. */ 3222 if (GET_CODE (tmp) != SET) 3223 continue; 3224 3225 /* If the destination is CCFP, then we've found an fcmp insn. */ 3226 tmp = SET_DEST (tmp); 3227 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) 3228 { 3229 fcmp_count++; 3230 continue; 3231 } 3232 3233 tmp = PATTERN (insn); 3234 /* If this is an fbranch instruction, bump the fbranch counter. */ 3235 if (GET_CODE (tmp) == SET 3236 && SET_DEST (tmp) == pc_rtx 3237 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE 3238 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE 3239 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG 3240 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) 3241 { 3242 fbranch_count++; 3243 continue; 3244 } 3245 } 3246 3247 3248 /* Find all floating point compare + branch insns. If possible, 3249 reverse the comparison & the branch to avoid add,tr insns. */ 3250 for (insn = get_insns (); insn; insn = next_insn (insn)) 3251 { 3252 rtx tmp, next; 3253 3254 /* Ignore anything that isn't an INSN. */ 3255 if (GET_CODE (insn) != INSN) 3256 continue; 3257 3258 tmp = PATTERN (insn); 3259 3260 /* It must be a set. */ 3261 if (GET_CODE (tmp) != SET) 3262 continue; 3263 3264 /* The destination must be CCFP, which is register zero. */ 3265 tmp = SET_DEST (tmp); 3266 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) 3267 continue; 3268 3269 /* INSN should be a set of CCFP. 3270 3271 See if the result of this insn is used in a reversed FP 3272 conditional branch. If so, reverse our condition and 3273 the branch. Doing so avoids useless add,tr insns. */ 3274 next = next_insn (insn); 3275 while (next) 3276 { 3277 /* Jumps, calls and labels stop our search. */ 3278 if (GET_CODE (next) == JUMP_INSN 3279 || GET_CODE (next) == CALL_INSN 3280 || GET_CODE (next) == CODE_LABEL) 3281 break; 3282 3283 /* As does another fcmp insn. */ 3284 if (GET_CODE (next) == INSN 3285 && GET_CODE (PATTERN (next)) == SET 3286 && GET_CODE (SET_DEST (PATTERN (next))) == REG 3287 && REGNO (SET_DEST (PATTERN (next))) == 0) 3288 break; 3289 3290 next = next_insn (next); 3291 } 3292 3293 /* Is NEXT_INSN a branch? */ 3294 if (next 3295 && GET_CODE (next) == JUMP_INSN) 3296 { 3297 rtx pattern = PATTERN (next); 3298 3299 /* If it a reversed fp conditional branch (e.g. uses add,tr) 3300 and CCFP dies, then reverse our conditional and the branch 3301 to avoid the add,tr. */ 3302 if (GET_CODE (pattern) == SET 3303 && SET_DEST (pattern) == pc_rtx 3304 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE 3305 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE 3306 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG 3307 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 3308 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC 3309 && (fcmp_count == fbranch_count 3310 || (check_notes 3311 && find_regno_note (next, REG_DEAD, 0)))) 3312 { 3313 /* Reverse the branch. */ 3314 tmp = XEXP (SET_SRC (pattern), 1); 3315 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); 3316 XEXP (SET_SRC (pattern), 2) = tmp; 3317 INSN_CODE (next) = -1; 3318 3319 /* Reverse our condition. */ 3320 tmp = PATTERN (insn); 3321 PUT_CODE (XEXP (tmp, 1), 3322 (reverse_condition_maybe_unordered 3323 (GET_CODE (XEXP (tmp, 1))))); 3324 } 3325 } 3326 } 3327 } 3328 3329 pass = !pass; 3330 3331 } 3332 3333 /* You may have trouble believing this, but this is the 32 bit HP-PA 3334 stack layout. Wow. 3335 3336 Offset Contents 3337 3338 Variable arguments (optional; any number may be allocated) 3339 3340 SP-(4*(N+9)) arg word N 3341 : : 3342 SP-56 arg word 5 3343 SP-52 arg word 4 3344 3345 Fixed arguments (must be allocated; may remain unused) 3346 3347 SP-48 arg word 3 3348 SP-44 arg word 2 3349 SP-40 arg word 1 3350 SP-36 arg word 0 3351 3352 Frame Marker 3353 3354 SP-32 External Data Pointer (DP) 3355 SP-28 External sr4 3356 SP-24 External/stub RP (RP') 3357 SP-20 Current RP 3358 SP-16 Static Link 3359 SP-12 Clean up 3360 SP-8 Calling Stub RP (RP'') 3361 SP-4 Previous SP 3362 3363 Top of Frame 3364 3365 SP-0 Stack Pointer (points to next available address) 3366 3367 */ 3368 3369 /* This function saves registers as follows. Registers marked with ' are 3370 this function's registers (as opposed to the previous function's). 3371 If a frame_pointer isn't needed, r4 is saved as a general register; 3372 the space for the frame pointer is still allocated, though, to keep 3373 things simple. 3374 3375 3376 Top of Frame 3377 3378 SP (FP') Previous FP 3379 SP + 4 Alignment filler (sigh) 3380 SP + 8 Space for locals reserved here. 3381 . 3382 . 3383 . 3384 SP + n All call saved register used. 3385 . 3386 . 3387 . 3388 SP + o All call saved fp registers used. 3389 . 3390 . 3391 . 3392 SP + p (SP') points to next available address. 3393 3394 */ 3395 3396 /* Global variables set by output_function_prologue(). */ 3397 /* Size of frame. Need to know this to emit return insns from 3398 leaf procedures. */ 3399 static HOST_WIDE_INT actual_fsize, local_fsize; 3400 static int save_fregs; 3401 3402 /* Emit RTL to store REG at the memory location specified by BASE+DISP. 3403 Handle case where DISP > 8k by using the add_high_const patterns. 3404 3405 Note in DISP > 8k case, we will leave the high part of the address 3406 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ 3407 3408 static void 3409 store_reg (int reg, HOST_WIDE_INT disp, int base) 3410 { 3411 rtx insn, dest, src, basereg; 3412 3413 src = gen_rtx_REG (word_mode, reg); 3414 basereg = gen_rtx_REG (Pmode, base); 3415 if (VAL_14_BITS_P (disp)) 3416 { 3417 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp)); 3418 insn = emit_move_insn (dest, src); 3419 } 3420 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3421 { 3422 rtx delta = GEN_INT (disp); 3423 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3424 3425 emit_move_insn (tmpreg, delta); 3426 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3427 if (DO_FRAME_NOTES) 3428 { 3429 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3430 gen_rtx_SET (VOIDmode, tmpreg, 3431 gen_rtx_PLUS (Pmode, basereg, delta))); 3432 RTX_FRAME_RELATED_P (insn) = 1; 3433 } 3434 dest = gen_rtx_MEM (word_mode, tmpreg); 3435 insn = emit_move_insn (dest, src); 3436 } 3437 else 3438 { 3439 rtx delta = GEN_INT (disp); 3440 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 3441 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3442 3443 emit_move_insn (tmpreg, high); 3444 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3445 insn = emit_move_insn (dest, src); 3446 if (DO_FRAME_NOTES) 3447 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3448 gen_rtx_SET (VOIDmode, 3449 gen_rtx_MEM (word_mode, 3450 gen_rtx_PLUS (word_mode, 3451 basereg, 3452 delta)), 3453 src)); 3454 } 3455 3456 if (DO_FRAME_NOTES) 3457 RTX_FRAME_RELATED_P (insn) = 1; 3458 } 3459 3460 /* Emit RTL to store REG at the memory location specified by BASE and then 3461 add MOD to BASE. MOD must be <= 8k. */ 3462 3463 static void 3464 store_reg_modify (int base, int reg, HOST_WIDE_INT mod) 3465 { 3466 rtx insn, basereg, srcreg, delta; 3467 3468 gcc_assert (VAL_14_BITS_P (mod)); 3469 3470 basereg = gen_rtx_REG (Pmode, base); 3471 srcreg = gen_rtx_REG (word_mode, reg); 3472 delta = GEN_INT (mod); 3473 3474 insn = emit_insn (gen_post_store (basereg, srcreg, delta)); 3475 if (DO_FRAME_NOTES) 3476 { 3477 RTX_FRAME_RELATED_P (insn) = 1; 3478 3479 /* RTX_FRAME_RELATED_P must be set on each frame related set 3480 in a parallel with more than one element. */ 3481 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1; 3482 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 3483 } 3484 } 3485 3486 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case 3487 where DISP > 8k by using the add_high_const patterns. NOTE indicates 3488 whether to add a frame note or not. 3489 3490 In the DISP > 8k case, we leave the high part of the address in %r1. 3491 There is code in expand_hppa_{prologue,epilogue} that knows about this. */ 3492 3493 static void 3494 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note) 3495 { 3496 rtx insn; 3497 3498 if (VAL_14_BITS_P (disp)) 3499 { 3500 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3501 plus_constant (gen_rtx_REG (Pmode, base), disp)); 3502 } 3503 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3504 { 3505 rtx basereg = gen_rtx_REG (Pmode, base); 3506 rtx delta = GEN_INT (disp); 3507 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3508 3509 emit_move_insn (tmpreg, delta); 3510 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3511 gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3512 if (DO_FRAME_NOTES) 3513 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3514 gen_rtx_SET (VOIDmode, tmpreg, 3515 gen_rtx_PLUS (Pmode, basereg, delta))); 3516 } 3517 else 3518 { 3519 rtx basereg = gen_rtx_REG (Pmode, base); 3520 rtx delta = GEN_INT (disp); 3521 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3522 3523 emit_move_insn (tmpreg, 3524 gen_rtx_PLUS (Pmode, basereg, 3525 gen_rtx_HIGH (Pmode, delta))); 3526 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3527 gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3528 } 3529 3530 if (DO_FRAME_NOTES && note) 3531 RTX_FRAME_RELATED_P (insn) = 1; 3532 } 3533 3534 HOST_WIDE_INT 3535 compute_frame_size (HOST_WIDE_INT size, int *fregs_live) 3536 { 3537 int freg_saved = 0; 3538 int i, j; 3539 3540 /* The code in hppa_expand_prologue and hppa_expand_epilogue must 3541 be consistent with the rounding and size calculation done here. 3542 Change them at the same time. */ 3543 3544 /* We do our own stack alignment. First, round the size of the 3545 stack locals up to a word boundary. */ 3546 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3547 3548 /* Space for previous frame pointer + filler. If any frame is 3549 allocated, we need to add in the STARTING_FRAME_OFFSET. We 3550 waste some space here for the sake of HP compatibility. The 3551 first slot is only used when the frame pointer is needed. */ 3552 if (size || frame_pointer_needed) 3553 size += STARTING_FRAME_OFFSET; 3554 3555 /* If the current function calls __builtin_eh_return, then we need 3556 to allocate stack space for registers that will hold data for 3557 the exception handler. */ 3558 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3559 { 3560 unsigned int i; 3561 3562 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) 3563 continue; 3564 size += i * UNITS_PER_WORD; 3565 } 3566 3567 /* Account for space used by the callee general register saves. */ 3568 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--) 3569 if (df_regs_ever_live_p (i)) 3570 size += UNITS_PER_WORD; 3571 3572 /* Account for space used by the callee floating point register saves. */ 3573 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 3574 if (df_regs_ever_live_p (i) 3575 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1))) 3576 { 3577 freg_saved = 1; 3578 3579 /* We always save both halves of the FP register, so always 3580 increment the frame size by 8 bytes. */ 3581 size += 8; 3582 } 3583 3584 /* If any of the floating registers are saved, account for the 3585 alignment needed for the floating point register save block. */ 3586 if (freg_saved) 3587 { 3588 size = (size + 7) & ~7; 3589 if (fregs_live) 3590 *fregs_live = 1; 3591 } 3592 3593 /* The various ABIs include space for the outgoing parameters in the 3594 size of the current function's stack frame. We don't need to align 3595 for the outgoing arguments as their alignment is set by the final 3596 rounding for the frame as a whole. */ 3597 size += crtl->outgoing_args_size; 3598 3599 /* Allocate space for the fixed frame marker. This space must be 3600 allocated for any function that makes calls or allocates 3601 stack space. */ 3602 if (!current_function_is_leaf || size) 3603 size += TARGET_64BIT ? 48 : 32; 3604 3605 /* Finally, round to the preferred stack boundary. */ 3606 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1) 3607 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); 3608 } 3609 3610 /* Generate the assembly code for function entry. FILE is a stdio 3611 stream to output the code to. SIZE is an int: how many units of 3612 temporary storage to allocate. 3613 3614 Refer to the array `regs_ever_live' to determine which registers to 3615 save; `regs_ever_live[I]' is nonzero if register number I is ever 3616 used in the function. This function is responsible for knowing 3617 which registers should not be saved even if used. */ 3618 3619 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block 3620 of memory. If any fpu reg is used in the function, we allocate 3621 such a block here, at the bottom of the frame, just in case it's needed. 3622 3623 If this function is a leaf procedure, then we may choose not 3624 to do a "save" insn. The decision about whether or not 3625 to do this is made in regclass.c. */ 3626 3627 static void 3628 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 3629 { 3630 /* The function's label and associated .PROC must never be 3631 separated and must be output *after* any profiling declarations 3632 to avoid changing spaces/subspaces within a procedure. */ 3633 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); 3634 fputs ("\t.PROC\n", file); 3635 3636 /* hppa_expand_prologue does the dirty work now. We just need 3637 to output the assembler directives which denote the start 3638 of a function. */ 3639 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize); 3640 if (current_function_is_leaf) 3641 fputs (",NO_CALLS", file); 3642 else 3643 fputs (",CALLS", file); 3644 if (rp_saved) 3645 fputs (",SAVE_RP", file); 3646 3647 /* The SAVE_SP flag is used to indicate that register %r3 is stored 3648 at the beginning of the frame and that it is used as the frame 3649 pointer for the frame. We do this because our current frame 3650 layout doesn't conform to that specified in the HP runtime 3651 documentation and we need a way to indicate to programs such as 3652 GDB where %r3 is saved. The SAVE_SP flag was chosen because it 3653 isn't used by HP compilers but is supported by the assembler. 3654 However, SAVE_SP is supposed to indicate that the previous stack 3655 pointer has been saved in the frame marker. */ 3656 if (frame_pointer_needed) 3657 fputs (",SAVE_SP", file); 3658 3659 /* Pass on information about the number of callee register saves 3660 performed in the prologue. 3661 3662 The compiler is supposed to pass the highest register number 3663 saved, the assembler then has to adjust that number before 3664 entering it into the unwind descriptor (to account for any 3665 caller saved registers with lower register numbers than the 3666 first callee saved register). */ 3667 if (gr_saved) 3668 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); 3669 3670 if (fr_saved) 3671 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); 3672 3673 fputs ("\n\t.ENTRY\n", file); 3674 3675 remove_useless_addtr_insns (0); 3676 } 3677 3678 void 3679 hppa_expand_prologue (void) 3680 { 3681 int merge_sp_adjust_with_store = 0; 3682 HOST_WIDE_INT size = get_frame_size (); 3683 HOST_WIDE_INT offset; 3684 int i; 3685 rtx insn, tmpreg; 3686 3687 gr_saved = 0; 3688 fr_saved = 0; 3689 save_fregs = 0; 3690 3691 /* Compute total size for frame pointer, filler, locals and rounding to 3692 the next word boundary. Similar code appears in compute_frame_size 3693 and must be changed in tandem with this code. */ 3694 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3695 if (local_fsize || frame_pointer_needed) 3696 local_fsize += STARTING_FRAME_OFFSET; 3697 3698 actual_fsize = compute_frame_size (size, &save_fregs); 3699 3700 /* Compute a few things we will use often. */ 3701 tmpreg = gen_rtx_REG (word_mode, 1); 3702 3703 /* Save RP first. The calling conventions manual states RP will 3704 always be stored into the caller's frame at sp - 20 or sp - 16 3705 depending on which ABI is in use. */ 3706 if (df_regs_ever_live_p (2) || crtl->calls_eh_return) 3707 { 3708 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM); 3709 rp_saved = true; 3710 } 3711 else 3712 rp_saved = false; 3713 3714 /* Allocate the local frame and set up the frame pointer if needed. */ 3715 if (actual_fsize != 0) 3716 { 3717 if (frame_pointer_needed) 3718 { 3719 /* Copy the old frame pointer temporarily into %r1. Set up the 3720 new stack pointer, then store away the saved old frame pointer 3721 into the stack at sp and at the same time update the stack 3722 pointer by actual_fsize bytes. Two versions, first 3723 handles small (<8k) frames. The second handles large (>=8k) 3724 frames. */ 3725 insn = emit_move_insn (tmpreg, frame_pointer_rtx); 3726 if (DO_FRAME_NOTES) 3727 RTX_FRAME_RELATED_P (insn) = 1; 3728 3729 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); 3730 if (DO_FRAME_NOTES) 3731 RTX_FRAME_RELATED_P (insn) = 1; 3732 3733 if (VAL_14_BITS_P (actual_fsize)) 3734 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize); 3735 else 3736 { 3737 /* It is incorrect to store the saved frame pointer at *sp, 3738 then increment sp (writes beyond the current stack boundary). 3739 3740 So instead use stwm to store at *sp and post-increment the 3741 stack pointer as an atomic operation. Then increment sp to 3742 finish allocating the new frame. */ 3743 HOST_WIDE_INT adjust1 = 8192 - 64; 3744 HOST_WIDE_INT adjust2 = actual_fsize - adjust1; 3745 3746 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1); 3747 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3748 adjust2, 1); 3749 } 3750 3751 /* We set SAVE_SP in frames that need a frame pointer. Thus, 3752 we need to store the previous stack pointer (frame pointer) 3753 into the frame marker on targets that use the HP unwind 3754 library. This allows the HP unwind library to be used to 3755 unwind GCC frames. However, we are not fully compatible 3756 with the HP library because our frame layout differs from 3757 that specified in the HP runtime specification. 3758 3759 We don't want a frame note on this instruction as the frame 3760 marker moves during dynamic stack allocation. 3761 3762 This instruction also serves as a blockage to prevent 3763 register spills from being scheduled before the stack 3764 pointer is raised. This is necessary as we store 3765 registers using the frame pointer as a base register, 3766 and the frame pointer is set before sp is raised. */ 3767 if (TARGET_HPUX_UNWIND_LIBRARY) 3768 { 3769 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, 3770 GEN_INT (TARGET_64BIT ? -8 : -4)); 3771 3772 emit_move_insn (gen_rtx_MEM (word_mode, addr), 3773 frame_pointer_rtx); 3774 } 3775 else 3776 emit_insn (gen_blockage ()); 3777 } 3778 /* no frame pointer needed. */ 3779 else 3780 { 3781 /* In some cases we can perform the first callee register save 3782 and allocating the stack frame at the same time. If so, just 3783 make a note of it and defer allocating the frame until saving 3784 the callee registers. */ 3785 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0) 3786 merge_sp_adjust_with_store = 1; 3787 /* Can not optimize. Adjust the stack frame by actual_fsize 3788 bytes. */ 3789 else 3790 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3791 actual_fsize, 1); 3792 } 3793 } 3794 3795 /* Normal register save. 3796 3797 Do not save the frame pointer in the frame_pointer_needed case. It 3798 was done earlier. */ 3799 if (frame_pointer_needed) 3800 { 3801 offset = local_fsize; 3802 3803 /* Saving the EH return data registers in the frame is the simplest 3804 way to get the frame unwind information emitted. We put them 3805 just before the general registers. */ 3806 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3807 { 3808 unsigned int i, regno; 3809 3810 for (i = 0; ; ++i) 3811 { 3812 regno = EH_RETURN_DATA_REGNO (i); 3813 if (regno == INVALID_REGNUM) 3814 break; 3815 3816 store_reg (regno, offset, FRAME_POINTER_REGNUM); 3817 offset += UNITS_PER_WORD; 3818 } 3819 } 3820 3821 for (i = 18; i >= 4; i--) 3822 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 3823 { 3824 store_reg (i, offset, FRAME_POINTER_REGNUM); 3825 offset += UNITS_PER_WORD; 3826 gr_saved++; 3827 } 3828 /* Account for %r3 which is saved in a special place. */ 3829 gr_saved++; 3830 } 3831 /* No frame pointer needed. */ 3832 else 3833 { 3834 offset = local_fsize - actual_fsize; 3835 3836 /* Saving the EH return data registers in the frame is the simplest 3837 way to get the frame unwind information emitted. */ 3838 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3839 { 3840 unsigned int i, regno; 3841 3842 for (i = 0; ; ++i) 3843 { 3844 regno = EH_RETURN_DATA_REGNO (i); 3845 if (regno == INVALID_REGNUM) 3846 break; 3847 3848 /* If merge_sp_adjust_with_store is nonzero, then we can 3849 optimize the first save. */ 3850 if (merge_sp_adjust_with_store) 3851 { 3852 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset); 3853 merge_sp_adjust_with_store = 0; 3854 } 3855 else 3856 store_reg (regno, offset, STACK_POINTER_REGNUM); 3857 offset += UNITS_PER_WORD; 3858 } 3859 } 3860 3861 for (i = 18; i >= 3; i--) 3862 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 3863 { 3864 /* If merge_sp_adjust_with_store is nonzero, then we can 3865 optimize the first GR save. */ 3866 if (merge_sp_adjust_with_store) 3867 { 3868 store_reg_modify (STACK_POINTER_REGNUM, i, -offset); 3869 merge_sp_adjust_with_store = 0; 3870 } 3871 else 3872 store_reg (i, offset, STACK_POINTER_REGNUM); 3873 offset += UNITS_PER_WORD; 3874 gr_saved++; 3875 } 3876 3877 /* If we wanted to merge the SP adjustment with a GR save, but we never 3878 did any GR saves, then just emit the adjustment here. */ 3879 if (merge_sp_adjust_with_store) 3880 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3881 actual_fsize, 1); 3882 } 3883 3884 /* The hppa calling conventions say that %r19, the pic offset 3885 register, is saved at sp - 32 (in this function's frame) 3886 when generating PIC code. FIXME: What is the correct thing 3887 to do for functions which make no calls and allocate no 3888 frame? Do we need to allocate a frame, or can we just omit 3889 the save? For now we'll just omit the save. 3890 3891 We don't want a note on this insn as the frame marker can 3892 move if there is a dynamic stack allocation. */ 3893 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT) 3894 { 3895 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); 3896 3897 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); 3898 3899 } 3900 3901 /* Align pointer properly (doubleword boundary). */ 3902 offset = (offset + 7) & ~7; 3903 3904 /* Floating point register store. */ 3905 if (save_fregs) 3906 { 3907 rtx base; 3908 3909 /* First get the frame or stack pointer to the start of the FP register 3910 save area. */ 3911 if (frame_pointer_needed) 3912 { 3913 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0); 3914 base = frame_pointer_rtx; 3915 } 3916 else 3917 { 3918 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 3919 base = stack_pointer_rtx; 3920 } 3921 3922 /* Now actually save the FP registers. */ 3923 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 3924 { 3925 if (df_regs_ever_live_p (i) 3926 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 3927 { 3928 rtx addr, insn, reg; 3929 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg)); 3930 reg = gen_rtx_REG (DFmode, i); 3931 insn = emit_move_insn (addr, reg); 3932 if (DO_FRAME_NOTES) 3933 { 3934 RTX_FRAME_RELATED_P (insn) = 1; 3935 if (TARGET_64BIT) 3936 { 3937 rtx mem = gen_rtx_MEM (DFmode, 3938 plus_constant (base, offset)); 3939 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3940 gen_rtx_SET (VOIDmode, mem, reg)); 3941 } 3942 else 3943 { 3944 rtx meml = gen_rtx_MEM (SFmode, 3945 plus_constant (base, offset)); 3946 rtx memr = gen_rtx_MEM (SFmode, 3947 plus_constant (base, offset + 4)); 3948 rtx regl = gen_rtx_REG (SFmode, i); 3949 rtx regr = gen_rtx_REG (SFmode, i + 1); 3950 rtx setl = gen_rtx_SET (VOIDmode, meml, regl); 3951 rtx setr = gen_rtx_SET (VOIDmode, memr, regr); 3952 rtvec vec; 3953 3954 RTX_FRAME_RELATED_P (setl) = 1; 3955 RTX_FRAME_RELATED_P (setr) = 1; 3956 vec = gen_rtvec (2, setl, setr); 3957 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3958 gen_rtx_SEQUENCE (VOIDmode, vec)); 3959 } 3960 } 3961 offset += GET_MODE_SIZE (DFmode); 3962 fr_saved++; 3963 } 3964 } 3965 } 3966 } 3967 3968 /* Emit RTL to load REG from the memory location specified by BASE+DISP. 3969 Handle case where DISP > 8k by using the add_high_const patterns. */ 3970 3971 static void 3972 load_reg (int reg, HOST_WIDE_INT disp, int base) 3973 { 3974 rtx dest = gen_rtx_REG (word_mode, reg); 3975 rtx basereg = gen_rtx_REG (Pmode, base); 3976 rtx src; 3977 3978 if (VAL_14_BITS_P (disp)) 3979 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp)); 3980 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3981 { 3982 rtx delta = GEN_INT (disp); 3983 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3984 3985 emit_move_insn (tmpreg, delta); 3986 if (TARGET_DISABLE_INDEXING) 3987 { 3988 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3989 src = gen_rtx_MEM (word_mode, tmpreg); 3990 } 3991 else 3992 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3993 } 3994 else 3995 { 3996 rtx delta = GEN_INT (disp); 3997 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 3998 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3999 4000 emit_move_insn (tmpreg, high); 4001 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 4002 } 4003 4004 emit_move_insn (dest, src); 4005 } 4006 4007 /* Update the total code bytes output to the text section. */ 4008 4009 static void 4010 update_total_code_bytes (unsigned int nbytes) 4011 { 4012 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM) 4013 && !IN_NAMED_SECTION_P (cfun->decl)) 4014 { 4015 unsigned int old_total = total_code_bytes; 4016 4017 total_code_bytes += nbytes; 4018 4019 /* Be prepared to handle overflows. */ 4020 if (old_total > total_code_bytes) 4021 total_code_bytes = UINT_MAX; 4022 } 4023 } 4024 4025 /* This function generates the assembly code for function exit. 4026 Args are as for output_function_prologue (). 4027 4028 The function epilogue should not depend on the current stack 4029 pointer! It should use the frame pointer only. This is mandatory 4030 because of alloca; we also take advantage of it to omit stack 4031 adjustments before returning. */ 4032 4033 static void 4034 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 4035 { 4036 rtx insn = get_last_insn (); 4037 4038 last_address = 0; 4039 4040 /* hppa_expand_epilogue does the dirty work now. We just need 4041 to output the assembler directives which denote the end 4042 of a function. 4043 4044 To make debuggers happy, emit a nop if the epilogue was completely 4045 eliminated due to a volatile call as the last insn in the 4046 current function. That way the return address (in %r2) will 4047 always point to a valid instruction in the current function. */ 4048 4049 /* Get the last real insn. */ 4050 if (GET_CODE (insn) == NOTE) 4051 insn = prev_real_insn (insn); 4052 4053 /* If it is a sequence, then look inside. */ 4054 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) 4055 insn = XVECEXP (PATTERN (insn), 0, 0); 4056 4057 /* If insn is a CALL_INSN, then it must be a call to a volatile 4058 function (otherwise there would be epilogue insns). */ 4059 if (insn && GET_CODE (insn) == CALL_INSN) 4060 { 4061 fputs ("\tnop\n", file); 4062 last_address += 4; 4063 } 4064 4065 fputs ("\t.EXIT\n\t.PROCEND\n", file); 4066 4067 if (TARGET_SOM && TARGET_GAS) 4068 { 4069 /* We done with this subspace except possibly for some additional 4070 debug information. Forget that we are in this subspace to ensure 4071 that the next function is output in its own subspace. */ 4072 in_section = NULL; 4073 cfun->machine->in_nsubspa = 2; 4074 } 4075 4076 if (INSN_ADDRESSES_SET_P ()) 4077 { 4078 insn = get_last_nonnote_insn (); 4079 last_address += INSN_ADDRESSES (INSN_UID (insn)); 4080 if (INSN_P (insn)) 4081 last_address += insn_default_length (insn); 4082 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 4083 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 4084 } 4085 else 4086 last_address = UINT_MAX; 4087 4088 /* Finally, update the total number of code bytes output so far. */ 4089 update_total_code_bytes (last_address); 4090 } 4091 4092 void 4093 hppa_expand_epilogue (void) 4094 { 4095 rtx tmpreg; 4096 HOST_WIDE_INT offset; 4097 HOST_WIDE_INT ret_off = 0; 4098 int i; 4099 int merge_sp_adjust_with_load = 0; 4100 4101 /* We will use this often. */ 4102 tmpreg = gen_rtx_REG (word_mode, 1); 4103 4104 /* Try to restore RP early to avoid load/use interlocks when 4105 RP gets used in the return (bv) instruction. This appears to still 4106 be necessary even when we schedule the prologue and epilogue. */ 4107 if (rp_saved) 4108 { 4109 ret_off = TARGET_64BIT ? -16 : -20; 4110 if (frame_pointer_needed) 4111 { 4112 load_reg (2, ret_off, FRAME_POINTER_REGNUM); 4113 ret_off = 0; 4114 } 4115 else 4116 { 4117 /* No frame pointer, and stack is smaller than 8k. */ 4118 if (VAL_14_BITS_P (ret_off - actual_fsize)) 4119 { 4120 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM); 4121 ret_off = 0; 4122 } 4123 } 4124 } 4125 4126 /* General register restores. */ 4127 if (frame_pointer_needed) 4128 { 4129 offset = local_fsize; 4130 4131 /* If the current function calls __builtin_eh_return, then we need 4132 to restore the saved EH data registers. */ 4133 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4134 { 4135 unsigned int i, regno; 4136 4137 for (i = 0; ; ++i) 4138 { 4139 regno = EH_RETURN_DATA_REGNO (i); 4140 if (regno == INVALID_REGNUM) 4141 break; 4142 4143 load_reg (regno, offset, FRAME_POINTER_REGNUM); 4144 offset += UNITS_PER_WORD; 4145 } 4146 } 4147 4148 for (i = 18; i >= 4; i--) 4149 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4150 { 4151 load_reg (i, offset, FRAME_POINTER_REGNUM); 4152 offset += UNITS_PER_WORD; 4153 } 4154 } 4155 else 4156 { 4157 offset = local_fsize - actual_fsize; 4158 4159 /* If the current function calls __builtin_eh_return, then we need 4160 to restore the saved EH data registers. */ 4161 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4162 { 4163 unsigned int i, regno; 4164 4165 for (i = 0; ; ++i) 4166 { 4167 regno = EH_RETURN_DATA_REGNO (i); 4168 if (regno == INVALID_REGNUM) 4169 break; 4170 4171 /* Only for the first load. 4172 merge_sp_adjust_with_load holds the register load 4173 with which we will merge the sp adjustment. */ 4174 if (merge_sp_adjust_with_load == 0 4175 && local_fsize == 0 4176 && VAL_14_BITS_P (-actual_fsize)) 4177 merge_sp_adjust_with_load = regno; 4178 else 4179 load_reg (regno, offset, STACK_POINTER_REGNUM); 4180 offset += UNITS_PER_WORD; 4181 } 4182 } 4183 4184 for (i = 18; i >= 3; i--) 4185 { 4186 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4187 { 4188 /* Only for the first load. 4189 merge_sp_adjust_with_load holds the register load 4190 with which we will merge the sp adjustment. */ 4191 if (merge_sp_adjust_with_load == 0 4192 && local_fsize == 0 4193 && VAL_14_BITS_P (-actual_fsize)) 4194 merge_sp_adjust_with_load = i; 4195 else 4196 load_reg (i, offset, STACK_POINTER_REGNUM); 4197 offset += UNITS_PER_WORD; 4198 } 4199 } 4200 } 4201 4202 /* Align pointer properly (doubleword boundary). */ 4203 offset = (offset + 7) & ~7; 4204 4205 /* FP register restores. */ 4206 if (save_fregs) 4207 { 4208 /* Adjust the register to index off of. */ 4209 if (frame_pointer_needed) 4210 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0); 4211 else 4212 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4213 4214 /* Actually do the restores now. */ 4215 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4216 if (df_regs_ever_live_p (i) 4217 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4218 { 4219 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg)); 4220 rtx dest = gen_rtx_REG (DFmode, i); 4221 emit_move_insn (dest, src); 4222 } 4223 } 4224 4225 /* Emit a blockage insn here to keep these insns from being moved to 4226 an earlier spot in the epilogue, or into the main instruction stream. 4227 4228 This is necessary as we must not cut the stack back before all the 4229 restores are finished. */ 4230 emit_insn (gen_blockage ()); 4231 4232 /* Reset stack pointer (and possibly frame pointer). The stack 4233 pointer is initially set to fp + 64 to avoid a race condition. */ 4234 if (frame_pointer_needed) 4235 { 4236 rtx delta = GEN_INT (-64); 4237 4238 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0); 4239 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta)); 4240 } 4241 /* If we were deferring a callee register restore, do it now. */ 4242 else if (merge_sp_adjust_with_load) 4243 { 4244 rtx delta = GEN_INT (-actual_fsize); 4245 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load); 4246 4247 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)); 4248 } 4249 else if (actual_fsize != 0) 4250 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4251 - actual_fsize, 0); 4252 4253 /* If we haven't restored %r2 yet (no frame pointer, and a stack 4254 frame greater than 8k), do so now. */ 4255 if (ret_off != 0) 4256 load_reg (2, ret_off, STACK_POINTER_REGNUM); 4257 4258 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4259 { 4260 rtx sa = EH_RETURN_STACKADJ_RTX; 4261 4262 emit_insn (gen_blockage ()); 4263 emit_insn (TARGET_64BIT 4264 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa) 4265 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa)); 4266 } 4267 } 4268 4269 rtx 4270 hppa_pic_save_rtx (void) 4271 { 4272 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM); 4273 } 4274 4275 #ifndef NO_DEFERRED_PROFILE_COUNTERS 4276 #define NO_DEFERRED_PROFILE_COUNTERS 0 4277 #endif 4278 4279 4280 /* Vector of funcdef numbers. */ 4281 static VEC(int,heap) *funcdef_nos; 4282 4283 /* Output deferred profile counters. */ 4284 static void 4285 output_deferred_profile_counters (void) 4286 { 4287 unsigned int i; 4288 int align, n; 4289 4290 if (VEC_empty (int, funcdef_nos)) 4291 return; 4292 4293 switch_to_section (data_section); 4294 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); 4295 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); 4296 4297 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++) 4298 { 4299 targetm.asm_out.internal_label (asm_out_file, "LP", n); 4300 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); 4301 } 4302 4303 VEC_free (int, heap, funcdef_nos); 4304 } 4305 4306 void 4307 hppa_profile_hook (int label_no) 4308 { 4309 /* We use SImode for the address of the function in both 32 and 4310 64-bit code to avoid having to provide DImode versions of the 4311 lcla2 and load_offset_label_address insn patterns. */ 4312 rtx reg = gen_reg_rtx (SImode); 4313 rtx label_rtx = gen_label_rtx (); 4314 rtx begin_label_rtx, call_insn; 4315 char begin_label_name[16]; 4316 4317 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, 4318 label_no); 4319 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); 4320 4321 if (TARGET_64BIT) 4322 emit_move_insn (arg_pointer_rtx, 4323 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, 4324 GEN_INT (64))); 4325 4326 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); 4327 4328 /* The address of the function is loaded into %r25 with an instruction- 4329 relative sequence that avoids the use of relocations. The sequence 4330 is split so that the load_offset_label_address instruction can 4331 occupy the delay slot of the call to _mcount. */ 4332 if (TARGET_PA_20) 4333 emit_insn (gen_lcla2 (reg, label_rtx)); 4334 else 4335 emit_insn (gen_lcla1 (reg, label_rtx)); 4336 4337 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), 4338 reg, begin_label_rtx, label_rtx)); 4339 4340 #if !NO_DEFERRED_PROFILE_COUNTERS 4341 { 4342 rtx count_label_rtx, addr, r24; 4343 char count_label_name[16]; 4344 4345 VEC_safe_push (int, heap, funcdef_nos, label_no); 4346 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); 4347 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name)); 4348 4349 addr = force_reg (Pmode, count_label_rtx); 4350 r24 = gen_rtx_REG (Pmode, 24); 4351 emit_move_insn (r24, addr); 4352 4353 call_insn = 4354 emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 4355 gen_rtx_SYMBOL_REF (Pmode, 4356 "_mcount")), 4357 GEN_INT (TARGET_64BIT ? 24 : 12))); 4358 4359 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); 4360 } 4361 #else 4362 4363 call_insn = 4364 emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 4365 gen_rtx_SYMBOL_REF (Pmode, 4366 "_mcount")), 4367 GEN_INT (TARGET_64BIT ? 16 : 8))); 4368 4369 #endif 4370 4371 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); 4372 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); 4373 4374 /* Indicate the _mcount call cannot throw, nor will it execute a 4375 non-local goto. */ 4376 make_reg_eh_region_note_nothrow_nononlocal (call_insn); 4377 } 4378 4379 /* Fetch the return address for the frame COUNT steps up from 4380 the current frame, after the prologue. FRAMEADDR is the 4381 frame pointer of the COUNT frame. 4382 4383 We want to ignore any export stub remnants here. To handle this, 4384 we examine the code at the return address, and if it is an export 4385 stub, we return a memory rtx for the stub return address stored 4386 at frame-24. 4387 4388 The value returned is used in two different ways: 4389 4390 1. To find a function's caller. 4391 4392 2. To change the return address for a function. 4393 4394 This function handles most instances of case 1; however, it will 4395 fail if there are two levels of stubs to execute on the return 4396 path. The only way I believe that can happen is if the return value 4397 needs a parameter relocation, which never happens for C code. 4398 4399 This function handles most instances of case 2; however, it will 4400 fail if we did not originally have stub code on the return path 4401 but will need stub code on the new return path. This can happen if 4402 the caller & callee are both in the main program, but the new 4403 return location is in a shared library. */ 4404 4405 rtx 4406 return_addr_rtx (int count, rtx frameaddr) 4407 { 4408 rtx label; 4409 rtx rp; 4410 rtx saved_rp; 4411 rtx ins; 4412 4413 /* Instruction stream at the normal return address for the export stub: 4414 4415 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4416 0x004010a1 | stub+12: ldsid (sr0,rp),r1 4417 0x00011820 | stub+16: mtsp r1,sr0 4418 0xe0400002 | stub+20: be,n 0(sr0,rp) 4419 4420 0xe0400002 must be specified as -532676606 so that it won't be 4421 rejected as an invalid immediate operand on 64-bit hosts. */ 4422 4423 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606}; 4424 int i; 4425 4426 if (count != 0) 4427 return NULL_RTX; 4428 4429 rp = get_hard_reg_initial_val (Pmode, 2); 4430 4431 if (TARGET_64BIT || TARGET_NO_SPACE_REGS) 4432 return rp; 4433 4434 /* If there is no export stub then just use the value saved from 4435 the return pointer register. */ 4436 4437 saved_rp = gen_reg_rtx (Pmode); 4438 emit_move_insn (saved_rp, rp); 4439 4440 /* Get pointer to the instruction stream. We have to mask out the 4441 privilege level from the two low order bits of the return address 4442 pointer here so that ins will point to the start of the first 4443 instruction that would have been executed if we returned. */ 4444 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR)); 4445 label = gen_label_rtx (); 4446 4447 /* Check the instruction stream at the normal return address for the 4448 export stub. If it is an export stub, than our return address is 4449 really in -24[frameaddr]. */ 4450 4451 for (i = 0; i < 3; i++) 4452 { 4453 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4)); 4454 rtx op1 = GEN_INT (insns[i]); 4455 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label); 4456 } 4457 4458 /* Here we know that our return address points to an export 4459 stub. We don't want to return the address of the export stub, 4460 but rather the return address of the export stub. That return 4461 address is stored at -24[frameaddr]. */ 4462 4463 emit_move_insn (saved_rp, 4464 gen_rtx_MEM (Pmode, 4465 memory_address (Pmode, 4466 plus_constant (frameaddr, 4467 -24)))); 4468 4469 emit_label (label); 4470 4471 return saved_rp; 4472 } 4473 4474 void 4475 emit_bcond_fp (rtx operands[]) 4476 { 4477 enum rtx_code code = GET_CODE (operands[0]); 4478 rtx operand0 = operands[1]; 4479 rtx operand1 = operands[2]; 4480 rtx label = operands[3]; 4481 4482 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), 4483 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1))); 4484 4485 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, 4486 gen_rtx_IF_THEN_ELSE (VOIDmode, 4487 gen_rtx_fmt_ee (NE, 4488 VOIDmode, 4489 gen_rtx_REG (CCFPmode, 0), 4490 const0_rtx), 4491 gen_rtx_LABEL_REF (VOIDmode, label), 4492 pc_rtx))); 4493 4494 } 4495 4496 /* Adjust the cost of a scheduling dependency. Return the new cost of 4497 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4498 4499 static int 4500 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 4501 { 4502 enum attr_type attr_type; 4503 4504 /* Don't adjust costs for a pa8000 chip, also do not adjust any 4505 true dependencies as they are described with bypasses now. */ 4506 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0) 4507 return cost; 4508 4509 if (! recog_memoized (insn)) 4510 return 0; 4511 4512 attr_type = get_attr_type (insn); 4513 4514 switch (REG_NOTE_KIND (link)) 4515 { 4516 case REG_DEP_ANTI: 4517 /* Anti dependency; DEP_INSN reads a register that INSN writes some 4518 cycles later. */ 4519 4520 if (attr_type == TYPE_FPLOAD) 4521 { 4522 rtx pat = PATTERN (insn); 4523 rtx dep_pat = PATTERN (dep_insn); 4524 if (GET_CODE (pat) == PARALLEL) 4525 { 4526 /* This happens for the fldXs,mb patterns. */ 4527 pat = XVECEXP (pat, 0, 0); 4528 } 4529 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4530 /* If this happens, we have to extend this to schedule 4531 optimally. Return 0 for now. */ 4532 return 0; 4533 4534 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4535 { 4536 if (! recog_memoized (dep_insn)) 4537 return 0; 4538 switch (get_attr_type (dep_insn)) 4539 { 4540 case TYPE_FPALU: 4541 case TYPE_FPMULSGL: 4542 case TYPE_FPMULDBL: 4543 case TYPE_FPDIVSGL: 4544 case TYPE_FPDIVDBL: 4545 case TYPE_FPSQRTSGL: 4546 case TYPE_FPSQRTDBL: 4547 /* A fpload can't be issued until one cycle before a 4548 preceding arithmetic operation has finished if 4549 the target of the fpload is any of the sources 4550 (or destination) of the arithmetic operation. */ 4551 return insn_default_latency (dep_insn) - 1; 4552 4553 default: 4554 return 0; 4555 } 4556 } 4557 } 4558 else if (attr_type == TYPE_FPALU) 4559 { 4560 rtx pat = PATTERN (insn); 4561 rtx dep_pat = PATTERN (dep_insn); 4562 if (GET_CODE (pat) == PARALLEL) 4563 { 4564 /* This happens for the fldXs,mb patterns. */ 4565 pat = XVECEXP (pat, 0, 0); 4566 } 4567 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4568 /* If this happens, we have to extend this to schedule 4569 optimally. Return 0 for now. */ 4570 return 0; 4571 4572 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4573 { 4574 if (! recog_memoized (dep_insn)) 4575 return 0; 4576 switch (get_attr_type (dep_insn)) 4577 { 4578 case TYPE_FPDIVSGL: 4579 case TYPE_FPDIVDBL: 4580 case TYPE_FPSQRTSGL: 4581 case TYPE_FPSQRTDBL: 4582 /* An ALU flop can't be issued until two cycles before a 4583 preceding divide or sqrt operation has finished if 4584 the target of the ALU flop is any of the sources 4585 (or destination) of the divide or sqrt operation. */ 4586 return insn_default_latency (dep_insn) - 2; 4587 4588 default: 4589 return 0; 4590 } 4591 } 4592 } 4593 4594 /* For other anti dependencies, the cost is 0. */ 4595 return 0; 4596 4597 case REG_DEP_OUTPUT: 4598 /* Output dependency; DEP_INSN writes a register that INSN writes some 4599 cycles later. */ 4600 if (attr_type == TYPE_FPLOAD) 4601 { 4602 rtx pat = PATTERN (insn); 4603 rtx dep_pat = PATTERN (dep_insn); 4604 if (GET_CODE (pat) == PARALLEL) 4605 { 4606 /* This happens for the fldXs,mb patterns. */ 4607 pat = XVECEXP (pat, 0, 0); 4608 } 4609 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4610 /* If this happens, we have to extend this to schedule 4611 optimally. Return 0 for now. */ 4612 return 0; 4613 4614 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4615 { 4616 if (! recog_memoized (dep_insn)) 4617 return 0; 4618 switch (get_attr_type (dep_insn)) 4619 { 4620 case TYPE_FPALU: 4621 case TYPE_FPMULSGL: 4622 case TYPE_FPMULDBL: 4623 case TYPE_FPDIVSGL: 4624 case TYPE_FPDIVDBL: 4625 case TYPE_FPSQRTSGL: 4626 case TYPE_FPSQRTDBL: 4627 /* A fpload can't be issued until one cycle before a 4628 preceding arithmetic operation has finished if 4629 the target of the fpload is the destination of the 4630 arithmetic operation. 4631 4632 Exception: For PA7100LC, PA7200 and PA7300, the cost 4633 is 3 cycles, unless they bundle together. We also 4634 pay the penalty if the second insn is a fpload. */ 4635 return insn_default_latency (dep_insn) - 1; 4636 4637 default: 4638 return 0; 4639 } 4640 } 4641 } 4642 else if (attr_type == TYPE_FPALU) 4643 { 4644 rtx pat = PATTERN (insn); 4645 rtx dep_pat = PATTERN (dep_insn); 4646 if (GET_CODE (pat) == PARALLEL) 4647 { 4648 /* This happens for the fldXs,mb patterns. */ 4649 pat = XVECEXP (pat, 0, 0); 4650 } 4651 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4652 /* If this happens, we have to extend this to schedule 4653 optimally. Return 0 for now. */ 4654 return 0; 4655 4656 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4657 { 4658 if (! recog_memoized (dep_insn)) 4659 return 0; 4660 switch (get_attr_type (dep_insn)) 4661 { 4662 case TYPE_FPDIVSGL: 4663 case TYPE_FPDIVDBL: 4664 case TYPE_FPSQRTSGL: 4665 case TYPE_FPSQRTDBL: 4666 /* An ALU flop can't be issued until two cycles before a 4667 preceding divide or sqrt operation has finished if 4668 the target of the ALU flop is also the target of 4669 the divide or sqrt operation. */ 4670 return insn_default_latency (dep_insn) - 2; 4671 4672 default: 4673 return 0; 4674 } 4675 } 4676 } 4677 4678 /* For other output dependencies, the cost is 0. */ 4679 return 0; 4680 4681 default: 4682 gcc_unreachable (); 4683 } 4684 } 4685 4686 /* Adjust scheduling priorities. We use this to try and keep addil 4687 and the next use of %r1 close together. */ 4688 static int 4689 pa_adjust_priority (rtx insn, int priority) 4690 { 4691 rtx set = single_set (insn); 4692 rtx src, dest; 4693 if (set) 4694 { 4695 src = SET_SRC (set); 4696 dest = SET_DEST (set); 4697 if (GET_CODE (src) == LO_SUM 4698 && symbolic_operand (XEXP (src, 1), VOIDmode) 4699 && ! read_only_operand (XEXP (src, 1), VOIDmode)) 4700 priority >>= 3; 4701 4702 else if (GET_CODE (src) == MEM 4703 && GET_CODE (XEXP (src, 0)) == LO_SUM 4704 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode) 4705 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode)) 4706 priority >>= 1; 4707 4708 else if (GET_CODE (dest) == MEM 4709 && GET_CODE (XEXP (dest, 0)) == LO_SUM 4710 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode) 4711 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)) 4712 priority >>= 3; 4713 } 4714 return priority; 4715 } 4716 4717 /* The 700 can only issue a single insn at a time. 4718 The 7XXX processors can issue two insns at a time. 4719 The 8000 can issue 4 insns at a time. */ 4720 static int 4721 pa_issue_rate (void) 4722 { 4723 switch (pa_cpu) 4724 { 4725 case PROCESSOR_700: return 1; 4726 case PROCESSOR_7100: return 2; 4727 case PROCESSOR_7100LC: return 2; 4728 case PROCESSOR_7200: return 2; 4729 case PROCESSOR_7300: return 2; 4730 case PROCESSOR_8000: return 4; 4731 4732 default: 4733 gcc_unreachable (); 4734 } 4735 } 4736 4737 4738 4739 /* Return any length adjustment needed by INSN which already has its length 4740 computed as LENGTH. Return zero if no adjustment is necessary. 4741 4742 For the PA: function calls, millicode calls, and backwards short 4743 conditional branches with unfilled delay slots need an adjustment by +1 4744 (to account for the NOP which will be inserted into the instruction stream). 4745 4746 Also compute the length of an inline block move here as it is too 4747 complicated to express as a length attribute in pa.md. */ 4748 int 4749 pa_adjust_insn_length (rtx insn, int length) 4750 { 4751 rtx pat = PATTERN (insn); 4752 4753 /* Jumps inside switch tables which have unfilled delay slots need 4754 adjustment. */ 4755 if (GET_CODE (insn) == JUMP_INSN 4756 && GET_CODE (pat) == PARALLEL 4757 && get_attr_type (insn) == TYPE_BTABLE_BRANCH) 4758 return 4; 4759 /* Millicode insn with an unfilled delay slot. */ 4760 else if (GET_CODE (insn) == INSN 4761 && GET_CODE (pat) != SEQUENCE 4762 && GET_CODE (pat) != USE 4763 && GET_CODE (pat) != CLOBBER 4764 && get_attr_type (insn) == TYPE_MILLI) 4765 return 4; 4766 /* Block move pattern. */ 4767 else if (GET_CODE (insn) == INSN 4768 && GET_CODE (pat) == PARALLEL 4769 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 4770 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 4771 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM 4772 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode 4773 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) 4774 return compute_movmem_length (insn) - 4; 4775 /* Block clear pattern. */ 4776 else if (GET_CODE (insn) == INSN 4777 && GET_CODE (pat) == PARALLEL 4778 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 4779 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 4780 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx 4781 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) 4782 return compute_clrmem_length (insn) - 4; 4783 /* Conditional branch with an unfilled delay slot. */ 4784 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) 4785 { 4786 /* Adjust a short backwards conditional with an unfilled delay slot. */ 4787 if (GET_CODE (pat) == SET 4788 && length == 4 4789 && JUMP_LABEL (insn) != NULL_RTX 4790 && ! forward_branch_p (insn)) 4791 return 4; 4792 else if (GET_CODE (pat) == PARALLEL 4793 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH 4794 && length == 4) 4795 return 4; 4796 /* Adjust dbra insn with short backwards conditional branch with 4797 unfilled delay slot -- only for case where counter is in a 4798 general register register. */ 4799 else if (GET_CODE (pat) == PARALLEL 4800 && GET_CODE (XVECEXP (pat, 0, 1)) == SET 4801 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG 4802 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) 4803 && length == 4 4804 && ! forward_branch_p (insn)) 4805 return 4; 4806 else 4807 return 0; 4808 } 4809 return 0; 4810 } 4811 4812 /* Print operand X (an rtx) in assembler syntax to file FILE. 4813 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 4814 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 4815 4816 void 4817 print_operand (FILE *file, rtx x, int code) 4818 { 4819 switch (code) 4820 { 4821 case '#': 4822 /* Output a 'nop' if there's nothing for the delay slot. */ 4823 if (dbr_sequence_length () == 0) 4824 fputs ("\n\tnop", file); 4825 return; 4826 case '*': 4827 /* Output a nullification completer if there's nothing for the */ 4828 /* delay slot or nullification is requested. */ 4829 if (dbr_sequence_length () == 0 || 4830 (final_sequence && 4831 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) 4832 fputs (",n", file); 4833 return; 4834 case 'R': 4835 /* Print out the second register name of a register pair. 4836 I.e., R (6) => 7. */ 4837 fputs (reg_names[REGNO (x) + 1], file); 4838 return; 4839 case 'r': 4840 /* A register or zero. */ 4841 if (x == const0_rtx 4842 || (x == CONST0_RTX (DFmode)) 4843 || (x == CONST0_RTX (SFmode))) 4844 { 4845 fputs ("%r0", file); 4846 return; 4847 } 4848 else 4849 break; 4850 case 'f': 4851 /* A register or zero (floating point). */ 4852 if (x == const0_rtx 4853 || (x == CONST0_RTX (DFmode)) 4854 || (x == CONST0_RTX (SFmode))) 4855 { 4856 fputs ("%fr0", file); 4857 return; 4858 } 4859 else 4860 break; 4861 case 'A': 4862 { 4863 rtx xoperands[2]; 4864 4865 xoperands[0] = XEXP (XEXP (x, 0), 0); 4866 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0); 4867 output_global_address (file, xoperands[1], 0); 4868 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]); 4869 return; 4870 } 4871 4872 case 'C': /* Plain (C)ondition */ 4873 case 'X': 4874 switch (GET_CODE (x)) 4875 { 4876 case EQ: 4877 fputs ("=", file); break; 4878 case NE: 4879 fputs ("<>", file); break; 4880 case GT: 4881 fputs (">", file); break; 4882 case GE: 4883 fputs (">=", file); break; 4884 case GEU: 4885 fputs (">>=", file); break; 4886 case GTU: 4887 fputs (">>", file); break; 4888 case LT: 4889 fputs ("<", file); break; 4890 case LE: 4891 fputs ("<=", file); break; 4892 case LEU: 4893 fputs ("<<=", file); break; 4894 case LTU: 4895 fputs ("<<", file); break; 4896 default: 4897 gcc_unreachable (); 4898 } 4899 return; 4900 case 'N': /* Condition, (N)egated */ 4901 switch (GET_CODE (x)) 4902 { 4903 case EQ: 4904 fputs ("<>", file); break; 4905 case NE: 4906 fputs ("=", file); break; 4907 case GT: 4908 fputs ("<=", file); break; 4909 case GE: 4910 fputs ("<", file); break; 4911 case GEU: 4912 fputs ("<<", file); break; 4913 case GTU: 4914 fputs ("<<=", file); break; 4915 case LT: 4916 fputs (">=", file); break; 4917 case LE: 4918 fputs (">", file); break; 4919 case LEU: 4920 fputs (">>", file); break; 4921 case LTU: 4922 fputs (">>=", file); break; 4923 default: 4924 gcc_unreachable (); 4925 } 4926 return; 4927 /* For floating point comparisons. Note that the output 4928 predicates are the complement of the desired mode. The 4929 conditions for GT, GE, LT, LE and LTGT cause an invalid 4930 operation exception if the result is unordered and this 4931 exception is enabled in the floating-point status register. */ 4932 case 'Y': 4933 switch (GET_CODE (x)) 4934 { 4935 case EQ: 4936 fputs ("!=", file); break; 4937 case NE: 4938 fputs ("=", file); break; 4939 case GT: 4940 fputs ("!>", file); break; 4941 case GE: 4942 fputs ("!>=", file); break; 4943 case LT: 4944 fputs ("!<", file); break; 4945 case LE: 4946 fputs ("!<=", file); break; 4947 case LTGT: 4948 fputs ("!<>", file); break; 4949 case UNLE: 4950 fputs ("!?<=", file); break; 4951 case UNLT: 4952 fputs ("!?<", file); break; 4953 case UNGE: 4954 fputs ("!?>=", file); break; 4955 case UNGT: 4956 fputs ("!?>", file); break; 4957 case UNEQ: 4958 fputs ("!?=", file); break; 4959 case UNORDERED: 4960 fputs ("!?", file); break; 4961 case ORDERED: 4962 fputs ("?", file); break; 4963 default: 4964 gcc_unreachable (); 4965 } 4966 return; 4967 case 'S': /* Condition, operands are (S)wapped. */ 4968 switch (GET_CODE (x)) 4969 { 4970 case EQ: 4971 fputs ("=", file); break; 4972 case NE: 4973 fputs ("<>", file); break; 4974 case GT: 4975 fputs ("<", file); break; 4976 case GE: 4977 fputs ("<=", file); break; 4978 case GEU: 4979 fputs ("<<=", file); break; 4980 case GTU: 4981 fputs ("<<", file); break; 4982 case LT: 4983 fputs (">", file); break; 4984 case LE: 4985 fputs (">=", file); break; 4986 case LEU: 4987 fputs (">>=", file); break; 4988 case LTU: 4989 fputs (">>", file); break; 4990 default: 4991 gcc_unreachable (); 4992 } 4993 return; 4994 case 'B': /* Condition, (B)oth swapped and negate. */ 4995 switch (GET_CODE (x)) 4996 { 4997 case EQ: 4998 fputs ("<>", file); break; 4999 case NE: 5000 fputs ("=", file); break; 5001 case GT: 5002 fputs (">=", file); break; 5003 case GE: 5004 fputs (">", file); break; 5005 case GEU: 5006 fputs (">>", file); break; 5007 case GTU: 5008 fputs (">>=", file); break; 5009 case LT: 5010 fputs ("<=", file); break; 5011 case LE: 5012 fputs ("<", file); break; 5013 case LEU: 5014 fputs ("<<", file); break; 5015 case LTU: 5016 fputs ("<<=", file); break; 5017 default: 5018 gcc_unreachable (); 5019 } 5020 return; 5021 case 'k': 5022 gcc_assert (GET_CODE (x) == CONST_INT); 5023 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); 5024 return; 5025 case 'Q': 5026 gcc_assert (GET_CODE (x) == CONST_INT); 5027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63)); 5028 return; 5029 case 'L': 5030 gcc_assert (GET_CODE (x) == CONST_INT); 5031 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31)); 5032 return; 5033 case 'O': 5034 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0); 5035 fprintf (file, "%d", exact_log2 (INTVAL (x))); 5036 return; 5037 case 'p': 5038 gcc_assert (GET_CODE (x) == CONST_INT); 5039 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63)); 5040 return; 5041 case 'P': 5042 gcc_assert (GET_CODE (x) == CONST_INT); 5043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31)); 5044 return; 5045 case 'I': 5046 if (GET_CODE (x) == CONST_INT) 5047 fputs ("i", file); 5048 return; 5049 case 'M': 5050 case 'F': 5051 switch (GET_CODE (XEXP (x, 0))) 5052 { 5053 case PRE_DEC: 5054 case PRE_INC: 5055 if (ASSEMBLER_DIALECT == 0) 5056 fputs ("s,mb", file); 5057 else 5058 fputs (",mb", file); 5059 break; 5060 case POST_DEC: 5061 case POST_INC: 5062 if (ASSEMBLER_DIALECT == 0) 5063 fputs ("s,ma", file); 5064 else 5065 fputs (",ma", file); 5066 break; 5067 case PLUS: 5068 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5069 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5070 { 5071 if (ASSEMBLER_DIALECT == 0) 5072 fputs ("x", file); 5073 } 5074 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5075 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5076 { 5077 if (ASSEMBLER_DIALECT == 0) 5078 fputs ("x,s", file); 5079 else 5080 fputs (",s", file); 5081 } 5082 else if (code == 'F' && ASSEMBLER_DIALECT == 0) 5083 fputs ("s", file); 5084 break; 5085 default: 5086 if (code == 'F' && ASSEMBLER_DIALECT == 0) 5087 fputs ("s", file); 5088 break; 5089 } 5090 return; 5091 case 'G': 5092 output_global_address (file, x, 0); 5093 return; 5094 case 'H': 5095 output_global_address (file, x, 1); 5096 return; 5097 case 0: /* Don't do anything special */ 5098 break; 5099 case 'Z': 5100 { 5101 unsigned op[3]; 5102 compute_zdepwi_operands (INTVAL (x), op); 5103 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5104 return; 5105 } 5106 case 'z': 5107 { 5108 unsigned op[3]; 5109 compute_zdepdi_operands (INTVAL (x), op); 5110 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5111 return; 5112 } 5113 case 'c': 5114 /* We can get here from a .vtable_inherit due to our 5115 CONSTANT_ADDRESS_P rejecting perfectly good constant 5116 addresses. */ 5117 break; 5118 default: 5119 gcc_unreachable (); 5120 } 5121 if (GET_CODE (x) == REG) 5122 { 5123 fputs (reg_names [REGNO (x)], file); 5124 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4) 5125 { 5126 fputs ("R", file); 5127 return; 5128 } 5129 if (FP_REG_P (x) 5130 && GET_MODE_SIZE (GET_MODE (x)) <= 4 5131 && (REGNO (x) & 1) == 0) 5132 fputs ("L", file); 5133 } 5134 else if (GET_CODE (x) == MEM) 5135 { 5136 int size = GET_MODE_SIZE (GET_MODE (x)); 5137 rtx base = NULL_RTX; 5138 switch (GET_CODE (XEXP (x, 0))) 5139 { 5140 case PRE_DEC: 5141 case POST_DEC: 5142 base = XEXP (XEXP (x, 0), 0); 5143 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]); 5144 break; 5145 case PRE_INC: 5146 case POST_INC: 5147 base = XEXP (XEXP (x, 0), 0); 5148 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]); 5149 break; 5150 case PLUS: 5151 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) 5152 fprintf (file, "%s(%s)", 5153 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], 5154 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); 5155 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5156 fprintf (file, "%s(%s)", 5157 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], 5158 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); 5159 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5160 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5161 { 5162 /* Because the REG_POINTER flag can get lost during reload, 5163 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the 5164 index and base registers in the combined move patterns. */ 5165 rtx base = XEXP (XEXP (x, 0), 1); 5166 rtx index = XEXP (XEXP (x, 0), 0); 5167 5168 fprintf (file, "%s(%s)", 5169 reg_names [REGNO (index)], reg_names [REGNO (base)]); 5170 } 5171 else 5172 output_address (XEXP (x, 0)); 5173 break; 5174 default: 5175 output_address (XEXP (x, 0)); 5176 break; 5177 } 5178 } 5179 else 5180 output_addr_const (file, x); 5181 } 5182 5183 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ 5184 5185 void 5186 output_global_address (FILE *file, rtx x, int round_constant) 5187 { 5188 5189 /* Imagine (high (const (plus ...))). */ 5190 if (GET_CODE (x) == HIGH) 5191 x = XEXP (x, 0); 5192 5193 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode)) 5194 output_addr_const (file, x); 5195 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) 5196 { 5197 output_addr_const (file, x); 5198 fputs ("-$global$", file); 5199 } 5200 else if (GET_CODE (x) == CONST) 5201 { 5202 const char *sep = ""; 5203 int offset = 0; /* assembler wants -$global$ at end */ 5204 rtx base = NULL_RTX; 5205 5206 switch (GET_CODE (XEXP (XEXP (x, 0), 0))) 5207 { 5208 case SYMBOL_REF: 5209 base = XEXP (XEXP (x, 0), 0); 5210 output_addr_const (file, base); 5211 break; 5212 case CONST_INT: 5213 offset = INTVAL (XEXP (XEXP (x, 0), 0)); 5214 break; 5215 default: 5216 gcc_unreachable (); 5217 } 5218 5219 switch (GET_CODE (XEXP (XEXP (x, 0), 1))) 5220 { 5221 case SYMBOL_REF: 5222 base = XEXP (XEXP (x, 0), 1); 5223 output_addr_const (file, base); 5224 break; 5225 case CONST_INT: 5226 offset = INTVAL (XEXP (XEXP (x, 0), 1)); 5227 break; 5228 default: 5229 gcc_unreachable (); 5230 } 5231 5232 /* How bogus. The compiler is apparently responsible for 5233 rounding the constant if it uses an LR field selector. 5234 5235 The linker and/or assembler seem a better place since 5236 they have to do this kind of thing already. 5237 5238 If we fail to do this, HP's optimizing linker may eliminate 5239 an addil, but not update the ldw/stw/ldo instruction that 5240 uses the result of the addil. */ 5241 if (round_constant) 5242 offset = ((offset + 0x1000) & ~0x1fff); 5243 5244 switch (GET_CODE (XEXP (x, 0))) 5245 { 5246 case PLUS: 5247 if (offset < 0) 5248 { 5249 offset = -offset; 5250 sep = "-"; 5251 } 5252 else 5253 sep = "+"; 5254 break; 5255 5256 case MINUS: 5257 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF); 5258 sep = "-"; 5259 break; 5260 5261 default: 5262 gcc_unreachable (); 5263 } 5264 5265 if (!read_only_operand (base, VOIDmode) && !flag_pic) 5266 fputs ("-$global$", file); 5267 if (offset) 5268 fprintf (file, "%s%d", sep, offset); 5269 } 5270 else 5271 output_addr_const (file, x); 5272 } 5273 5274 /* Output boilerplate text to appear at the beginning of the file. 5275 There are several possible versions. */ 5276 #define aputs(x) fputs(x, asm_out_file) 5277 static inline void 5278 pa_file_start_level (void) 5279 { 5280 if (TARGET_64BIT) 5281 aputs ("\t.LEVEL 2.0w\n"); 5282 else if (TARGET_PA_20) 5283 aputs ("\t.LEVEL 2.0\n"); 5284 else if (TARGET_PA_11) 5285 aputs ("\t.LEVEL 1.1\n"); 5286 else 5287 aputs ("\t.LEVEL 1.0\n"); 5288 } 5289 5290 static inline void 5291 pa_file_start_space (int sortspace) 5292 { 5293 aputs ("\t.SPACE $PRIVATE$"); 5294 if (sortspace) 5295 aputs (",SORT=16"); 5296 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31" 5297 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82" 5298 "\n\t.SPACE $TEXT$"); 5299 if (sortspace) 5300 aputs (",SORT=8"); 5301 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44" 5302 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n"); 5303 } 5304 5305 static inline void 5306 pa_file_start_file (int want_version) 5307 { 5308 if (write_symbols != NO_DEBUG) 5309 { 5310 output_file_directive (asm_out_file, main_input_filename); 5311 if (want_version) 5312 aputs ("\t.version\t\"01.01\"\n"); 5313 } 5314 } 5315 5316 static inline void 5317 pa_file_start_mcount (const char *aswhat) 5318 { 5319 if (profile_flag) 5320 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat); 5321 } 5322 5323 static void 5324 pa_elf_file_start (void) 5325 { 5326 pa_file_start_level (); 5327 pa_file_start_mcount ("ENTRY"); 5328 pa_file_start_file (0); 5329 } 5330 5331 static void 5332 pa_som_file_start (void) 5333 { 5334 pa_file_start_level (); 5335 pa_file_start_space (0); 5336 aputs ("\t.IMPORT $global$,DATA\n" 5337 "\t.IMPORT $$dyncall,MILLICODE\n"); 5338 pa_file_start_mcount ("CODE"); 5339 pa_file_start_file (0); 5340 } 5341 5342 static void 5343 pa_linux_file_start (void) 5344 { 5345 pa_file_start_file (0); 5346 pa_file_start_level (); 5347 pa_file_start_mcount ("CODE"); 5348 } 5349 5350 static void 5351 pa_hpux64_gas_file_start (void) 5352 { 5353 pa_file_start_level (); 5354 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 5355 if (profile_flag) 5356 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function"); 5357 #endif 5358 pa_file_start_file (1); 5359 } 5360 5361 static void 5362 pa_hpux64_hpas_file_start (void) 5363 { 5364 pa_file_start_level (); 5365 pa_file_start_space (1); 5366 pa_file_start_mcount ("CODE"); 5367 pa_file_start_file (0); 5368 } 5369 #undef aputs 5370 5371 /* Search the deferred plabel list for SYMBOL and return its internal 5372 label. If an entry for SYMBOL is not found, a new entry is created. */ 5373 5374 rtx 5375 get_deferred_plabel (rtx symbol) 5376 { 5377 const char *fname = XSTR (symbol, 0); 5378 size_t i; 5379 5380 /* See if we have already put this function on the list of deferred 5381 plabels. This list is generally small, so a liner search is not 5382 too ugly. If it proves too slow replace it with something faster. */ 5383 for (i = 0; i < n_deferred_plabels; i++) 5384 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0) 5385 break; 5386 5387 /* If the deferred plabel list is empty, or this entry was not found 5388 on the list, create a new entry on the list. */ 5389 if (deferred_plabels == NULL || i == n_deferred_plabels) 5390 { 5391 tree id; 5392 5393 if (deferred_plabels == 0) 5394 deferred_plabels = (struct deferred_plabel *) 5395 ggc_alloc (sizeof (struct deferred_plabel)); 5396 else 5397 deferred_plabels = (struct deferred_plabel *) 5398 ggc_realloc (deferred_plabels, 5399 ((n_deferred_plabels + 1) 5400 * sizeof (struct deferred_plabel))); 5401 5402 i = n_deferred_plabels++; 5403 deferred_plabels[i].internal_label = gen_label_rtx (); 5404 deferred_plabels[i].symbol = symbol; 5405 5406 /* Gross. We have just implicitly taken the address of this 5407 function. Mark it in the same manner as assemble_name. */ 5408 id = maybe_get_identifier (targetm.strip_name_encoding (fname)); 5409 if (id) 5410 mark_referenced (id); 5411 } 5412 5413 return deferred_plabels[i].internal_label; 5414 } 5415 5416 static void 5417 output_deferred_plabels (void) 5418 { 5419 size_t i; 5420 5421 /* If we have some deferred plabels, then we need to switch into the 5422 data or readonly data section, and align it to a 4 byte boundary 5423 before outputting the deferred plabels. */ 5424 if (n_deferred_plabels) 5425 { 5426 switch_to_section (flag_pic ? data_section : readonly_data_section); 5427 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 5428 } 5429 5430 /* Now output the deferred plabels. */ 5431 for (i = 0; i < n_deferred_plabels; i++) 5432 { 5433 targetm.asm_out.internal_label (asm_out_file, "L", 5434 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); 5435 assemble_integer (deferred_plabels[i].symbol, 5436 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1); 5437 } 5438 } 5439 5440 #ifdef HPUX_LONG_DOUBLE_LIBRARY 5441 /* Initialize optabs to point to HPUX long double emulation routines. */ 5442 static void 5443 pa_hpux_init_libfuncs (void) 5444 { 5445 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 5446 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 5447 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 5448 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 5449 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin"); 5450 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 5451 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt"); 5452 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 5453 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 5454 5455 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 5456 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 5457 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 5458 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 5459 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 5460 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 5461 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord"); 5462 5463 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 5464 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 5465 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 5466 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 5467 5468 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT 5469 ? "__U_Qfcnvfxt_quad_to_sgl" 5470 : "_U_Qfcnvfxt_quad_to_sgl"); 5471 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); 5472 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl"); 5473 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl"); 5474 5475 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); 5476 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); 5477 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad"); 5478 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad"); 5479 } 5480 #endif 5481 5482 /* HP's millicode routines mean something special to the assembler. 5483 Keep track of which ones we have used. */ 5484 5485 enum millicodes { remI, remU, divI, divU, mulI, end1000 }; 5486 static void import_milli (enum millicodes); 5487 static char imported[(int) end1000]; 5488 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"}; 5489 static const char import_string[] = ".IMPORT $$....,MILLICODE"; 5490 #define MILLI_START 10 5491 5492 static void 5493 import_milli (enum millicodes code) 5494 { 5495 char str[sizeof (import_string)]; 5496 5497 if (!imported[(int) code]) 5498 { 5499 imported[(int) code] = 1; 5500 strcpy (str, import_string); 5501 strncpy (str + MILLI_START, milli_names[(int) code], 4); 5502 output_asm_insn (str, 0); 5503 } 5504 } 5505 5506 /* The register constraints have put the operands and return value in 5507 the proper registers. */ 5508 5509 const char * 5510 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn) 5511 { 5512 import_milli (mulI); 5513 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI")); 5514 } 5515 5516 /* Emit the rtl for doing a division by a constant. */ 5517 5518 /* Do magic division millicodes exist for this value? */ 5519 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1}; 5520 5521 /* We'll use an array to keep track of the magic millicodes and 5522 whether or not we've used them already. [n][0] is signed, [n][1] is 5523 unsigned. */ 5524 5525 static int div_milli[16][2]; 5526 5527 int 5528 emit_hpdiv_const (rtx *operands, int unsignedp) 5529 { 5530 if (GET_CODE (operands[2]) == CONST_INT 5531 && INTVAL (operands[2]) > 0 5532 && INTVAL (operands[2]) < 16 5533 && magic_milli[INTVAL (operands[2])]) 5534 { 5535 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); 5536 5537 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); 5538 emit 5539 (gen_rtx_PARALLEL 5540 (VOIDmode, 5541 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), 5542 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 5543 SImode, 5544 gen_rtx_REG (SImode, 26), 5545 operands[2])), 5546 gen_rtx_CLOBBER (VOIDmode, operands[4]), 5547 gen_rtx_CLOBBER (VOIDmode, operands[3]), 5548 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), 5549 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), 5550 gen_rtx_CLOBBER (VOIDmode, ret)))); 5551 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); 5552 return 1; 5553 } 5554 return 0; 5555 } 5556 5557 const char * 5558 output_div_insn (rtx *operands, int unsignedp, rtx insn) 5559 { 5560 HOST_WIDE_INT divisor; 5561 5562 /* If the divisor is a constant, try to use one of the special 5563 opcodes .*/ 5564 if (GET_CODE (operands[0]) == CONST_INT) 5565 { 5566 static char buf[100]; 5567 divisor = INTVAL (operands[0]); 5568 if (!div_milli[divisor][unsignedp]) 5569 { 5570 div_milli[divisor][unsignedp] = 1; 5571 if (unsignedp) 5572 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); 5573 else 5574 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); 5575 } 5576 if (unsignedp) 5577 { 5578 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC, 5579 INTVAL (operands[0])); 5580 return output_millicode_call (insn, 5581 gen_rtx_SYMBOL_REF (SImode, buf)); 5582 } 5583 else 5584 { 5585 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC, 5586 INTVAL (operands[0])); 5587 return output_millicode_call (insn, 5588 gen_rtx_SYMBOL_REF (SImode, buf)); 5589 } 5590 } 5591 /* Divisor isn't a special constant. */ 5592 else 5593 { 5594 if (unsignedp) 5595 { 5596 import_milli (divU); 5597 return output_millicode_call (insn, 5598 gen_rtx_SYMBOL_REF (SImode, "$$divU")); 5599 } 5600 else 5601 { 5602 import_milli (divI); 5603 return output_millicode_call (insn, 5604 gen_rtx_SYMBOL_REF (SImode, "$$divI")); 5605 } 5606 } 5607 } 5608 5609 /* Output a $$rem millicode to do mod. */ 5610 5611 const char * 5612 output_mod_insn (int unsignedp, rtx insn) 5613 { 5614 if (unsignedp) 5615 { 5616 import_milli (remU); 5617 return output_millicode_call (insn, 5618 gen_rtx_SYMBOL_REF (SImode, "$$remU")); 5619 } 5620 else 5621 { 5622 import_milli (remI); 5623 return output_millicode_call (insn, 5624 gen_rtx_SYMBOL_REF (SImode, "$$remI")); 5625 } 5626 } 5627 5628 void 5629 output_arg_descriptor (rtx call_insn) 5630 { 5631 const char *arg_regs[4]; 5632 enum machine_mode arg_mode; 5633 rtx link; 5634 int i, output_flag = 0; 5635 int regno; 5636 5637 /* We neither need nor want argument location descriptors for the 5638 64bit runtime environment or the ELF32 environment. */ 5639 if (TARGET_64BIT || TARGET_ELF32) 5640 return; 5641 5642 for (i = 0; i < 4; i++) 5643 arg_regs[i] = 0; 5644 5645 /* Specify explicitly that no argument relocations should take place 5646 if using the portable runtime calling conventions. */ 5647 if (TARGET_PORTABLE_RUNTIME) 5648 { 5649 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", 5650 asm_out_file); 5651 return; 5652 } 5653 5654 gcc_assert (GET_CODE (call_insn) == CALL_INSN); 5655 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); 5656 link; link = XEXP (link, 1)) 5657 { 5658 rtx use = XEXP (link, 0); 5659 5660 if (! (GET_CODE (use) == USE 5661 && GET_CODE (XEXP (use, 0)) == REG 5662 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 5663 continue; 5664 5665 arg_mode = GET_MODE (XEXP (use, 0)); 5666 regno = REGNO (XEXP (use, 0)); 5667 if (regno >= 23 && regno <= 26) 5668 { 5669 arg_regs[26 - regno] = "GR"; 5670 if (arg_mode == DImode) 5671 arg_regs[25 - regno] = "GR"; 5672 } 5673 else if (regno >= 32 && regno <= 39) 5674 { 5675 if (arg_mode == SFmode) 5676 arg_regs[(regno - 32) / 2] = "FR"; 5677 else 5678 { 5679 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED 5680 arg_regs[(regno - 34) / 2] = "FR"; 5681 arg_regs[(regno - 34) / 2 + 1] = "FU"; 5682 #else 5683 arg_regs[(regno - 34) / 2] = "FU"; 5684 arg_regs[(regno - 34) / 2 + 1] = "FR"; 5685 #endif 5686 } 5687 } 5688 } 5689 fputs ("\t.CALL ", asm_out_file); 5690 for (i = 0; i < 4; i++) 5691 { 5692 if (arg_regs[i]) 5693 { 5694 if (output_flag++) 5695 fputc (',', asm_out_file); 5696 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); 5697 } 5698 } 5699 fputc ('\n', asm_out_file); 5700 } 5701 5702 /* Inform reload about cases where moving X with a mode MODE to a register in 5703 RCLASS requires an extra scratch or immediate register. Return the class 5704 needed for the immediate register. */ 5705 5706 static enum reg_class 5707 pa_secondary_reload (bool in_p, rtx x, enum reg_class rclass, 5708 enum machine_mode mode, secondary_reload_info *sri) 5709 { 5710 int regno; 5711 5712 /* Handle the easy stuff first. */ 5713 if (rclass == R1_REGS) 5714 return NO_REGS; 5715 5716 if (REG_P (x)) 5717 { 5718 regno = REGNO (x); 5719 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER) 5720 return NO_REGS; 5721 } 5722 else 5723 regno = -1; 5724 5725 /* If we have something like (mem (mem (...)), we can safely assume the 5726 inner MEM will end up in a general register after reloading, so there's 5727 no need for a secondary reload. */ 5728 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM) 5729 return NO_REGS; 5730 5731 /* Trying to load a constant into a FP register during PIC code 5732 generation requires %r1 as a scratch register. */ 5733 if (flag_pic 5734 && (mode == SImode || mode == DImode) 5735 && FP_REG_CLASS_P (rclass) 5736 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)) 5737 { 5738 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1 5739 : CODE_FOR_reload_indi_r1); 5740 return NO_REGS; 5741 } 5742 5743 /* Secondary reloads of symbolic operands require %r1 as a scratch 5744 register when we're generating PIC code and when the operand isn't 5745 readonly. */ 5746 if (symbolic_expression_p (x)) 5747 { 5748 if (GET_CODE (x) == HIGH) 5749 x = XEXP (x, 0); 5750 5751 if (flag_pic || !read_only_operand (x, VOIDmode)) 5752 { 5753 gcc_assert (mode == SImode || mode == DImode); 5754 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1 5755 : CODE_FOR_reload_indi_r1); 5756 return NO_REGS; 5757 } 5758 } 5759 5760 /* Profiling showed the PA port spends about 1.3% of its compilation 5761 time in true_regnum from calls inside pa_secondary_reload_class. */ 5762 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) 5763 regno = true_regnum (x); 5764 5765 /* In order to allow 14-bit displacements in integer loads and stores, 5766 we need to prevent reload from generating out of range integer mode 5767 loads and stores to the floating point registers. Previously, we 5768 used to call for a secondary reload and have emit_move_sequence() 5769 fix the instruction sequence. However, reload occasionally wouldn't 5770 generate the reload and we would end up with an invalid REG+D memory 5771 address. So, now we use an intermediate general register for most 5772 memory loads and stores. */ 5773 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1) 5774 && GET_MODE_CLASS (mode) == MODE_INT 5775 && FP_REG_CLASS_P (rclass)) 5776 { 5777 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check 5778 the secondary reload needed for a pseudo. It never passes a 5779 REG+D address. */ 5780 if (GET_CODE (x) == MEM) 5781 { 5782 x = XEXP (x, 0); 5783 5784 /* We don't need an intermediate for indexed and LO_SUM DLT 5785 memory addresses. When INT14_OK_STRICT is true, it might 5786 appear that we could directly allow register indirect 5787 memory addresses. However, this doesn't work because we 5788 don't support SUBREGs in floating-point register copies 5789 and reload doesn't tell us when it's going to use a SUBREG. */ 5790 if (IS_INDEX_ADDR_P (x) 5791 || IS_LO_SUM_DLT_ADDR_P (x)) 5792 return NO_REGS; 5793 5794 /* Otherwise, we need an intermediate general register. */ 5795 return GENERAL_REGS; 5796 } 5797 5798 /* Request a secondary reload with a general scratch register 5799 for everthing else. ??? Could symbolic operands be handled 5800 directly when generating non-pic PA 2.0 code? */ 5801 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode]; 5802 return NO_REGS; 5803 } 5804 5805 /* A SAR<->FP register copy requires an intermediate general register 5806 and secondary memory. We need a secondary reload with a general 5807 scratch register for spills. */ 5808 if (rclass == SHIFT_REGS) 5809 { 5810 /* Handle spill. */ 5811 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0) 5812 { 5813 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode]; 5814 return NO_REGS; 5815 } 5816 5817 /* Handle FP copy. */ 5818 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno))) 5819 return GENERAL_REGS; 5820 } 5821 5822 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER 5823 && REGNO_REG_CLASS (regno) == SHIFT_REGS 5824 && FP_REG_CLASS_P (rclass)) 5825 return GENERAL_REGS; 5826 5827 return NO_REGS; 5828 } 5829 5830 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer 5831 is only marked as live on entry by df-scan when it is a fixed 5832 register. It isn't a fixed register in the 64-bit runtime, 5833 so we need to mark it here. */ 5834 5835 static void 5836 pa_extra_live_on_entry (bitmap regs) 5837 { 5838 if (TARGET_64BIT) 5839 bitmap_set_bit (regs, ARG_POINTER_REGNUM); 5840 } 5841 5842 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile 5843 to prevent it from being deleted. */ 5844 5845 rtx 5846 pa_eh_return_handler_rtx (void) 5847 { 5848 rtx tmp; 5849 5850 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx, 5851 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20)); 5852 tmp = gen_rtx_MEM (word_mode, tmp); 5853 tmp->volatil = 1; 5854 return tmp; 5855 } 5856 5857 /* In the 32-bit runtime, arguments larger than eight bytes are passed 5858 by invisible reference. As a GCC extension, we also pass anything 5859 with a zero or variable size by reference. 5860 5861 The 64-bit runtime does not describe passing any types by invisible 5862 reference. The internals of GCC can't currently handle passing 5863 empty structures, and zero or variable length arrays when they are 5864 not passed entirely on the stack or by reference. Thus, as a GCC 5865 extension, we pass these types by reference. The HP compiler doesn't 5866 support these types, so hopefully there shouldn't be any compatibility 5867 issues. This may have to be revisited when HP releases a C99 compiler 5868 or updates the ABI. */ 5869 5870 static bool 5871 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED, 5872 enum machine_mode mode, const_tree type, 5873 bool named ATTRIBUTE_UNUSED) 5874 { 5875 HOST_WIDE_INT size; 5876 5877 if (type) 5878 size = int_size_in_bytes (type); 5879 else 5880 size = GET_MODE_SIZE (mode); 5881 5882 if (TARGET_64BIT) 5883 return size <= 0; 5884 else 5885 return size <= 0 || size > 8; 5886 } 5887 5888 enum direction 5889 function_arg_padding (enum machine_mode mode, const_tree type) 5890 { 5891 if (mode == BLKmode 5892 || (TARGET_64BIT 5893 && type 5894 && (AGGREGATE_TYPE_P (type) 5895 || TREE_CODE (type) == COMPLEX_TYPE 5896 || TREE_CODE (type) == VECTOR_TYPE))) 5897 { 5898 /* Return none if justification is not required. */ 5899 if (type 5900 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 5901 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0) 5902 return none; 5903 5904 /* The directions set here are ignored when a BLKmode argument larger 5905 than a word is placed in a register. Different code is used for 5906 the stack and registers. This makes it difficult to have a 5907 consistent data representation for both the stack and registers. 5908 For both runtimes, the justification and padding for arguments on 5909 the stack and in registers should be identical. */ 5910 if (TARGET_64BIT) 5911 /* The 64-bit runtime specifies left justification for aggregates. */ 5912 return upward; 5913 else 5914 /* The 32-bit runtime architecture specifies right justification. 5915 When the argument is passed on the stack, the argument is padded 5916 with garbage on the left. The HP compiler pads with zeros. */ 5917 return downward; 5918 } 5919 5920 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 5921 return downward; 5922 else 5923 return none; 5924 } 5925 5926 5927 /* Do what is necessary for `va_start'. We look at the current function 5928 to determine if stdargs or varargs is used and fill in an initial 5929 va_list. A pointer to this constructor is returned. */ 5930 5931 static rtx 5932 hppa_builtin_saveregs (void) 5933 { 5934 rtx offset, dest; 5935 tree fntype = TREE_TYPE (current_function_decl); 5936 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0 5937 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 5938 != void_type_node))) 5939 ? UNITS_PER_WORD : 0); 5940 5941 if (argadj) 5942 offset = plus_constant (crtl->args.arg_offset_rtx, argadj); 5943 else 5944 offset = crtl->args.arg_offset_rtx; 5945 5946 if (TARGET_64BIT) 5947 { 5948 int i, off; 5949 5950 /* Adjust for varargs/stdarg differences. */ 5951 if (argadj) 5952 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj); 5953 else 5954 offset = crtl->args.arg_offset_rtx; 5955 5956 /* We need to save %r26 .. %r19 inclusive starting at offset -64 5957 from the incoming arg pointer and growing to larger addresses. */ 5958 for (i = 26, off = -64; i >= 19; i--, off += 8) 5959 emit_move_insn (gen_rtx_MEM (word_mode, 5960 plus_constant (arg_pointer_rtx, off)), 5961 gen_rtx_REG (word_mode, i)); 5962 5963 /* The incoming args pointer points just beyond the flushback area; 5964 normally this is not a serious concern. However, when we are doing 5965 varargs/stdargs we want to make the arg pointer point to the start 5966 of the incoming argument area. */ 5967 emit_move_insn (virtual_incoming_args_rtx, 5968 plus_constant (arg_pointer_rtx, -64)); 5969 5970 /* Now return a pointer to the first anonymous argument. */ 5971 return copy_to_reg (expand_binop (Pmode, add_optab, 5972 virtual_incoming_args_rtx, 5973 offset, 0, 0, OPTAB_LIB_WIDEN)); 5974 } 5975 5976 /* Store general registers on the stack. */ 5977 dest = gen_rtx_MEM (BLKmode, 5978 plus_constant (crtl->args.internal_arg_pointer, 5979 -16)); 5980 set_mem_alias_set (dest, get_varargs_alias_set ()); 5981 set_mem_align (dest, BITS_PER_WORD); 5982 move_block_from_reg (23, dest, 4); 5983 5984 /* move_block_from_reg will emit code to store the argument registers 5985 individually as scalar stores. 5986 5987 However, other insns may later load from the same addresses for 5988 a structure load (passing a struct to a varargs routine). 5989 5990 The alias code assumes that such aliasing can never happen, so we 5991 have to keep memory referencing insns from moving up beyond the 5992 last argument register store. So we emit a blockage insn here. */ 5993 emit_insn (gen_blockage ()); 5994 5995 return copy_to_reg (expand_binop (Pmode, add_optab, 5996 crtl->args.internal_arg_pointer, 5997 offset, 0, 0, OPTAB_LIB_WIDEN)); 5998 } 5999 6000 static void 6001 hppa_va_start (tree valist, rtx nextarg) 6002 { 6003 nextarg = expand_builtin_saveregs (); 6004 std_expand_builtin_va_start (valist, nextarg); 6005 } 6006 6007 static tree 6008 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 6009 gimple_seq *post_p) 6010 { 6011 if (TARGET_64BIT) 6012 { 6013 /* Args grow upward. We can use the generic routines. */ 6014 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6015 } 6016 else /* !TARGET_64BIT */ 6017 { 6018 tree ptr = build_pointer_type (type); 6019 tree valist_type; 6020 tree t, u; 6021 unsigned int size, ofs; 6022 bool indirect; 6023 6024 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); 6025 if (indirect) 6026 { 6027 type = ptr; 6028 ptr = build_pointer_type (type); 6029 } 6030 size = int_size_in_bytes (type); 6031 valist_type = TREE_TYPE (valist); 6032 6033 /* Args grow down. Not handled by generic routines. */ 6034 6035 u = fold_convert (sizetype, size_in_bytes (type)); 6036 u = fold_build1 (NEGATE_EXPR, sizetype, u); 6037 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u); 6038 6039 /* Copied from va-pa.h, but we probably don't need to align to 6040 word size, since we generate and preserve that invariant. */ 6041 u = size_int (size > 4 ? -8 : -4); 6042 t = fold_convert (sizetype, t); 6043 t = build2 (BIT_AND_EXPR, sizetype, t, u); 6044 t = fold_convert (valist_type, t); 6045 6046 t = build2 (MODIFY_EXPR, valist_type, valist, t); 6047 6048 ofs = (8 - size) % 4; 6049 if (ofs != 0) 6050 { 6051 u = size_int (ofs); 6052 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u); 6053 } 6054 6055 t = fold_convert (ptr, t); 6056 t = build_va_arg_indirect_ref (t); 6057 6058 if (indirect) 6059 t = build_va_arg_indirect_ref (t); 6060 6061 return t; 6062 } 6063 } 6064 6065 /* True if MODE is valid for the target. By "valid", we mean able to 6066 be manipulated in non-trivial ways. In particular, this means all 6067 the arithmetic is supported. 6068 6069 Currently, TImode is not valid as the HP 64-bit runtime documentation 6070 doesn't document the alignment and calling conventions for this type. 6071 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and 6072 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */ 6073 6074 static bool 6075 pa_scalar_mode_supported_p (enum machine_mode mode) 6076 { 6077 int precision = GET_MODE_PRECISION (mode); 6078 6079 switch (GET_MODE_CLASS (mode)) 6080 { 6081 case MODE_PARTIAL_INT: 6082 case MODE_INT: 6083 if (precision == CHAR_TYPE_SIZE) 6084 return true; 6085 if (precision == SHORT_TYPE_SIZE) 6086 return true; 6087 if (precision == INT_TYPE_SIZE) 6088 return true; 6089 if (precision == LONG_TYPE_SIZE) 6090 return true; 6091 if (precision == LONG_LONG_TYPE_SIZE) 6092 return true; 6093 return false; 6094 6095 case MODE_FLOAT: 6096 if (precision == FLOAT_TYPE_SIZE) 6097 return true; 6098 if (precision == DOUBLE_TYPE_SIZE) 6099 return true; 6100 if (precision == LONG_DOUBLE_TYPE_SIZE) 6101 return true; 6102 return false; 6103 6104 case MODE_DECIMAL_FLOAT: 6105 return false; 6106 6107 default: 6108 gcc_unreachable (); 6109 } 6110 } 6111 6112 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and 6113 it branches into the delay slot. Otherwise, return FALSE. */ 6114 6115 static bool 6116 branch_to_delay_slot_p (rtx insn) 6117 { 6118 rtx jump_insn; 6119 6120 if (dbr_sequence_length ()) 6121 return FALSE; 6122 6123 jump_insn = next_active_insn (JUMP_LABEL (insn)); 6124 while (insn) 6125 { 6126 insn = next_active_insn (insn); 6127 if (jump_insn == insn) 6128 return TRUE; 6129 6130 /* We can't rely on the length of asms. So, we return FALSE when 6131 the branch is followed by an asm. */ 6132 if (!insn 6133 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6134 || extract_asm_operands (PATTERN (insn)) != NULL_RTX 6135 || get_attr_length (insn) > 0) 6136 break; 6137 } 6138 6139 return FALSE; 6140 } 6141 6142 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot. 6143 6144 This occurs when INSN has an unfilled delay slot and is followed 6145 by an asm. Disaster can occur if the asm is empty and the jump 6146 branches into the delay slot. So, we add a nop in the delay slot 6147 when this occurs. */ 6148 6149 static bool 6150 branch_needs_nop_p (rtx insn) 6151 { 6152 rtx jump_insn; 6153 6154 if (dbr_sequence_length ()) 6155 return FALSE; 6156 6157 jump_insn = next_active_insn (JUMP_LABEL (insn)); 6158 while (insn) 6159 { 6160 insn = next_active_insn (insn); 6161 if (!insn || jump_insn == insn) 6162 return TRUE; 6163 6164 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT 6165 || extract_asm_operands (PATTERN (insn)) != NULL_RTX) 6166 && get_attr_length (insn) > 0) 6167 break; 6168 } 6169 6170 return FALSE; 6171 } 6172 6173 /* Return TRUE if INSN, a forward jump insn, can use nullification 6174 to skip the following instruction. This avoids an extra cycle due 6175 to a mis-predicted branch when we fall through. */ 6176 6177 static bool 6178 use_skip_p (rtx insn) 6179 { 6180 rtx jump_insn = next_active_insn (JUMP_LABEL (insn)); 6181 6182 while (insn) 6183 { 6184 insn = next_active_insn (insn); 6185 6186 /* We can't rely on the length of asms, so we can't skip asms. */ 6187 if (!insn 6188 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6189 || extract_asm_operands (PATTERN (insn)) != NULL_RTX) 6190 break; 6191 if (get_attr_length (insn) == 4 6192 && jump_insn == next_active_insn (insn)) 6193 return TRUE; 6194 if (get_attr_length (insn) > 0) 6195 break; 6196 } 6197 6198 return FALSE; 6199 } 6200 6201 /* This routine handles all the normal conditional branch sequences we 6202 might need to generate. It handles compare immediate vs compare 6203 register, nullification of delay slots, varying length branches, 6204 negated branches, and all combinations of the above. It returns the 6205 output appropriate to emit the branch corresponding to all given 6206 parameters. */ 6207 6208 const char * 6209 output_cbranch (rtx *operands, int negated, rtx insn) 6210 { 6211 static char buf[100]; 6212 bool useskip; 6213 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6214 int length = get_attr_length (insn); 6215 int xdelay; 6216 6217 /* A conditional branch to the following instruction (e.g. the delay slot) 6218 is asking for a disaster. This can happen when not optimizing and 6219 when jump optimization fails. 6220 6221 While it is usually safe to emit nothing, this can fail if the 6222 preceding instruction is a nullified branch with an empty delay 6223 slot and the same branch target as this branch. We could check 6224 for this but jump optimization should eliminate nop jumps. It 6225 is always safe to emit a nop. */ 6226 if (branch_to_delay_slot_p (insn)) 6227 return "nop"; 6228 6229 /* The doubleword form of the cmpib instruction doesn't have the LEU 6230 and GTU conditions while the cmpb instruction does. Since we accept 6231 zero for cmpb, we must ensure that we use cmpb for the comparison. */ 6232 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx) 6233 operands[2] = gen_rtx_REG (DImode, 0); 6234 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx) 6235 operands[1] = gen_rtx_REG (DImode, 0); 6236 6237 /* If this is a long branch with its delay slot unfilled, set `nullify' 6238 as it can nullify the delay slot and save a nop. */ 6239 if (length == 8 && dbr_sequence_length () == 0) 6240 nullify = 1; 6241 6242 /* If this is a short forward conditional branch which did not get 6243 its delay slot filled, the delay slot can still be nullified. */ 6244 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6245 nullify = forward_branch_p (insn); 6246 6247 /* A forward branch over a single nullified insn can be done with a 6248 comclr instruction. This avoids a single cycle penalty due to 6249 mis-predicted branch if we fall through (branch not taken). */ 6250 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6251 6252 switch (length) 6253 { 6254 /* All short conditional branches except backwards with an unfilled 6255 delay slot. */ 6256 case 4: 6257 if (useskip) 6258 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6259 else 6260 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6261 if (GET_MODE (operands[1]) == DImode) 6262 strcat (buf, "*"); 6263 if (negated) 6264 strcat (buf, "%B3"); 6265 else 6266 strcat (buf, "%S3"); 6267 if (useskip) 6268 strcat (buf, " %2,%r1,%%r0"); 6269 else if (nullify) 6270 { 6271 if (branch_needs_nop_p (insn)) 6272 strcat (buf, ",n %2,%r1,%0%#"); 6273 else 6274 strcat (buf, ",n %2,%r1,%0"); 6275 } 6276 else 6277 strcat (buf, " %2,%r1,%0"); 6278 break; 6279 6280 /* All long conditionals. Note a short backward branch with an 6281 unfilled delay slot is treated just like a long backward branch 6282 with an unfilled delay slot. */ 6283 case 8: 6284 /* Handle weird backwards branch with a filled delay slot 6285 which is nullified. */ 6286 if (dbr_sequence_length () != 0 6287 && ! forward_branch_p (insn) 6288 && nullify) 6289 { 6290 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6291 if (GET_MODE (operands[1]) == DImode) 6292 strcat (buf, "*"); 6293 if (negated) 6294 strcat (buf, "%S3"); 6295 else 6296 strcat (buf, "%B3"); 6297 strcat (buf, ",n %2,%r1,.+12\n\tb %0"); 6298 } 6299 /* Handle short backwards branch with an unfilled delay slot. 6300 Using a comb;nop rather than comiclr;bl saves 1 cycle for both 6301 taken and untaken branches. */ 6302 else if (dbr_sequence_length () == 0 6303 && ! forward_branch_p (insn) 6304 && INSN_ADDRESSES_SET_P () 6305 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6306 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6307 { 6308 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6309 if (GET_MODE (operands[1]) == DImode) 6310 strcat (buf, "*"); 6311 if (negated) 6312 strcat (buf, "%B3 %2,%r1,%0%#"); 6313 else 6314 strcat (buf, "%S3 %2,%r1,%0%#"); 6315 } 6316 else 6317 { 6318 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6319 if (GET_MODE (operands[1]) == DImode) 6320 strcat (buf, "*"); 6321 if (negated) 6322 strcat (buf, "%S3"); 6323 else 6324 strcat (buf, "%B3"); 6325 if (nullify) 6326 strcat (buf, " %2,%r1,%%r0\n\tb,n %0"); 6327 else 6328 strcat (buf, " %2,%r1,%%r0\n\tb %0"); 6329 } 6330 break; 6331 6332 default: 6333 /* The reversed conditional branch must branch over one additional 6334 instruction if the delay slot is filled and needs to be extracted 6335 by output_lbranch. If the delay slot is empty or this is a 6336 nullified forward branch, the instruction after the reversed 6337 condition branch must be nullified. */ 6338 if (dbr_sequence_length () == 0 6339 || (nullify && forward_branch_p (insn))) 6340 { 6341 nullify = 1; 6342 xdelay = 0; 6343 operands[4] = GEN_INT (length); 6344 } 6345 else 6346 { 6347 xdelay = 1; 6348 operands[4] = GEN_INT (length + 4); 6349 } 6350 6351 /* Create a reversed conditional branch which branches around 6352 the following insns. */ 6353 if (GET_MODE (operands[1]) != DImode) 6354 { 6355 if (nullify) 6356 { 6357 if (negated) 6358 strcpy (buf, 6359 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}"); 6360 else 6361 strcpy (buf, 6362 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}"); 6363 } 6364 else 6365 { 6366 if (negated) 6367 strcpy (buf, 6368 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}"); 6369 else 6370 strcpy (buf, 6371 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}"); 6372 } 6373 } 6374 else 6375 { 6376 if (nullify) 6377 { 6378 if (negated) 6379 strcpy (buf, 6380 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}"); 6381 else 6382 strcpy (buf, 6383 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}"); 6384 } 6385 else 6386 { 6387 if (negated) 6388 strcpy (buf, 6389 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}"); 6390 else 6391 strcpy (buf, 6392 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}"); 6393 } 6394 } 6395 6396 output_asm_insn (buf, operands); 6397 return output_lbranch (operands[0], insn, xdelay); 6398 } 6399 return buf; 6400 } 6401 6402 /* This routine handles output of long unconditional branches that 6403 exceed the maximum range of a simple branch instruction. Since 6404 we don't have a register available for the branch, we save register 6405 %r1 in the frame marker, load the branch destination DEST into %r1, 6406 execute the branch, and restore %r1 in the delay slot of the branch. 6407 6408 Since long branches may have an insn in the delay slot and the 6409 delay slot is used to restore %r1, we in general need to extract 6410 this insn and execute it before the branch. However, to facilitate 6411 use of this function by conditional branches, we also provide an 6412 option to not extract the delay insn so that it will be emitted 6413 after the long branch. So, if there is an insn in the delay slot, 6414 it is extracted if XDELAY is nonzero. 6415 6416 The lengths of the various long-branch sequences are 20, 16 and 24 6417 bytes for the portable runtime, non-PIC and PIC cases, respectively. */ 6418 6419 const char * 6420 output_lbranch (rtx dest, rtx insn, int xdelay) 6421 { 6422 rtx xoperands[2]; 6423 6424 xoperands[0] = dest; 6425 6426 /* First, free up the delay slot. */ 6427 if (xdelay && dbr_sequence_length () != 0) 6428 { 6429 /* We can't handle a jump in the delay slot. */ 6430 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN); 6431 6432 final_scan_insn (NEXT_INSN (insn), asm_out_file, 6433 optimize, 0, NULL); 6434 6435 /* Now delete the delay insn. */ 6436 SET_INSN_DELETED (NEXT_INSN (insn)); 6437 } 6438 6439 /* Output an insn to save %r1. The runtime documentation doesn't 6440 specify whether the "Clean Up" slot in the callers frame can 6441 be clobbered by the callee. It isn't copied by HP's builtin 6442 alloca, so this suggests that it can be clobbered if necessary. 6443 The "Static Link" location is copied by HP builtin alloca, so 6444 we avoid using it. Using the cleanup slot might be a problem 6445 if we have to interoperate with languages that pass cleanup 6446 information. However, it should be possible to handle these 6447 situations with GCC's asm feature. 6448 6449 The "Current RP" slot is reserved for the called procedure, so 6450 we try to use it when we don't have a frame of our own. It's 6451 rather unlikely that we won't have a frame when we need to emit 6452 a very long branch. 6453 6454 Really the way to go long term is a register scavenger; goto 6455 the target of the jump and find a register which we can use 6456 as a scratch to hold the value in %r1. Then, we wouldn't have 6457 to free up the delay slot or clobber a slot that may be needed 6458 for other purposes. */ 6459 if (TARGET_64BIT) 6460 { 6461 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6462 /* Use the return pointer slot in the frame marker. */ 6463 output_asm_insn ("std %%r1,-16(%%r30)", xoperands); 6464 else 6465 /* Use the slot at -40 in the frame marker since HP builtin 6466 alloca doesn't copy it. */ 6467 output_asm_insn ("std %%r1,-40(%%r30)", xoperands); 6468 } 6469 else 6470 { 6471 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6472 /* Use the return pointer slot in the frame marker. */ 6473 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands); 6474 else 6475 /* Use the "Clean Up" slot in the frame marker. In GCC, 6476 the only other use of this location is for copying a 6477 floating point double argument from a floating-point 6478 register to two general registers. The copy is done 6479 as an "atomic" operation when outputting a call, so it 6480 won't interfere with our using the location here. */ 6481 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands); 6482 } 6483 6484 if (TARGET_PORTABLE_RUNTIME) 6485 { 6486 output_asm_insn ("ldil L'%0,%%r1", xoperands); 6487 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 6488 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6489 } 6490 else if (flag_pic) 6491 { 6492 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 6493 if (TARGET_SOM || !TARGET_GAS) 6494 { 6495 xoperands[1] = gen_label_rtx (); 6496 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands); 6497 targetm.asm_out.internal_label (asm_out_file, "L", 6498 CODE_LABEL_NUMBER (xoperands[1])); 6499 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands); 6500 } 6501 else 6502 { 6503 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands); 6504 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 6505 } 6506 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6507 } 6508 else 6509 /* Now output a very long branch to the original target. */ 6510 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands); 6511 6512 /* Now restore the value of %r1 in the delay slot. */ 6513 if (TARGET_64BIT) 6514 { 6515 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6516 return "ldd -16(%%r30),%%r1"; 6517 else 6518 return "ldd -40(%%r30),%%r1"; 6519 } 6520 else 6521 { 6522 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6523 return "ldw -20(%%r30),%%r1"; 6524 else 6525 return "ldw -12(%%r30),%%r1"; 6526 } 6527 } 6528 6529 /* This routine handles all the branch-on-bit conditional branch sequences we 6530 might need to generate. It handles nullification of delay slots, 6531 varying length branches, negated branches and all combinations of the 6532 above. it returns the appropriate output template to emit the branch. */ 6533 6534 const char * 6535 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which) 6536 { 6537 static char buf[100]; 6538 bool useskip; 6539 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6540 int length = get_attr_length (insn); 6541 int xdelay; 6542 6543 /* A conditional branch to the following instruction (e.g. the delay slot) is 6544 asking for a disaster. I do not think this can happen as this pattern 6545 is only used when optimizing; jump optimization should eliminate the 6546 jump. But be prepared just in case. */ 6547 6548 if (branch_to_delay_slot_p (insn)) 6549 return "nop"; 6550 6551 /* If this is a long branch with its delay slot unfilled, set `nullify' 6552 as it can nullify the delay slot and save a nop. */ 6553 if (length == 8 && dbr_sequence_length () == 0) 6554 nullify = 1; 6555 6556 /* If this is a short forward conditional branch which did not get 6557 its delay slot filled, the delay slot can still be nullified. */ 6558 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6559 nullify = forward_branch_p (insn); 6560 6561 /* A forward branch over a single nullified insn can be done with a 6562 extrs instruction. This avoids a single cycle penalty due to 6563 mis-predicted branch if we fall through (branch not taken). */ 6564 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6565 6566 switch (length) 6567 { 6568 6569 /* All short conditional branches except backwards with an unfilled 6570 delay slot. */ 6571 case 4: 6572 if (useskip) 6573 strcpy (buf, "{extrs,|extrw,s,}"); 6574 else 6575 strcpy (buf, "bb,"); 6576 if (useskip && GET_MODE (operands[0]) == DImode) 6577 strcpy (buf, "extrd,s,*"); 6578 else if (GET_MODE (operands[0]) == DImode) 6579 strcpy (buf, "bb,*"); 6580 if ((which == 0 && negated) 6581 || (which == 1 && ! negated)) 6582 strcat (buf, ">="); 6583 else 6584 strcat (buf, "<"); 6585 if (useskip) 6586 strcat (buf, " %0,%1,1,%%r0"); 6587 else if (nullify && negated) 6588 { 6589 if (branch_needs_nop_p (insn)) 6590 strcat (buf, ",n %0,%1,%3%#"); 6591 else 6592 strcat (buf, ",n %0,%1,%3"); 6593 } 6594 else if (nullify && ! negated) 6595 { 6596 if (branch_needs_nop_p (insn)) 6597 strcat (buf, ",n %0,%1,%2%#"); 6598 else 6599 strcat (buf, ",n %0,%1,%2"); 6600 } 6601 else if (! nullify && negated) 6602 strcat (buf, " %0,%1,%3"); 6603 else if (! nullify && ! negated) 6604 strcat (buf, " %0,%1,%2"); 6605 break; 6606 6607 /* All long conditionals. Note a short backward branch with an 6608 unfilled delay slot is treated just like a long backward branch 6609 with an unfilled delay slot. */ 6610 case 8: 6611 /* Handle weird backwards branch with a filled delay slot 6612 which is nullified. */ 6613 if (dbr_sequence_length () != 0 6614 && ! forward_branch_p (insn) 6615 && nullify) 6616 { 6617 strcpy (buf, "bb,"); 6618 if (GET_MODE (operands[0]) == DImode) 6619 strcat (buf, "*"); 6620 if ((which == 0 && negated) 6621 || (which == 1 && ! negated)) 6622 strcat (buf, "<"); 6623 else 6624 strcat (buf, ">="); 6625 if (negated) 6626 strcat (buf, ",n %0,%1,.+12\n\tb %3"); 6627 else 6628 strcat (buf, ",n %0,%1,.+12\n\tb %2"); 6629 } 6630 /* Handle short backwards branch with an unfilled delay slot. 6631 Using a bb;nop rather than extrs;bl saves 1 cycle for both 6632 taken and untaken branches. */ 6633 else if (dbr_sequence_length () == 0 6634 && ! forward_branch_p (insn) 6635 && INSN_ADDRESSES_SET_P () 6636 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6637 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6638 { 6639 strcpy (buf, "bb,"); 6640 if (GET_MODE (operands[0]) == DImode) 6641 strcat (buf, "*"); 6642 if ((which == 0 && negated) 6643 || (which == 1 && ! negated)) 6644 strcat (buf, ">="); 6645 else 6646 strcat (buf, "<"); 6647 if (negated) 6648 strcat (buf, " %0,%1,%3%#"); 6649 else 6650 strcat (buf, " %0,%1,%2%#"); 6651 } 6652 else 6653 { 6654 if (GET_MODE (operands[0]) == DImode) 6655 strcpy (buf, "extrd,s,*"); 6656 else 6657 strcpy (buf, "{extrs,|extrw,s,}"); 6658 if ((which == 0 && negated) 6659 || (which == 1 && ! negated)) 6660 strcat (buf, "<"); 6661 else 6662 strcat (buf, ">="); 6663 if (nullify && negated) 6664 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3"); 6665 else if (nullify && ! negated) 6666 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2"); 6667 else if (negated) 6668 strcat (buf, " %0,%1,1,%%r0\n\tb %3"); 6669 else 6670 strcat (buf, " %0,%1,1,%%r0\n\tb %2"); 6671 } 6672 break; 6673 6674 default: 6675 /* The reversed conditional branch must branch over one additional 6676 instruction if the delay slot is filled and needs to be extracted 6677 by output_lbranch. If the delay slot is empty or this is a 6678 nullified forward branch, the instruction after the reversed 6679 condition branch must be nullified. */ 6680 if (dbr_sequence_length () == 0 6681 || (nullify && forward_branch_p (insn))) 6682 { 6683 nullify = 1; 6684 xdelay = 0; 6685 operands[4] = GEN_INT (length); 6686 } 6687 else 6688 { 6689 xdelay = 1; 6690 operands[4] = GEN_INT (length + 4); 6691 } 6692 6693 if (GET_MODE (operands[0]) == DImode) 6694 strcpy (buf, "bb,*"); 6695 else 6696 strcpy (buf, "bb,"); 6697 if ((which == 0 && negated) 6698 || (which == 1 && !negated)) 6699 strcat (buf, "<"); 6700 else 6701 strcat (buf, ">="); 6702 if (nullify) 6703 strcat (buf, ",n %0,%1,.+%4"); 6704 else 6705 strcat (buf, " %0,%1,.+%4"); 6706 output_asm_insn (buf, operands); 6707 return output_lbranch (negated ? operands[3] : operands[2], 6708 insn, xdelay); 6709 } 6710 return buf; 6711 } 6712 6713 /* This routine handles all the branch-on-variable-bit conditional branch 6714 sequences we might need to generate. It handles nullification of delay 6715 slots, varying length branches, negated branches and all combinations 6716 of the above. it returns the appropriate output template to emit the 6717 branch. */ 6718 6719 const char * 6720 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which) 6721 { 6722 static char buf[100]; 6723 bool useskip; 6724 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6725 int length = get_attr_length (insn); 6726 int xdelay; 6727 6728 /* A conditional branch to the following instruction (e.g. the delay slot) is 6729 asking for a disaster. I do not think this can happen as this pattern 6730 is only used when optimizing; jump optimization should eliminate the 6731 jump. But be prepared just in case. */ 6732 6733 if (branch_to_delay_slot_p (insn)) 6734 return "nop"; 6735 6736 /* If this is a long branch with its delay slot unfilled, set `nullify' 6737 as it can nullify the delay slot and save a nop. */ 6738 if (length == 8 && dbr_sequence_length () == 0) 6739 nullify = 1; 6740 6741 /* If this is a short forward conditional branch which did not get 6742 its delay slot filled, the delay slot can still be nullified. */ 6743 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6744 nullify = forward_branch_p (insn); 6745 6746 /* A forward branch over a single nullified insn can be done with a 6747 extrs instruction. This avoids a single cycle penalty due to 6748 mis-predicted branch if we fall through (branch not taken). */ 6749 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6750 6751 switch (length) 6752 { 6753 6754 /* All short conditional branches except backwards with an unfilled 6755 delay slot. */ 6756 case 4: 6757 if (useskip) 6758 strcpy (buf, "{vextrs,|extrw,s,}"); 6759 else 6760 strcpy (buf, "{bvb,|bb,}"); 6761 if (useskip && GET_MODE (operands[0]) == DImode) 6762 strcpy (buf, "extrd,s,*"); 6763 else if (GET_MODE (operands[0]) == DImode) 6764 strcpy (buf, "bb,*"); 6765 if ((which == 0 && negated) 6766 || (which == 1 && ! negated)) 6767 strcat (buf, ">="); 6768 else 6769 strcat (buf, "<"); 6770 if (useskip) 6771 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}"); 6772 else if (nullify && negated) 6773 { 6774 if (branch_needs_nop_p (insn)) 6775 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}"); 6776 else 6777 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}"); 6778 } 6779 else if (nullify && ! negated) 6780 { 6781 if (branch_needs_nop_p (insn)) 6782 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}"); 6783 else 6784 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}"); 6785 } 6786 else if (! nullify && negated) 6787 strcat (buf, "{ %0,%3| %0,%%sar,%3}"); 6788 else if (! nullify && ! negated) 6789 strcat (buf, "{ %0,%2| %0,%%sar,%2}"); 6790 break; 6791 6792 /* All long conditionals. Note a short backward branch with an 6793 unfilled delay slot is treated just like a long backward branch 6794 with an unfilled delay slot. */ 6795 case 8: 6796 /* Handle weird backwards branch with a filled delay slot 6797 which is nullified. */ 6798 if (dbr_sequence_length () != 0 6799 && ! forward_branch_p (insn) 6800 && nullify) 6801 { 6802 strcpy (buf, "{bvb,|bb,}"); 6803 if (GET_MODE (operands[0]) == DImode) 6804 strcat (buf, "*"); 6805 if ((which == 0 && negated) 6806 || (which == 1 && ! negated)) 6807 strcat (buf, "<"); 6808 else 6809 strcat (buf, ">="); 6810 if (negated) 6811 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}"); 6812 else 6813 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}"); 6814 } 6815 /* Handle short backwards branch with an unfilled delay slot. 6816 Using a bb;nop rather than extrs;bl saves 1 cycle for both 6817 taken and untaken branches. */ 6818 else if (dbr_sequence_length () == 0 6819 && ! forward_branch_p (insn) 6820 && INSN_ADDRESSES_SET_P () 6821 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6822 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6823 { 6824 strcpy (buf, "{bvb,|bb,}"); 6825 if (GET_MODE (operands[0]) == DImode) 6826 strcat (buf, "*"); 6827 if ((which == 0 && negated) 6828 || (which == 1 && ! negated)) 6829 strcat (buf, ">="); 6830 else 6831 strcat (buf, "<"); 6832 if (negated) 6833 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}"); 6834 else 6835 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}"); 6836 } 6837 else 6838 { 6839 strcpy (buf, "{vextrs,|extrw,s,}"); 6840 if (GET_MODE (operands[0]) == DImode) 6841 strcpy (buf, "extrd,s,*"); 6842 if ((which == 0 && negated) 6843 || (which == 1 && ! negated)) 6844 strcat (buf, "<"); 6845 else 6846 strcat (buf, ">="); 6847 if (nullify && negated) 6848 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}"); 6849 else if (nullify && ! negated) 6850 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}"); 6851 else if (negated) 6852 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}"); 6853 else 6854 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}"); 6855 } 6856 break; 6857 6858 default: 6859 /* The reversed conditional branch must branch over one additional 6860 instruction if the delay slot is filled and needs to be extracted 6861 by output_lbranch. If the delay slot is empty or this is a 6862 nullified forward branch, the instruction after the reversed 6863 condition branch must be nullified. */ 6864 if (dbr_sequence_length () == 0 6865 || (nullify && forward_branch_p (insn))) 6866 { 6867 nullify = 1; 6868 xdelay = 0; 6869 operands[4] = GEN_INT (length); 6870 } 6871 else 6872 { 6873 xdelay = 1; 6874 operands[4] = GEN_INT (length + 4); 6875 } 6876 6877 if (GET_MODE (operands[0]) == DImode) 6878 strcpy (buf, "bb,*"); 6879 else 6880 strcpy (buf, "{bvb,|bb,}"); 6881 if ((which == 0 && negated) 6882 || (which == 1 && !negated)) 6883 strcat (buf, "<"); 6884 else 6885 strcat (buf, ">="); 6886 if (nullify) 6887 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}"); 6888 else 6889 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}"); 6890 output_asm_insn (buf, operands); 6891 return output_lbranch (negated ? operands[3] : operands[2], 6892 insn, xdelay); 6893 } 6894 return buf; 6895 } 6896 6897 /* Return the output template for emitting a dbra type insn. 6898 6899 Note it may perform some output operations on its own before 6900 returning the final output string. */ 6901 const char * 6902 output_dbra (rtx *operands, rtx insn, int which_alternative) 6903 { 6904 int length = get_attr_length (insn); 6905 6906 /* A conditional branch to the following instruction (e.g. the delay slot) is 6907 asking for a disaster. Be prepared! */ 6908 6909 if (branch_to_delay_slot_p (insn)) 6910 { 6911 if (which_alternative == 0) 6912 return "ldo %1(%0),%0"; 6913 else if (which_alternative == 1) 6914 { 6915 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands); 6916 output_asm_insn ("ldw -16(%%r30),%4", operands); 6917 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 6918 return "{fldws|fldw} -16(%%r30),%0"; 6919 } 6920 else 6921 { 6922 output_asm_insn ("ldw %0,%4", operands); 6923 return "ldo %1(%4),%4\n\tstw %4,%0"; 6924 } 6925 } 6926 6927 if (which_alternative == 0) 6928 { 6929 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6930 int xdelay; 6931 6932 /* If this is a long branch with its delay slot unfilled, set `nullify' 6933 as it can nullify the delay slot and save a nop. */ 6934 if (length == 8 && dbr_sequence_length () == 0) 6935 nullify = 1; 6936 6937 /* If this is a short forward conditional branch which did not get 6938 its delay slot filled, the delay slot can still be nullified. */ 6939 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6940 nullify = forward_branch_p (insn); 6941 6942 switch (length) 6943 { 6944 case 4: 6945 if (nullify) 6946 { 6947 if (branch_needs_nop_p (insn)) 6948 return "addib,%C2,n %1,%0,%3%#"; 6949 else 6950 return "addib,%C2,n %1,%0,%3"; 6951 } 6952 else 6953 return "addib,%C2 %1,%0,%3"; 6954 6955 case 8: 6956 /* Handle weird backwards branch with a fulled delay slot 6957 which is nullified. */ 6958 if (dbr_sequence_length () != 0 6959 && ! forward_branch_p (insn) 6960 && nullify) 6961 return "addib,%N2,n %1,%0,.+12\n\tb %3"; 6962 /* Handle short backwards branch with an unfilled delay slot. 6963 Using a addb;nop rather than addi;bl saves 1 cycle for both 6964 taken and untaken branches. */ 6965 else if (dbr_sequence_length () == 0 6966 && ! forward_branch_p (insn) 6967 && INSN_ADDRESSES_SET_P () 6968 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6969 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6970 return "addib,%C2 %1,%0,%3%#"; 6971 6972 /* Handle normal cases. */ 6973 if (nullify) 6974 return "addi,%N2 %1,%0,%0\n\tb,n %3"; 6975 else 6976 return "addi,%N2 %1,%0,%0\n\tb %3"; 6977 6978 default: 6979 /* The reversed conditional branch must branch over one additional 6980 instruction if the delay slot is filled and needs to be extracted 6981 by output_lbranch. If the delay slot is empty or this is a 6982 nullified forward branch, the instruction after the reversed 6983 condition branch must be nullified. */ 6984 if (dbr_sequence_length () == 0 6985 || (nullify && forward_branch_p (insn))) 6986 { 6987 nullify = 1; 6988 xdelay = 0; 6989 operands[4] = GEN_INT (length); 6990 } 6991 else 6992 { 6993 xdelay = 1; 6994 operands[4] = GEN_INT (length + 4); 6995 } 6996 6997 if (nullify) 6998 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands); 6999 else 7000 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands); 7001 7002 return output_lbranch (operands[3], insn, xdelay); 7003 } 7004 7005 } 7006 /* Deal with gross reload from FP register case. */ 7007 else if (which_alternative == 1) 7008 { 7009 /* Move loop counter from FP register to MEM then into a GR, 7010 increment the GR, store the GR into MEM, and finally reload 7011 the FP register from MEM from within the branch's delay slot. */ 7012 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4", 7013 operands); 7014 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7015 if (length == 24) 7016 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7017 else if (length == 28) 7018 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7019 else 7020 { 7021 operands[5] = GEN_INT (length - 16); 7022 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands); 7023 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7024 return output_lbranch (operands[3], insn, 0); 7025 } 7026 } 7027 /* Deal with gross reload from memory case. */ 7028 else 7029 { 7030 /* Reload loop counter from memory, the store back to memory 7031 happens in the branch's delay slot. */ 7032 output_asm_insn ("ldw %0,%4", operands); 7033 if (length == 12) 7034 return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; 7035 else if (length == 16) 7036 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0"; 7037 else 7038 { 7039 operands[5] = GEN_INT (length - 4); 7040 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands); 7041 return output_lbranch (operands[3], insn, 0); 7042 } 7043 } 7044 } 7045 7046 /* Return the output template for emitting a movb type insn. 7047 7048 Note it may perform some output operations on its own before 7049 returning the final output string. */ 7050 const char * 7051 output_movb (rtx *operands, rtx insn, int which_alternative, 7052 int reverse_comparison) 7053 { 7054 int length = get_attr_length (insn); 7055 7056 /* A conditional branch to the following instruction (e.g. the delay slot) is 7057 asking for a disaster. Be prepared! */ 7058 7059 if (branch_to_delay_slot_p (insn)) 7060 { 7061 if (which_alternative == 0) 7062 return "copy %1,%0"; 7063 else if (which_alternative == 1) 7064 { 7065 output_asm_insn ("stw %1,-16(%%r30)", operands); 7066 return "{fldws|fldw} -16(%%r30),%0"; 7067 } 7068 else if (which_alternative == 2) 7069 return "stw %1,%0"; 7070 else 7071 return "mtsar %r1"; 7072 } 7073 7074 /* Support the second variant. */ 7075 if (reverse_comparison) 7076 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); 7077 7078 if (which_alternative == 0) 7079 { 7080 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7081 int xdelay; 7082 7083 /* If this is a long branch with its delay slot unfilled, set `nullify' 7084 as it can nullify the delay slot and save a nop. */ 7085 if (length == 8 && dbr_sequence_length () == 0) 7086 nullify = 1; 7087 7088 /* If this is a short forward conditional branch which did not get 7089 its delay slot filled, the delay slot can still be nullified. */ 7090 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7091 nullify = forward_branch_p (insn); 7092 7093 switch (length) 7094 { 7095 case 4: 7096 if (nullify) 7097 { 7098 if (branch_needs_nop_p (insn)) 7099 return "movb,%C2,n %1,%0,%3%#"; 7100 else 7101 return "movb,%C2,n %1,%0,%3"; 7102 } 7103 else 7104 return "movb,%C2 %1,%0,%3"; 7105 7106 case 8: 7107 /* Handle weird backwards branch with a filled delay slot 7108 which is nullified. */ 7109 if (dbr_sequence_length () != 0 7110 && ! forward_branch_p (insn) 7111 && nullify) 7112 return "movb,%N2,n %1,%0,.+12\n\tb %3"; 7113 7114 /* Handle short backwards branch with an unfilled delay slot. 7115 Using a movb;nop rather than or;bl saves 1 cycle for both 7116 taken and untaken branches. */ 7117 else if (dbr_sequence_length () == 0 7118 && ! forward_branch_p (insn) 7119 && INSN_ADDRESSES_SET_P () 7120 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7121 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7122 return "movb,%C2 %1,%0,%3%#"; 7123 /* Handle normal cases. */ 7124 if (nullify) 7125 return "or,%N2 %1,%%r0,%0\n\tb,n %3"; 7126 else 7127 return "or,%N2 %1,%%r0,%0\n\tb %3"; 7128 7129 default: 7130 /* The reversed conditional branch must branch over one additional 7131 instruction if the delay slot is filled and needs to be extracted 7132 by output_lbranch. If the delay slot is empty or this is a 7133 nullified forward branch, the instruction after the reversed 7134 condition branch must be nullified. */ 7135 if (dbr_sequence_length () == 0 7136 || (nullify && forward_branch_p (insn))) 7137 { 7138 nullify = 1; 7139 xdelay = 0; 7140 operands[4] = GEN_INT (length); 7141 } 7142 else 7143 { 7144 xdelay = 1; 7145 operands[4] = GEN_INT (length + 4); 7146 } 7147 7148 if (nullify) 7149 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands); 7150 else 7151 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands); 7152 7153 return output_lbranch (operands[3], insn, xdelay); 7154 } 7155 } 7156 /* Deal with gross reload for FP destination register case. */ 7157 else if (which_alternative == 1) 7158 { 7159 /* Move source register to MEM, perform the branch test, then 7160 finally load the FP register from MEM from within the branch's 7161 delay slot. */ 7162 output_asm_insn ("stw %1,-16(%%r30)", operands); 7163 if (length == 12) 7164 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7165 else if (length == 16) 7166 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7167 else 7168 { 7169 operands[4] = GEN_INT (length - 4); 7170 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands); 7171 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7172 return output_lbranch (operands[3], insn, 0); 7173 } 7174 } 7175 /* Deal with gross reload from memory case. */ 7176 else if (which_alternative == 2) 7177 { 7178 /* Reload loop counter from memory, the store back to memory 7179 happens in the branch's delay slot. */ 7180 if (length == 8) 7181 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0"; 7182 else if (length == 12) 7183 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0"; 7184 else 7185 { 7186 operands[4] = GEN_INT (length); 7187 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0", 7188 operands); 7189 return output_lbranch (operands[3], insn, 0); 7190 } 7191 } 7192 /* Handle SAR as a destination. */ 7193 else 7194 { 7195 if (length == 8) 7196 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1"; 7197 else if (length == 12) 7198 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1"; 7199 else 7200 { 7201 operands[4] = GEN_INT (length); 7202 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1", 7203 operands); 7204 return output_lbranch (operands[3], insn, 0); 7205 } 7206 } 7207 } 7208 7209 /* Copy any FP arguments in INSN into integer registers. */ 7210 static void 7211 copy_fp_args (rtx insn) 7212 { 7213 rtx link; 7214 rtx xoperands[2]; 7215 7216 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7217 { 7218 int arg_mode, regno; 7219 rtx use = XEXP (link, 0); 7220 7221 if (! (GET_CODE (use) == USE 7222 && GET_CODE (XEXP (use, 0)) == REG 7223 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7224 continue; 7225 7226 arg_mode = GET_MODE (XEXP (use, 0)); 7227 regno = REGNO (XEXP (use, 0)); 7228 7229 /* Is it a floating point register? */ 7230 if (regno >= 32 && regno <= 39) 7231 { 7232 /* Copy the FP register into an integer register via memory. */ 7233 if (arg_mode == SFmode) 7234 { 7235 xoperands[0] = XEXP (use, 0); 7236 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); 7237 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands); 7238 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7239 } 7240 else 7241 { 7242 xoperands[0] = XEXP (use, 0); 7243 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); 7244 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands); 7245 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); 7246 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7247 } 7248 } 7249 } 7250 } 7251 7252 /* Compute length of the FP argument copy sequence for INSN. */ 7253 static int 7254 length_fp_args (rtx insn) 7255 { 7256 int length = 0; 7257 rtx link; 7258 7259 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7260 { 7261 int arg_mode, regno; 7262 rtx use = XEXP (link, 0); 7263 7264 if (! (GET_CODE (use) == USE 7265 && GET_CODE (XEXP (use, 0)) == REG 7266 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7267 continue; 7268 7269 arg_mode = GET_MODE (XEXP (use, 0)); 7270 regno = REGNO (XEXP (use, 0)); 7271 7272 /* Is it a floating point register? */ 7273 if (regno >= 32 && regno <= 39) 7274 { 7275 if (arg_mode == SFmode) 7276 length += 8; 7277 else 7278 length += 12; 7279 } 7280 } 7281 7282 return length; 7283 } 7284 7285 /* Return the attribute length for the millicode call instruction INSN. 7286 The length must match the code generated by output_millicode_call. 7287 We include the delay slot in the returned length as it is better to 7288 over estimate the length than to under estimate it. */ 7289 7290 int 7291 attr_length_millicode_call (rtx insn) 7292 { 7293 unsigned long distance = -1; 7294 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7295 7296 if (INSN_ADDRESSES_SET_P ()) 7297 { 7298 distance = (total + insn_current_reference_address (insn)); 7299 if (distance < total) 7300 distance = -1; 7301 } 7302 7303 if (TARGET_64BIT) 7304 { 7305 if (!TARGET_LONG_CALLS && distance < 7600000) 7306 return 8; 7307 7308 return 20; 7309 } 7310 else if (TARGET_PORTABLE_RUNTIME) 7311 return 24; 7312 else 7313 { 7314 if (!TARGET_LONG_CALLS && distance < 240000) 7315 return 8; 7316 7317 if (TARGET_LONG_ABS_CALL && !flag_pic) 7318 return 12; 7319 7320 return 24; 7321 } 7322 } 7323 7324 /* INSN is a function call. It may have an unconditional jump 7325 in its delay slot. 7326 7327 CALL_DEST is the routine we are calling. */ 7328 7329 const char * 7330 output_millicode_call (rtx insn, rtx call_dest) 7331 { 7332 int attr_length = get_attr_length (insn); 7333 int seq_length = dbr_sequence_length (); 7334 int distance; 7335 rtx seq_insn; 7336 rtx xoperands[3]; 7337 7338 xoperands[0] = call_dest; 7339 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31); 7340 7341 /* Handle the common case where we are sure that the branch will 7342 reach the beginning of the $CODE$ subspace. The within reach 7343 form of the $$sh_func_adrs call has a length of 28. Because 7344 it has an attribute type of multi, it never has a nonzero 7345 sequence length. The length of the $$sh_func_adrs is the same 7346 as certain out of reach PIC calls to other routines. */ 7347 if (!TARGET_LONG_CALLS 7348 && ((seq_length == 0 7349 && (attr_length == 12 7350 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI))) 7351 || (seq_length != 0 && attr_length == 8))) 7352 { 7353 output_asm_insn ("{bl|b,l} %0,%2", xoperands); 7354 } 7355 else 7356 { 7357 if (TARGET_64BIT) 7358 { 7359 /* It might seem that one insn could be saved by accessing 7360 the millicode function using the linkage table. However, 7361 this doesn't work in shared libraries and other dynamically 7362 loaded objects. Using a pc-relative sequence also avoids 7363 problems related to the implicit use of the gp register. */ 7364 output_asm_insn ("b,l .+8,%%r1", xoperands); 7365 7366 if (TARGET_GAS) 7367 { 7368 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 7369 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 7370 } 7371 else 7372 { 7373 xoperands[1] = gen_label_rtx (); 7374 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7375 targetm.asm_out.internal_label (asm_out_file, "L", 7376 CODE_LABEL_NUMBER (xoperands[1])); 7377 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7378 } 7379 7380 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 7381 } 7382 else if (TARGET_PORTABLE_RUNTIME) 7383 { 7384 /* Pure portable runtime doesn't allow be/ble; we also don't 7385 have PIC support in the assembler/linker, so this sequence 7386 is needed. */ 7387 7388 /* Get the address of our target into %r1. */ 7389 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7390 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 7391 7392 /* Get our return address into %r31. */ 7393 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands); 7394 output_asm_insn ("addi 8,%%r31,%%r31", xoperands); 7395 7396 /* Jump to our target address in %r1. */ 7397 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7398 } 7399 else if (!flag_pic) 7400 { 7401 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7402 if (TARGET_PA_20) 7403 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands); 7404 else 7405 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7406 } 7407 else 7408 { 7409 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7410 output_asm_insn ("addi 16,%%r1,%%r31", xoperands); 7411 7412 if (TARGET_SOM || !TARGET_GAS) 7413 { 7414 /* The HP assembler can generate relocations for the 7415 difference of two symbols. GAS can do this for a 7416 millicode symbol but not an arbitrary external 7417 symbol when generating SOM output. */ 7418 xoperands[1] = gen_label_rtx (); 7419 targetm.asm_out.internal_label (asm_out_file, "L", 7420 CODE_LABEL_NUMBER (xoperands[1])); 7421 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7422 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7423 } 7424 else 7425 { 7426 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands); 7427 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1", 7428 xoperands); 7429 } 7430 7431 /* Jump to our target address in %r1. */ 7432 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7433 } 7434 } 7435 7436 if (seq_length == 0) 7437 output_asm_insn ("nop", xoperands); 7438 7439 /* We are done if there isn't a jump in the delay slot. */ 7440 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) 7441 return ""; 7442 7443 /* This call has an unconditional jump in its delay slot. */ 7444 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); 7445 7446 /* See if the return address can be adjusted. Use the containing 7447 sequence insn's address. */ 7448 if (INSN_ADDRESSES_SET_P ()) 7449 { 7450 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); 7451 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) 7452 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); 7453 7454 if (VAL_14_BITS_P (distance)) 7455 { 7456 xoperands[1] = gen_label_rtx (); 7457 output_asm_insn ("ldo %0-%1(%2),%2", xoperands); 7458 targetm.asm_out.internal_label (asm_out_file, "L", 7459 CODE_LABEL_NUMBER (xoperands[1])); 7460 } 7461 else 7462 /* ??? This branch may not reach its target. */ 7463 output_asm_insn ("nop\n\tb,n %0", xoperands); 7464 } 7465 else 7466 /* ??? This branch may not reach its target. */ 7467 output_asm_insn ("nop\n\tb,n %0", xoperands); 7468 7469 /* Delete the jump. */ 7470 SET_INSN_DELETED (NEXT_INSN (insn)); 7471 7472 return ""; 7473 } 7474 7475 /* Return the attribute length of the call instruction INSN. The SIBCALL 7476 flag indicates whether INSN is a regular call or a sibling call. The 7477 length returned must be longer than the code actually generated by 7478 output_call. Since branch shortening is done before delay branch 7479 sequencing, there is no way to determine whether or not the delay 7480 slot will be filled during branch shortening. Even when the delay 7481 slot is filled, we may have to add a nop if the delay slot contains 7482 a branch that can't reach its target. Thus, we always have to include 7483 the delay slot in the length estimate. This used to be done in 7484 pa_adjust_insn_length but we do it here now as some sequences always 7485 fill the delay slot and we can save four bytes in the estimate for 7486 these sequences. */ 7487 7488 int 7489 attr_length_call (rtx insn, int sibcall) 7490 { 7491 int local_call; 7492 rtx call, call_dest; 7493 tree call_decl; 7494 int length = 0; 7495 rtx pat = PATTERN (insn); 7496 unsigned long distance = -1; 7497 7498 gcc_assert (GET_CODE (insn) == CALL_INSN); 7499 7500 if (INSN_ADDRESSES_SET_P ()) 7501 { 7502 unsigned long total; 7503 7504 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7505 distance = (total + insn_current_reference_address (insn)); 7506 if (distance < total) 7507 distance = -1; 7508 } 7509 7510 gcc_assert (GET_CODE (pat) == PARALLEL); 7511 7512 /* Get the call rtx. */ 7513 call = XVECEXP (pat, 0, 0); 7514 if (GET_CODE (call) == SET) 7515 call = SET_SRC (call); 7516 7517 gcc_assert (GET_CODE (call) == CALL); 7518 7519 /* Determine if this is a local call. */ 7520 call_dest = XEXP (XEXP (call, 0), 0); 7521 call_decl = SYMBOL_REF_DECL (call_dest); 7522 local_call = call_decl && targetm.binds_local_p (call_decl); 7523 7524 /* pc-relative branch. */ 7525 if (!TARGET_LONG_CALLS 7526 && ((TARGET_PA_20 && !sibcall && distance < 7600000) 7527 || distance < 240000)) 7528 length += 8; 7529 7530 /* 64-bit plabel sequence. */ 7531 else if (TARGET_64BIT && !local_call) 7532 length += sibcall ? 28 : 24; 7533 7534 /* non-pic long absolute branch sequence. */ 7535 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7536 length += 12; 7537 7538 /* long pc-relative branch sequence. */ 7539 else if (TARGET_LONG_PIC_SDIFF_CALL 7540 || (TARGET_GAS && !TARGET_SOM 7541 && (TARGET_LONG_PIC_PCREL_CALL || local_call))) 7542 { 7543 length += 20; 7544 7545 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7546 length += 8; 7547 } 7548 7549 /* 32-bit plabel sequence. */ 7550 else 7551 { 7552 length += 32; 7553 7554 if (TARGET_SOM) 7555 length += length_fp_args (insn); 7556 7557 if (flag_pic) 7558 length += 4; 7559 7560 if (!TARGET_PA_20) 7561 { 7562 if (!sibcall) 7563 length += 8; 7564 7565 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7566 length += 8; 7567 } 7568 } 7569 7570 return length; 7571 } 7572 7573 /* INSN is a function call. It may have an unconditional jump 7574 in its delay slot. 7575 7576 CALL_DEST is the routine we are calling. */ 7577 7578 const char * 7579 output_call (rtx insn, rtx call_dest, int sibcall) 7580 { 7581 int delay_insn_deleted = 0; 7582 int delay_slot_filled = 0; 7583 int seq_length = dbr_sequence_length (); 7584 tree call_decl = SYMBOL_REF_DECL (call_dest); 7585 int local_call = call_decl && targetm.binds_local_p (call_decl); 7586 rtx xoperands[2]; 7587 7588 xoperands[0] = call_dest; 7589 7590 /* Handle the common case where we're sure that the branch will reach 7591 the beginning of the "$CODE$" subspace. This is the beginning of 7592 the current function if we are in a named section. */ 7593 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8) 7594 { 7595 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2); 7596 output_asm_insn ("{bl|b,l} %0,%1", xoperands); 7597 } 7598 else 7599 { 7600 if (TARGET_64BIT && !local_call) 7601 { 7602 /* ??? As far as I can tell, the HP linker doesn't support the 7603 long pc-relative sequence described in the 64-bit runtime 7604 architecture. So, we use a slightly longer indirect call. */ 7605 xoperands[0] = get_deferred_plabel (call_dest); 7606 xoperands[1] = gen_label_rtx (); 7607 7608 /* If this isn't a sibcall, we put the load of %r27 into the 7609 delay slot. We can't do this in a sibcall as we don't 7610 have a second call-clobbered scratch register available. */ 7611 if (seq_length != 0 7612 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN 7613 && !sibcall) 7614 { 7615 final_scan_insn (NEXT_INSN (insn), asm_out_file, 7616 optimize, 0, NULL); 7617 7618 /* Now delete the delay insn. */ 7619 SET_INSN_DELETED (NEXT_INSN (insn)); 7620 delay_insn_deleted = 1; 7621 } 7622 7623 output_asm_insn ("addil LT'%0,%%r27", xoperands); 7624 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); 7625 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); 7626 7627 if (sibcall) 7628 { 7629 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 7630 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands); 7631 output_asm_insn ("bve (%%r1)", xoperands); 7632 } 7633 else 7634 { 7635 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); 7636 output_asm_insn ("bve,l (%%r2),%%r2", xoperands); 7637 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 7638 delay_slot_filled = 1; 7639 } 7640 } 7641 else 7642 { 7643 int indirect_call = 0; 7644 7645 /* Emit a long call. There are several different sequences 7646 of increasing length and complexity. In most cases, 7647 they don't allow an instruction in the delay slot. */ 7648 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7649 && !TARGET_LONG_PIC_SDIFF_CALL 7650 && !(TARGET_GAS && !TARGET_SOM 7651 && (TARGET_LONG_PIC_PCREL_CALL || local_call)) 7652 && !TARGET_64BIT) 7653 indirect_call = 1; 7654 7655 if (seq_length != 0 7656 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN 7657 && !sibcall 7658 && (!TARGET_PA_20 7659 || indirect_call 7660 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic))) 7661 { 7662 /* A non-jump insn in the delay slot. By definition we can 7663 emit this insn before the call (and in fact before argument 7664 relocating. */ 7665 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 7666 NULL); 7667 7668 /* Now delete the delay insn. */ 7669 SET_INSN_DELETED (NEXT_INSN (insn)); 7670 delay_insn_deleted = 1; 7671 } 7672 7673 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7674 { 7675 /* This is the best sequence for making long calls in 7676 non-pic code. Unfortunately, GNU ld doesn't provide 7677 the stub needed for external calls, and GAS's support 7678 for this with the SOM linker is buggy. It is safe 7679 to use this for local calls. */ 7680 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7681 if (sibcall) 7682 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands); 7683 else 7684 { 7685 if (TARGET_PA_20) 7686 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", 7687 xoperands); 7688 else 7689 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7690 7691 output_asm_insn ("copy %%r31,%%r2", xoperands); 7692 delay_slot_filled = 1; 7693 } 7694 } 7695 else 7696 { 7697 if (TARGET_LONG_PIC_SDIFF_CALL) 7698 { 7699 /* The HP assembler and linker can handle relocations 7700 for the difference of two symbols. The HP assembler 7701 recognizes the sequence as a pc-relative call and 7702 the linker provides stubs when needed. */ 7703 xoperands[1] = gen_label_rtx (); 7704 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7705 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7706 targetm.asm_out.internal_label (asm_out_file, "L", 7707 CODE_LABEL_NUMBER (xoperands[1])); 7708 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7709 } 7710 else if (TARGET_GAS && !TARGET_SOM 7711 && (TARGET_LONG_PIC_PCREL_CALL || local_call)) 7712 { 7713 /* GAS currently can't generate the relocations that 7714 are needed for the SOM linker under HP-UX using this 7715 sequence. The GNU linker doesn't generate the stubs 7716 that are needed for external calls on TARGET_ELF32 7717 with this sequence. For now, we have to use a 7718 longer plabel sequence when using GAS. */ 7719 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7720 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", 7721 xoperands); 7722 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", 7723 xoperands); 7724 } 7725 else 7726 { 7727 /* Emit a long plabel-based call sequence. This is 7728 essentially an inline implementation of $$dyncall. 7729 We don't actually try to call $$dyncall as this is 7730 as difficult as calling the function itself. */ 7731 xoperands[0] = get_deferred_plabel (call_dest); 7732 xoperands[1] = gen_label_rtx (); 7733 7734 /* Since the call is indirect, FP arguments in registers 7735 need to be copied to the general registers. Then, the 7736 argument relocation stub will copy them back. */ 7737 if (TARGET_SOM) 7738 copy_fp_args (insn); 7739 7740 if (flag_pic) 7741 { 7742 output_asm_insn ("addil LT'%0,%%r19", xoperands); 7743 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); 7744 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); 7745 } 7746 else 7747 { 7748 output_asm_insn ("addil LR'%0-$global$,%%r27", 7749 xoperands); 7750 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", 7751 xoperands); 7752 } 7753 7754 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); 7755 output_asm_insn ("depi 0,31,2,%%r1", xoperands); 7756 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); 7757 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); 7758 7759 if (!sibcall && !TARGET_PA_20) 7760 { 7761 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); 7762 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 7763 output_asm_insn ("addi 8,%%r2,%%r2", xoperands); 7764 else 7765 output_asm_insn ("addi 16,%%r2,%%r2", xoperands); 7766 } 7767 } 7768 7769 if (TARGET_PA_20) 7770 { 7771 if (sibcall) 7772 output_asm_insn ("bve (%%r1)", xoperands); 7773 else 7774 { 7775 if (indirect_call) 7776 { 7777 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 7778 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands); 7779 delay_slot_filled = 1; 7780 } 7781 else 7782 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 7783 } 7784 } 7785 else 7786 { 7787 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7788 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0", 7789 xoperands); 7790 7791 if (sibcall) 7792 { 7793 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 7794 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands); 7795 else 7796 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands); 7797 } 7798 else 7799 { 7800 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 7801 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands); 7802 else 7803 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands); 7804 7805 if (indirect_call) 7806 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands); 7807 else 7808 output_asm_insn ("copy %%r31,%%r2", xoperands); 7809 delay_slot_filled = 1; 7810 } 7811 } 7812 } 7813 } 7814 } 7815 7816 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted)) 7817 output_asm_insn ("nop", xoperands); 7818 7819 /* We are done if there isn't a jump in the delay slot. */ 7820 if (seq_length == 0 7821 || delay_insn_deleted 7822 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) 7823 return ""; 7824 7825 /* A sibcall should never have a branch in the delay slot. */ 7826 gcc_assert (!sibcall); 7827 7828 /* This call has an unconditional jump in its delay slot. */ 7829 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); 7830 7831 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ()) 7832 { 7833 /* See if the return address can be adjusted. Use the containing 7834 sequence insn's address. This would break the regular call/return@ 7835 relationship assumed by the table based eh unwinder, so only do that 7836 if the call is not possibly throwing. */ 7837 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); 7838 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))) 7839 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8); 7840 7841 if (VAL_14_BITS_P (distance) 7842 && !(can_throw_internal (insn) || can_throw_external (insn))) 7843 { 7844 xoperands[1] = gen_label_rtx (); 7845 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands); 7846 targetm.asm_out.internal_label (asm_out_file, "L", 7847 CODE_LABEL_NUMBER (xoperands[1])); 7848 } 7849 else 7850 output_asm_insn ("nop\n\tb,n %0", xoperands); 7851 } 7852 else 7853 output_asm_insn ("b,n %0", xoperands); 7854 7855 /* Delete the jump. */ 7856 SET_INSN_DELETED (NEXT_INSN (insn)); 7857 7858 return ""; 7859 } 7860 7861 /* Return the attribute length of the indirect call instruction INSN. 7862 The length must match the code generated by output_indirect call. 7863 The returned length includes the delay slot. Currently, the delay 7864 slot of an indirect call sequence is not exposed and it is used by 7865 the sequence itself. */ 7866 7867 int 7868 attr_length_indirect_call (rtx insn) 7869 { 7870 unsigned long distance = -1; 7871 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7872 7873 if (INSN_ADDRESSES_SET_P ()) 7874 { 7875 distance = (total + insn_current_reference_address (insn)); 7876 if (distance < total) 7877 distance = -1; 7878 } 7879 7880 if (TARGET_64BIT) 7881 return 12; 7882 7883 if (TARGET_FAST_INDIRECT_CALLS 7884 || (!TARGET_PORTABLE_RUNTIME 7885 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) 7886 || distance < 240000))) 7887 return 8; 7888 7889 if (flag_pic) 7890 return 24; 7891 7892 if (TARGET_PORTABLE_RUNTIME) 7893 return 20; 7894 7895 /* Out of reach, can use ble. */ 7896 return 12; 7897 } 7898 7899 const char * 7900 output_indirect_call (rtx insn, rtx call_dest) 7901 { 7902 rtx xoperands[1]; 7903 7904 if (TARGET_64BIT) 7905 { 7906 xoperands[0] = call_dest; 7907 output_asm_insn ("ldd 16(%0),%%r2", xoperands); 7908 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands); 7909 return ""; 7910 } 7911 7912 /* First the special case for kernels, level 0 systems, etc. */ 7913 if (TARGET_FAST_INDIRECT_CALLS) 7914 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 7915 7916 /* Now the normal case -- we can reach $$dyncall directly or 7917 we're sure that we can get there via a long-branch stub. 7918 7919 No need to check target flags as the length uniquely identifies 7920 the remaining cases. */ 7921 if (attr_length_indirect_call (insn) == 8) 7922 { 7923 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to 7924 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit 7925 variant of the B,L instruction can't be used on the SOM target. */ 7926 if (TARGET_PA_20 && !TARGET_SOM) 7927 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31"; 7928 else 7929 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2"; 7930 } 7931 7932 /* Long millicode call, but we are not generating PIC or portable runtime 7933 code. */ 7934 if (attr_length_indirect_call (insn) == 12) 7935 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; 7936 7937 /* Long millicode call for portable runtime. */ 7938 if (attr_length_indirect_call (insn) == 20) 7939 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop"; 7940 7941 /* We need a long PIC call to $$dyncall. */ 7942 xoperands[0] = NULL_RTX; 7943 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7944 if (TARGET_SOM || !TARGET_GAS) 7945 { 7946 xoperands[0] = gen_label_rtx (); 7947 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands); 7948 targetm.asm_out.internal_label (asm_out_file, "L", 7949 CODE_LABEL_NUMBER (xoperands[0])); 7950 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands); 7951 } 7952 else 7953 { 7954 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands); 7955 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1", 7956 xoperands); 7957 } 7958 output_asm_insn ("blr %%r0,%%r2", xoperands); 7959 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands); 7960 return ""; 7961 } 7962 7963 /* Return the total length of the save and restore instructions needed for 7964 the data linkage table pointer (i.e., the PIC register) across the call 7965 instruction INSN. No-return calls do not require a save and restore. 7966 In addition, we may be able to avoid the save and restore for calls 7967 within the same translation unit. */ 7968 7969 int 7970 attr_length_save_restore_dltp (rtx insn) 7971 { 7972 if (find_reg_note (insn, REG_NORETURN, NULL_RTX)) 7973 return 0; 7974 7975 return 8; 7976 } 7977 7978 /* In HPUX 8.0's shared library scheme, special relocations are needed 7979 for function labels if they might be passed to a function 7980 in a shared library (because shared libraries don't live in code 7981 space), and special magic is needed to construct their address. */ 7982 7983 void 7984 hppa_encode_label (rtx sym) 7985 { 7986 const char *str = XSTR (sym, 0); 7987 int len = strlen (str) + 1; 7988 char *newstr, *p; 7989 7990 p = newstr = XALLOCAVEC (char, len + 1); 7991 *p++ = '@'; 7992 strcpy (p, str); 7993 7994 XSTR (sym, 0) = ggc_alloc_string (newstr, len); 7995 } 7996 7997 static void 7998 pa_encode_section_info (tree decl, rtx rtl, int first) 7999 { 8000 int old_referenced = 0; 8001 8002 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF) 8003 old_referenced 8004 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED; 8005 8006 default_encode_section_info (decl, rtl, first); 8007 8008 if (first && TEXT_SPACE_P (decl)) 8009 { 8010 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; 8011 if (TREE_CODE (decl) == FUNCTION_DECL) 8012 hppa_encode_label (XEXP (rtl, 0)); 8013 } 8014 else if (old_referenced) 8015 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced; 8016 } 8017 8018 /* This is sort of inverse to pa_encode_section_info. */ 8019 8020 static const char * 8021 pa_strip_name_encoding (const char *str) 8022 { 8023 str += (*str == '@'); 8024 str += (*str == '*'); 8025 return str; 8026 } 8027 8028 int 8029 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 8030 { 8031 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0)); 8032 } 8033 8034 /* Returns 1 if OP is a function label involved in a simple addition 8035 with a constant. Used to keep certain patterns from matching 8036 during instruction combination. */ 8037 int 8038 is_function_label_plus_const (rtx op) 8039 { 8040 /* Strip off any CONST. */ 8041 if (GET_CODE (op) == CONST) 8042 op = XEXP (op, 0); 8043 8044 return (GET_CODE (op) == PLUS 8045 && function_label_operand (XEXP (op, 0), Pmode) 8046 && GET_CODE (XEXP (op, 1)) == CONST_INT); 8047 } 8048 8049 /* Output assembly code for a thunk to FUNCTION. */ 8050 8051 static void 8052 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, 8053 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, 8054 tree function) 8055 { 8056 static unsigned int current_thunk_number; 8057 int val_14 = VAL_14_BITS_P (delta); 8058 unsigned int old_last_address = last_address, nbytes = 0; 8059 char label[16]; 8060 rtx xoperands[4]; 8061 8062 xoperands[0] = XEXP (DECL_RTL (function), 0); 8063 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0); 8064 xoperands[2] = GEN_INT (delta); 8065 8066 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0)); 8067 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n"); 8068 8069 /* Output the thunk. We know that the function is in the same 8070 translation unit (i.e., the same space) as the thunk, and that 8071 thunks are output after their method. Thus, we don't need an 8072 external branch to reach the function. With SOM and GAS, 8073 functions and thunks are effectively in different sections. 8074 Thus, we can always use a IA-relative branch and the linker 8075 will add a long branch stub if necessary. 8076 8077 However, we have to be careful when generating PIC code on the 8078 SOM port to ensure that the sequence does not transfer to an 8079 import stub for the target function as this could clobber the 8080 return value saved at SP-24. This would also apply to the 8081 32-bit linux port if the multi-space model is implemented. */ 8082 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8083 && !(flag_pic && TREE_PUBLIC (function)) 8084 && (TARGET_GAS || last_address < 262132)) 8085 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8086 && ((targetm.have_named_sections 8087 && DECL_SECTION_NAME (thunk_fndecl) != NULL 8088 /* The GNU 64-bit linker has rather poor stub management. 8089 So, we use a long branch from thunks that aren't in 8090 the same section as the target function. */ 8091 && ((!TARGET_64BIT 8092 && (DECL_SECTION_NAME (thunk_fndecl) 8093 != DECL_SECTION_NAME (function))) 8094 || ((DECL_SECTION_NAME (thunk_fndecl) 8095 == DECL_SECTION_NAME (function)) 8096 && last_address < 262132))) 8097 || (targetm.have_named_sections 8098 && DECL_SECTION_NAME (thunk_fndecl) == NULL 8099 && DECL_SECTION_NAME (function) == NULL 8100 && last_address < 262132) 8101 || (!targetm.have_named_sections && last_address < 262132)))) 8102 { 8103 if (!val_14) 8104 output_asm_insn ("addil L'%2,%%r26", xoperands); 8105 8106 output_asm_insn ("b %0", xoperands); 8107 8108 if (val_14) 8109 { 8110 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8111 nbytes += 8; 8112 } 8113 else 8114 { 8115 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8116 nbytes += 12; 8117 } 8118 } 8119 else if (TARGET_64BIT) 8120 { 8121 /* We only have one call-clobbered scratch register, so we can't 8122 make use of the delay slot if delta doesn't fit in 14 bits. */ 8123 if (!val_14) 8124 { 8125 output_asm_insn ("addil L'%2,%%r26", xoperands); 8126 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8127 } 8128 8129 output_asm_insn ("b,l .+8,%%r1", xoperands); 8130 8131 if (TARGET_GAS) 8132 { 8133 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 8134 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 8135 } 8136 else 8137 { 8138 xoperands[3] = GEN_INT (val_14 ? 8 : 16); 8139 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands); 8140 } 8141 8142 if (val_14) 8143 { 8144 output_asm_insn ("bv %%r0(%%r1)", xoperands); 8145 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8146 nbytes += 20; 8147 } 8148 else 8149 { 8150 output_asm_insn ("bv,n %%r0(%%r1)", xoperands); 8151 nbytes += 24; 8152 } 8153 } 8154 else if (TARGET_PORTABLE_RUNTIME) 8155 { 8156 output_asm_insn ("ldil L'%0,%%r1", xoperands); 8157 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); 8158 8159 if (!val_14) 8160 output_asm_insn ("addil L'%2,%%r26", xoperands); 8161 8162 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8163 8164 if (val_14) 8165 { 8166 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8167 nbytes += 16; 8168 } 8169 else 8170 { 8171 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8172 nbytes += 20; 8173 } 8174 } 8175 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8176 { 8177 /* The function is accessible from outside this module. The only 8178 way to avoid an import stub between the thunk and function is to 8179 call the function directly with an indirect sequence similar to 8180 that used by $$dyncall. This is possible because $$dyncall acts 8181 as the import stub in an indirect call. */ 8182 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); 8183 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); 8184 output_asm_insn ("addil LT'%3,%%r19", xoperands); 8185 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); 8186 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8187 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8188 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8189 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); 8190 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8191 8192 if (!val_14) 8193 { 8194 output_asm_insn ("addil L'%2,%%r26", xoperands); 8195 nbytes += 4; 8196 } 8197 8198 if (TARGET_PA_20) 8199 { 8200 output_asm_insn ("bve (%%r22)", xoperands); 8201 nbytes += 36; 8202 } 8203 else if (TARGET_NO_SPACE_REGS) 8204 { 8205 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); 8206 nbytes += 36; 8207 } 8208 else 8209 { 8210 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); 8211 output_asm_insn ("mtsp %%r21,%%sr0", xoperands); 8212 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); 8213 nbytes += 44; 8214 } 8215 8216 if (val_14) 8217 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8218 else 8219 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8220 } 8221 else if (flag_pic) 8222 { 8223 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 8224 8225 if (TARGET_SOM || !TARGET_GAS) 8226 { 8227 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands); 8228 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands); 8229 } 8230 else 8231 { 8232 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 8233 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands); 8234 } 8235 8236 if (!val_14) 8237 output_asm_insn ("addil L'%2,%%r26", xoperands); 8238 8239 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8240 8241 if (val_14) 8242 { 8243 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8244 nbytes += 20; 8245 } 8246 else 8247 { 8248 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8249 nbytes += 24; 8250 } 8251 } 8252 else 8253 { 8254 if (!val_14) 8255 output_asm_insn ("addil L'%2,%%r26", xoperands); 8256 8257 output_asm_insn ("ldil L'%0,%%r22", xoperands); 8258 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); 8259 8260 if (val_14) 8261 { 8262 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8263 nbytes += 12; 8264 } 8265 else 8266 { 8267 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8268 nbytes += 16; 8269 } 8270 } 8271 8272 fprintf (file, "\t.EXIT\n\t.PROCEND\n"); 8273 8274 if (TARGET_SOM && TARGET_GAS) 8275 { 8276 /* We done with this subspace except possibly for some additional 8277 debug information. Forget that we are in this subspace to ensure 8278 that the next function is output in its own subspace. */ 8279 in_section = NULL; 8280 cfun->machine->in_nsubspa = 2; 8281 } 8282 8283 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8284 { 8285 switch_to_section (data_section); 8286 output_asm_insn (".align 4", xoperands); 8287 ASM_OUTPUT_LABEL (file, label); 8288 output_asm_insn (".word P'%0", xoperands); 8289 } 8290 8291 current_thunk_number++; 8292 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 8293 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 8294 last_address += nbytes; 8295 if (old_last_address > last_address) 8296 last_address = UINT_MAX; 8297 update_total_code_bytes (nbytes); 8298 } 8299 8300 /* Only direct calls to static functions are allowed to be sibling (tail) 8301 call optimized. 8302 8303 This restriction is necessary because some linker generated stubs will 8304 store return pointers into rp' in some cases which might clobber a 8305 live value already in rp'. 8306 8307 In a sibcall the current function and the target function share stack 8308 space. Thus if the path to the current function and the path to the 8309 target function save a value in rp', they save the value into the 8310 same stack slot, which has undesirable consequences. 8311 8312 Because of the deferred binding nature of shared libraries any function 8313 with external scope could be in a different load module and thus require 8314 rp' to be saved when calling that function. So sibcall optimizations 8315 can only be safe for static function. 8316 8317 Note that GCC never needs return value relocations, so we don't have to 8318 worry about static calls with return value relocations (which require 8319 saving rp'). 8320 8321 It is safe to perform a sibcall optimization when the target function 8322 will never return. */ 8323 static bool 8324 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 8325 { 8326 if (TARGET_PORTABLE_RUNTIME) 8327 return false; 8328 8329 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in 8330 single subspace mode and the call is not indirect. As far as I know, 8331 there is no operating system support for the multiple subspace mode. 8332 It might be possible to support indirect calls if we didn't use 8333 $$dyncall (see the indirect sequence generated in output_call). */ 8334 if (TARGET_ELF32) 8335 return (decl != NULL_TREE); 8336 8337 /* Sibcalls are not ok because the arg pointer register is not a fixed 8338 register. This prevents the sibcall optimization from occurring. In 8339 addition, there are problems with stub placement using GNU ld. This 8340 is because a normal sibcall branch uses a 17-bit relocation while 8341 a regular call branch uses a 22-bit relocation. As a result, more 8342 care needs to be taken in the placement of long-branch stubs. */ 8343 if (TARGET_64BIT) 8344 return false; 8345 8346 /* Sibcalls are only ok within a translation unit. */ 8347 return (decl && !TREE_PUBLIC (decl)); 8348 } 8349 8350 /* ??? Addition is not commutative on the PA due to the weird implicit 8351 space register selection rules for memory addresses. Therefore, we 8352 don't consider a + b == b + a, as this might be inside a MEM. */ 8353 static bool 8354 pa_commutative_p (const_rtx x, int outer_code) 8355 { 8356 return (COMMUTATIVE_P (x) 8357 && (TARGET_NO_SPACE_REGS 8358 || (outer_code != UNKNOWN && outer_code != MEM) 8359 || GET_CODE (x) != PLUS)); 8360 } 8361 8362 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8363 use in fmpyadd instructions. */ 8364 int 8365 fmpyaddoperands (rtx *operands) 8366 { 8367 enum machine_mode mode = GET_MODE (operands[0]); 8368 8369 /* Must be a floating point mode. */ 8370 if (mode != SFmode && mode != DFmode) 8371 return 0; 8372 8373 /* All modes must be the same. */ 8374 if (! (mode == GET_MODE (operands[1]) 8375 && mode == GET_MODE (operands[2]) 8376 && mode == GET_MODE (operands[3]) 8377 && mode == GET_MODE (operands[4]) 8378 && mode == GET_MODE (operands[5]))) 8379 return 0; 8380 8381 /* All operands must be registers. */ 8382 if (! (GET_CODE (operands[1]) == REG 8383 && GET_CODE (operands[2]) == REG 8384 && GET_CODE (operands[3]) == REG 8385 && GET_CODE (operands[4]) == REG 8386 && GET_CODE (operands[5]) == REG)) 8387 return 0; 8388 8389 /* Only 2 real operands to the addition. One of the input operands must 8390 be the same as the output operand. */ 8391 if (! rtx_equal_p (operands[3], operands[4]) 8392 && ! rtx_equal_p (operands[3], operands[5])) 8393 return 0; 8394 8395 /* Inout operand of add cannot conflict with any operands from multiply. */ 8396 if (rtx_equal_p (operands[3], operands[0]) 8397 || rtx_equal_p (operands[3], operands[1]) 8398 || rtx_equal_p (operands[3], operands[2])) 8399 return 0; 8400 8401 /* multiply cannot feed into addition operands. */ 8402 if (rtx_equal_p (operands[4], operands[0]) 8403 || rtx_equal_p (operands[5], operands[0])) 8404 return 0; 8405 8406 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8407 if (mode == SFmode 8408 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8409 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8410 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8411 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8412 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8413 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8414 return 0; 8415 8416 /* Passed. Operands are suitable for fmpyadd. */ 8417 return 1; 8418 } 8419 8420 #if !defined(USE_COLLECT2) 8421 static void 8422 pa_asm_out_constructor (rtx symbol, int priority) 8423 { 8424 if (!function_label_operand (symbol, VOIDmode)) 8425 hppa_encode_label (symbol); 8426 8427 #ifdef CTORS_SECTION_ASM_OP 8428 default_ctor_section_asm_out_constructor (symbol, priority); 8429 #else 8430 # ifdef TARGET_ASM_NAMED_SECTION 8431 default_named_section_asm_out_constructor (symbol, priority); 8432 # else 8433 default_stabs_asm_out_constructor (symbol, priority); 8434 # endif 8435 #endif 8436 } 8437 8438 static void 8439 pa_asm_out_destructor (rtx symbol, int priority) 8440 { 8441 if (!function_label_operand (symbol, VOIDmode)) 8442 hppa_encode_label (symbol); 8443 8444 #ifdef DTORS_SECTION_ASM_OP 8445 default_dtor_section_asm_out_destructor (symbol, priority); 8446 #else 8447 # ifdef TARGET_ASM_NAMED_SECTION 8448 default_named_section_asm_out_destructor (symbol, priority); 8449 # else 8450 default_stabs_asm_out_destructor (symbol, priority); 8451 # endif 8452 #endif 8453 } 8454 #endif 8455 8456 /* This function places uninitialized global data in the bss section. 8457 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this 8458 function on the SOM port to prevent uninitialized global data from 8459 being placed in the data section. */ 8460 8461 void 8462 pa_asm_output_aligned_bss (FILE *stream, 8463 const char *name, 8464 unsigned HOST_WIDE_INT size, 8465 unsigned int align) 8466 { 8467 switch_to_section (bss_section); 8468 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8469 8470 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE 8471 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); 8472 #endif 8473 8474 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE 8475 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); 8476 #endif 8477 8478 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8479 ASM_OUTPUT_LABEL (stream, name); 8480 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8481 } 8482 8483 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive 8484 that doesn't allow the alignment of global common storage to be directly 8485 specified. The SOM linker aligns common storage based on the rounded 8486 value of the NUM_BYTES parameter in the .comm directive. It's not 8487 possible to use the .align directive as it doesn't affect the alignment 8488 of the label associated with a .comm directive. */ 8489 8490 void 8491 pa_asm_output_aligned_common (FILE *stream, 8492 const char *name, 8493 unsigned HOST_WIDE_INT size, 8494 unsigned int align) 8495 { 8496 unsigned int max_common_align; 8497 8498 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64); 8499 if (align > max_common_align) 8500 { 8501 warning (0, "alignment (%u) for %s exceeds maximum alignment " 8502 "for global common data. Using %u", 8503 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT); 8504 align = max_common_align; 8505 } 8506 8507 switch_to_section (bss_section); 8508 8509 assemble_name (stream, name); 8510 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n", 8511 MAX (size, align / BITS_PER_UNIT)); 8512 } 8513 8514 /* We can't use .comm for local common storage as the SOM linker effectively 8515 treats the symbol as universal and uses the same storage for local symbols 8516 with the same name in different object files. The .block directive 8517 reserves an uninitialized block of storage. However, it's not common 8518 storage. Fortunately, GCC never requests common storage with the same 8519 name in any given translation unit. */ 8520 8521 void 8522 pa_asm_output_aligned_local (FILE *stream, 8523 const char *name, 8524 unsigned HOST_WIDE_INT size, 8525 unsigned int align) 8526 { 8527 switch_to_section (bss_section); 8528 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8529 8530 #ifdef LOCAL_ASM_OP 8531 fprintf (stream, "%s", LOCAL_ASM_OP); 8532 assemble_name (stream, name); 8533 fprintf (stream, "\n"); 8534 #endif 8535 8536 ASM_OUTPUT_LABEL (stream, name); 8537 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8538 } 8539 8540 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8541 use in fmpysub instructions. */ 8542 int 8543 fmpysuboperands (rtx *operands) 8544 { 8545 enum machine_mode mode = GET_MODE (operands[0]); 8546 8547 /* Must be a floating point mode. */ 8548 if (mode != SFmode && mode != DFmode) 8549 return 0; 8550 8551 /* All modes must be the same. */ 8552 if (! (mode == GET_MODE (operands[1]) 8553 && mode == GET_MODE (operands[2]) 8554 && mode == GET_MODE (operands[3]) 8555 && mode == GET_MODE (operands[4]) 8556 && mode == GET_MODE (operands[5]))) 8557 return 0; 8558 8559 /* All operands must be registers. */ 8560 if (! (GET_CODE (operands[1]) == REG 8561 && GET_CODE (operands[2]) == REG 8562 && GET_CODE (operands[3]) == REG 8563 && GET_CODE (operands[4]) == REG 8564 && GET_CODE (operands[5]) == REG)) 8565 return 0; 8566 8567 /* Only 2 real operands to the subtraction. Subtraction is not a commutative 8568 operation, so operands[4] must be the same as operand[3]. */ 8569 if (! rtx_equal_p (operands[3], operands[4])) 8570 return 0; 8571 8572 /* multiply cannot feed into subtraction. */ 8573 if (rtx_equal_p (operands[5], operands[0])) 8574 return 0; 8575 8576 /* Inout operand of sub cannot conflict with any operands from multiply. */ 8577 if (rtx_equal_p (operands[3], operands[0]) 8578 || rtx_equal_p (operands[3], operands[1]) 8579 || rtx_equal_p (operands[3], operands[2])) 8580 return 0; 8581 8582 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8583 if (mode == SFmode 8584 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8585 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8586 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8587 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8588 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8589 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8590 return 0; 8591 8592 /* Passed. Operands are suitable for fmpysub. */ 8593 return 1; 8594 } 8595 8596 /* Return 1 if the given constant is 2, 4, or 8. These are the valid 8597 constants for shadd instructions. */ 8598 int 8599 shadd_constant_p (int val) 8600 { 8601 if (val == 2 || val == 4 || val == 8) 8602 return 1; 8603 else 8604 return 0; 8605 } 8606 8607 /* Return 1 if OP is valid as a base or index register in a 8608 REG+REG address. */ 8609 8610 int 8611 borx_reg_operand (rtx op, enum machine_mode mode) 8612 { 8613 if (GET_CODE (op) != REG) 8614 return 0; 8615 8616 /* We must reject virtual registers as the only expressions that 8617 can be instantiated are REG and REG+CONST. */ 8618 if (op == virtual_incoming_args_rtx 8619 || op == virtual_stack_vars_rtx 8620 || op == virtual_stack_dynamic_rtx 8621 || op == virtual_outgoing_args_rtx 8622 || op == virtual_cfa_rtx) 8623 return 0; 8624 8625 /* While it's always safe to index off the frame pointer, it's not 8626 profitable to do so when the frame pointer is being eliminated. */ 8627 if (!reload_completed 8628 && flag_omit_frame_pointer 8629 && !cfun->calls_alloca 8630 && op == frame_pointer_rtx) 8631 return 0; 8632 8633 return register_operand (op, mode); 8634 } 8635 8636 /* Return 1 if this operand is anything other than a hard register. */ 8637 8638 int 8639 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 8640 { 8641 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER); 8642 } 8643 8644 /* Return TRUE if INSN branches forward. */ 8645 8646 static bool 8647 forward_branch_p (rtx insn) 8648 { 8649 rtx lab = JUMP_LABEL (insn); 8650 8651 /* The INSN must have a jump label. */ 8652 gcc_assert (lab != NULL_RTX); 8653 8654 if (INSN_ADDRESSES_SET_P ()) 8655 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn)); 8656 8657 while (insn) 8658 { 8659 if (insn == lab) 8660 return true; 8661 else 8662 insn = NEXT_INSN (insn); 8663 } 8664 8665 return false; 8666 } 8667 8668 /* Return 1 if OP is an equality comparison, else return 0. */ 8669 int 8670 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 8671 { 8672 return (GET_CODE (op) == EQ || GET_CODE (op) == NE); 8673 } 8674 8675 /* Return 1 if INSN is in the delay slot of a call instruction. */ 8676 int 8677 jump_in_call_delay (rtx insn) 8678 { 8679 8680 if (GET_CODE (insn) != JUMP_INSN) 8681 return 0; 8682 8683 if (PREV_INSN (insn) 8684 && PREV_INSN (PREV_INSN (insn)) 8685 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN) 8686 { 8687 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn))); 8688 8689 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE 8690 && XVECEXP (PATTERN (test_insn), 0, 1) == insn); 8691 8692 } 8693 else 8694 return 0; 8695 } 8696 8697 /* Output an unconditional move and branch insn. */ 8698 8699 const char * 8700 output_parallel_movb (rtx *operands, rtx insn) 8701 { 8702 int length = get_attr_length (insn); 8703 8704 /* These are the cases in which we win. */ 8705 if (length == 4) 8706 return "mov%I1b,tr %1,%0,%2"; 8707 8708 /* None of the following cases win, but they don't lose either. */ 8709 if (length == 8) 8710 { 8711 if (dbr_sequence_length () == 0) 8712 { 8713 /* Nothing in the delay slot, fake it by putting the combined 8714 insn (the copy or add) in the delay slot of a bl. */ 8715 if (GET_CODE (operands[1]) == CONST_INT) 8716 return "b %2\n\tldi %1,%0"; 8717 else 8718 return "b %2\n\tcopy %1,%0"; 8719 } 8720 else 8721 { 8722 /* Something in the delay slot, but we've got a long branch. */ 8723 if (GET_CODE (operands[1]) == CONST_INT) 8724 return "ldi %1,%0\n\tb %2"; 8725 else 8726 return "copy %1,%0\n\tb %2"; 8727 } 8728 } 8729 8730 if (GET_CODE (operands[1]) == CONST_INT) 8731 output_asm_insn ("ldi %1,%0", operands); 8732 else 8733 output_asm_insn ("copy %1,%0", operands); 8734 return output_lbranch (operands[2], insn, 1); 8735 } 8736 8737 /* Output an unconditional add and branch insn. */ 8738 8739 const char * 8740 output_parallel_addb (rtx *operands, rtx insn) 8741 { 8742 int length = get_attr_length (insn); 8743 8744 /* To make life easy we want operand0 to be the shared input/output 8745 operand and operand1 to be the readonly operand. */ 8746 if (operands[0] == operands[1]) 8747 operands[1] = operands[2]; 8748 8749 /* These are the cases in which we win. */ 8750 if (length == 4) 8751 return "add%I1b,tr %1,%0,%3"; 8752 8753 /* None of the following cases win, but they don't lose either. */ 8754 if (length == 8) 8755 { 8756 if (dbr_sequence_length () == 0) 8757 /* Nothing in the delay slot, fake it by putting the combined 8758 insn (the copy or add) in the delay slot of a bl. */ 8759 return "b %3\n\tadd%I1 %1,%0,%0"; 8760 else 8761 /* Something in the delay slot, but we've got a long branch. */ 8762 return "add%I1 %1,%0,%0\n\tb %3"; 8763 } 8764 8765 output_asm_insn ("add%I1 %1,%0,%0", operands); 8766 return output_lbranch (operands[3], insn, 1); 8767 } 8768 8769 /* Return nonzero if INSN (a jump insn) immediately follows a call 8770 to a named function. This is used to avoid filling the delay slot 8771 of the jump since it can usually be eliminated by modifying RP in 8772 the delay slot of the call. */ 8773 8774 int 8775 following_call (rtx insn) 8776 { 8777 if (! TARGET_JUMP_IN_DELAY) 8778 return 0; 8779 8780 /* Find the previous real insn, skipping NOTEs. */ 8781 insn = PREV_INSN (insn); 8782 while (insn && GET_CODE (insn) == NOTE) 8783 insn = PREV_INSN (insn); 8784 8785 /* Check for CALL_INSNs and millicode calls. */ 8786 if (insn 8787 && ((GET_CODE (insn) == CALL_INSN 8788 && get_attr_type (insn) != TYPE_DYNCALL) 8789 || (GET_CODE (insn) == INSN 8790 && GET_CODE (PATTERN (insn)) != SEQUENCE 8791 && GET_CODE (PATTERN (insn)) != USE 8792 && GET_CODE (PATTERN (insn)) != CLOBBER 8793 && get_attr_type (insn) == TYPE_MILLI))) 8794 return 1; 8795 8796 return 0; 8797 } 8798 8799 /* We use this hook to perform a PA specific optimization which is difficult 8800 to do in earlier passes. 8801 8802 We want the delay slots of branches within jump tables to be filled. 8803 None of the compiler passes at the moment even has the notion that a 8804 PA jump table doesn't contain addresses, but instead contains actual 8805 instructions! 8806 8807 Because we actually jump into the table, the addresses of each entry 8808 must stay constant in relation to the beginning of the table (which 8809 itself must stay constant relative to the instruction to jump into 8810 it). I don't believe we can guarantee earlier passes of the compiler 8811 will adhere to those rules. 8812 8813 So, late in the compilation process we find all the jump tables, and 8814 expand them into real code -- e.g. each entry in the jump table vector 8815 will get an appropriate label followed by a jump to the final target. 8816 8817 Reorg and the final jump pass can then optimize these branches and 8818 fill their delay slots. We end up with smaller, more efficient code. 8819 8820 The jump instructions within the table are special; we must be able 8821 to identify them during assembly output (if the jumps don't get filled 8822 we need to emit a nop rather than nullifying the delay slot)). We 8823 identify jumps in switch tables by using insns with the attribute 8824 type TYPE_BTABLE_BRANCH. 8825 8826 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB 8827 insns. This serves two purposes, first it prevents jump.c from 8828 noticing that the last N entries in the table jump to the instruction 8829 immediately after the table and deleting the jumps. Second, those 8830 insns mark where we should emit .begin_brtab and .end_brtab directives 8831 when using GAS (allows for better link time optimizations). */ 8832 8833 static void 8834 pa_reorg (void) 8835 { 8836 rtx insn; 8837 8838 remove_useless_addtr_insns (1); 8839 8840 if (pa_cpu < PROCESSOR_8000) 8841 pa_combine_instructions (); 8842 8843 8844 /* This is fairly cheap, so always run it if optimizing. */ 8845 if (optimize > 0 && !TARGET_BIG_SWITCH) 8846 { 8847 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */ 8848 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8849 { 8850 rtx pattern, tmp, location, label; 8851 unsigned int length, i; 8852 8853 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */ 8854 if (GET_CODE (insn) != JUMP_INSN 8855 || (GET_CODE (PATTERN (insn)) != ADDR_VEC 8856 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) 8857 continue; 8858 8859 /* Emit marker for the beginning of the branch table. */ 8860 emit_insn_before (gen_begin_brtab (), insn); 8861 8862 pattern = PATTERN (insn); 8863 location = PREV_INSN (insn); 8864 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC); 8865 8866 for (i = 0; i < length; i++) 8867 { 8868 /* Emit a label before each jump to keep jump.c from 8869 removing this code. */ 8870 tmp = gen_label_rtx (); 8871 LABEL_NUSES (tmp) = 1; 8872 emit_label_after (tmp, location); 8873 location = NEXT_INSN (location); 8874 8875 if (GET_CODE (pattern) == ADDR_VEC) 8876 label = XEXP (XVECEXP (pattern, 0, i), 0); 8877 else 8878 label = XEXP (XVECEXP (pattern, 1, i), 0); 8879 8880 tmp = gen_short_jump (label); 8881 8882 /* Emit the jump itself. */ 8883 tmp = emit_jump_insn_after (tmp, location); 8884 JUMP_LABEL (tmp) = label; 8885 LABEL_NUSES (label)++; 8886 location = NEXT_INSN (location); 8887 8888 /* Emit a BARRIER after the jump. */ 8889 emit_barrier_after (location); 8890 location = NEXT_INSN (location); 8891 } 8892 8893 /* Emit marker for the end of the branch table. */ 8894 emit_insn_before (gen_end_brtab (), location); 8895 location = NEXT_INSN (location); 8896 emit_barrier_after (location); 8897 8898 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */ 8899 delete_insn (insn); 8900 } 8901 } 8902 else 8903 { 8904 /* Still need brtab marker insns. FIXME: the presence of these 8905 markers disables output of the branch table to readonly memory, 8906 and any alignment directives that might be needed. Possibly, 8907 the begin_brtab insn should be output before the label for the 8908 table. This doesn't matter at the moment since the tables are 8909 always output in the text section. */ 8910 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8911 { 8912 /* Find an ADDR_VEC insn. */ 8913 if (GET_CODE (insn) != JUMP_INSN 8914 || (GET_CODE (PATTERN (insn)) != ADDR_VEC 8915 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) 8916 continue; 8917 8918 /* Now generate markers for the beginning and end of the 8919 branch table. */ 8920 emit_insn_before (gen_begin_brtab (), insn); 8921 emit_insn_after (gen_end_brtab (), insn); 8922 } 8923 } 8924 } 8925 8926 /* The PA has a number of odd instructions which can perform multiple 8927 tasks at once. On first generation PA machines (PA1.0 and PA1.1) 8928 it may be profitable to combine two instructions into one instruction 8929 with two outputs. It's not profitable PA2.0 machines because the 8930 two outputs would take two slots in the reorder buffers. 8931 8932 This routine finds instructions which can be combined and combines 8933 them. We only support some of the potential combinations, and we 8934 only try common ways to find suitable instructions. 8935 8936 * addb can add two registers or a register and a small integer 8937 and jump to a nearby (+-8k) location. Normally the jump to the 8938 nearby location is conditional on the result of the add, but by 8939 using the "true" condition we can make the jump unconditional. 8940 Thus addb can perform two independent operations in one insn. 8941 8942 * movb is similar to addb in that it can perform a reg->reg 8943 or small immediate->reg copy and jump to a nearby (+-8k location). 8944 8945 * fmpyadd and fmpysub can perform a FP multiply and either an 8946 FP add or FP sub if the operands of the multiply and add/sub are 8947 independent (there are other minor restrictions). Note both 8948 the fmpy and fadd/fsub can in theory move to better spots according 8949 to data dependencies, but for now we require the fmpy stay at a 8950 fixed location. 8951 8952 * Many of the memory operations can perform pre & post updates 8953 of index registers. GCC's pre/post increment/decrement addressing 8954 is far too simple to take advantage of all the possibilities. This 8955 pass may not be suitable since those insns may not be independent. 8956 8957 * comclr can compare two ints or an int and a register, nullify 8958 the following instruction and zero some other register. This 8959 is more difficult to use as it's harder to find an insn which 8960 will generate a comclr than finding something like an unconditional 8961 branch. (conditional moves & long branches create comclr insns). 8962 8963 * Most arithmetic operations can conditionally skip the next 8964 instruction. They can be viewed as "perform this operation 8965 and conditionally jump to this nearby location" (where nearby 8966 is an insns away). These are difficult to use due to the 8967 branch length restrictions. */ 8968 8969 static void 8970 pa_combine_instructions (void) 8971 { 8972 rtx anchor, new_rtx; 8973 8974 /* This can get expensive since the basic algorithm is on the 8975 order of O(n^2) (or worse). Only do it for -O2 or higher 8976 levels of optimization. */ 8977 if (optimize < 2) 8978 return; 8979 8980 /* Walk down the list of insns looking for "anchor" insns which 8981 may be combined with "floating" insns. As the name implies, 8982 "anchor" instructions don't move, while "floating" insns may 8983 move around. */ 8984 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); 8985 new_rtx = make_insn_raw (new_rtx); 8986 8987 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) 8988 { 8989 enum attr_pa_combine_type anchor_attr; 8990 enum attr_pa_combine_type floater_attr; 8991 8992 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. 8993 Also ignore any special USE insns. */ 8994 if ((GET_CODE (anchor) != INSN 8995 && GET_CODE (anchor) != JUMP_INSN 8996 && GET_CODE (anchor) != CALL_INSN) 8997 || GET_CODE (PATTERN (anchor)) == USE 8998 || GET_CODE (PATTERN (anchor)) == CLOBBER 8999 || GET_CODE (PATTERN (anchor)) == ADDR_VEC 9000 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) 9001 continue; 9002 9003 anchor_attr = get_attr_pa_combine_type (anchor); 9004 /* See if anchor is an insn suitable for combination. */ 9005 if (anchor_attr == PA_COMBINE_TYPE_FMPY 9006 || anchor_attr == PA_COMBINE_TYPE_FADDSUB 9007 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9008 && ! forward_branch_p (anchor))) 9009 { 9010 rtx floater; 9011 9012 for (floater = PREV_INSN (anchor); 9013 floater; 9014 floater = PREV_INSN (floater)) 9015 { 9016 if (GET_CODE (floater) == NOTE 9017 || (GET_CODE (floater) == INSN 9018 && (GET_CODE (PATTERN (floater)) == USE 9019 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9020 continue; 9021 9022 /* Anything except a regular INSN will stop our search. */ 9023 if (GET_CODE (floater) != INSN 9024 || GET_CODE (PATTERN (floater)) == ADDR_VEC 9025 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) 9026 { 9027 floater = NULL_RTX; 9028 break; 9029 } 9030 9031 /* See if FLOATER is suitable for combination with the 9032 anchor. */ 9033 floater_attr = get_attr_pa_combine_type (floater); 9034 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9035 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9036 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9037 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9038 { 9039 /* If ANCHOR and FLOATER can be combined, then we're 9040 done with this pass. */ 9041 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9042 SET_DEST (PATTERN (floater)), 9043 XEXP (SET_SRC (PATTERN (floater)), 0), 9044 XEXP (SET_SRC (PATTERN (floater)), 1))) 9045 break; 9046 } 9047 9048 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9049 && floater_attr == PA_COMBINE_TYPE_ADDMOVE) 9050 { 9051 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) 9052 { 9053 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9054 SET_DEST (PATTERN (floater)), 9055 XEXP (SET_SRC (PATTERN (floater)), 0), 9056 XEXP (SET_SRC (PATTERN (floater)), 1))) 9057 break; 9058 } 9059 else 9060 { 9061 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9062 SET_DEST (PATTERN (floater)), 9063 SET_SRC (PATTERN (floater)), 9064 SET_SRC (PATTERN (floater)))) 9065 break; 9066 } 9067 } 9068 } 9069 9070 /* If we didn't find anything on the backwards scan try forwards. */ 9071 if (!floater 9072 && (anchor_attr == PA_COMBINE_TYPE_FMPY 9073 || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) 9074 { 9075 for (floater = anchor; floater; floater = NEXT_INSN (floater)) 9076 { 9077 if (GET_CODE (floater) == NOTE 9078 || (GET_CODE (floater) == INSN 9079 && (GET_CODE (PATTERN (floater)) == USE 9080 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9081 9082 continue; 9083 9084 /* Anything except a regular INSN will stop our search. */ 9085 if (GET_CODE (floater) != INSN 9086 || GET_CODE (PATTERN (floater)) == ADDR_VEC 9087 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) 9088 { 9089 floater = NULL_RTX; 9090 break; 9091 } 9092 9093 /* See if FLOATER is suitable for combination with the 9094 anchor. */ 9095 floater_attr = get_attr_pa_combine_type (floater); 9096 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9097 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9098 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9099 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9100 { 9101 /* If ANCHOR and FLOATER can be combined, then we're 9102 done with this pass. */ 9103 if (pa_can_combine_p (new_rtx, anchor, floater, 1, 9104 SET_DEST (PATTERN (floater)), 9105 XEXP (SET_SRC (PATTERN (floater)), 9106 0), 9107 XEXP (SET_SRC (PATTERN (floater)), 9108 1))) 9109 break; 9110 } 9111 } 9112 } 9113 9114 /* FLOATER will be nonzero if we found a suitable floating 9115 insn for combination with ANCHOR. */ 9116 if (floater 9117 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9118 || anchor_attr == PA_COMBINE_TYPE_FMPY)) 9119 { 9120 /* Emit the new instruction and delete the old anchor. */ 9121 emit_insn_before (gen_rtx_PARALLEL 9122 (VOIDmode, 9123 gen_rtvec (2, PATTERN (anchor), 9124 PATTERN (floater))), 9125 anchor); 9126 9127 SET_INSN_DELETED (anchor); 9128 9129 /* Emit a special USE insn for FLOATER, then delete 9130 the floating insn. */ 9131 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); 9132 delete_insn (floater); 9133 9134 continue; 9135 } 9136 else if (floater 9137 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) 9138 { 9139 rtx temp; 9140 /* Emit the new_jump instruction and delete the old anchor. */ 9141 temp 9142 = emit_jump_insn_before (gen_rtx_PARALLEL 9143 (VOIDmode, 9144 gen_rtvec (2, PATTERN (anchor), 9145 PATTERN (floater))), 9146 anchor); 9147 9148 JUMP_LABEL (temp) = JUMP_LABEL (anchor); 9149 SET_INSN_DELETED (anchor); 9150 9151 /* Emit a special USE insn for FLOATER, then delete 9152 the floating insn. */ 9153 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); 9154 delete_insn (floater); 9155 continue; 9156 } 9157 } 9158 } 9159 } 9160 9161 static int 9162 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest, 9163 rtx src1, rtx src2) 9164 { 9165 int insn_code_number; 9166 rtx start, end; 9167 9168 /* Create a PARALLEL with the patterns of ANCHOR and 9169 FLOATER, try to recognize it, then test constraints 9170 for the resulting pattern. 9171 9172 If the pattern doesn't match or the constraints 9173 aren't met keep searching for a suitable floater 9174 insn. */ 9175 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor); 9176 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater); 9177 INSN_CODE (new_rtx) = -1; 9178 insn_code_number = recog_memoized (new_rtx); 9179 if (insn_code_number < 0 9180 || (extract_insn (new_rtx), ! constrain_operands (1))) 9181 return 0; 9182 9183 if (reversed) 9184 { 9185 start = anchor; 9186 end = floater; 9187 } 9188 else 9189 { 9190 start = floater; 9191 end = anchor; 9192 } 9193 9194 /* There's up to three operands to consider. One 9195 output and two inputs. 9196 9197 The output must not be used between FLOATER & ANCHOR 9198 exclusive. The inputs must not be set between 9199 FLOATER and ANCHOR exclusive. */ 9200 9201 if (reg_used_between_p (dest, start, end)) 9202 return 0; 9203 9204 if (reg_set_between_p (src1, start, end)) 9205 return 0; 9206 9207 if (reg_set_between_p (src2, start, end)) 9208 return 0; 9209 9210 /* If we get here, then everything is good. */ 9211 return 1; 9212 } 9213 9214 /* Return nonzero if references for INSN are delayed. 9215 9216 Millicode insns are actually function calls with some special 9217 constraints on arguments and register usage. 9218 9219 Millicode calls always expect their arguments in the integer argument 9220 registers, and always return their result in %r29 (ret1). They 9221 are expected to clobber their arguments, %r1, %r29, and the return 9222 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. 9223 9224 This function tells reorg that the references to arguments and 9225 millicode calls do not appear to happen until after the millicode call. 9226 This allows reorg to put insns which set the argument registers into the 9227 delay slot of the millicode call -- thus they act more like traditional 9228 CALL_INSNs. 9229 9230 Note we cannot consider side effects of the insn to be delayed because 9231 the branch and link insn will clobber the return pointer. If we happened 9232 to use the return pointer in the delay slot of the call, then we lose. 9233 9234 get_attr_type will try to recognize the given insn, so make sure to 9235 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns 9236 in particular. */ 9237 int 9238 insn_refs_are_delayed (rtx insn) 9239 { 9240 return ((GET_CODE (insn) == INSN 9241 && GET_CODE (PATTERN (insn)) != SEQUENCE 9242 && GET_CODE (PATTERN (insn)) != USE 9243 && GET_CODE (PATTERN (insn)) != CLOBBER 9244 && get_attr_type (insn) == TYPE_MILLI)); 9245 } 9246 9247 /* Promote the return value, but not the arguments. */ 9248 9249 static enum machine_mode 9250 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, 9251 enum machine_mode mode, 9252 int *punsignedp ATTRIBUTE_UNUSED, 9253 const_tree fntype ATTRIBUTE_UNUSED, 9254 int for_return) 9255 { 9256 if (for_return == 0) 9257 return mode; 9258 return promote_mode (type, mode, punsignedp); 9259 } 9260 9261 /* On the HP-PA the value is found in register(s) 28(-29), unless 9262 the mode is SF or DF. Then the value is returned in fr4 (32). 9263 9264 This must perform the same promotions as PROMOTE_MODE, else promoting 9265 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly. 9266 9267 Small structures must be returned in a PARALLEL on PA64 in order 9268 to match the HP Compiler ABI. */ 9269 9270 rtx 9271 pa_function_value (const_tree valtype, 9272 const_tree func ATTRIBUTE_UNUSED, 9273 bool outgoing ATTRIBUTE_UNUSED) 9274 { 9275 enum machine_mode valmode; 9276 9277 if (AGGREGATE_TYPE_P (valtype) 9278 || TREE_CODE (valtype) == COMPLEX_TYPE 9279 || TREE_CODE (valtype) == VECTOR_TYPE) 9280 { 9281 if (TARGET_64BIT) 9282 { 9283 /* Aggregates with a size less than or equal to 128 bits are 9284 returned in GR 28(-29). They are left justified. The pad 9285 bits are undefined. Larger aggregates are returned in 9286 memory. */ 9287 rtx loc[2]; 9288 int i, offset = 0; 9289 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2; 9290 9291 for (i = 0; i < ub; i++) 9292 { 9293 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9294 gen_rtx_REG (DImode, 28 + i), 9295 GEN_INT (offset)); 9296 offset += 8; 9297 } 9298 9299 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); 9300 } 9301 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD) 9302 { 9303 /* Aggregates 5 to 8 bytes in size are returned in general 9304 registers r28-r29 in the same manner as other non 9305 floating-point objects. The data is right-justified and 9306 zero-extended to 64 bits. This is opposite to the normal 9307 justification used on big endian targets and requires 9308 special treatment. */ 9309 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9310 gen_rtx_REG (DImode, 28), const0_rtx); 9311 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9312 } 9313 } 9314 9315 if ((INTEGRAL_TYPE_P (valtype) 9316 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD) 9317 || POINTER_TYPE_P (valtype)) 9318 valmode = word_mode; 9319 else 9320 valmode = TYPE_MODE (valtype); 9321 9322 if (TREE_CODE (valtype) == REAL_TYPE 9323 && !AGGREGATE_TYPE_P (valtype) 9324 && TYPE_MODE (valtype) != TFmode 9325 && !TARGET_SOFT_FLOAT) 9326 return gen_rtx_REG (valmode, 32); 9327 9328 return gen_rtx_REG (valmode, 28); 9329 } 9330 9331 /* Return the location of a parameter that is passed in a register or NULL 9332 if the parameter has any component that is passed in memory. 9333 9334 This is new code and will be pushed to into the net sources after 9335 further testing. 9336 9337 ??? We might want to restructure this so that it looks more like other 9338 ports. */ 9339 rtx 9340 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, 9341 int named ATTRIBUTE_UNUSED) 9342 { 9343 int max_arg_words = (TARGET_64BIT ? 8 : 4); 9344 int alignment = 0; 9345 int arg_size; 9346 int fpr_reg_base; 9347 int gpr_reg_base; 9348 rtx retval; 9349 9350 if (mode == VOIDmode) 9351 return NULL_RTX; 9352 9353 arg_size = FUNCTION_ARG_SIZE (mode, type); 9354 9355 /* If this arg would be passed partially or totally on the stack, then 9356 this routine should return zero. pa_arg_partial_bytes will 9357 handle arguments which are split between regs and stack slots if 9358 the ABI mandates split arguments. */ 9359 if (!TARGET_64BIT) 9360 { 9361 /* The 32-bit ABI does not split arguments. */ 9362 if (cum->words + arg_size > max_arg_words) 9363 return NULL_RTX; 9364 } 9365 else 9366 { 9367 if (arg_size > 1) 9368 alignment = cum->words & 1; 9369 if (cum->words + alignment >= max_arg_words) 9370 return NULL_RTX; 9371 } 9372 9373 /* The 32bit ABIs and the 64bit ABIs are rather different, 9374 particularly in their handling of FP registers. We might 9375 be able to cleverly share code between them, but I'm not 9376 going to bother in the hope that splitting them up results 9377 in code that is more easily understood. */ 9378 9379 if (TARGET_64BIT) 9380 { 9381 /* Advance the base registers to their current locations. 9382 9383 Remember, gprs grow towards smaller register numbers while 9384 fprs grow to higher register numbers. Also remember that 9385 although FP regs are 32-bit addressable, we pretend that 9386 the registers are 64-bits wide. */ 9387 gpr_reg_base = 26 - cum->words; 9388 fpr_reg_base = 32 + cum->words; 9389 9390 /* Arguments wider than one word and small aggregates need special 9391 treatment. */ 9392 if (arg_size > 1 9393 || mode == BLKmode 9394 || (type && (AGGREGATE_TYPE_P (type) 9395 || TREE_CODE (type) == COMPLEX_TYPE 9396 || TREE_CODE (type) == VECTOR_TYPE))) 9397 { 9398 /* Double-extended precision (80-bit), quad-precision (128-bit) 9399 and aggregates including complex numbers are aligned on 9400 128-bit boundaries. The first eight 64-bit argument slots 9401 are associated one-to-one, with general registers r26 9402 through r19, and also with floating-point registers fr4 9403 through fr11. Arguments larger than one word are always 9404 passed in general registers. 9405 9406 Using a PARALLEL with a word mode register results in left 9407 justified data on a big-endian target. */ 9408 9409 rtx loc[8]; 9410 int i, offset = 0, ub = arg_size; 9411 9412 /* Align the base register. */ 9413 gpr_reg_base -= alignment; 9414 9415 ub = MIN (ub, max_arg_words - cum->words - alignment); 9416 for (i = 0; i < ub; i++) 9417 { 9418 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9419 gen_rtx_REG (DImode, gpr_reg_base), 9420 GEN_INT (offset)); 9421 gpr_reg_base -= 1; 9422 offset += 8; 9423 } 9424 9425 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc)); 9426 } 9427 } 9428 else 9429 { 9430 /* If the argument is larger than a word, then we know precisely 9431 which registers we must use. */ 9432 if (arg_size > 1) 9433 { 9434 if (cum->words) 9435 { 9436 gpr_reg_base = 23; 9437 fpr_reg_base = 38; 9438 } 9439 else 9440 { 9441 gpr_reg_base = 25; 9442 fpr_reg_base = 34; 9443 } 9444 9445 /* Structures 5 to 8 bytes in size are passed in the general 9446 registers in the same manner as other non floating-point 9447 objects. The data is right-justified and zero-extended 9448 to 64 bits. This is opposite to the normal justification 9449 used on big endian targets and requires special treatment. 9450 We now define BLOCK_REG_PADDING to pad these objects. 9451 Aggregates, complex and vector types are passed in the same 9452 manner as structures. */ 9453 if (mode == BLKmode 9454 || (type && (AGGREGATE_TYPE_P (type) 9455 || TREE_CODE (type) == COMPLEX_TYPE 9456 || TREE_CODE (type) == VECTOR_TYPE))) 9457 { 9458 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9459 gen_rtx_REG (DImode, gpr_reg_base), 9460 const0_rtx); 9461 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9462 } 9463 } 9464 else 9465 { 9466 /* We have a single word (32 bits). A simple computation 9467 will get us the register #s we need. */ 9468 gpr_reg_base = 26 - cum->words; 9469 fpr_reg_base = 32 + 2 * cum->words; 9470 } 9471 } 9472 9473 /* Determine if the argument needs to be passed in both general and 9474 floating point registers. */ 9475 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32) 9476 /* If we are doing soft-float with portable runtime, then there 9477 is no need to worry about FP regs. */ 9478 && !TARGET_SOFT_FLOAT 9479 /* The parameter must be some kind of scalar float, else we just 9480 pass it in integer registers. */ 9481 && GET_MODE_CLASS (mode) == MODE_FLOAT 9482 /* The target function must not have a prototype. */ 9483 && cum->nargs_prototype <= 0 9484 /* libcalls do not need to pass items in both FP and general 9485 registers. */ 9486 && type != NULL_TREE 9487 /* All this hair applies to "outgoing" args only. This includes 9488 sibcall arguments setup with FUNCTION_INCOMING_ARG. */ 9489 && !cum->incoming) 9490 /* Also pass outgoing floating arguments in both registers in indirect 9491 calls with the 32 bit ABI and the HP assembler since there is no 9492 way to the specify argument locations in static functions. */ 9493 || (!TARGET_64BIT 9494 && !TARGET_GAS 9495 && !cum->incoming 9496 && cum->indirect 9497 && GET_MODE_CLASS (mode) == MODE_FLOAT)) 9498 { 9499 retval 9500 = gen_rtx_PARALLEL 9501 (mode, 9502 gen_rtvec (2, 9503 gen_rtx_EXPR_LIST (VOIDmode, 9504 gen_rtx_REG (mode, fpr_reg_base), 9505 const0_rtx), 9506 gen_rtx_EXPR_LIST (VOIDmode, 9507 gen_rtx_REG (mode, gpr_reg_base), 9508 const0_rtx))); 9509 } 9510 else 9511 { 9512 /* See if we should pass this parameter in a general register. */ 9513 if (TARGET_SOFT_FLOAT 9514 /* Indirect calls in the normal 32bit ABI require all arguments 9515 to be passed in general registers. */ 9516 || (!TARGET_PORTABLE_RUNTIME 9517 && !TARGET_64BIT 9518 && !TARGET_ELF32 9519 && cum->indirect) 9520 /* If the parameter is not a scalar floating-point parameter, 9521 then it belongs in GPRs. */ 9522 || GET_MODE_CLASS (mode) != MODE_FLOAT 9523 /* Structure with single SFmode field belongs in GPR. */ 9524 || (type && AGGREGATE_TYPE_P (type))) 9525 retval = gen_rtx_REG (mode, gpr_reg_base); 9526 else 9527 retval = gen_rtx_REG (mode, fpr_reg_base); 9528 } 9529 return retval; 9530 } 9531 9532 9533 /* If this arg would be passed totally in registers or totally on the stack, 9534 then this routine should return zero. */ 9535 9536 static int 9537 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, 9538 tree type, bool named ATTRIBUTE_UNUSED) 9539 { 9540 unsigned int max_arg_words = 8; 9541 unsigned int offset = 0; 9542 9543 if (!TARGET_64BIT) 9544 return 0; 9545 9546 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1)) 9547 offset = 1; 9548 9549 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words) 9550 /* Arg fits fully into registers. */ 9551 return 0; 9552 else if (cum->words + offset >= max_arg_words) 9553 /* Arg fully on the stack. */ 9554 return 0; 9555 else 9556 /* Arg is split. */ 9557 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD; 9558 } 9559 9560 9561 /* A get_unnamed_section callback for switching to the text section. 9562 9563 This function is only used with SOM. Because we don't support 9564 named subspaces, we can only create a new subspace or switch back 9565 to the default text subspace. */ 9566 9567 static void 9568 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) 9569 { 9570 gcc_assert (TARGET_SOM); 9571 if (TARGET_GAS) 9572 { 9573 if (cfun && cfun->machine && !cfun->machine->in_nsubspa) 9574 { 9575 /* We only want to emit a .nsubspa directive once at the 9576 start of the function. */ 9577 cfun->machine->in_nsubspa = 1; 9578 9579 /* Create a new subspace for the text. This provides 9580 better stub placement and one-only functions. */ 9581 if (cfun->decl 9582 && DECL_ONE_ONLY (cfun->decl) 9583 && !DECL_WEAK (cfun->decl)) 9584 { 9585 output_section_asm_op ("\t.SPACE $TEXT$\n" 9586 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8," 9587 "ACCESS=44,SORT=24,COMDAT"); 9588 return; 9589 } 9590 } 9591 else 9592 { 9593 /* There isn't a current function or the body of the current 9594 function has been completed. So, we are changing to the 9595 text section to output debugging information. Thus, we 9596 need to forget that we are in the text section so that 9597 varasm.c will call us when text_section is selected again. */ 9598 gcc_assert (!cfun || !cfun->machine 9599 || cfun->machine->in_nsubspa == 2); 9600 in_section = NULL; 9601 } 9602 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$"); 9603 return; 9604 } 9605 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$"); 9606 } 9607 9608 /* A get_unnamed_section callback for switching to comdat data 9609 sections. This function is only used with SOM. */ 9610 9611 static void 9612 som_output_comdat_data_section_asm_op (const void *data) 9613 { 9614 in_section = NULL; 9615 output_section_asm_op (data); 9616 } 9617 9618 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */ 9619 9620 static void 9621 pa_som_asm_init_sections (void) 9622 { 9623 text_section 9624 = get_unnamed_section (0, som_output_text_section_asm_op, NULL); 9625 9626 /* SOM puts readonly data in the default $LIT$ subspace when PIC code 9627 is not being generated. */ 9628 som_readonly_data_section 9629 = get_unnamed_section (0, output_section_asm_op, 9630 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$"); 9631 9632 /* When secondary definitions are not supported, SOM makes readonly 9633 data one-only by creating a new $LIT$ subspace in $TEXT$ with 9634 the comdat flag. */ 9635 som_one_only_readonly_data_section 9636 = get_unnamed_section (0, som_output_comdat_data_section_asm_op, 9637 "\t.SPACE $TEXT$\n" 9638 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8," 9639 "ACCESS=0x2c,SORT=16,COMDAT"); 9640 9641 9642 /* When secondary definitions are not supported, SOM makes data one-only 9643 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */ 9644 som_one_only_data_section 9645 = get_unnamed_section (SECTION_WRITE, 9646 som_output_comdat_data_section_asm_op, 9647 "\t.SPACE $PRIVATE$\n" 9648 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8," 9649 "ACCESS=31,SORT=24,COMDAT"); 9650 9651 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups 9652 which reference data within the $TEXT$ space (for example constant 9653 strings in the $LIT$ subspace). 9654 9655 The assemblers (GAS and HP as) both have problems with handling 9656 the difference of two symbols which is the other correct way to 9657 reference constant data during PIC code generation. 9658 9659 So, there's no way to reference constant data which is in the 9660 $TEXT$ space during PIC generation. Instead place all constant 9661 data into the $PRIVATE$ subspace (this reduces sharing, but it 9662 works correctly). */ 9663 readonly_data_section = flag_pic ? data_section : som_readonly_data_section; 9664 9665 /* We must not have a reference to an external symbol defined in a 9666 shared library in a readonly section, else the SOM linker will 9667 complain. 9668 9669 So, we force exception information into the data section. */ 9670 exception_section = data_section; 9671 } 9672 9673 /* On hpux10, the linker will give an error if we have a reference 9674 in the read-only data section to a symbol defined in a shared 9675 library. Therefore, expressions that might require a reloc can 9676 not be placed in the read-only data section. */ 9677 9678 static section * 9679 pa_select_section (tree exp, int reloc, 9680 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 9681 { 9682 if (TREE_CODE (exp) == VAR_DECL 9683 && TREE_READONLY (exp) 9684 && !TREE_THIS_VOLATILE (exp) 9685 && DECL_INITIAL (exp) 9686 && (DECL_INITIAL (exp) == error_mark_node 9687 || TREE_CONSTANT (DECL_INITIAL (exp))) 9688 && !reloc) 9689 { 9690 if (TARGET_SOM 9691 && DECL_ONE_ONLY (exp) 9692 && !DECL_WEAK (exp)) 9693 return som_one_only_readonly_data_section; 9694 else 9695 return readonly_data_section; 9696 } 9697 else if (CONSTANT_CLASS_P (exp) && !reloc) 9698 return readonly_data_section; 9699 else if (TARGET_SOM 9700 && TREE_CODE (exp) == VAR_DECL 9701 && DECL_ONE_ONLY (exp) 9702 && !DECL_WEAK (exp)) 9703 return som_one_only_data_section; 9704 else 9705 return data_section; 9706 } 9707 9708 static void 9709 pa_globalize_label (FILE *stream, const char *name) 9710 { 9711 /* We only handle DATA objects here, functions are globalized in 9712 ASM_DECLARE_FUNCTION_NAME. */ 9713 if (! FUNCTION_NAME_P (name)) 9714 { 9715 fputs ("\t.EXPORT ", stream); 9716 assemble_name (stream, name); 9717 fputs (",DATA\n", stream); 9718 } 9719 } 9720 9721 /* Worker function for TARGET_STRUCT_VALUE_RTX. */ 9722 9723 static rtx 9724 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, 9725 int incoming ATTRIBUTE_UNUSED) 9726 { 9727 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM); 9728 } 9729 9730 /* Worker function for TARGET_RETURN_IN_MEMORY. */ 9731 9732 bool 9733 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 9734 { 9735 /* SOM ABI says that objects larger than 64 bits are returned in memory. 9736 PA64 ABI says that objects larger than 128 bits are returned in memory. 9737 Note, int_size_in_bytes can return -1 if the size of the object is 9738 variable or larger than the maximum value that can be expressed as 9739 a HOST_WIDE_INT. It can also return zero for an empty type. The 9740 simplest way to handle variable and empty types is to pass them in 9741 memory. This avoids problems in defining the boundaries of argument 9742 slots, allocating registers, etc. */ 9743 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8) 9744 || int_size_in_bytes (type) <= 0); 9745 } 9746 9747 /* Structure to hold declaration and name of external symbols that are 9748 emitted by GCC. We generate a vector of these symbols and output them 9749 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true. 9750 This avoids putting out names that are never really used. */ 9751 9752 typedef struct GTY(()) extern_symbol 9753 { 9754 tree decl; 9755 const char *name; 9756 } extern_symbol; 9757 9758 /* Define gc'd vector type for extern_symbol. */ 9759 DEF_VEC_O(extern_symbol); 9760 DEF_VEC_ALLOC_O(extern_symbol,gc); 9761 9762 /* Vector of extern_symbol pointers. */ 9763 static GTY(()) VEC(extern_symbol,gc) *extern_symbols; 9764 9765 #ifdef ASM_OUTPUT_EXTERNAL_REAL 9766 /* Mark DECL (name NAME) as an external reference (assembler output 9767 file FILE). This saves the names to output at the end of the file 9768 if actually referenced. */ 9769 9770 void 9771 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name) 9772 { 9773 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL); 9774 9775 gcc_assert (file == asm_out_file); 9776 p->decl = decl; 9777 p->name = name; 9778 } 9779 9780 /* Output text required at the end of an assembler file. 9781 This includes deferred plabels and .import directives for 9782 all external symbols that were actually referenced. */ 9783 9784 static void 9785 pa_hpux_file_end (void) 9786 { 9787 unsigned int i; 9788 extern_symbol *p; 9789 9790 if (!NO_DEFERRED_PROFILE_COUNTERS) 9791 output_deferred_profile_counters (); 9792 9793 output_deferred_plabels (); 9794 9795 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++) 9796 { 9797 tree decl = p->decl; 9798 9799 if (!TREE_ASM_WRITTEN (decl) 9800 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0))) 9801 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name); 9802 } 9803 9804 VEC_free (extern_symbol, gc, extern_symbols); 9805 } 9806 #endif 9807 9808 /* Return true if a change from mode FROM to mode TO for a register 9809 in register class RCLASS is invalid. */ 9810 9811 bool 9812 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 9813 enum reg_class rclass) 9814 { 9815 if (from == to) 9816 return false; 9817 9818 /* Reject changes to/from complex and vector modes. */ 9819 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from) 9820 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to)) 9821 return true; 9822 9823 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)) 9824 return false; 9825 9826 /* There is no way to load QImode or HImode values directly from 9827 memory. SImode loads to the FP registers are not zero extended. 9828 On the 64-bit target, this conflicts with the definition of 9829 LOAD_EXTEND_OP. Thus, we can't allow changing between modes 9830 with different sizes in the floating-point registers. */ 9831 if (MAYBE_FP_REG_CLASS_P (rclass)) 9832 return true; 9833 9834 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word 9835 in specific sets of registers. Thus, we cannot allow changing 9836 to a larger mode when it's larger than a word. */ 9837 if (GET_MODE_SIZE (to) > UNITS_PER_WORD 9838 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from)) 9839 return true; 9840 9841 return false; 9842 } 9843 9844 /* Returns TRUE if it is a good idea to tie two pseudo registers 9845 when one has mode MODE1 and one has mode MODE2. 9846 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, 9847 for any hard reg, then this must be FALSE for correct output. 9848 9849 We should return FALSE for QImode and HImode because these modes 9850 are not ok in the floating-point registers. However, this prevents 9851 tieing these modes to SImode and DImode in the general registers. 9852 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and 9853 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used 9854 in the floating-point registers. */ 9855 9856 bool 9857 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 9858 { 9859 /* Don't tie modes in different classes. */ 9860 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2)) 9861 return false; 9862 9863 return true; 9864 } 9865 9866 9867 /* Length in units of the trampoline instruction code. */ 9868 9869 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) 9870 9871 9872 /* Output assembler code for a block containing the constant parts 9873 of a trampoline, leaving space for the variable parts.\ 9874 9875 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM 9876 and then branches to the specified routine. 9877 9878 This code template is copied from text segment to stack location 9879 and then patched with pa_trampoline_init to contain valid values, 9880 and then entered as a subroutine. 9881 9882 It is best to keep this as small as possible to avoid having to 9883 flush multiple lines in the cache. */ 9884 9885 static void 9886 pa_asm_trampoline_template (FILE *f) 9887 { 9888 if (!TARGET_64BIT) 9889 { 9890 fputs ("\tldw 36(%r22),%r21\n", f); 9891 fputs ("\tbb,>=,n %r21,30,.+16\n", f); 9892 if (ASSEMBLER_DIALECT == 0) 9893 fputs ("\tdepi 0,31,2,%r21\n", f); 9894 else 9895 fputs ("\tdepwi 0,31,2,%r21\n", f); 9896 fputs ("\tldw 4(%r21),%r19\n", f); 9897 fputs ("\tldw 0(%r21),%r21\n", f); 9898 if (TARGET_PA_20) 9899 { 9900 fputs ("\tbve (%r21)\n", f); 9901 fputs ("\tldw 40(%r22),%r29\n", f); 9902 fputs ("\t.word 0\n", f); 9903 fputs ("\t.word 0\n", f); 9904 } 9905 else 9906 { 9907 fputs ("\tldsid (%r21),%r1\n", f); 9908 fputs ("\tmtsp %r1,%sr0\n", f); 9909 fputs ("\tbe 0(%sr0,%r21)\n", f); 9910 fputs ("\tldw 40(%r22),%r29\n", f); 9911 } 9912 fputs ("\t.word 0\n", f); 9913 fputs ("\t.word 0\n", f); 9914 fputs ("\t.word 0\n", f); 9915 fputs ("\t.word 0\n", f); 9916 } 9917 else 9918 { 9919 fputs ("\t.dword 0\n", f); 9920 fputs ("\t.dword 0\n", f); 9921 fputs ("\t.dword 0\n", f); 9922 fputs ("\t.dword 0\n", f); 9923 fputs ("\tmfia %r31\n", f); 9924 fputs ("\tldd 24(%r31),%r1\n", f); 9925 fputs ("\tldd 24(%r1),%r27\n", f); 9926 fputs ("\tldd 16(%r1),%r1\n", f); 9927 fputs ("\tbve (%r1)\n", f); 9928 fputs ("\tldd 32(%r31),%r31\n", f); 9929 fputs ("\t.dword 0 ; fptr\n", f); 9930 fputs ("\t.dword 0 ; static link\n", f); 9931 } 9932 } 9933 9934 /* Emit RTL insns to initialize the variable parts of a trampoline. 9935 FNADDR is an RTX for the address of the function's pure code. 9936 CXT is an RTX for the static chain value for the function. 9937 9938 Move the function address to the trampoline template at offset 36. 9939 Move the static chain value to trampoline template at offset 40. 9940 Move the trampoline address to trampoline template at offset 44. 9941 Move r19 to trampoline template at offset 48. The latter two 9942 words create a plabel for the indirect call to the trampoline. 9943 9944 A similar sequence is used for the 64-bit port but the plabel is 9945 at the beginning of the trampoline. 9946 9947 Finally, the cache entries for the trampoline code are flushed. 9948 This is necessary to ensure that the trampoline instruction sequence 9949 is written to memory prior to any attempts at prefetching the code 9950 sequence. */ 9951 9952 static void 9953 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 9954 { 9955 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 9956 rtx start_addr = gen_reg_rtx (Pmode); 9957 rtx end_addr = gen_reg_rtx (Pmode); 9958 rtx line_length = gen_reg_rtx (Pmode); 9959 rtx r_tramp, tmp; 9960 9961 emit_block_move (m_tramp, assemble_trampoline_template (), 9962 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); 9963 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0)); 9964 9965 if (!TARGET_64BIT) 9966 { 9967 tmp = adjust_address (m_tramp, Pmode, 36); 9968 emit_move_insn (tmp, fnaddr); 9969 tmp = adjust_address (m_tramp, Pmode, 40); 9970 emit_move_insn (tmp, chain_value); 9971 9972 /* Create a fat pointer for the trampoline. */ 9973 tmp = adjust_address (m_tramp, Pmode, 44); 9974 emit_move_insn (tmp, r_tramp); 9975 tmp = adjust_address (m_tramp, Pmode, 48); 9976 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); 9977 9978 /* fdc and fic only use registers for the address to flush, 9979 they do not accept integer displacements. We align the 9980 start and end addresses to the beginning of their respective 9981 cache lines to minimize the number of lines flushed. */ 9982 emit_insn (gen_andsi3 (start_addr, r_tramp, 9983 GEN_INT (-MIN_CACHELINE_SIZE))); 9984 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1)); 9985 emit_insn (gen_andsi3 (end_addr, tmp, 9986 GEN_INT (-MIN_CACHELINE_SIZE))); 9987 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 9988 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); 9989 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, 9990 gen_reg_rtx (Pmode), 9991 gen_reg_rtx (Pmode))); 9992 } 9993 else 9994 { 9995 tmp = adjust_address (m_tramp, Pmode, 56); 9996 emit_move_insn (tmp, fnaddr); 9997 tmp = adjust_address (m_tramp, Pmode, 64); 9998 emit_move_insn (tmp, chain_value); 9999 10000 /* Create a fat pointer for the trampoline. */ 10001 tmp = adjust_address (m_tramp, Pmode, 16); 10002 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32))); 10003 tmp = adjust_address (m_tramp, Pmode, 24); 10004 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); 10005 10006 /* fdc and fic only use registers for the address to flush, 10007 they do not accept integer displacements. We align the 10008 start and end addresses to the beginning of their respective 10009 cache lines to minimize the number of lines flushed. */ 10010 tmp = force_reg (Pmode, plus_constant (r_tramp, 32)); 10011 emit_insn (gen_anddi3 (start_addr, tmp, 10012 GEN_INT (-MIN_CACHELINE_SIZE))); 10013 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); 10014 emit_insn (gen_anddi3 (end_addr, tmp, 10015 GEN_INT (-MIN_CACHELINE_SIZE))); 10016 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10017 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); 10018 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, 10019 gen_reg_rtx (Pmode), 10020 gen_reg_rtx (Pmode))); 10021 } 10022 } 10023 10024 /* Perform any machine-specific adjustment in the address of the trampoline. 10025 ADDR contains the address that was passed to pa_trampoline_init. 10026 Adjust the trampoline address to point to the plabel at offset 44. */ 10027 10028 static rtx 10029 pa_trampoline_adjust_address (rtx addr) 10030 { 10031 if (!TARGET_64BIT) 10032 addr = memory_address (Pmode, plus_constant (addr, 46)); 10033 return addr; 10034 } 10035 10036 static rtx 10037 pa_delegitimize_address (rtx orig_x) 10038 { 10039 rtx x = delegitimize_mem_from_attrs (orig_x); 10040 10041 if (GET_CODE (x) == LO_SUM 10042 && GET_CODE (XEXP (x, 1)) == UNSPEC 10043 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R) 10044 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0)); 10045 return x; 10046 } 10047 10048 #include "gt-pa.h" 10049