1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2020 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56
57 /* This file should be included last. */
58 #include "target-def.h"
59
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65 machine_mode store_mode;
66 machine_mode other_mode;
67 rtx set;
68
69 if (recog_memoized (in_insn) < 0
70 || (get_attr_type (in_insn) != TYPE_FPSTORE
71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72 || recog_memoized (out_insn) < 0)
73 return 0;
74
75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76
77 set = single_set (out_insn);
78 if (!set)
79 return 0;
80
81 other_mode = GET_MODE (SET_SRC (set));
82
83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85
86
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131 ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136 ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
185
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207
208 /* The following extra sections are only used for SOM. */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213
214 /* Counts for the number of callee-saved general and floating point
215 registers which were saved by the current function's prologue. */
216 static int gr_saved, fr_saved;
217
218 /* Boolean indicating whether the return pointer was saved by the
219 current function's prologue. */
220 static bool rp_saved;
221
222 static rtx find_addr_reg (rtx);
223
224 /* Keep track of the number of bytes we have output in the CODE subspace
225 during this compilation so we'll know when to emit inline long-calls. */
226 unsigned long total_code_bytes;
227
228 /* The last address of the previous function plus the number of bytes in
229 associated thunks that have been output. This is used to determine if
230 a thunk can use an IA-relative branch to reach its target function. */
231 static unsigned int last_address;
232
233 /* Variables to handle plabels that we discover are necessary at assembly
234 output time. They are output after the current function. */
235 struct GTY(()) deferred_plabel
236 {
237 rtx internal_label;
238 rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241 deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243
244 /* Initialize the GCC target structure. */
245
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
297
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427
428 struct gcc_target targetm = TARGET_INITIALIZER;
429
430 /* Parse the -mfixed-range= option string. */
431
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435 int i, first, last;
436 char *str, *dash, *comma;
437
438 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439 REG2 are either register names or register numbers. The effect
440 of this option is to mark the registers in the range from REG1 to
441 REG2 as ``fixed'' so they won't be used by the compiler. This is
442 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
443
444 i = strlen (const_str);
445 str = (char *) alloca (i + 1);
446 memcpy (str, const_str, i + 1);
447
448 while (1)
449 {
450 dash = strchr (str, '-');
451 if (!dash)
452 {
453 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 return;
455 }
456 *dash = '\0';
457
458 comma = strchr (dash + 1, ',');
459 if (comma)
460 *comma = '\0';
461
462 first = decode_reg_name (str);
463 if (first < 0)
464 {
465 warning (0, "unknown register name: %s", str);
466 return;
467 }
468
469 last = decode_reg_name (dash + 1);
470 if (last < 0)
471 {
472 warning (0, "unknown register name: %s", dash + 1);
473 return;
474 }
475
476 *dash = '-';
477
478 if (first > last)
479 {
480 warning (0, "%s-%s is an empty range", str, dash + 1);
481 return;
482 }
483
484 for (i = first; i <= last; ++i)
485 fixed_regs[i] = call_used_regs[i] = 1;
486
487 if (!comma)
488 break;
489
490 *comma = ',';
491 str = comma + 1;
492 }
493
494 /* Check if all floating point registers have been fixed. */
495 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496 if (!fixed_regs[i])
497 break;
498
499 if (i > FP_REG_LAST)
500 target_flags |= MASK_DISABLE_FPREGS;
501 }
502
503 /* Implement the TARGET_OPTION_OVERRIDE hook. */
504
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508 unsigned int i;
509 cl_deferred_option *opt;
510 vec<cl_deferred_option> *v
511 = (vec<cl_deferred_option> *) pa_deferred_options;
512
513 if (v)
514 FOR_EACH_VEC_ELT (*v, i, opt)
515 {
516 switch (opt->opt_index)
517 {
518 case OPT_mfixed_range_:
519 fix_range (opt->arg);
520 break;
521
522 default:
523 gcc_unreachable ();
524 }
525 }
526
527 if (flag_pic && TARGET_PORTABLE_RUNTIME)
528 {
529 warning (0, "PIC code generation is not supported in the portable runtime model");
530 }
531
532 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533 {
534 warning (0, "PIC code generation is not compatible with fast indirect calls");
535 }
536
537 if (! TARGET_GAS && write_symbols != NO_DEBUG)
538 {
539 warning (0, "%<-g%> is only supported when using GAS on this processor");
540 warning (0, "%<-g%> option disabled");
541 write_symbols = NO_DEBUG;
542 }
543
544 /* We only support the "big PIC" model now. And we always generate PIC
545 code when in 64bit mode. */
546 if (flag_pic == 1 || TARGET_64BIT)
547 flag_pic = 2;
548
549 /* Disable -freorder-blocks-and-partition as we don't support hot and
550 cold partitioning. */
551 if (flag_reorder_blocks_and_partition)
552 {
553 inform (input_location,
554 "%<-freorder-blocks-and-partition%> does not work "
555 "on this architecture");
556 flag_reorder_blocks_and_partition = 0;
557 flag_reorder_blocks = 1;
558 }
559
560 /* We can't guarantee that .dword is available for 32-bit targets. */
561 if (UNITS_PER_WORD == 4)
562 targetm.asm_out.aligned_op.di = NULL;
563
564 /* The unaligned ops are only available when using GAS. */
565 if (!TARGET_GAS)
566 {
567 targetm.asm_out.unaligned_op.hi = NULL;
568 targetm.asm_out.unaligned_op.si = NULL;
569 targetm.asm_out.unaligned_op.di = NULL;
570 }
571
572 init_machine_status = pa_init_machine_status;
573 }
574
575 enum pa_builtins
576 {
577 PA_BUILTIN_COPYSIGNQ,
578 PA_BUILTIN_FABSQ,
579 PA_BUILTIN_INFQ,
580 PA_BUILTIN_HUGE_VALQ,
581 PA_BUILTIN_max
582 };
583
584 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
585
586 static void
pa_init_builtins(void)587 pa_init_builtins (void)
588 {
589 #ifdef DONT_HAVE_FPUTC_UNLOCKED
590 {
591 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
592 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
593 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
594 }
595 #endif
596 #if TARGET_HPUX_11
597 {
598 tree decl;
599
600 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
601 set_user_assembler_name (decl, "_Isfinite");
602 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
603 set_user_assembler_name (decl, "_Isfinitef");
604 }
605 #endif
606
607 if (HPUX_LONG_DOUBLE_LIBRARY)
608 {
609 tree decl, ftype;
610
611 /* Under HPUX, the __float128 type is a synonym for "long double". */
612 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
613 "__float128");
614
615 /* TFmode support builtins. */
616 ftype = build_function_type_list (long_double_type_node,
617 long_double_type_node,
618 NULL_TREE);
619 decl = add_builtin_function ("__builtin_fabsq", ftype,
620 PA_BUILTIN_FABSQ, BUILT_IN_MD,
621 "_U_Qfabs", NULL_TREE);
622 TREE_READONLY (decl) = 1;
623 pa_builtins[PA_BUILTIN_FABSQ] = decl;
624
625 ftype = build_function_type_list (long_double_type_node,
626 long_double_type_node,
627 long_double_type_node,
628 NULL_TREE);
629 decl = add_builtin_function ("__builtin_copysignq", ftype,
630 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
631 "_U_Qfcopysign", NULL_TREE);
632 TREE_READONLY (decl) = 1;
633 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
634
635 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
636 decl = add_builtin_function ("__builtin_infq", ftype,
637 PA_BUILTIN_INFQ, BUILT_IN_MD,
638 NULL, NULL_TREE);
639 pa_builtins[PA_BUILTIN_INFQ] = decl;
640
641 decl = add_builtin_function ("__builtin_huge_valq", ftype,
642 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
643 NULL, NULL_TREE);
644 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
645 }
646 }
647
648 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)649 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
650 machine_mode mode ATTRIBUTE_UNUSED,
651 int ignore ATTRIBUTE_UNUSED)
652 {
653 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
654 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
655
656 switch (fcode)
657 {
658 case PA_BUILTIN_FABSQ:
659 case PA_BUILTIN_COPYSIGNQ:
660 return expand_call (exp, target, ignore);
661
662 case PA_BUILTIN_INFQ:
663 case PA_BUILTIN_HUGE_VALQ:
664 {
665 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
666 REAL_VALUE_TYPE inf;
667 rtx tmp;
668
669 real_inf (&inf);
670 tmp = const_double_from_real_value (inf, target_mode);
671
672 tmp = validize_mem (force_const_mem (target_mode, tmp));
673
674 if (target == 0)
675 target = gen_reg_rtx (target_mode);
676
677 emit_move_insn (target, tmp);
678 return target;
679 }
680
681 default:
682 gcc_unreachable ();
683 }
684
685 return NULL_RTX;
686 }
687
688 /* Function to init struct machine_function.
689 This will be called, via a pointer variable,
690 from push_function_context. */
691
692 static struct machine_function *
pa_init_machine_status(void)693 pa_init_machine_status (void)
694 {
695 return ggc_cleared_alloc<machine_function> ();
696 }
697
698 /* If FROM is a probable pointer register, mark TO as a probable
699 pointer register with the same pointer alignment as FROM. */
700
701 static void
copy_reg_pointer(rtx to,rtx from)702 copy_reg_pointer (rtx to, rtx from)
703 {
704 if (REG_POINTER (from))
705 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
706 }
707
708 /* Return 1 if X contains a symbolic expression. We know these
709 expressions will have one of a few well defined forms, so
710 we need only check those forms. */
711 int
pa_symbolic_expression_p(rtx x)712 pa_symbolic_expression_p (rtx x)
713 {
714
715 /* Strip off any HIGH. */
716 if (GET_CODE (x) == HIGH)
717 x = XEXP (x, 0);
718
719 return symbolic_operand (x, VOIDmode);
720 }
721
722 /* Accept any constant that can be moved in one instruction into a
723 general register. */
724 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)725 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
726 {
727 /* OK if ldo, ldil, or zdepi, can be used. */
728 return (VAL_14_BITS_P (ival)
729 || pa_ldil_cint_p (ival)
730 || pa_zdepi_cint_p (ival));
731 }
732
733 /* True iff ldil can be used to load this CONST_INT. The least
734 significant 11 bits of the value must be zero and the value must
735 not change sign when extended from 32 to 64 bits. */
736 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)737 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
738 {
739 unsigned HOST_WIDE_INT x;
740
741 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
742 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
743 }
744
745 /* True iff zdepi can be used to generate this CONST_INT.
746 zdepi first sign extends a 5-bit signed number to a given field
747 length, then places this field anywhere in a zero. */
748 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)749 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
750 {
751 unsigned HOST_WIDE_INT lsb_mask, t;
752
753 /* This might not be obvious, but it's at least fast.
754 This function is critical; we don't have the time loops would take. */
755 lsb_mask = x & -x;
756 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
757 /* Return true iff t is a power of two. */
758 return ((t & (t - 1)) == 0);
759 }
760
761 /* True iff depi or extru can be used to compute (reg & mask).
762 Accept bit pattern like these:
763 0....01....1
764 1....10....0
765 1..10..01..1 */
766 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)767 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
768 {
769 mask = ~mask;
770 mask += mask & -mask;
771 return (mask & (mask - 1)) == 0;
772 }
773
774 /* True iff depi can be used to compute (reg | MASK). */
775 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)776 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
777 {
778 mask += mask & -mask;
779 return (mask & (mask - 1)) == 0;
780 }
781
782 /* Legitimize PIC addresses. If the address is already
783 position-independent, we return ORIG. Newly generated
784 position-independent addresses go to REG. If we need more
785 than one register, we lose. */
786
787 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)788 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
789 {
790 rtx pic_ref = orig;
791
792 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
793
794 /* Labels need special handling. */
795 if (pic_label_operand (orig, mode))
796 {
797 rtx_insn *insn;
798
799 /* We do not want to go through the movXX expanders here since that
800 would create recursion.
801
802 Nor do we really want to call a generator for a named pattern
803 since that requires multiple patterns if we want to support
804 multiple word sizes.
805
806 So instead we just emit the raw set, which avoids the movXX
807 expanders completely. */
808 mark_reg_pointer (reg, BITS_PER_UNIT);
809 insn = emit_insn (gen_rtx_SET (reg, orig));
810
811 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
812 add_reg_note (insn, REG_EQUAL, orig);
813
814 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
815 and update LABEL_NUSES because this is not done automatically. */
816 if (reload_in_progress || reload_completed)
817 {
818 /* Extract LABEL_REF. */
819 if (GET_CODE (orig) == CONST)
820 orig = XEXP (XEXP (orig, 0), 0);
821 /* Extract CODE_LABEL. */
822 orig = XEXP (orig, 0);
823 add_reg_note (insn, REG_LABEL_OPERAND, orig);
824 /* Make sure we have label and not a note. */
825 if (LABEL_P (orig))
826 LABEL_NUSES (orig)++;
827 }
828 crtl->uses_pic_offset_table = 1;
829 return reg;
830 }
831 if (GET_CODE (orig) == SYMBOL_REF)
832 {
833 rtx_insn *insn;
834 rtx tmp_reg;
835
836 gcc_assert (reg);
837
838 /* Before reload, allocate a temporary register for the intermediate
839 result. This allows the sequence to be deleted when the final
840 result is unused and the insns are trivially dead. */
841 tmp_reg = ((reload_in_progress || reload_completed)
842 ? reg : gen_reg_rtx (Pmode));
843
844 if (function_label_operand (orig, VOIDmode))
845 {
846 /* Force function label into memory in word mode. */
847 orig = XEXP (force_const_mem (word_mode, orig), 0);
848 /* Load plabel address from DLT. */
849 emit_move_insn (tmp_reg,
850 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
851 gen_rtx_HIGH (word_mode, orig)));
852 pic_ref
853 = gen_const_mem (Pmode,
854 gen_rtx_LO_SUM (Pmode, tmp_reg,
855 gen_rtx_UNSPEC (Pmode,
856 gen_rtvec (1, orig),
857 UNSPEC_DLTIND14R)));
858 emit_move_insn (reg, pic_ref);
859 /* Now load address of function descriptor. */
860 pic_ref = gen_rtx_MEM (Pmode, reg);
861 }
862 else
863 {
864 /* Load symbol reference from DLT. */
865 emit_move_insn (tmp_reg,
866 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
867 gen_rtx_HIGH (word_mode, orig)));
868 pic_ref
869 = gen_const_mem (Pmode,
870 gen_rtx_LO_SUM (Pmode, tmp_reg,
871 gen_rtx_UNSPEC (Pmode,
872 gen_rtvec (1, orig),
873 UNSPEC_DLTIND14R)));
874 }
875
876 crtl->uses_pic_offset_table = 1;
877 mark_reg_pointer (reg, BITS_PER_UNIT);
878 insn = emit_move_insn (reg, pic_ref);
879
880 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
881 set_unique_reg_note (insn, REG_EQUAL, orig);
882
883 return reg;
884 }
885 else if (GET_CODE (orig) == CONST)
886 {
887 rtx base;
888
889 if (GET_CODE (XEXP (orig, 0)) == PLUS
890 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
891 return orig;
892
893 gcc_assert (reg);
894 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
895
896 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
897 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
898 base == reg ? 0 : reg);
899
900 if (GET_CODE (orig) == CONST_INT)
901 {
902 if (INT_14_BITS (orig))
903 return plus_constant (Pmode, base, INTVAL (orig));
904 orig = force_reg (Pmode, orig);
905 }
906 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907 /* Likewise, should we set special REG_NOTEs here? */
908 }
909
910 return pic_ref;
911 }
912
913 static GTY(()) rtx gen_tls_tga;
914
915 static rtx
gen_tls_get_addr(void)916 gen_tls_get_addr (void)
917 {
918 if (!gen_tls_tga)
919 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
920 return gen_tls_tga;
921 }
922
923 static rtx
hppa_tls_call(rtx arg)924 hppa_tls_call (rtx arg)
925 {
926 rtx ret;
927
928 ret = gen_reg_rtx (Pmode);
929 emit_library_call_value (gen_tls_get_addr (), ret,
930 LCT_CONST, Pmode, arg, Pmode);
931
932 return ret;
933 }
934
935 static rtx
legitimize_tls_address(rtx addr)936 legitimize_tls_address (rtx addr)
937 {
938 rtx ret, tmp, t1, t2, tp;
939 rtx_insn *insn;
940
941 /* Currently, we can't handle anything but a SYMBOL_REF. */
942 if (GET_CODE (addr) != SYMBOL_REF)
943 return addr;
944
945 switch (SYMBOL_REF_TLS_MODEL (addr))
946 {
947 case TLS_MODEL_GLOBAL_DYNAMIC:
948 tmp = gen_reg_rtx (Pmode);
949 if (flag_pic)
950 emit_insn (gen_tgd_load_pic (tmp, addr));
951 else
952 emit_insn (gen_tgd_load (tmp, addr));
953 ret = hppa_tls_call (tmp);
954 break;
955
956 case TLS_MODEL_LOCAL_DYNAMIC:
957 ret = gen_reg_rtx (Pmode);
958 tmp = gen_reg_rtx (Pmode);
959 start_sequence ();
960 if (flag_pic)
961 emit_insn (gen_tld_load_pic (tmp, addr));
962 else
963 emit_insn (gen_tld_load (tmp, addr));
964 t1 = hppa_tls_call (tmp);
965 insn = get_insns ();
966 end_sequence ();
967 t2 = gen_reg_rtx (Pmode);
968 emit_libcall_block (insn, t2, t1,
969 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
970 UNSPEC_TLSLDBASE));
971 emit_insn (gen_tld_offset_load (ret, addr, t2));
972 break;
973
974 case TLS_MODEL_INITIAL_EXEC:
975 tp = gen_reg_rtx (Pmode);
976 tmp = gen_reg_rtx (Pmode);
977 ret = gen_reg_rtx (Pmode);
978 emit_insn (gen_tp_load (tp));
979 if (flag_pic)
980 emit_insn (gen_tie_load_pic (tmp, addr));
981 else
982 emit_insn (gen_tie_load (tmp, addr));
983 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
984 break;
985
986 case TLS_MODEL_LOCAL_EXEC:
987 tp = gen_reg_rtx (Pmode);
988 ret = gen_reg_rtx (Pmode);
989 emit_insn (gen_tp_load (tp));
990 emit_insn (gen_tle_load (ret, addr, tp));
991 break;
992
993 default:
994 gcc_unreachable ();
995 }
996
997 return ret;
998 }
999
1000 /* Helper for hppa_legitimize_address. Given X, return true if it
1001 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1002
1003 This respectively represent canonical shift-add rtxs or scaled
1004 memory addresses. */
1005 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1006 mem_shadd_or_shadd_rtx_p (rtx x)
1007 {
1008 return ((GET_CODE (x) == ASHIFT
1009 || GET_CODE (x) == MULT)
1010 && GET_CODE (XEXP (x, 1)) == CONST_INT
1011 && ((GET_CODE (x) == ASHIFT
1012 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1013 || (GET_CODE (x) == MULT
1014 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1015 }
1016
1017 /* Try machine-dependent ways of modifying an illegitimate address
1018 to be legitimate. If we find one, return the new, valid address.
1019 This macro is used in only one place: `memory_address' in explow.c.
1020
1021 OLDX is the address as it was before break_out_memory_refs was called.
1022 In some cases it is useful to look at this to decide what needs to be done.
1023
1024 It is always safe for this macro to do nothing. It exists to recognize
1025 opportunities to optimize the output.
1026
1027 For the PA, transform:
1028
1029 memory(X + <large int>)
1030
1031 into:
1032
1033 if (<large int> & mask) >= 16
1034 Y = (<large int> & ~mask) + mask + 1 Round up.
1035 else
1036 Y = (<large int> & ~mask) Round down.
1037 Z = X + Y
1038 memory (Z + (<large int> - Y));
1039
1040 This is for CSE to find several similar references, and only use one Z.
1041
1042 X can either be a SYMBOL_REF or REG, but because combine cannot
1043 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1044 D will not fit in 14 bits.
1045
1046 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1047 0x1f as the mask.
1048
1049 MODE_INT references allow displacements which fit in 14 bits, so use
1050 0x3fff as the mask.
1051
1052 This relies on the fact that most mode MODE_FLOAT references will use FP
1053 registers and most mode MODE_INT references will use integer registers.
1054 (In the rare case of an FP register used in an integer MODE, we depend
1055 on secondary reloads to clean things up.)
1056
1057
1058 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1059 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1060 addressing modes to be used).
1061
1062 Note that the addresses passed into hppa_legitimize_address always
1063 come from a MEM, so we only have to match the MULT form on incoming
1064 addresses. But to be future proof we also match the ASHIFT form.
1065
1066 However, this routine always places those shift-add sequences into
1067 registers, so we have to generate the ASHIFT form as our output.
1068
1069 Put X and Z into registers. Then put the entire expression into
1070 a register. */
1071
1072 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1073 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1074 machine_mode mode)
1075 {
1076 rtx orig = x;
1077
1078 /* We need to canonicalize the order of operands in unscaled indexed
1079 addresses since the code that checks if an address is valid doesn't
1080 always try both orders. */
1081 if (!TARGET_NO_SPACE_REGS
1082 && GET_CODE (x) == PLUS
1083 && GET_MODE (x) == Pmode
1084 && REG_P (XEXP (x, 0))
1085 && REG_P (XEXP (x, 1))
1086 && REG_POINTER (XEXP (x, 0))
1087 && !REG_POINTER (XEXP (x, 1)))
1088 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1089
1090 if (tls_referenced_p (x))
1091 return legitimize_tls_address (x);
1092 else if (flag_pic)
1093 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1094
1095 /* Strip off CONST. */
1096 if (GET_CODE (x) == CONST)
1097 x = XEXP (x, 0);
1098
1099 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1100 That should always be safe. */
1101 if (GET_CODE (x) == PLUS
1102 && GET_CODE (XEXP (x, 0)) == REG
1103 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1104 {
1105 rtx reg = force_reg (Pmode, XEXP (x, 1));
1106 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1107 }
1108
1109 /* Note we must reject symbols which represent function addresses
1110 since the assembler/linker can't handle arithmetic on plabels. */
1111 if (GET_CODE (x) == PLUS
1112 && GET_CODE (XEXP (x, 1)) == CONST_INT
1113 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1114 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1115 || GET_CODE (XEXP (x, 0)) == REG))
1116 {
1117 rtx int_part, ptr_reg;
1118 HOST_WIDE_INT newoffset;
1119 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
1120 HOST_WIDE_INT mask;
1121
1122 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1123 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1124
1125 /* Choose which way to round the offset. Round up if we
1126 are >= halfway to the next boundary. */
1127 if ((offset & mask) >= ((mask + 1) / 2))
1128 newoffset = (offset & ~ mask) + mask + 1;
1129 else
1130 newoffset = (offset & ~ mask);
1131
1132 /* If the newoffset will not fit in 14 bits (ldo), then
1133 handling this would take 4 or 5 instructions (2 to load
1134 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1135 add the new offset and the SYMBOL_REF.) Combine cannot
1136 handle 4->2 or 5->2 combinations, so do not create
1137 them. */
1138 if (! VAL_14_BITS_P (newoffset)
1139 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1140 {
1141 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1142 rtx tmp_reg
1143 = force_reg (Pmode,
1144 gen_rtx_HIGH (Pmode, const_part));
1145 ptr_reg
1146 = force_reg (Pmode,
1147 gen_rtx_LO_SUM (Pmode,
1148 tmp_reg, const_part));
1149 }
1150 else
1151 {
1152 if (! VAL_14_BITS_P (newoffset))
1153 int_part = force_reg (Pmode, GEN_INT (newoffset));
1154 else
1155 int_part = GEN_INT (newoffset);
1156
1157 ptr_reg = force_reg (Pmode,
1158 gen_rtx_PLUS (Pmode,
1159 force_reg (Pmode, XEXP (x, 0)),
1160 int_part));
1161 }
1162 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1163 }
1164
1165 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1166
1167 if (GET_CODE (x) == PLUS
1168 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1169 && (OBJECT_P (XEXP (x, 1))
1170 || GET_CODE (XEXP (x, 1)) == SUBREG)
1171 && GET_CODE (XEXP (x, 1)) != CONST)
1172 {
1173 /* If we were given a MULT, we must fix the constant
1174 as we're going to create the ASHIFT form. */
1175 HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1176 if (GET_CODE (XEXP (x, 0)) == MULT)
1177 shift_val = exact_log2 (shift_val);
1178
1179 rtx reg1, reg2;
1180 reg1 = XEXP (x, 1);
1181 if (GET_CODE (reg1) != REG)
1182 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1183
1184 reg2 = XEXP (XEXP (x, 0), 0);
1185 if (GET_CODE (reg2) != REG)
1186 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1187
1188 return force_reg (Pmode,
1189 gen_rtx_PLUS (Pmode,
1190 gen_rtx_ASHIFT (Pmode, reg2,
1191 GEN_INT (shift_val)),
1192 reg1));
1193 }
1194
1195 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1196
1197 Only do so for floating point modes since this is more speculative
1198 and we lose if it's an integer store. */
1199 if (GET_CODE (x) == PLUS
1200 && GET_CODE (XEXP (x, 0)) == PLUS
1201 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1202 && (mode == SFmode || mode == DFmode))
1203 {
1204 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1205
1206 /* If we were given a MULT, we must fix the constant
1207 as we're going to create the ASHIFT form. */
1208 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1209 shift_val = exact_log2 (shift_val);
1210
1211 /* Try and figure out what to use as a base register. */
1212 rtx reg1, reg2, base, idx;
1213
1214 reg1 = XEXP (XEXP (x, 0), 1);
1215 reg2 = XEXP (x, 1);
1216 base = NULL_RTX;
1217 idx = NULL_RTX;
1218
1219 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1220 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1221 it's a base register below. */
1222 if (GET_CODE (reg1) != REG)
1223 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1224
1225 if (GET_CODE (reg2) != REG)
1226 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1227
1228 /* Figure out what the base and index are. */
1229
1230 if (GET_CODE (reg1) == REG
1231 && REG_POINTER (reg1))
1232 {
1233 base = reg1;
1234 idx = gen_rtx_PLUS (Pmode,
1235 gen_rtx_ASHIFT (Pmode,
1236 XEXP (XEXP (XEXP (x, 0), 0), 0),
1237 GEN_INT (shift_val)),
1238 XEXP (x, 1));
1239 }
1240 else if (GET_CODE (reg2) == REG
1241 && REG_POINTER (reg2))
1242 {
1243 base = reg2;
1244 idx = XEXP (x, 0);
1245 }
1246
1247 if (base == 0)
1248 return orig;
1249
1250 /* If the index adds a large constant, try to scale the
1251 constant so that it can be loaded with only one insn. */
1252 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1253 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1254 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1255 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1256 {
1257 /* Divide the CONST_INT by the scale factor, then add it to A. */
1258 HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1259 val /= (1 << shift_val);
1260
1261 reg1 = XEXP (XEXP (idx, 0), 0);
1262 if (GET_CODE (reg1) != REG)
1263 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1264
1265 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1266
1267 /* We can now generate a simple scaled indexed address. */
1268 return
1269 force_reg
1270 (Pmode, gen_rtx_PLUS (Pmode,
1271 gen_rtx_ASHIFT (Pmode, reg1,
1272 GEN_INT (shift_val)),
1273 base));
1274 }
1275
1276 /* If B + C is still a valid base register, then add them. */
1277 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1278 && INTVAL (XEXP (idx, 1)) <= 4096
1279 && INTVAL (XEXP (idx, 1)) >= -4096)
1280 {
1281 rtx reg1, reg2;
1282
1283 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1284
1285 reg2 = XEXP (XEXP (idx, 0), 0);
1286 if (GET_CODE (reg2) != CONST_INT)
1287 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1288
1289 return force_reg (Pmode,
1290 gen_rtx_PLUS (Pmode,
1291 gen_rtx_ASHIFT (Pmode, reg2,
1292 GEN_INT (shift_val)),
1293 reg1));
1294 }
1295
1296 /* Get the index into a register, then add the base + index and
1297 return a register holding the result. */
1298
1299 /* First get A into a register. */
1300 reg1 = XEXP (XEXP (idx, 0), 0);
1301 if (GET_CODE (reg1) != REG)
1302 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1303
1304 /* And get B into a register. */
1305 reg2 = XEXP (idx, 1);
1306 if (GET_CODE (reg2) != REG)
1307 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1308
1309 reg1 = force_reg (Pmode,
1310 gen_rtx_PLUS (Pmode,
1311 gen_rtx_ASHIFT (Pmode, reg1,
1312 GEN_INT (shift_val)),
1313 reg2));
1314
1315 /* Add the result to our base register and return. */
1316 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1317
1318 }
1319
1320 /* Uh-oh. We might have an address for x[n-100000]. This needs
1321 special handling to avoid creating an indexed memory address
1322 with x-100000 as the base.
1323
1324 If the constant part is small enough, then it's still safe because
1325 there is a guard page at the beginning and end of the data segment.
1326
1327 Scaled references are common enough that we want to try and rearrange the
1328 terms so that we can use indexing for these addresses too. Only
1329 do the optimization for floatint point modes. */
1330
1331 if (GET_CODE (x) == PLUS
1332 && pa_symbolic_expression_p (XEXP (x, 1)))
1333 {
1334 /* Ugly. We modify things here so that the address offset specified
1335 by the index expression is computed first, then added to x to form
1336 the entire address. */
1337
1338 rtx regx1, regx2, regy1, regy2, y;
1339
1340 /* Strip off any CONST. */
1341 y = XEXP (x, 1);
1342 if (GET_CODE (y) == CONST)
1343 y = XEXP (y, 0);
1344
1345 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1346 {
1347 /* See if this looks like
1348 (plus (mult (reg) (mem_shadd_const))
1349 (const (plus (symbol_ref) (const_int))))
1350
1351 Where const_int is small. In that case the const
1352 expression is a valid pointer for indexing.
1353
1354 If const_int is big, but can be divided evenly by shadd_const
1355 and added to (reg). This allows more scaled indexed addresses. */
1356 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1357 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1358 && GET_CODE (XEXP (y, 1)) == CONST_INT
1359 && INTVAL (XEXP (y, 1)) >= -4096
1360 && INTVAL (XEXP (y, 1)) <= 4095)
1361 {
1362 HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1363
1364 /* If we were given a MULT, we must fix the constant
1365 as we're going to create the ASHIFT form. */
1366 if (GET_CODE (XEXP (x, 0)) == MULT)
1367 shift_val = exact_log2 (shift_val);
1368
1369 rtx reg1, reg2;
1370
1371 reg1 = XEXP (x, 1);
1372 if (GET_CODE (reg1) != REG)
1373 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1374
1375 reg2 = XEXP (XEXP (x, 0), 0);
1376 if (GET_CODE (reg2) != REG)
1377 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1378
1379 return
1380 force_reg (Pmode,
1381 gen_rtx_PLUS (Pmode,
1382 gen_rtx_ASHIFT (Pmode,
1383 reg2,
1384 GEN_INT (shift_val)),
1385 reg1));
1386 }
1387 else if ((mode == DFmode || mode == SFmode)
1388 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1389 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1390 && GET_CODE (XEXP (y, 1)) == CONST_INT
1391 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1392 {
1393 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1394
1395 /* If we were given a MULT, we must fix the constant
1396 as we're going to create the ASHIFT form. */
1397 if (GET_CODE (XEXP (x, 0)) == MULT)
1398 shift_val = exact_log2 (shift_val);
1399
1400 regx1
1401 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1402 / INTVAL (XEXP (XEXP (x, 0), 1))));
1403 regx2 = XEXP (XEXP (x, 0), 0);
1404 if (GET_CODE (regx2) != REG)
1405 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1406 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1407 regx2, regx1));
1408 return
1409 force_reg (Pmode,
1410 gen_rtx_PLUS (Pmode,
1411 gen_rtx_ASHIFT (Pmode, regx2,
1412 GEN_INT (shift_val)),
1413 force_reg (Pmode, XEXP (y, 0))));
1414 }
1415 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1416 && INTVAL (XEXP (y, 1)) >= -4096
1417 && INTVAL (XEXP (y, 1)) <= 4095)
1418 {
1419 /* This is safe because of the guard page at the
1420 beginning and end of the data space. Just
1421 return the original address. */
1422 return orig;
1423 }
1424 else
1425 {
1426 /* Doesn't look like one we can optimize. */
1427 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1428 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1429 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1430 regx1 = force_reg (Pmode,
1431 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1432 regx1, regy2));
1433 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1434 }
1435 }
1436 }
1437
1438 return orig;
1439 }
1440
1441 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1442
1443 Compute extra cost of moving data between one register class
1444 and another.
1445
1446 Make moves from SAR so expensive they should never happen. We used to
1447 have 0xffff here, but that generates overflow in rare cases.
1448
1449 Copies involving a FP register and a non-FP register are relatively
1450 expensive because they must go through memory.
1451
1452 Other copies are reasonably cheap. */
1453
1454 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1455 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1456 reg_class_t from, reg_class_t to)
1457 {
1458 if (from == SHIFT_REGS)
1459 return 0x100;
1460 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1461 return 18;
1462 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1463 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1464 return 16;
1465 else
1466 return 2;
1467 }
1468
1469 /* For the HPPA, REG and REG+CONST is cost 0
1470 and addresses involving symbolic constants are cost 2.
1471
1472 PIC addresses are very expensive.
1473
1474 It is no coincidence that this has the same structure
1475 as pa_legitimate_address_p. */
1476
1477 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1478 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1479 addr_space_t as ATTRIBUTE_UNUSED,
1480 bool speed ATTRIBUTE_UNUSED)
1481 {
1482 switch (GET_CODE (X))
1483 {
1484 case REG:
1485 case PLUS:
1486 case LO_SUM:
1487 return 1;
1488 case HIGH:
1489 return 2;
1490 default:
1491 return 4;
1492 }
1493 }
1494
1495 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1496 The machine mode of X is known to be SImode or DImode. */
1497
1498 static bool
hppa_rtx_costs_shadd_p(rtx x)1499 hppa_rtx_costs_shadd_p (rtx x)
1500 {
1501 if (GET_CODE (x) != PLUS
1502 || !REG_P (XEXP (x, 1)))
1503 return false;
1504 rtx op0 = XEXP (x, 0);
1505 if (GET_CODE (op0) == ASHIFT
1506 && CONST_INT_P (XEXP (op0, 1))
1507 && REG_P (XEXP (op0, 0)))
1508 {
1509 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1510 return x == 1 || x == 2 || x == 3;
1511 }
1512 if (GET_CODE (op0) == MULT
1513 && CONST_INT_P (XEXP (op0, 1))
1514 && REG_P (XEXP (op0, 0)))
1515 {
1516 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1517 return x == 2 || x == 4 || x == 8;
1518 }
1519 return false;
1520 }
1521
1522 /* Compute a (partial) cost for rtx X. Return true if the complete
1523 cost has been computed, and false if subexpressions should be
1524 scanned. In either case, *TOTAL contains the cost result. */
1525
1526 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)1527 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1528 int opno ATTRIBUTE_UNUSED,
1529 int *total, bool speed)
1530 {
1531 int code = GET_CODE (x);
1532
1533 switch (code)
1534 {
1535 case CONST_INT:
1536 if (outer_code == SET)
1537 *total = COSTS_N_INSNS (1);
1538 else if (INTVAL (x) == 0)
1539 *total = 0;
1540 else if (INT_14_BITS (x))
1541 *total = 1;
1542 else
1543 *total = 2;
1544 return true;
1545
1546 case HIGH:
1547 *total = 2;
1548 return true;
1549
1550 case CONST:
1551 case LABEL_REF:
1552 case SYMBOL_REF:
1553 *total = 4;
1554 return true;
1555
1556 case CONST_DOUBLE:
1557 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1558 && outer_code != SET)
1559 *total = 0;
1560 else
1561 *total = 8;
1562 return true;
1563
1564 case MULT:
1565 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1566 {
1567 *total = COSTS_N_INSNS (3);
1568 }
1569 else if (mode == DImode)
1570 {
1571 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1572 *total = COSTS_N_INSNS (32);
1573 else
1574 *total = COSTS_N_INSNS (80);
1575 }
1576 else
1577 {
1578 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1579 *total = COSTS_N_INSNS (8);
1580 else
1581 *total = COSTS_N_INSNS (20);
1582 }
1583 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1584
1585 case DIV:
1586 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1587 {
1588 *total = COSTS_N_INSNS (14);
1589 return false;
1590 }
1591 /* FALLTHRU */
1592
1593 case UDIV:
1594 case MOD:
1595 case UMOD:
1596 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1597 if (mode == DImode)
1598 *total = COSTS_N_INSNS (240);
1599 else
1600 *total = COSTS_N_INSNS (60);
1601 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1602
1603 case PLUS: /* this includes shNadd insns */
1604 case MINUS:
1605 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1606 *total = COSTS_N_INSNS (3);
1607 else if (mode == DImode)
1608 {
1609 if (TARGET_64BIT)
1610 {
1611 *total = COSTS_N_INSNS (1);
1612 /* Handle shladd,l instructions. */
1613 if (hppa_rtx_costs_shadd_p (x))
1614 return true;
1615 }
1616 else
1617 *total = COSTS_N_INSNS (2);
1618 }
1619 else
1620 {
1621 *total = COSTS_N_INSNS (1);
1622 /* Handle shNadd instructions. */
1623 if (hppa_rtx_costs_shadd_p (x))
1624 return true;
1625 }
1626 return REG_P (XEXP (x, 0))
1627 && (REG_P (XEXP (x, 1))
1628 || CONST_INT_P (XEXP (x, 1)));
1629
1630 case ASHIFT:
1631 if (mode == DImode)
1632 {
1633 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1634 {
1635 if (TARGET_64BIT)
1636 *total = COSTS_N_INSNS (1);
1637 else
1638 *total = COSTS_N_INSNS (2);
1639 return true;
1640 }
1641 else if (TARGET_64BIT)
1642 *total = COSTS_N_INSNS (3);
1643 else if (speed)
1644 *total = COSTS_N_INSNS (13);
1645 else
1646 *total = COSTS_N_INSNS (18);
1647 }
1648 else if (TARGET_64BIT)
1649 *total = COSTS_N_INSNS (4);
1650 else
1651 *total = COSTS_N_INSNS (2);
1652 return REG_P (XEXP (x, 0))
1653 && (REG_P (XEXP (x, 1))
1654 || CONST_INT_P (XEXP (x, 1)));
1655
1656 case ASHIFTRT:
1657 if (mode == DImode)
1658 {
1659 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1660 {
1661 if (TARGET_64BIT)
1662 *total = COSTS_N_INSNS (1);
1663 else
1664 *total = COSTS_N_INSNS (2);
1665 return true;
1666 }
1667 else if (TARGET_64BIT)
1668 *total = COSTS_N_INSNS (3);
1669 else if (speed)
1670 *total = COSTS_N_INSNS (14);
1671 else
1672 *total = COSTS_N_INSNS (19);
1673 }
1674 else if (TARGET_64BIT)
1675 *total = COSTS_N_INSNS (4);
1676 else
1677 *total = COSTS_N_INSNS (2);
1678 return REG_P (XEXP (x, 0))
1679 && (REG_P (XEXP (x, 1))
1680 || CONST_INT_P (XEXP (x, 1)));
1681
1682 case LSHIFTRT:
1683 if (mode == DImode)
1684 {
1685 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1686 {
1687 if (TARGET_64BIT)
1688 *total = COSTS_N_INSNS (1);
1689 else
1690 *total = COSTS_N_INSNS (2);
1691 return true;
1692 }
1693 else if (TARGET_64BIT)
1694 *total = COSTS_N_INSNS (2);
1695 else if (speed)
1696 *total = COSTS_N_INSNS (12);
1697 else
1698 *total = COSTS_N_INSNS (15);
1699 }
1700 else if (TARGET_64BIT)
1701 *total = COSTS_N_INSNS (3);
1702 else
1703 *total = COSTS_N_INSNS (2);
1704 return REG_P (XEXP (x, 0))
1705 && (REG_P (XEXP (x, 1))
1706 || CONST_INT_P (XEXP (x, 1)));
1707
1708 default:
1709 return false;
1710 }
1711 }
1712
1713 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1714 new rtx with the correct mode. */
1715 static inline rtx
force_mode(machine_mode mode,rtx orig)1716 force_mode (machine_mode mode, rtx orig)
1717 {
1718 if (mode == GET_MODE (orig))
1719 return orig;
1720
1721 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1722
1723 return gen_rtx_REG (mode, REGNO (orig));
1724 }
1725
1726 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1727
1728 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1729 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1730 {
1731 return tls_referenced_p (x);
1732 }
1733
1734 /* Emit insns to move operands[1] into operands[0].
1735
1736 Return 1 if we have written out everything that needs to be done to
1737 do the move. Otherwise, return 0 and the caller will emit the move
1738 normally.
1739
1740 Note SCRATCH_REG may not be in the proper mode depending on how it
1741 will be used. This routine is responsible for creating a new copy
1742 of SCRATCH_REG in the proper mode. */
1743
1744 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1745 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1746 {
1747 register rtx operand0 = operands[0];
1748 register rtx operand1 = operands[1];
1749 register rtx tem;
1750
1751 /* We can only handle indexed addresses in the destination operand
1752 of floating point stores. Thus, we need to break out indexed
1753 addresses from the destination operand. */
1754 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1755 {
1756 gcc_assert (can_create_pseudo_p ());
1757
1758 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1759 operand0 = replace_equiv_address (operand0, tem);
1760 }
1761
1762 /* On targets with non-equivalent space registers, break out unscaled
1763 indexed addresses from the source operand before the final CSE.
1764 We have to do this because the REG_POINTER flag is not correctly
1765 carried through various optimization passes and CSE may substitute
1766 a pseudo without the pointer set for one with the pointer set. As
1767 a result, we loose various opportunities to create insns with
1768 unscaled indexed addresses. */
1769 if (!TARGET_NO_SPACE_REGS
1770 && !cse_not_expected
1771 && GET_CODE (operand1) == MEM
1772 && GET_CODE (XEXP (operand1, 0)) == PLUS
1773 && REG_P (XEXP (XEXP (operand1, 0), 0))
1774 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1775 operand1
1776 = replace_equiv_address (operand1,
1777 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1778
1779 if (scratch_reg
1780 && reload_in_progress && GET_CODE (operand0) == REG
1781 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1782 operand0 = reg_equiv_mem (REGNO (operand0));
1783 else if (scratch_reg
1784 && reload_in_progress && GET_CODE (operand0) == SUBREG
1785 && GET_CODE (SUBREG_REG (operand0)) == REG
1786 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1787 {
1788 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1789 the code which tracks sets/uses for delete_output_reload. */
1790 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1791 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1792 SUBREG_BYTE (operand0));
1793 operand0 = alter_subreg (&temp, true);
1794 }
1795
1796 if (scratch_reg
1797 && reload_in_progress && GET_CODE (operand1) == REG
1798 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1799 operand1 = reg_equiv_mem (REGNO (operand1));
1800 else if (scratch_reg
1801 && reload_in_progress && GET_CODE (operand1) == SUBREG
1802 && GET_CODE (SUBREG_REG (operand1)) == REG
1803 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1804 {
1805 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1806 the code which tracks sets/uses for delete_output_reload. */
1807 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1808 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1809 SUBREG_BYTE (operand1));
1810 operand1 = alter_subreg (&temp, true);
1811 }
1812
1813 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1814 && ((tem = find_replacement (&XEXP (operand0, 0)))
1815 != XEXP (operand0, 0)))
1816 operand0 = replace_equiv_address (operand0, tem);
1817
1818 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1819 && ((tem = find_replacement (&XEXP (operand1, 0)))
1820 != XEXP (operand1, 0)))
1821 operand1 = replace_equiv_address (operand1, tem);
1822
1823 /* Handle secondary reloads for loads/stores of FP registers from
1824 REG+D addresses where D does not fit in 5 or 14 bits, including
1825 (subreg (mem (addr))) cases, and reloads for other unsupported
1826 memory operands. */
1827 if (scratch_reg
1828 && FP_REG_P (operand0)
1829 && (MEM_P (operand1)
1830 || (GET_CODE (operand1) == SUBREG
1831 && MEM_P (XEXP (operand1, 0)))))
1832 {
1833 rtx op1 = operand1;
1834
1835 if (GET_CODE (op1) == SUBREG)
1836 op1 = XEXP (op1, 0);
1837
1838 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1839 {
1840 if (!(TARGET_PA_20
1841 && !TARGET_ELF32
1842 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1843 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1844 {
1845 /* SCRATCH_REG will hold an address and maybe the actual data.
1846 We want it in WORD_MODE regardless of what mode it was
1847 originally given to us. */
1848 scratch_reg = force_mode (word_mode, scratch_reg);
1849
1850 /* D might not fit in 14 bits either; for such cases load D
1851 into scratch reg. */
1852 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1853 {
1854 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1855 emit_move_insn (scratch_reg,
1856 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1857 Pmode,
1858 XEXP (XEXP (op1, 0), 0),
1859 scratch_reg));
1860 }
1861 else
1862 emit_move_insn (scratch_reg, XEXP (op1, 0));
1863 op1 = replace_equiv_address (op1, scratch_reg);
1864 }
1865 }
1866 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1867 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1868 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1869 {
1870 /* Load memory address into SCRATCH_REG. */
1871 scratch_reg = force_mode (word_mode, scratch_reg);
1872 emit_move_insn (scratch_reg, XEXP (op1, 0));
1873 op1 = replace_equiv_address (op1, scratch_reg);
1874 }
1875 emit_insn (gen_rtx_SET (operand0, op1));
1876 return 1;
1877 }
1878 else if (scratch_reg
1879 && FP_REG_P (operand1)
1880 && (MEM_P (operand0)
1881 || (GET_CODE (operand0) == SUBREG
1882 && MEM_P (XEXP (operand0, 0)))))
1883 {
1884 rtx op0 = operand0;
1885
1886 if (GET_CODE (op0) == SUBREG)
1887 op0 = XEXP (op0, 0);
1888
1889 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1890 {
1891 if (!(TARGET_PA_20
1892 && !TARGET_ELF32
1893 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1894 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1895 {
1896 /* SCRATCH_REG will hold an address and maybe the actual data.
1897 We want it in WORD_MODE regardless of what mode it was
1898 originally given to us. */
1899 scratch_reg = force_mode (word_mode, scratch_reg);
1900
1901 /* D might not fit in 14 bits either; for such cases load D
1902 into scratch reg. */
1903 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1904 {
1905 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1906 emit_move_insn (scratch_reg,
1907 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1908 Pmode,
1909 XEXP (XEXP (op0, 0), 0),
1910 scratch_reg));
1911 }
1912 else
1913 emit_move_insn (scratch_reg, XEXP (op0, 0));
1914 op0 = replace_equiv_address (op0, scratch_reg);
1915 }
1916 }
1917 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1918 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1919 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1920 {
1921 /* Load memory address into SCRATCH_REG. */
1922 scratch_reg = force_mode (word_mode, scratch_reg);
1923 emit_move_insn (scratch_reg, XEXP (op0, 0));
1924 op0 = replace_equiv_address (op0, scratch_reg);
1925 }
1926 emit_insn (gen_rtx_SET (op0, operand1));
1927 return 1;
1928 }
1929 /* Handle secondary reloads for loads of FP registers from constant
1930 expressions by forcing the constant into memory. For the most part,
1931 this is only necessary for SImode and DImode.
1932
1933 Use scratch_reg to hold the address of the memory location. */
1934 else if (scratch_reg
1935 && CONSTANT_P (operand1)
1936 && FP_REG_P (operand0))
1937 {
1938 rtx const_mem, xoperands[2];
1939
1940 if (operand1 == CONST0_RTX (mode))
1941 {
1942 emit_insn (gen_rtx_SET (operand0, operand1));
1943 return 1;
1944 }
1945
1946 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1947 it in WORD_MODE regardless of what mode it was originally given
1948 to us. */
1949 scratch_reg = force_mode (word_mode, scratch_reg);
1950
1951 /* Force the constant into memory and put the address of the
1952 memory location into scratch_reg. */
1953 const_mem = force_const_mem (mode, operand1);
1954 xoperands[0] = scratch_reg;
1955 xoperands[1] = XEXP (const_mem, 0);
1956 pa_emit_move_sequence (xoperands, Pmode, 0);
1957
1958 /* Now load the destination register. */
1959 emit_insn (gen_rtx_SET (operand0,
1960 replace_equiv_address (const_mem, scratch_reg)));
1961 return 1;
1962 }
1963 /* Handle secondary reloads for SAR. These occur when trying to load
1964 the SAR from memory or a constant. */
1965 else if (scratch_reg
1966 && GET_CODE (operand0) == REG
1967 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1968 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1969 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1970 {
1971 /* D might not fit in 14 bits either; for such cases load D into
1972 scratch reg. */
1973 if (GET_CODE (operand1) == MEM
1974 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1975 {
1976 /* We are reloading the address into the scratch register, so we
1977 want to make sure the scratch register is a full register. */
1978 scratch_reg = force_mode (word_mode, scratch_reg);
1979
1980 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1981 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1982 0)),
1983 Pmode,
1984 XEXP (XEXP (operand1, 0),
1985 0),
1986 scratch_reg));
1987
1988 /* Now we are going to load the scratch register from memory,
1989 we want to load it in the same width as the original MEM,
1990 which must be the same as the width of the ultimate destination,
1991 OPERAND0. */
1992 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1993
1994 emit_move_insn (scratch_reg,
1995 replace_equiv_address (operand1, scratch_reg));
1996 }
1997 else
1998 {
1999 /* We want to load the scratch register using the same mode as
2000 the ultimate destination. */
2001 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2002
2003 emit_move_insn (scratch_reg, operand1);
2004 }
2005
2006 /* And emit the insn to set the ultimate destination. We know that
2007 the scratch register has the same mode as the destination at this
2008 point. */
2009 emit_move_insn (operand0, scratch_reg);
2010 return 1;
2011 }
2012
2013 /* Handle the most common case: storing into a register. */
2014 if (register_operand (operand0, mode))
2015 {
2016 /* Legitimize TLS symbol references. This happens for references
2017 that aren't a legitimate constant. */
2018 if (PA_SYMBOL_REF_TLS_P (operand1))
2019 operand1 = legitimize_tls_address (operand1);
2020
2021 if (register_operand (operand1, mode)
2022 || (GET_CODE (operand1) == CONST_INT
2023 && pa_cint_ok_for_move (UINTVAL (operand1)))
2024 || (operand1 == CONST0_RTX (mode))
2025 || (GET_CODE (operand1) == HIGH
2026 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2027 /* Only `general_operands' can come here, so MEM is ok. */
2028 || GET_CODE (operand1) == MEM)
2029 {
2030 /* Various sets are created during RTL generation which don't
2031 have the REG_POINTER flag correctly set. After the CSE pass,
2032 instruction recognition can fail if we don't consistently
2033 set this flag when performing register copies. This should
2034 also improve the opportunities for creating insns that use
2035 unscaled indexing. */
2036 if (REG_P (operand0) && REG_P (operand1))
2037 {
2038 if (REG_POINTER (operand1)
2039 && !REG_POINTER (operand0)
2040 && !HARD_REGISTER_P (operand0))
2041 copy_reg_pointer (operand0, operand1);
2042 }
2043
2044 /* When MEMs are broken out, the REG_POINTER flag doesn't
2045 get set. In some cases, we can set the REG_POINTER flag
2046 from the declaration for the MEM. */
2047 if (REG_P (operand0)
2048 && GET_CODE (operand1) == MEM
2049 && !REG_POINTER (operand0))
2050 {
2051 tree decl = MEM_EXPR (operand1);
2052
2053 /* Set the register pointer flag and register alignment
2054 if the declaration for this memory reference is a
2055 pointer type. */
2056 if (decl)
2057 {
2058 tree type;
2059
2060 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2061 tree operand 1. */
2062 if (TREE_CODE (decl) == COMPONENT_REF)
2063 decl = TREE_OPERAND (decl, 1);
2064
2065 type = TREE_TYPE (decl);
2066 type = strip_array_types (type);
2067
2068 if (POINTER_TYPE_P (type))
2069 mark_reg_pointer (operand0, BITS_PER_UNIT);
2070 }
2071 }
2072
2073 emit_insn (gen_rtx_SET (operand0, operand1));
2074 return 1;
2075 }
2076 }
2077 else if (GET_CODE (operand0) == MEM)
2078 {
2079 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2080 && !(reload_in_progress || reload_completed))
2081 {
2082 rtx temp = gen_reg_rtx (DFmode);
2083
2084 emit_insn (gen_rtx_SET (temp, operand1));
2085 emit_insn (gen_rtx_SET (operand0, temp));
2086 return 1;
2087 }
2088 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2089 {
2090 /* Run this case quickly. */
2091 emit_insn (gen_rtx_SET (operand0, operand1));
2092 return 1;
2093 }
2094 if (! (reload_in_progress || reload_completed))
2095 {
2096 operands[0] = validize_mem (operand0);
2097 operands[1] = operand1 = force_reg (mode, operand1);
2098 }
2099 }
2100
2101 /* Simplify the source if we need to.
2102 Note we do have to handle function labels here, even though we do
2103 not consider them legitimate constants. Loop optimizations can
2104 call the emit_move_xxx with one as a source. */
2105 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2106 || (GET_CODE (operand1) == HIGH
2107 && symbolic_operand (XEXP (operand1, 0), mode))
2108 || function_label_operand (operand1, VOIDmode)
2109 || tls_referenced_p (operand1))
2110 {
2111 int ishighonly = 0;
2112
2113 if (GET_CODE (operand1) == HIGH)
2114 {
2115 ishighonly = 1;
2116 operand1 = XEXP (operand1, 0);
2117 }
2118 if (symbolic_operand (operand1, mode))
2119 {
2120 /* Argh. The assembler and linker can't handle arithmetic
2121 involving plabels.
2122
2123 So we force the plabel into memory, load operand0 from
2124 the memory location, then add in the constant part. */
2125 if ((GET_CODE (operand1) == CONST
2126 && GET_CODE (XEXP (operand1, 0)) == PLUS
2127 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2128 VOIDmode))
2129 || function_label_operand (operand1, VOIDmode))
2130 {
2131 rtx temp, const_part;
2132
2133 /* Figure out what (if any) scratch register to use. */
2134 if (reload_in_progress || reload_completed)
2135 {
2136 scratch_reg = scratch_reg ? scratch_reg : operand0;
2137 /* SCRATCH_REG will hold an address and maybe the actual
2138 data. We want it in WORD_MODE regardless of what mode it
2139 was originally given to us. */
2140 scratch_reg = force_mode (word_mode, scratch_reg);
2141 }
2142 else if (flag_pic)
2143 scratch_reg = gen_reg_rtx (Pmode);
2144
2145 if (GET_CODE (operand1) == CONST)
2146 {
2147 /* Save away the constant part of the expression. */
2148 const_part = XEXP (XEXP (operand1, 0), 1);
2149 gcc_assert (GET_CODE (const_part) == CONST_INT);
2150
2151 /* Force the function label into memory. */
2152 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2153 }
2154 else
2155 {
2156 /* No constant part. */
2157 const_part = NULL_RTX;
2158
2159 /* Force the function label into memory. */
2160 temp = force_const_mem (mode, operand1);
2161 }
2162
2163
2164 /* Get the address of the memory location. PIC-ify it if
2165 necessary. */
2166 temp = XEXP (temp, 0);
2167 if (flag_pic)
2168 temp = legitimize_pic_address (temp, mode, scratch_reg);
2169
2170 /* Put the address of the memory location into our destination
2171 register. */
2172 operands[1] = temp;
2173 pa_emit_move_sequence (operands, mode, scratch_reg);
2174
2175 /* Now load from the memory location into our destination
2176 register. */
2177 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2178 pa_emit_move_sequence (operands, mode, scratch_reg);
2179
2180 /* And add back in the constant part. */
2181 if (const_part != NULL_RTX)
2182 expand_inc (operand0, const_part);
2183
2184 return 1;
2185 }
2186
2187 if (flag_pic)
2188 {
2189 rtx_insn *insn;
2190 rtx temp;
2191
2192 if (reload_in_progress || reload_completed)
2193 {
2194 temp = scratch_reg ? scratch_reg : operand0;
2195 /* TEMP will hold an address and maybe the actual
2196 data. We want it in WORD_MODE regardless of what mode it
2197 was originally given to us. */
2198 temp = force_mode (word_mode, temp);
2199 }
2200 else
2201 temp = gen_reg_rtx (Pmode);
2202
2203 /* Force (const (plus (symbol) (const_int))) to memory
2204 if the const_int will not fit in 14 bits. Although
2205 this requires a relocation, the instruction sequence
2206 needed to load the value is shorter. */
2207 if (GET_CODE (operand1) == CONST
2208 && GET_CODE (XEXP (operand1, 0)) == PLUS
2209 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2210 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2211 {
2212 rtx x, m = force_const_mem (mode, operand1);
2213
2214 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2215 x = replace_equiv_address (m, x);
2216 insn = emit_move_insn (operand0, x);
2217 }
2218 else
2219 {
2220 operands[1] = legitimize_pic_address (operand1, mode, temp);
2221 if (REG_P (operand0) && REG_P (operands[1]))
2222 copy_reg_pointer (operand0, operands[1]);
2223 insn = emit_move_insn (operand0, operands[1]);
2224 }
2225
2226 /* Put a REG_EQUAL note on this insn. */
2227 set_unique_reg_note (insn, REG_EQUAL, operand1);
2228 }
2229 /* On the HPPA, references to data space are supposed to use dp,
2230 register 27, but showing it in the RTL inhibits various cse
2231 and loop optimizations. */
2232 else
2233 {
2234 rtx temp, set;
2235
2236 if (reload_in_progress || reload_completed)
2237 {
2238 temp = scratch_reg ? scratch_reg : operand0;
2239 /* TEMP will hold an address and maybe the actual
2240 data. We want it in WORD_MODE regardless of what mode it
2241 was originally given to us. */
2242 temp = force_mode (word_mode, temp);
2243 }
2244 else
2245 temp = gen_reg_rtx (mode);
2246
2247 /* Loading a SYMBOL_REF into a register makes that register
2248 safe to be used as the base in an indexed address.
2249
2250 Don't mark hard registers though. That loses. */
2251 if (GET_CODE (operand0) == REG
2252 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2253 mark_reg_pointer (operand0, BITS_PER_UNIT);
2254 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2255 mark_reg_pointer (temp, BITS_PER_UNIT);
2256
2257 if (ishighonly)
2258 set = gen_rtx_SET (operand0, temp);
2259 else
2260 set = gen_rtx_SET (operand0,
2261 gen_rtx_LO_SUM (mode, temp, operand1));
2262
2263 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2264 emit_insn (set);
2265
2266 }
2267 return 1;
2268 }
2269 else if (tls_referenced_p (operand1))
2270 {
2271 rtx tmp = operand1;
2272 rtx addend = NULL;
2273
2274 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2275 {
2276 addend = XEXP (XEXP (tmp, 0), 1);
2277 tmp = XEXP (XEXP (tmp, 0), 0);
2278 }
2279
2280 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2281 tmp = legitimize_tls_address (tmp);
2282 if (addend)
2283 {
2284 tmp = gen_rtx_PLUS (mode, tmp, addend);
2285 tmp = force_operand (tmp, operands[0]);
2286 }
2287 operands[1] = tmp;
2288 }
2289 else if (GET_CODE (operand1) != CONST_INT
2290 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2291 {
2292 rtx temp;
2293 rtx_insn *insn;
2294 rtx op1 = operand1;
2295 HOST_WIDE_INT value = 0;
2296 HOST_WIDE_INT insv = 0;
2297 int insert = 0;
2298
2299 if (GET_CODE (operand1) == CONST_INT)
2300 value = INTVAL (operand1);
2301
2302 if (TARGET_64BIT
2303 && GET_CODE (operand1) == CONST_INT
2304 && HOST_BITS_PER_WIDE_INT > 32
2305 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2306 {
2307 HOST_WIDE_INT nval;
2308
2309 /* Extract the low order 32 bits of the value and sign extend.
2310 If the new value is the same as the original value, we can
2311 can use the original value as-is. If the new value is
2312 different, we use it and insert the most-significant 32-bits
2313 of the original value into the final result. */
2314 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2315 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2316 if (value != nval)
2317 {
2318 #if HOST_BITS_PER_WIDE_INT > 32
2319 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2320 #endif
2321 insert = 1;
2322 value = nval;
2323 operand1 = GEN_INT (nval);
2324 }
2325 }
2326
2327 if (reload_in_progress || reload_completed)
2328 temp = scratch_reg ? scratch_reg : operand0;
2329 else
2330 temp = gen_reg_rtx (mode);
2331
2332 /* We don't directly split DImode constants on 32-bit targets
2333 because PLUS uses an 11-bit immediate and the insn sequence
2334 generated is not as efficient as the one using HIGH/LO_SUM. */
2335 if (GET_CODE (operand1) == CONST_INT
2336 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2337 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2338 && !insert)
2339 {
2340 /* Directly break constant into high and low parts. This
2341 provides better optimization opportunities because various
2342 passes recognize constants split with PLUS but not LO_SUM.
2343 We use a 14-bit signed low part except when the addition
2344 of 0x4000 to the high part might change the sign of the
2345 high part. */
2346 HOST_WIDE_INT low = value & 0x3fff;
2347 HOST_WIDE_INT high = value & ~ 0x3fff;
2348
2349 if (low >= 0x2000)
2350 {
2351 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2352 high += 0x2000;
2353 else
2354 high += 0x4000;
2355 }
2356
2357 low = value - high;
2358
2359 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2360 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2361 }
2362 else
2363 {
2364 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2365 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2366 }
2367
2368 insn = emit_move_insn (operands[0], operands[1]);
2369
2370 /* Now insert the most significant 32 bits of the value
2371 into the register. When we don't have a second register
2372 available, it could take up to nine instructions to load
2373 a 64-bit integer constant. Prior to reload, we force
2374 constants that would take more than three instructions
2375 to load to the constant pool. During and after reload,
2376 we have to handle all possible values. */
2377 if (insert)
2378 {
2379 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2380 register and the value to be inserted is outside the
2381 range that can be loaded with three depdi instructions. */
2382 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2383 {
2384 operand1 = GEN_INT (insv);
2385
2386 emit_insn (gen_rtx_SET (temp,
2387 gen_rtx_HIGH (mode, operand1)));
2388 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2389 if (mode == DImode)
2390 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2391 const0_rtx, temp));
2392 else
2393 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2394 const0_rtx, temp));
2395 }
2396 else
2397 {
2398 int len = 5, pos = 27;
2399
2400 /* Insert the bits using the depdi instruction. */
2401 while (pos >= 0)
2402 {
2403 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2404 HOST_WIDE_INT sign = v5 < 0;
2405
2406 /* Left extend the insertion. */
2407 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2408 while (pos > 0 && (insv & 1) == sign)
2409 {
2410 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2411 len += 1;
2412 pos -= 1;
2413 }
2414
2415 if (mode == DImode)
2416 insn = emit_insn (gen_insvdi (operand0,
2417 GEN_INT (len),
2418 GEN_INT (pos),
2419 GEN_INT (v5)));
2420 else
2421 insn = emit_insn (gen_insvsi (operand0,
2422 GEN_INT (len),
2423 GEN_INT (pos),
2424 GEN_INT (v5)));
2425
2426 len = pos > 0 && pos < 5 ? pos : 5;
2427 pos -= len;
2428 }
2429 }
2430 }
2431
2432 set_unique_reg_note (insn, REG_EQUAL, op1);
2433
2434 return 1;
2435 }
2436 }
2437 /* Now have insn-emit do whatever it normally does. */
2438 return 0;
2439 }
2440
2441 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2442 it will need a link/runtime reloc). */
2443
2444 int
pa_reloc_needed(tree exp)2445 pa_reloc_needed (tree exp)
2446 {
2447 int reloc = 0;
2448
2449 switch (TREE_CODE (exp))
2450 {
2451 case ADDR_EXPR:
2452 return 1;
2453
2454 case POINTER_PLUS_EXPR:
2455 case PLUS_EXPR:
2456 case MINUS_EXPR:
2457 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2458 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2459 break;
2460
2461 CASE_CONVERT:
2462 case NON_LVALUE_EXPR:
2463 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2464 break;
2465
2466 case CONSTRUCTOR:
2467 {
2468 tree value;
2469 unsigned HOST_WIDE_INT ix;
2470
2471 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2472 if (value)
2473 reloc |= pa_reloc_needed (value);
2474 }
2475 break;
2476
2477 case ERROR_MARK:
2478 break;
2479
2480 default:
2481 break;
2482 }
2483 return reloc;
2484 }
2485
2486
2487 /* Return the best assembler insn template
2488 for moving operands[1] into operands[0] as a fullword. */
2489 const char *
pa_singlemove_string(rtx * operands)2490 pa_singlemove_string (rtx *operands)
2491 {
2492 HOST_WIDE_INT intval;
2493
2494 if (GET_CODE (operands[0]) == MEM)
2495 return "stw %r1,%0";
2496 if (GET_CODE (operands[1]) == MEM)
2497 return "ldw %1,%0";
2498 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2499 {
2500 long i;
2501
2502 gcc_assert (GET_MODE (operands[1]) == SFmode);
2503
2504 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2505 bit pattern. */
2506 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2507
2508 operands[1] = GEN_INT (i);
2509 /* Fall through to CONST_INT case. */
2510 }
2511 if (GET_CODE (operands[1]) == CONST_INT)
2512 {
2513 intval = INTVAL (operands[1]);
2514
2515 if (VAL_14_BITS_P (intval))
2516 return "ldi %1,%0";
2517 else if ((intval & 0x7ff) == 0)
2518 return "ldil L'%1,%0";
2519 else if (pa_zdepi_cint_p (intval))
2520 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2521 else
2522 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2523 }
2524 return "copy %1,%0";
2525 }
2526
2527
2528 /* Compute position (in OP[1]) and width (in OP[2])
2529 useful for copying IMM to a register using the zdepi
2530 instructions. Store the immediate value to insert in OP[0]. */
2531 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2532 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2533 {
2534 int lsb, len;
2535
2536 /* Find the least significant set bit in IMM. */
2537 for (lsb = 0; lsb < 32; lsb++)
2538 {
2539 if ((imm & 1) != 0)
2540 break;
2541 imm >>= 1;
2542 }
2543
2544 /* Choose variants based on *sign* of the 5-bit field. */
2545 if ((imm & 0x10) == 0)
2546 len = (lsb <= 28) ? 4 : 32 - lsb;
2547 else
2548 {
2549 /* Find the width of the bitstring in IMM. */
2550 for (len = 5; len < 32 - lsb; len++)
2551 {
2552 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2553 break;
2554 }
2555
2556 /* Sign extend IMM as a 5-bit value. */
2557 imm = (imm & 0xf) - 0x10;
2558 }
2559
2560 op[0] = imm;
2561 op[1] = 31 - lsb;
2562 op[2] = len;
2563 }
2564
2565 /* Compute position (in OP[1]) and width (in OP[2])
2566 useful for copying IMM to a register using the depdi,z
2567 instructions. Store the immediate value to insert in OP[0]. */
2568
2569 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2570 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2571 {
2572 int lsb, len, maxlen;
2573
2574 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2575
2576 /* Find the least significant set bit in IMM. */
2577 for (lsb = 0; lsb < maxlen; lsb++)
2578 {
2579 if ((imm & 1) != 0)
2580 break;
2581 imm >>= 1;
2582 }
2583
2584 /* Choose variants based on *sign* of the 5-bit field. */
2585 if ((imm & 0x10) == 0)
2586 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2587 else
2588 {
2589 /* Find the width of the bitstring in IMM. */
2590 for (len = 5; len < maxlen - lsb; len++)
2591 {
2592 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2593 break;
2594 }
2595
2596 /* Extend length if host is narrow and IMM is negative. */
2597 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2598 len += 32;
2599
2600 /* Sign extend IMM as a 5-bit value. */
2601 imm = (imm & 0xf) - 0x10;
2602 }
2603
2604 op[0] = imm;
2605 op[1] = 63 - lsb;
2606 op[2] = len;
2607 }
2608
2609 /* Output assembler code to perform a doubleword move insn
2610 with operands OPERANDS. */
2611
2612 const char *
pa_output_move_double(rtx * operands)2613 pa_output_move_double (rtx *operands)
2614 {
2615 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2616 rtx latehalf[2];
2617 rtx addreg0 = 0, addreg1 = 0;
2618 int highonly = 0;
2619
2620 /* First classify both operands. */
2621
2622 if (REG_P (operands[0]))
2623 optype0 = REGOP;
2624 else if (offsettable_memref_p (operands[0]))
2625 optype0 = OFFSOP;
2626 else if (GET_CODE (operands[0]) == MEM)
2627 optype0 = MEMOP;
2628 else
2629 optype0 = RNDOP;
2630
2631 if (REG_P (operands[1]))
2632 optype1 = REGOP;
2633 else if (CONSTANT_P (operands[1]))
2634 optype1 = CNSTOP;
2635 else if (offsettable_memref_p (operands[1]))
2636 optype1 = OFFSOP;
2637 else if (GET_CODE (operands[1]) == MEM)
2638 optype1 = MEMOP;
2639 else
2640 optype1 = RNDOP;
2641
2642 /* Check for the cases that the operand constraints are not
2643 supposed to allow to happen. */
2644 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2645
2646 /* Handle copies between general and floating registers. */
2647
2648 if (optype0 == REGOP && optype1 == REGOP
2649 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2650 {
2651 if (FP_REG_P (operands[0]))
2652 {
2653 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2654 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2655 return "{fldds|fldd} -16(%%sp),%0";
2656 }
2657 else
2658 {
2659 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2660 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2661 return "{ldws|ldw} -12(%%sp),%R0";
2662 }
2663 }
2664
2665 /* Handle auto decrementing and incrementing loads and stores
2666 specifically, since the structure of the function doesn't work
2667 for them without major modification. Do it better when we learn
2668 this port about the general inc/dec addressing of PA.
2669 (This was written by tege. Chide him if it doesn't work.) */
2670
2671 if (optype0 == MEMOP)
2672 {
2673 /* We have to output the address syntax ourselves, since print_operand
2674 doesn't deal with the addresses we want to use. Fix this later. */
2675
2676 rtx addr = XEXP (operands[0], 0);
2677 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2678 {
2679 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2680
2681 operands[0] = XEXP (addr, 0);
2682 gcc_assert (GET_CODE (operands[1]) == REG
2683 && GET_CODE (operands[0]) == REG);
2684
2685 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2686
2687 /* No overlap between high target register and address
2688 register. (We do this in a non-obvious way to
2689 save a register file writeback) */
2690 if (GET_CODE (addr) == POST_INC)
2691 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2692 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2693 }
2694 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2695 {
2696 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2697
2698 operands[0] = XEXP (addr, 0);
2699 gcc_assert (GET_CODE (operands[1]) == REG
2700 && GET_CODE (operands[0]) == REG);
2701
2702 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2703 /* No overlap between high target register and address
2704 register. (We do this in a non-obvious way to save a
2705 register file writeback) */
2706 if (GET_CODE (addr) == PRE_INC)
2707 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2708 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2709 }
2710 }
2711 if (optype1 == MEMOP)
2712 {
2713 /* We have to output the address syntax ourselves, since print_operand
2714 doesn't deal with the addresses we want to use. Fix this later. */
2715
2716 rtx addr = XEXP (operands[1], 0);
2717 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2718 {
2719 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2720
2721 operands[1] = XEXP (addr, 0);
2722 gcc_assert (GET_CODE (operands[0]) == REG
2723 && GET_CODE (operands[1]) == REG);
2724
2725 if (!reg_overlap_mentioned_p (high_reg, addr))
2726 {
2727 /* No overlap between high target register and address
2728 register. (We do this in a non-obvious way to
2729 save a register file writeback) */
2730 if (GET_CODE (addr) == POST_INC)
2731 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2732 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2733 }
2734 else
2735 {
2736 /* This is an undefined situation. We should load into the
2737 address register *and* update that register. Probably
2738 we don't need to handle this at all. */
2739 if (GET_CODE (addr) == POST_INC)
2740 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2741 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2742 }
2743 }
2744 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2745 {
2746 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2747
2748 operands[1] = XEXP (addr, 0);
2749 gcc_assert (GET_CODE (operands[0]) == REG
2750 && GET_CODE (operands[1]) == REG);
2751
2752 if (!reg_overlap_mentioned_p (high_reg, addr))
2753 {
2754 /* No overlap between high target register and address
2755 register. (We do this in a non-obvious way to
2756 save a register file writeback) */
2757 if (GET_CODE (addr) == PRE_INC)
2758 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2759 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2760 }
2761 else
2762 {
2763 /* This is an undefined situation. We should load into the
2764 address register *and* update that register. Probably
2765 we don't need to handle this at all. */
2766 if (GET_CODE (addr) == PRE_INC)
2767 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2768 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2769 }
2770 }
2771 else if (GET_CODE (addr) == PLUS
2772 && GET_CODE (XEXP (addr, 0)) == MULT)
2773 {
2774 rtx xoperands[4];
2775
2776 /* Load address into left half of destination register. */
2777 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2778 xoperands[1] = XEXP (addr, 1);
2779 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2780 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2781 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2782 xoperands);
2783 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2784 }
2785 else if (GET_CODE (addr) == PLUS
2786 && REG_P (XEXP (addr, 0))
2787 && REG_P (XEXP (addr, 1)))
2788 {
2789 rtx xoperands[3];
2790
2791 /* Load address into left half of destination register. */
2792 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2793 xoperands[1] = XEXP (addr, 0);
2794 xoperands[2] = XEXP (addr, 1);
2795 output_asm_insn ("{addl|add,l} %1,%2,%0",
2796 xoperands);
2797 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2798 }
2799 }
2800
2801 /* If an operand is an unoffsettable memory ref, find a register
2802 we can increment temporarily to make it refer to the second word. */
2803
2804 if (optype0 == MEMOP)
2805 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2806
2807 if (optype1 == MEMOP)
2808 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2809
2810 /* Ok, we can do one word at a time.
2811 Normally we do the low-numbered word first.
2812
2813 In either case, set up in LATEHALF the operands to use
2814 for the high-numbered word and in some cases alter the
2815 operands in OPERANDS to be suitable for the low-numbered word. */
2816
2817 if (optype0 == REGOP)
2818 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2819 else if (optype0 == OFFSOP)
2820 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2821 else
2822 latehalf[0] = operands[0];
2823
2824 if (optype1 == REGOP)
2825 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2826 else if (optype1 == OFFSOP)
2827 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2828 else if (optype1 == CNSTOP)
2829 {
2830 if (GET_CODE (operands[1]) == HIGH)
2831 {
2832 operands[1] = XEXP (operands[1], 0);
2833 highonly = 1;
2834 }
2835 split_double (operands[1], &operands[1], &latehalf[1]);
2836 }
2837 else
2838 latehalf[1] = operands[1];
2839
2840 /* If the first move would clobber the source of the second one,
2841 do them in the other order.
2842
2843 This can happen in two cases:
2844
2845 mem -> register where the first half of the destination register
2846 is the same register used in the memory's address. Reload
2847 can create such insns.
2848
2849 mem in this case will be either register indirect or register
2850 indirect plus a valid offset.
2851
2852 register -> register move where REGNO(dst) == REGNO(src + 1)
2853 someone (Tim/Tege?) claimed this can happen for parameter loads.
2854
2855 Handle mem -> register case first. */
2856 if (optype0 == REGOP
2857 && (optype1 == MEMOP || optype1 == OFFSOP)
2858 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2859 {
2860 /* Do the late half first. */
2861 if (addreg1)
2862 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2863 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2864
2865 /* Then clobber. */
2866 if (addreg1)
2867 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2868 return pa_singlemove_string (operands);
2869 }
2870
2871 /* Now handle register -> register case. */
2872 if (optype0 == REGOP && optype1 == REGOP
2873 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2874 {
2875 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2876 return pa_singlemove_string (operands);
2877 }
2878
2879 /* Normal case: do the two words, low-numbered first. */
2880
2881 output_asm_insn (pa_singlemove_string (operands), operands);
2882
2883 /* Make any unoffsettable addresses point at high-numbered word. */
2884 if (addreg0)
2885 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2886 if (addreg1)
2887 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2888
2889 /* Do high-numbered word. */
2890 if (highonly)
2891 output_asm_insn ("ldil L'%1,%0", latehalf);
2892 else
2893 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2894
2895 /* Undo the adds we just did. */
2896 if (addreg0)
2897 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2898 if (addreg1)
2899 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2900
2901 return "";
2902 }
2903
2904 const char *
pa_output_fp_move_double(rtx * operands)2905 pa_output_fp_move_double (rtx *operands)
2906 {
2907 if (FP_REG_P (operands[0]))
2908 {
2909 if (FP_REG_P (operands[1])
2910 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2911 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2912 else
2913 output_asm_insn ("fldd%F1 %1,%0", operands);
2914 }
2915 else if (FP_REG_P (operands[1]))
2916 {
2917 output_asm_insn ("fstd%F0 %1,%0", operands);
2918 }
2919 else
2920 {
2921 rtx xoperands[2];
2922
2923 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2924
2925 /* This is a pain. You have to be prepared to deal with an
2926 arbitrary address here including pre/post increment/decrement.
2927
2928 so avoid this in the MD. */
2929 gcc_assert (GET_CODE (operands[0]) == REG);
2930
2931 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2932 xoperands[0] = operands[0];
2933 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2934 }
2935 return "";
2936 }
2937
2938 /* Return a REG that occurs in ADDR with coefficient 1.
2939 ADDR can be effectively incremented by incrementing REG. */
2940
2941 static rtx
find_addr_reg(rtx addr)2942 find_addr_reg (rtx addr)
2943 {
2944 while (GET_CODE (addr) == PLUS)
2945 {
2946 if (GET_CODE (XEXP (addr, 0)) == REG)
2947 addr = XEXP (addr, 0);
2948 else if (GET_CODE (XEXP (addr, 1)) == REG)
2949 addr = XEXP (addr, 1);
2950 else if (CONSTANT_P (XEXP (addr, 0)))
2951 addr = XEXP (addr, 1);
2952 else if (CONSTANT_P (XEXP (addr, 1)))
2953 addr = XEXP (addr, 0);
2954 else
2955 gcc_unreachable ();
2956 }
2957 gcc_assert (GET_CODE (addr) == REG);
2958 return addr;
2959 }
2960
2961 /* Emit code to perform a block move.
2962
2963 OPERANDS[0] is the destination pointer as a REG, clobbered.
2964 OPERANDS[1] is the source pointer as a REG, clobbered.
2965 OPERANDS[2] is a register for temporary storage.
2966 OPERANDS[3] is a register for temporary storage.
2967 OPERANDS[4] is the size as a CONST_INT
2968 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2969 OPERANDS[6] is another temporary register. */
2970
2971 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2972 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2973 {
2974 HOST_WIDE_INT align = INTVAL (operands[5]);
2975 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
2976
2977 /* We can't move more than a word at a time because the PA
2978 has no longer integer move insns. (Could use fp mem ops?) */
2979 if (align > (TARGET_64BIT ? 8 : 4))
2980 align = (TARGET_64BIT ? 8 : 4);
2981
2982 /* Note that we know each loop below will execute at least twice
2983 (else we would have open-coded the copy). */
2984 switch (align)
2985 {
2986 case 8:
2987 /* Pre-adjust the loop counter. */
2988 operands[4] = GEN_INT (n_bytes - 16);
2989 output_asm_insn ("ldi %4,%2", operands);
2990
2991 /* Copying loop. */
2992 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2993 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2994 output_asm_insn ("std,ma %3,8(%0)", operands);
2995 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2996 output_asm_insn ("std,ma %6,8(%0)", operands);
2997
2998 /* Handle the residual. There could be up to 7 bytes of
2999 residual to copy! */
3000 if (n_bytes % 16 != 0)
3001 {
3002 operands[4] = GEN_INT (n_bytes % 8);
3003 if (n_bytes % 16 >= 8)
3004 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3005 if (n_bytes % 8 != 0)
3006 output_asm_insn ("ldd 0(%1),%6", operands);
3007 if (n_bytes % 16 >= 8)
3008 output_asm_insn ("std,ma %3,8(%0)", operands);
3009 if (n_bytes % 8 != 0)
3010 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3011 }
3012 return "";
3013
3014 case 4:
3015 /* Pre-adjust the loop counter. */
3016 operands[4] = GEN_INT (n_bytes - 8);
3017 output_asm_insn ("ldi %4,%2", operands);
3018
3019 /* Copying loop. */
3020 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3021 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3022 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3023 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3024 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3025
3026 /* Handle the residual. There could be up to 7 bytes of
3027 residual to copy! */
3028 if (n_bytes % 8 != 0)
3029 {
3030 operands[4] = GEN_INT (n_bytes % 4);
3031 if (n_bytes % 8 >= 4)
3032 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3033 if (n_bytes % 4 != 0)
3034 output_asm_insn ("ldw 0(%1),%6", operands);
3035 if (n_bytes % 8 >= 4)
3036 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3037 if (n_bytes % 4 != 0)
3038 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3039 }
3040 return "";
3041
3042 case 2:
3043 /* Pre-adjust the loop counter. */
3044 operands[4] = GEN_INT (n_bytes - 4);
3045 output_asm_insn ("ldi %4,%2", operands);
3046
3047 /* Copying loop. */
3048 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3049 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3050 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3051 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3052 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3053
3054 /* Handle the residual. */
3055 if (n_bytes % 4 != 0)
3056 {
3057 if (n_bytes % 4 >= 2)
3058 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3059 if (n_bytes % 2 != 0)
3060 output_asm_insn ("ldb 0(%1),%6", operands);
3061 if (n_bytes % 4 >= 2)
3062 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3063 if (n_bytes % 2 != 0)
3064 output_asm_insn ("stb %6,0(%0)", operands);
3065 }
3066 return "";
3067
3068 case 1:
3069 /* Pre-adjust the loop counter. */
3070 operands[4] = GEN_INT (n_bytes - 2);
3071 output_asm_insn ("ldi %4,%2", operands);
3072
3073 /* Copying loop. */
3074 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3075 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3076 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3077 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3078 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3079
3080 /* Handle the residual. */
3081 if (n_bytes % 2 != 0)
3082 {
3083 output_asm_insn ("ldb 0(%1),%3", operands);
3084 output_asm_insn ("stb %3,0(%0)", operands);
3085 }
3086 return "";
3087
3088 default:
3089 gcc_unreachable ();
3090 }
3091 }
3092
3093 /* Count the number of insns necessary to handle this block move.
3094
3095 Basic structure is the same as emit_block_move, except that we
3096 count insns rather than emit them. */
3097
3098 static int
compute_cpymem_length(rtx_insn * insn)3099 compute_cpymem_length (rtx_insn *insn)
3100 {
3101 rtx pat = PATTERN (insn);
3102 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3103 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3104 unsigned int n_insns = 0;
3105
3106 /* We can't move more than four bytes at a time because the PA
3107 has no longer integer move insns. (Could use fp mem ops?) */
3108 if (align > (TARGET_64BIT ? 8 : 4))
3109 align = (TARGET_64BIT ? 8 : 4);
3110
3111 /* The basic copying loop. */
3112 n_insns = 6;
3113
3114 /* Residuals. */
3115 if (n_bytes % (2 * align) != 0)
3116 {
3117 if ((n_bytes % (2 * align)) >= align)
3118 n_insns += 2;
3119
3120 if ((n_bytes % align) != 0)
3121 n_insns += 2;
3122 }
3123
3124 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3125 return n_insns * 4;
3126 }
3127
3128 /* Emit code to perform a block clear.
3129
3130 OPERANDS[0] is the destination pointer as a REG, clobbered.
3131 OPERANDS[1] is a register for temporary storage.
3132 OPERANDS[2] is the size as a CONST_INT
3133 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3134
3135 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3136 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3137 {
3138 HOST_WIDE_INT align = INTVAL (operands[3]);
3139 unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
3140
3141 /* We can't clear more than a word at a time because the PA
3142 has no longer integer move insns. */
3143 if (align > (TARGET_64BIT ? 8 : 4))
3144 align = (TARGET_64BIT ? 8 : 4);
3145
3146 /* Note that we know each loop below will execute at least twice
3147 (else we would have open-coded the copy). */
3148 switch (align)
3149 {
3150 case 8:
3151 /* Pre-adjust the loop counter. */
3152 operands[2] = GEN_INT (n_bytes - 16);
3153 output_asm_insn ("ldi %2,%1", operands);
3154
3155 /* Loop. */
3156 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3157 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3158 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3159
3160 /* Handle the residual. There could be up to 7 bytes of
3161 residual to copy! */
3162 if (n_bytes % 16 != 0)
3163 {
3164 operands[2] = GEN_INT (n_bytes % 8);
3165 if (n_bytes % 16 >= 8)
3166 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3167 if (n_bytes % 8 != 0)
3168 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3169 }
3170 return "";
3171
3172 case 4:
3173 /* Pre-adjust the loop counter. */
3174 operands[2] = GEN_INT (n_bytes - 8);
3175 output_asm_insn ("ldi %2,%1", operands);
3176
3177 /* Loop. */
3178 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3179 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3180 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3181
3182 /* Handle the residual. There could be up to 7 bytes of
3183 residual to copy! */
3184 if (n_bytes % 8 != 0)
3185 {
3186 operands[2] = GEN_INT (n_bytes % 4);
3187 if (n_bytes % 8 >= 4)
3188 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3189 if (n_bytes % 4 != 0)
3190 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3191 }
3192 return "";
3193
3194 case 2:
3195 /* Pre-adjust the loop counter. */
3196 operands[2] = GEN_INT (n_bytes - 4);
3197 output_asm_insn ("ldi %2,%1", operands);
3198
3199 /* Loop. */
3200 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3201 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3202 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3203
3204 /* Handle the residual. */
3205 if (n_bytes % 4 != 0)
3206 {
3207 if (n_bytes % 4 >= 2)
3208 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3209 if (n_bytes % 2 != 0)
3210 output_asm_insn ("stb %%r0,0(%0)", operands);
3211 }
3212 return "";
3213
3214 case 1:
3215 /* Pre-adjust the loop counter. */
3216 operands[2] = GEN_INT (n_bytes - 2);
3217 output_asm_insn ("ldi %2,%1", operands);
3218
3219 /* Loop. */
3220 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3221 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3222 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3223
3224 /* Handle the residual. */
3225 if (n_bytes % 2 != 0)
3226 output_asm_insn ("stb %%r0,0(%0)", operands);
3227
3228 return "";
3229
3230 default:
3231 gcc_unreachable ();
3232 }
3233 }
3234
3235 /* Count the number of insns necessary to handle this block move.
3236
3237 Basic structure is the same as emit_block_move, except that we
3238 count insns rather than emit them. */
3239
3240 static int
compute_clrmem_length(rtx_insn * insn)3241 compute_clrmem_length (rtx_insn *insn)
3242 {
3243 rtx pat = PATTERN (insn);
3244 unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3245 unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3246 unsigned int n_insns = 0;
3247
3248 /* We can't clear more than a word at a time because the PA
3249 has no longer integer move insns. */
3250 if (align > (TARGET_64BIT ? 8 : 4))
3251 align = (TARGET_64BIT ? 8 : 4);
3252
3253 /* The basic loop. */
3254 n_insns = 4;
3255
3256 /* Residuals. */
3257 if (n_bytes % (2 * align) != 0)
3258 {
3259 if ((n_bytes % (2 * align)) >= align)
3260 n_insns++;
3261
3262 if ((n_bytes % align) != 0)
3263 n_insns++;
3264 }
3265
3266 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3267 return n_insns * 4;
3268 }
3269
3270
3271 const char *
pa_output_and(rtx * operands)3272 pa_output_and (rtx *operands)
3273 {
3274 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3275 {
3276 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3277 int ls0, ls1, ms0, p, len;
3278
3279 for (ls0 = 0; ls0 < 32; ls0++)
3280 if ((mask & (1 << ls0)) == 0)
3281 break;
3282
3283 for (ls1 = ls0; ls1 < 32; ls1++)
3284 if ((mask & (1 << ls1)) != 0)
3285 break;
3286
3287 for (ms0 = ls1; ms0 < 32; ms0++)
3288 if ((mask & (1 << ms0)) == 0)
3289 break;
3290
3291 gcc_assert (ms0 == 32);
3292
3293 if (ls1 == 32)
3294 {
3295 len = ls0;
3296
3297 gcc_assert (len);
3298
3299 operands[2] = GEN_INT (len);
3300 return "{extru|extrw,u} %1,31,%2,%0";
3301 }
3302 else
3303 {
3304 /* We could use this `depi' for the case above as well, but `depi'
3305 requires one more register file access than an `extru'. */
3306
3307 p = 31 - ls0;
3308 len = ls1 - ls0;
3309
3310 operands[2] = GEN_INT (p);
3311 operands[3] = GEN_INT (len);
3312 return "{depi|depwi} 0,%2,%3,%0";
3313 }
3314 }
3315 else
3316 return "and %1,%2,%0";
3317 }
3318
3319 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3320 storing the result in operands[0]. */
3321 const char *
pa_output_64bit_and(rtx * operands)3322 pa_output_64bit_and (rtx *operands)
3323 {
3324 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3325 {
3326 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3327 int ls0, ls1, ms0, p, len;
3328
3329 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3330 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3331 break;
3332
3333 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3334 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3335 break;
3336
3337 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3338 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3339 break;
3340
3341 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3342
3343 if (ls1 == HOST_BITS_PER_WIDE_INT)
3344 {
3345 len = ls0;
3346
3347 gcc_assert (len);
3348
3349 operands[2] = GEN_INT (len);
3350 return "extrd,u %1,63,%2,%0";
3351 }
3352 else
3353 {
3354 /* We could use this `depi' for the case above as well, but `depi'
3355 requires one more register file access than an `extru'. */
3356
3357 p = 63 - ls0;
3358 len = ls1 - ls0;
3359
3360 operands[2] = GEN_INT (p);
3361 operands[3] = GEN_INT (len);
3362 return "depdi 0,%2,%3,%0";
3363 }
3364 }
3365 else
3366 return "and %1,%2,%0";
3367 }
3368
3369 const char *
pa_output_ior(rtx * operands)3370 pa_output_ior (rtx *operands)
3371 {
3372 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3373 int bs0, bs1, p, len;
3374
3375 if (INTVAL (operands[2]) == 0)
3376 return "copy %1,%0";
3377
3378 for (bs0 = 0; bs0 < 32; bs0++)
3379 if ((mask & (1 << bs0)) != 0)
3380 break;
3381
3382 for (bs1 = bs0; bs1 < 32; bs1++)
3383 if ((mask & (1 << bs1)) == 0)
3384 break;
3385
3386 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3387
3388 p = 31 - bs0;
3389 len = bs1 - bs0;
3390
3391 operands[2] = GEN_INT (p);
3392 operands[3] = GEN_INT (len);
3393 return "{depi|depwi} -1,%2,%3,%0";
3394 }
3395
3396 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3397 storing the result in operands[0]. */
3398 const char *
pa_output_64bit_ior(rtx * operands)3399 pa_output_64bit_ior (rtx *operands)
3400 {
3401 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3402 int bs0, bs1, p, len;
3403
3404 if (INTVAL (operands[2]) == 0)
3405 return "copy %1,%0";
3406
3407 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3408 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3409 break;
3410
3411 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3412 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3413 break;
3414
3415 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3416 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3417
3418 p = 63 - bs0;
3419 len = bs1 - bs0;
3420
3421 operands[2] = GEN_INT (p);
3422 operands[3] = GEN_INT (len);
3423 return "depdi -1,%2,%3,%0";
3424 }
3425
3426 /* Target hook for assembling integer objects. This code handles
3427 aligned SI and DI integers specially since function references
3428 must be preceded by P%. */
3429
3430 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3431 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3432 {
3433 bool result;
3434 tree decl = NULL;
3435
3436 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3437 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3438 calling output_addr_const. Otherwise, it may call assemble_external
3439 in the midst of outputing the assembler code for the SYMBOL_REF.
3440 We restore the SYMBOL_REF_DECL after the output is done. */
3441 if (GET_CODE (x) == SYMBOL_REF)
3442 {
3443 decl = SYMBOL_REF_DECL (x);
3444 if (decl)
3445 {
3446 assemble_external (decl);
3447 SET_SYMBOL_REF_DECL (x, NULL);
3448 }
3449 }
3450
3451 if (size == UNITS_PER_WORD
3452 && aligned_p
3453 && function_label_operand (x, VOIDmode))
3454 {
3455 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3456
3457 /* We don't want an OPD when generating fast indirect calls. */
3458 if (!TARGET_FAST_INDIRECT_CALLS)
3459 fputs ("P%", asm_out_file);
3460
3461 output_addr_const (asm_out_file, x);
3462 fputc ('\n', asm_out_file);
3463 result = true;
3464 }
3465 else
3466 result = default_assemble_integer (x, size, aligned_p);
3467
3468 if (decl)
3469 SET_SYMBOL_REF_DECL (x, decl);
3470
3471 return result;
3472 }
3473
3474 /* Output an ascii string. */
3475 void
pa_output_ascii(FILE * file,const char * p,int size)3476 pa_output_ascii (FILE *file, const char *p, int size)
3477 {
3478 int i;
3479 int chars_output;
3480 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3481
3482 /* The HP assembler can only take strings of 256 characters at one
3483 time. This is a limitation on input line length, *not* the
3484 length of the string. Sigh. Even worse, it seems that the
3485 restriction is in number of input characters (see \xnn &
3486 \whatever). So we have to do this very carefully. */
3487
3488 fputs ("\t.STRING \"", file);
3489
3490 chars_output = 0;
3491 for (i = 0; i < size; i += 4)
3492 {
3493 int co = 0;
3494 int io = 0;
3495 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3496 {
3497 register unsigned int c = (unsigned char) p[i + io];
3498
3499 if (c == '\"' || c == '\\')
3500 partial_output[co++] = '\\';
3501 if (c >= ' ' && c < 0177)
3502 partial_output[co++] = c;
3503 else
3504 {
3505 unsigned int hexd;
3506 partial_output[co++] = '\\';
3507 partial_output[co++] = 'x';
3508 hexd = c / 16 - 0 + '0';
3509 if (hexd > '9')
3510 hexd -= '9' - 'a' + 1;
3511 partial_output[co++] = hexd;
3512 hexd = c % 16 - 0 + '0';
3513 if (hexd > '9')
3514 hexd -= '9' - 'a' + 1;
3515 partial_output[co++] = hexd;
3516 }
3517 }
3518 if (chars_output + co > 243)
3519 {
3520 fputs ("\"\n\t.STRING \"", file);
3521 chars_output = 0;
3522 }
3523 fwrite (partial_output, 1, (size_t) co, file);
3524 chars_output += co;
3525 co = 0;
3526 }
3527 fputs ("\"\n", file);
3528 }
3529
3530 /* Try to rewrite floating point comparisons & branches to avoid
3531 useless add,tr insns.
3532
3533 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3534 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3535 first attempt to remove useless add,tr insns. It is zero
3536 for the second pass as reorg sometimes leaves bogus REG_DEAD
3537 notes lying around.
3538
3539 When CHECK_NOTES is zero we can only eliminate add,tr insns
3540 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3541 instructions. */
3542 static void
remove_useless_addtr_insns(int check_notes)3543 remove_useless_addtr_insns (int check_notes)
3544 {
3545 rtx_insn *insn;
3546 static int pass = 0;
3547
3548 /* This is fairly cheap, so always run it when optimizing. */
3549 if (optimize > 0)
3550 {
3551 int fcmp_count = 0;
3552 int fbranch_count = 0;
3553
3554 /* Walk all the insns in this function looking for fcmp & fbranch
3555 instructions. Keep track of how many of each we find. */
3556 for (insn = get_insns (); insn; insn = next_insn (insn))
3557 {
3558 rtx tmp;
3559
3560 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3561 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3562 continue;
3563
3564 tmp = PATTERN (insn);
3565
3566 /* It must be a set. */
3567 if (GET_CODE (tmp) != SET)
3568 continue;
3569
3570 /* If the destination is CCFP, then we've found an fcmp insn. */
3571 tmp = SET_DEST (tmp);
3572 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3573 {
3574 fcmp_count++;
3575 continue;
3576 }
3577
3578 tmp = PATTERN (insn);
3579 /* If this is an fbranch instruction, bump the fbranch counter. */
3580 if (GET_CODE (tmp) == SET
3581 && SET_DEST (tmp) == pc_rtx
3582 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3583 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3584 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3585 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3586 {
3587 fbranch_count++;
3588 continue;
3589 }
3590 }
3591
3592
3593 /* Find all floating point compare + branch insns. If possible,
3594 reverse the comparison & the branch to avoid add,tr insns. */
3595 for (insn = get_insns (); insn; insn = next_insn (insn))
3596 {
3597 rtx tmp;
3598 rtx_insn *next;
3599
3600 /* Ignore anything that isn't an INSN. */
3601 if (! NONJUMP_INSN_P (insn))
3602 continue;
3603
3604 tmp = PATTERN (insn);
3605
3606 /* It must be a set. */
3607 if (GET_CODE (tmp) != SET)
3608 continue;
3609
3610 /* The destination must be CCFP, which is register zero. */
3611 tmp = SET_DEST (tmp);
3612 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3613 continue;
3614
3615 /* INSN should be a set of CCFP.
3616
3617 See if the result of this insn is used in a reversed FP
3618 conditional branch. If so, reverse our condition and
3619 the branch. Doing so avoids useless add,tr insns. */
3620 next = next_insn (insn);
3621 while (next)
3622 {
3623 /* Jumps, calls and labels stop our search. */
3624 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3625 break;
3626
3627 /* As does another fcmp insn. */
3628 if (NONJUMP_INSN_P (next)
3629 && GET_CODE (PATTERN (next)) == SET
3630 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3631 && REGNO (SET_DEST (PATTERN (next))) == 0)
3632 break;
3633
3634 next = next_insn (next);
3635 }
3636
3637 /* Is NEXT_INSN a branch? */
3638 if (next && JUMP_P (next))
3639 {
3640 rtx pattern = PATTERN (next);
3641
3642 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3643 and CCFP dies, then reverse our conditional and the branch
3644 to avoid the add,tr. */
3645 if (GET_CODE (pattern) == SET
3646 && SET_DEST (pattern) == pc_rtx
3647 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3648 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3649 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3650 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3651 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3652 && (fcmp_count == fbranch_count
3653 || (check_notes
3654 && find_regno_note (next, REG_DEAD, 0))))
3655 {
3656 /* Reverse the branch. */
3657 tmp = XEXP (SET_SRC (pattern), 1);
3658 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3659 XEXP (SET_SRC (pattern), 2) = tmp;
3660 INSN_CODE (next) = -1;
3661
3662 /* Reverse our condition. */
3663 tmp = PATTERN (insn);
3664 PUT_CODE (XEXP (tmp, 1),
3665 (reverse_condition_maybe_unordered
3666 (GET_CODE (XEXP (tmp, 1)))));
3667 }
3668 }
3669 }
3670 }
3671
3672 pass = !pass;
3673
3674 }
3675
3676 /* You may have trouble believing this, but this is the 32 bit HP-PA
3677 stack layout. Wow.
3678
3679 Offset Contents
3680
3681 Variable arguments (optional; any number may be allocated)
3682
3683 SP-(4*(N+9)) arg word N
3684 : :
3685 SP-56 arg word 5
3686 SP-52 arg word 4
3687
3688 Fixed arguments (must be allocated; may remain unused)
3689
3690 SP-48 arg word 3
3691 SP-44 arg word 2
3692 SP-40 arg word 1
3693 SP-36 arg word 0
3694
3695 Frame Marker
3696
3697 SP-32 External Data Pointer (DP)
3698 SP-28 External sr4
3699 SP-24 External/stub RP (RP')
3700 SP-20 Current RP
3701 SP-16 Static Link
3702 SP-12 Clean up
3703 SP-8 Calling Stub RP (RP'')
3704 SP-4 Previous SP
3705
3706 Top of Frame
3707
3708 SP-0 Stack Pointer (points to next available address)
3709
3710 */
3711
3712 /* This function saves registers as follows. Registers marked with ' are
3713 this function's registers (as opposed to the previous function's).
3714 If a frame_pointer isn't needed, r4 is saved as a general register;
3715 the space for the frame pointer is still allocated, though, to keep
3716 things simple.
3717
3718
3719 Top of Frame
3720
3721 SP (FP') Previous FP
3722 SP + 4 Alignment filler (sigh)
3723 SP + 8 Space for locals reserved here.
3724 .
3725 .
3726 .
3727 SP + n All call saved register used.
3728 .
3729 .
3730 .
3731 SP + o All call saved fp registers used.
3732 .
3733 .
3734 .
3735 SP + p (SP') points to next available address.
3736
3737 */
3738
3739 /* Global variables set by output_function_prologue(). */
3740 /* Size of frame. Need to know this to emit return insns from
3741 leaf procedures. */
3742 static HOST_WIDE_INT actual_fsize, local_fsize;
3743 static int save_fregs;
3744
3745 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3746 Handle case where DISP > 8k by using the add_high_const patterns.
3747
3748 Note in DISP > 8k case, we will leave the high part of the address
3749 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3750
3751 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3752 store_reg (int reg, HOST_WIDE_INT disp, int base)
3753 {
3754 rtx dest, src, basereg;
3755 rtx_insn *insn;
3756
3757 src = gen_rtx_REG (word_mode, reg);
3758 basereg = gen_rtx_REG (Pmode, base);
3759 if (VAL_14_BITS_P (disp))
3760 {
3761 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3762 insn = emit_move_insn (dest, src);
3763 }
3764 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3765 {
3766 rtx delta = GEN_INT (disp);
3767 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3768
3769 emit_move_insn (tmpreg, delta);
3770 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3771 if (DO_FRAME_NOTES)
3772 {
3773 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3774 gen_rtx_SET (tmpreg,
3775 gen_rtx_PLUS (Pmode, basereg, delta)));
3776 RTX_FRAME_RELATED_P (insn) = 1;
3777 }
3778 dest = gen_rtx_MEM (word_mode, tmpreg);
3779 insn = emit_move_insn (dest, src);
3780 }
3781 else
3782 {
3783 rtx delta = GEN_INT (disp);
3784 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3785 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3786
3787 emit_move_insn (tmpreg, high);
3788 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3789 insn = emit_move_insn (dest, src);
3790 if (DO_FRAME_NOTES)
3791 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3792 gen_rtx_SET (gen_rtx_MEM (word_mode,
3793 gen_rtx_PLUS (word_mode,
3794 basereg,
3795 delta)),
3796 src));
3797 }
3798
3799 if (DO_FRAME_NOTES)
3800 RTX_FRAME_RELATED_P (insn) = 1;
3801 }
3802
3803 /* Emit RTL to store REG at the memory location specified by BASE and then
3804 add MOD to BASE. MOD must be <= 8k. */
3805
3806 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3807 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3808 {
3809 rtx basereg, srcreg, delta;
3810 rtx_insn *insn;
3811
3812 gcc_assert (VAL_14_BITS_P (mod));
3813
3814 basereg = gen_rtx_REG (Pmode, base);
3815 srcreg = gen_rtx_REG (word_mode, reg);
3816 delta = GEN_INT (mod);
3817
3818 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3819 if (DO_FRAME_NOTES)
3820 {
3821 RTX_FRAME_RELATED_P (insn) = 1;
3822
3823 /* RTX_FRAME_RELATED_P must be set on each frame related set
3824 in a parallel with more than one element. */
3825 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3826 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3827 }
3828 }
3829
3830 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3831 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3832 whether to add a frame note or not.
3833
3834 In the DISP > 8k case, we leave the high part of the address in %r1.
3835 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3836
3837 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3838 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3839 {
3840 rtx_insn *insn;
3841
3842 if (VAL_14_BITS_P (disp))
3843 {
3844 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3845 plus_constant (Pmode,
3846 gen_rtx_REG (Pmode, base), disp));
3847 }
3848 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3849 {
3850 rtx basereg = gen_rtx_REG (Pmode, base);
3851 rtx delta = GEN_INT (disp);
3852 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3853
3854 emit_move_insn (tmpreg, delta);
3855 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3856 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3857 if (DO_FRAME_NOTES)
3858 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3859 gen_rtx_SET (tmpreg,
3860 gen_rtx_PLUS (Pmode, basereg, delta)));
3861 }
3862 else
3863 {
3864 rtx basereg = gen_rtx_REG (Pmode, base);
3865 rtx delta = GEN_INT (disp);
3866 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3867
3868 emit_move_insn (tmpreg,
3869 gen_rtx_PLUS (Pmode, basereg,
3870 gen_rtx_HIGH (Pmode, delta)));
3871 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3872 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3873 }
3874
3875 if (DO_FRAME_NOTES && note)
3876 RTX_FRAME_RELATED_P (insn) = 1;
3877 }
3878
3879 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3880 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3881 {
3882 int freg_saved = 0;
3883 int i, j;
3884
3885 /* The code in pa_expand_prologue and pa_expand_epilogue must
3886 be consistent with the rounding and size calculation done here.
3887 Change them at the same time. */
3888
3889 /* We do our own stack alignment. First, round the size of the
3890 stack locals up to a word boundary. */
3891 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3892
3893 /* Space for previous frame pointer + filler. If any frame is
3894 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3895 waste some space here for the sake of HP compatibility. The
3896 first slot is only used when the frame pointer is needed. */
3897 if (size || frame_pointer_needed)
3898 size += pa_starting_frame_offset ();
3899
3900 /* If the current function calls __builtin_eh_return, then we need
3901 to allocate stack space for registers that will hold data for
3902 the exception handler. */
3903 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3904 {
3905 unsigned int i;
3906
3907 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3908 continue;
3909 size += i * UNITS_PER_WORD;
3910 }
3911
3912 /* Account for space used by the callee general register saves. */
3913 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3914 if (df_regs_ever_live_p (i))
3915 size += UNITS_PER_WORD;
3916
3917 /* Account for space used by the callee floating point register saves. */
3918 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3919 if (df_regs_ever_live_p (i)
3920 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3921 {
3922 freg_saved = 1;
3923
3924 /* We always save both halves of the FP register, so always
3925 increment the frame size by 8 bytes. */
3926 size += 8;
3927 }
3928
3929 /* If any of the floating registers are saved, account for the
3930 alignment needed for the floating point register save block. */
3931 if (freg_saved)
3932 {
3933 size = (size + 7) & ~7;
3934 if (fregs_live)
3935 *fregs_live = 1;
3936 }
3937
3938 /* The various ABIs include space for the outgoing parameters in the
3939 size of the current function's stack frame. We don't need to align
3940 for the outgoing arguments as their alignment is set by the final
3941 rounding for the frame as a whole. */
3942 size += crtl->outgoing_args_size;
3943
3944 /* Allocate space for the fixed frame marker. This space must be
3945 allocated for any function that makes calls or allocates
3946 stack space. */
3947 if (!crtl->is_leaf || size)
3948 size += TARGET_64BIT ? 48 : 32;
3949
3950 /* Finally, round to the preferred stack boundary. */
3951 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3952 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3953 }
3954
3955 /* Output function label, and associated .PROC and .CALLINFO statements. */
3956
3957 void
pa_output_function_label(FILE * file)3958 pa_output_function_label (FILE *file)
3959 {
3960 /* The function's label and associated .PROC must never be
3961 separated and must be output *after* any profiling declarations
3962 to avoid changing spaces/subspaces within a procedure. */
3963 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3964 fputs ("\t.PROC\n", file);
3965
3966 /* pa_expand_prologue does the dirty work now. We just need
3967 to output the assembler directives which denote the start
3968 of a function. */
3969 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3970 if (crtl->is_leaf)
3971 fputs (",NO_CALLS", file);
3972 else
3973 fputs (",CALLS", file);
3974 if (rp_saved)
3975 fputs (",SAVE_RP", file);
3976
3977 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3978 at the beginning of the frame and that it is used as the frame
3979 pointer for the frame. We do this because our current frame
3980 layout doesn't conform to that specified in the HP runtime
3981 documentation and we need a way to indicate to programs such as
3982 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3983 isn't used by HP compilers but is supported by the assembler.
3984 However, SAVE_SP is supposed to indicate that the previous stack
3985 pointer has been saved in the frame marker. */
3986 if (frame_pointer_needed)
3987 fputs (",SAVE_SP", file);
3988
3989 /* Pass on information about the number of callee register saves
3990 performed in the prologue.
3991
3992 The compiler is supposed to pass the highest register number
3993 saved, the assembler then has to adjust that number before
3994 entering it into the unwind descriptor (to account for any
3995 caller saved registers with lower register numbers than the
3996 first callee saved register). */
3997 if (gr_saved)
3998 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3999
4000 if (fr_saved)
4001 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4002
4003 fputs ("\n\t.ENTRY\n", file);
4004 }
4005
4006 /* Output function prologue. */
4007
4008 static void
pa_output_function_prologue(FILE * file)4009 pa_output_function_prologue (FILE *file)
4010 {
4011 pa_output_function_label (file);
4012 remove_useless_addtr_insns (0);
4013 }
4014
4015 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4016
4017 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)4018 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4019 {
4020 remove_useless_addtr_insns (0);
4021 }
4022
4023 void
pa_expand_prologue(void)4024 pa_expand_prologue (void)
4025 {
4026 int merge_sp_adjust_with_store = 0;
4027 HOST_WIDE_INT size = get_frame_size ();
4028 HOST_WIDE_INT offset;
4029 int i;
4030 rtx tmpreg;
4031 rtx_insn *insn;
4032
4033 gr_saved = 0;
4034 fr_saved = 0;
4035 save_fregs = 0;
4036
4037 /* Compute total size for frame pointer, filler, locals and rounding to
4038 the next word boundary. Similar code appears in pa_compute_frame_size
4039 and must be changed in tandem with this code. */
4040 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4041 if (local_fsize || frame_pointer_needed)
4042 local_fsize += pa_starting_frame_offset ();
4043
4044 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4045 if (flag_stack_usage_info)
4046 current_function_static_stack_size = actual_fsize;
4047
4048 /* Compute a few things we will use often. */
4049 tmpreg = gen_rtx_REG (word_mode, 1);
4050
4051 /* Save RP first. The calling conventions manual states RP will
4052 always be stored into the caller's frame at sp - 20 or sp - 16
4053 depending on which ABI is in use. */
4054 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4055 {
4056 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4057 rp_saved = true;
4058 }
4059 else
4060 rp_saved = false;
4061
4062 /* Allocate the local frame and set up the frame pointer if needed. */
4063 if (actual_fsize != 0)
4064 {
4065 if (frame_pointer_needed)
4066 {
4067 /* Copy the old frame pointer temporarily into %r1. Set up the
4068 new stack pointer, then store away the saved old frame pointer
4069 into the stack at sp and at the same time update the stack
4070 pointer by actual_fsize bytes. Two versions, first
4071 handles small (<8k) frames. The second handles large (>=8k)
4072 frames. */
4073 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4074 if (DO_FRAME_NOTES)
4075 RTX_FRAME_RELATED_P (insn) = 1;
4076
4077 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4078 if (DO_FRAME_NOTES)
4079 RTX_FRAME_RELATED_P (insn) = 1;
4080
4081 if (VAL_14_BITS_P (actual_fsize))
4082 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4083 else
4084 {
4085 /* It is incorrect to store the saved frame pointer at *sp,
4086 then increment sp (writes beyond the current stack boundary).
4087
4088 So instead use stwm to store at *sp and post-increment the
4089 stack pointer as an atomic operation. Then increment sp to
4090 finish allocating the new frame. */
4091 HOST_WIDE_INT adjust1 = 8192 - 64;
4092 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4093
4094 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4095 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4096 adjust2, 1);
4097 }
4098
4099 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4100 we need to store the previous stack pointer (frame pointer)
4101 into the frame marker on targets that use the HP unwind
4102 library. This allows the HP unwind library to be used to
4103 unwind GCC frames. However, we are not fully compatible
4104 with the HP library because our frame layout differs from
4105 that specified in the HP runtime specification.
4106
4107 We don't want a frame note on this instruction as the frame
4108 marker moves during dynamic stack allocation.
4109
4110 This instruction also serves as a blockage to prevent
4111 register spills from being scheduled before the stack
4112 pointer is raised. This is necessary as we store
4113 registers using the frame pointer as a base register,
4114 and the frame pointer is set before sp is raised. */
4115 if (TARGET_HPUX_UNWIND_LIBRARY)
4116 {
4117 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4118 GEN_INT (TARGET_64BIT ? -8 : -4));
4119
4120 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4121 hard_frame_pointer_rtx);
4122 }
4123 else
4124 emit_insn (gen_blockage ());
4125 }
4126 /* no frame pointer needed. */
4127 else
4128 {
4129 /* In some cases we can perform the first callee register save
4130 and allocating the stack frame at the same time. If so, just
4131 make a note of it and defer allocating the frame until saving
4132 the callee registers. */
4133 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4134 merge_sp_adjust_with_store = 1;
4135 /* Cannot optimize. Adjust the stack frame by actual_fsize
4136 bytes. */
4137 else
4138 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4139 actual_fsize, 1);
4140 }
4141 }
4142
4143 /* Normal register save.
4144
4145 Do not save the frame pointer in the frame_pointer_needed case. It
4146 was done earlier. */
4147 if (frame_pointer_needed)
4148 {
4149 offset = local_fsize;
4150
4151 /* Saving the EH return data registers in the frame is the simplest
4152 way to get the frame unwind information emitted. We put them
4153 just before the general registers. */
4154 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4155 {
4156 unsigned int i, regno;
4157
4158 for (i = 0; ; ++i)
4159 {
4160 regno = EH_RETURN_DATA_REGNO (i);
4161 if (regno == INVALID_REGNUM)
4162 break;
4163
4164 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4165 offset += UNITS_PER_WORD;
4166 }
4167 }
4168
4169 for (i = 18; i >= 4; i--)
4170 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4171 {
4172 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4173 offset += UNITS_PER_WORD;
4174 gr_saved++;
4175 }
4176 /* Account for %r3 which is saved in a special place. */
4177 gr_saved++;
4178 }
4179 /* No frame pointer needed. */
4180 else
4181 {
4182 offset = local_fsize - actual_fsize;
4183
4184 /* Saving the EH return data registers in the frame is the simplest
4185 way to get the frame unwind information emitted. */
4186 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4187 {
4188 unsigned int i, regno;
4189
4190 for (i = 0; ; ++i)
4191 {
4192 regno = EH_RETURN_DATA_REGNO (i);
4193 if (regno == INVALID_REGNUM)
4194 break;
4195
4196 /* If merge_sp_adjust_with_store is nonzero, then we can
4197 optimize the first save. */
4198 if (merge_sp_adjust_with_store)
4199 {
4200 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4201 merge_sp_adjust_with_store = 0;
4202 }
4203 else
4204 store_reg (regno, offset, STACK_POINTER_REGNUM);
4205 offset += UNITS_PER_WORD;
4206 }
4207 }
4208
4209 for (i = 18; i >= 3; i--)
4210 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4211 {
4212 /* If merge_sp_adjust_with_store is nonzero, then we can
4213 optimize the first GR save. */
4214 if (merge_sp_adjust_with_store)
4215 {
4216 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4217 merge_sp_adjust_with_store = 0;
4218 }
4219 else
4220 store_reg (i, offset, STACK_POINTER_REGNUM);
4221 offset += UNITS_PER_WORD;
4222 gr_saved++;
4223 }
4224
4225 /* If we wanted to merge the SP adjustment with a GR save, but we never
4226 did any GR saves, then just emit the adjustment here. */
4227 if (merge_sp_adjust_with_store)
4228 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4229 actual_fsize, 1);
4230 }
4231
4232 /* The hppa calling conventions say that %r19, the pic offset
4233 register, is saved at sp - 32 (in this function's frame)
4234 when generating PIC code. FIXME: What is the correct thing
4235 to do for functions which make no calls and allocate no
4236 frame? Do we need to allocate a frame, or can we just omit
4237 the save? For now we'll just omit the save.
4238
4239 We don't want a note on this insn as the frame marker can
4240 move if there is a dynamic stack allocation. */
4241 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4242 {
4243 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4244
4245 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4246
4247 }
4248
4249 /* Align pointer properly (doubleword boundary). */
4250 offset = (offset + 7) & ~7;
4251
4252 /* Floating point register store. */
4253 if (save_fregs)
4254 {
4255 rtx base;
4256
4257 /* First get the frame or stack pointer to the start of the FP register
4258 save area. */
4259 if (frame_pointer_needed)
4260 {
4261 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4262 base = hard_frame_pointer_rtx;
4263 }
4264 else
4265 {
4266 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4267 base = stack_pointer_rtx;
4268 }
4269
4270 /* Now actually save the FP registers. */
4271 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4272 {
4273 if (df_regs_ever_live_p (i)
4274 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4275 {
4276 rtx addr, reg;
4277 rtx_insn *insn;
4278 addr = gen_rtx_MEM (DFmode,
4279 gen_rtx_POST_INC (word_mode, tmpreg));
4280 reg = gen_rtx_REG (DFmode, i);
4281 insn = emit_move_insn (addr, reg);
4282 if (DO_FRAME_NOTES)
4283 {
4284 RTX_FRAME_RELATED_P (insn) = 1;
4285 if (TARGET_64BIT)
4286 {
4287 rtx mem = gen_rtx_MEM (DFmode,
4288 plus_constant (Pmode, base,
4289 offset));
4290 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4291 gen_rtx_SET (mem, reg));
4292 }
4293 else
4294 {
4295 rtx meml = gen_rtx_MEM (SFmode,
4296 plus_constant (Pmode, base,
4297 offset));
4298 rtx memr = gen_rtx_MEM (SFmode,
4299 plus_constant (Pmode, base,
4300 offset + 4));
4301 rtx regl = gen_rtx_REG (SFmode, i);
4302 rtx regr = gen_rtx_REG (SFmode, i + 1);
4303 rtx setl = gen_rtx_SET (meml, regl);
4304 rtx setr = gen_rtx_SET (memr, regr);
4305 rtvec vec;
4306
4307 RTX_FRAME_RELATED_P (setl) = 1;
4308 RTX_FRAME_RELATED_P (setr) = 1;
4309 vec = gen_rtvec (2, setl, setr);
4310 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4311 gen_rtx_SEQUENCE (VOIDmode, vec));
4312 }
4313 }
4314 offset += GET_MODE_SIZE (DFmode);
4315 fr_saved++;
4316 }
4317 }
4318 }
4319 }
4320
4321 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4322 Handle case where DISP > 8k by using the add_high_const patterns. */
4323
4324 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4325 load_reg (int reg, HOST_WIDE_INT disp, int base)
4326 {
4327 rtx dest = gen_rtx_REG (word_mode, reg);
4328 rtx basereg = gen_rtx_REG (Pmode, base);
4329 rtx src;
4330
4331 if (VAL_14_BITS_P (disp))
4332 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4333 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4334 {
4335 rtx delta = GEN_INT (disp);
4336 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4337
4338 emit_move_insn (tmpreg, delta);
4339 if (TARGET_DISABLE_INDEXING)
4340 {
4341 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4342 src = gen_rtx_MEM (word_mode, tmpreg);
4343 }
4344 else
4345 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4346 }
4347 else
4348 {
4349 rtx delta = GEN_INT (disp);
4350 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4351 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4352
4353 emit_move_insn (tmpreg, high);
4354 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4355 }
4356
4357 emit_move_insn (dest, src);
4358 }
4359
4360 /* Update the total code bytes output to the text section. */
4361
4362 static void
update_total_code_bytes(unsigned int nbytes)4363 update_total_code_bytes (unsigned int nbytes)
4364 {
4365 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4366 && !IN_NAMED_SECTION_P (cfun->decl))
4367 {
4368 unsigned int old_total = total_code_bytes;
4369
4370 total_code_bytes += nbytes;
4371
4372 /* Be prepared to handle overflows. */
4373 if (old_total > total_code_bytes)
4374 total_code_bytes = UINT_MAX;
4375 }
4376 }
4377
4378 /* This function generates the assembly code for function exit.
4379 Args are as for output_function_prologue ().
4380
4381 The function epilogue should not depend on the current stack
4382 pointer! It should use the frame pointer only. This is mandatory
4383 because of alloca; we also take advantage of it to omit stack
4384 adjustments before returning. */
4385
4386 static void
pa_output_function_epilogue(FILE * file)4387 pa_output_function_epilogue (FILE *file)
4388 {
4389 rtx_insn *insn = get_last_insn ();
4390 bool extra_nop;
4391
4392 /* pa_expand_epilogue does the dirty work now. We just need
4393 to output the assembler directives which denote the end
4394 of a function.
4395
4396 To make debuggers happy, emit a nop if the epilogue was completely
4397 eliminated due to a volatile call as the last insn in the
4398 current function. That way the return address (in %r2) will
4399 always point to a valid instruction in the current function. */
4400
4401 /* Get the last real insn. */
4402 if (NOTE_P (insn))
4403 insn = prev_real_insn (insn);
4404
4405 /* If it is a sequence, then look inside. */
4406 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4407 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4408
4409 /* If insn is a CALL_INSN, then it must be a call to a volatile
4410 function (otherwise there would be epilogue insns). */
4411 if (insn && CALL_P (insn))
4412 {
4413 fputs ("\tnop\n", file);
4414 extra_nop = true;
4415 }
4416 else
4417 extra_nop = false;
4418
4419 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4420
4421 if (TARGET_SOM && TARGET_GAS)
4422 {
4423 /* We are done with this subspace except possibly for some additional
4424 debug information. Forget that we are in this subspace to ensure
4425 that the next function is output in its own subspace. */
4426 in_section = NULL;
4427 cfun->machine->in_nsubspa = 2;
4428 }
4429
4430 /* Thunks do their own insn accounting. */
4431 if (cfun->is_thunk)
4432 return;
4433
4434 if (INSN_ADDRESSES_SET_P ())
4435 {
4436 last_address = extra_nop ? 4 : 0;
4437 insn = get_last_nonnote_insn ();
4438 if (insn)
4439 {
4440 last_address += INSN_ADDRESSES (INSN_UID (insn));
4441 if (INSN_P (insn))
4442 last_address += insn_default_length (insn);
4443 }
4444 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4445 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4446 }
4447 else
4448 last_address = UINT_MAX;
4449
4450 /* Finally, update the total number of code bytes output so far. */
4451 update_total_code_bytes (last_address);
4452 }
4453
4454 void
pa_expand_epilogue(void)4455 pa_expand_epilogue (void)
4456 {
4457 rtx tmpreg;
4458 HOST_WIDE_INT offset;
4459 HOST_WIDE_INT ret_off = 0;
4460 int i;
4461 int merge_sp_adjust_with_load = 0;
4462
4463 /* We will use this often. */
4464 tmpreg = gen_rtx_REG (word_mode, 1);
4465
4466 /* Try to restore RP early to avoid load/use interlocks when
4467 RP gets used in the return (bv) instruction. This appears to still
4468 be necessary even when we schedule the prologue and epilogue. */
4469 if (rp_saved)
4470 {
4471 ret_off = TARGET_64BIT ? -16 : -20;
4472 if (frame_pointer_needed)
4473 {
4474 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4475 ret_off = 0;
4476 }
4477 else
4478 {
4479 /* No frame pointer, and stack is smaller than 8k. */
4480 if (VAL_14_BITS_P (ret_off - actual_fsize))
4481 {
4482 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4483 ret_off = 0;
4484 }
4485 }
4486 }
4487
4488 /* General register restores. */
4489 if (frame_pointer_needed)
4490 {
4491 offset = local_fsize;
4492
4493 /* If the current function calls __builtin_eh_return, then we need
4494 to restore the saved EH data registers. */
4495 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4496 {
4497 unsigned int i, regno;
4498
4499 for (i = 0; ; ++i)
4500 {
4501 regno = EH_RETURN_DATA_REGNO (i);
4502 if (regno == INVALID_REGNUM)
4503 break;
4504
4505 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4506 offset += UNITS_PER_WORD;
4507 }
4508 }
4509
4510 for (i = 18; i >= 4; i--)
4511 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4512 {
4513 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4514 offset += UNITS_PER_WORD;
4515 }
4516 }
4517 else
4518 {
4519 offset = local_fsize - actual_fsize;
4520
4521 /* If the current function calls __builtin_eh_return, then we need
4522 to restore the saved EH data registers. */
4523 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4524 {
4525 unsigned int i, regno;
4526
4527 for (i = 0; ; ++i)
4528 {
4529 regno = EH_RETURN_DATA_REGNO (i);
4530 if (regno == INVALID_REGNUM)
4531 break;
4532
4533 /* Only for the first load.
4534 merge_sp_adjust_with_load holds the register load
4535 with which we will merge the sp adjustment. */
4536 if (merge_sp_adjust_with_load == 0
4537 && local_fsize == 0
4538 && VAL_14_BITS_P (-actual_fsize))
4539 merge_sp_adjust_with_load = regno;
4540 else
4541 load_reg (regno, offset, STACK_POINTER_REGNUM);
4542 offset += UNITS_PER_WORD;
4543 }
4544 }
4545
4546 for (i = 18; i >= 3; i--)
4547 {
4548 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4549 {
4550 /* Only for the first load.
4551 merge_sp_adjust_with_load holds the register load
4552 with which we will merge the sp adjustment. */
4553 if (merge_sp_adjust_with_load == 0
4554 && local_fsize == 0
4555 && VAL_14_BITS_P (-actual_fsize))
4556 merge_sp_adjust_with_load = i;
4557 else
4558 load_reg (i, offset, STACK_POINTER_REGNUM);
4559 offset += UNITS_PER_WORD;
4560 }
4561 }
4562 }
4563
4564 /* Align pointer properly (doubleword boundary). */
4565 offset = (offset + 7) & ~7;
4566
4567 /* FP register restores. */
4568 if (save_fregs)
4569 {
4570 /* Adjust the register to index off of. */
4571 if (frame_pointer_needed)
4572 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4573 else
4574 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4575
4576 /* Actually do the restores now. */
4577 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4578 if (df_regs_ever_live_p (i)
4579 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4580 {
4581 rtx src = gen_rtx_MEM (DFmode,
4582 gen_rtx_POST_INC (word_mode, tmpreg));
4583 rtx dest = gen_rtx_REG (DFmode, i);
4584 emit_move_insn (dest, src);
4585 }
4586 }
4587
4588 /* Emit a blockage insn here to keep these insns from being moved to
4589 an earlier spot in the epilogue, or into the main instruction stream.
4590
4591 This is necessary as we must not cut the stack back before all the
4592 restores are finished. */
4593 emit_insn (gen_blockage ());
4594
4595 /* Reset stack pointer (and possibly frame pointer). The stack
4596 pointer is initially set to fp + 64 to avoid a race condition. */
4597 if (frame_pointer_needed)
4598 {
4599 rtx delta = GEN_INT (-64);
4600
4601 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4602 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4603 stack_pointer_rtx, delta));
4604 }
4605 /* If we were deferring a callee register restore, do it now. */
4606 else if (merge_sp_adjust_with_load)
4607 {
4608 rtx delta = GEN_INT (-actual_fsize);
4609 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4610
4611 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4612 }
4613 else if (actual_fsize != 0)
4614 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4615 - actual_fsize, 0);
4616
4617 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4618 frame greater than 8k), do so now. */
4619 if (ret_off != 0)
4620 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4621
4622 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4623 {
4624 rtx sa = EH_RETURN_STACKADJ_RTX;
4625
4626 emit_insn (gen_blockage ());
4627 emit_insn (TARGET_64BIT
4628 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4629 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4630 }
4631 }
4632
4633 bool
pa_can_use_return_insn(void)4634 pa_can_use_return_insn (void)
4635 {
4636 if (!reload_completed)
4637 return false;
4638
4639 if (frame_pointer_needed)
4640 return false;
4641
4642 if (df_regs_ever_live_p (2))
4643 return false;
4644
4645 if (crtl->profile)
4646 return false;
4647
4648 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4649 }
4650
4651 rtx
hppa_pic_save_rtx(void)4652 hppa_pic_save_rtx (void)
4653 {
4654 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4655 }
4656
4657 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4658 #define NO_DEFERRED_PROFILE_COUNTERS 0
4659 #endif
4660
4661
4662 /* Vector of funcdef numbers. */
4663 static vec<int> funcdef_nos;
4664
4665 /* Output deferred profile counters. */
4666 static void
output_deferred_profile_counters(void)4667 output_deferred_profile_counters (void)
4668 {
4669 unsigned int i;
4670 int align, n;
4671
4672 if (funcdef_nos.is_empty ())
4673 return;
4674
4675 switch_to_section (data_section);
4676 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4677 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4678
4679 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4680 {
4681 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4682 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4683 }
4684
4685 funcdef_nos.release ();
4686 }
4687
4688 void
hppa_profile_hook(int label_no)4689 hppa_profile_hook (int label_no)
4690 {
4691 rtx_code_label *label_rtx = gen_label_rtx ();
4692 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4693 rtx arg_bytes, begin_label_rtx, mcount, sym;
4694 rtx_insn *call_insn;
4695 char begin_label_name[16];
4696 bool use_mcount_pcrel_call;
4697
4698 /* Set up call destination. */
4699 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4700 pa_encode_label (sym);
4701 mcount = gen_rtx_MEM (Pmode, sym);
4702
4703 /* If we can reach _mcount with a pc-relative call, we can optimize
4704 loading the address of the current function. This requires linker
4705 long branch stub support. */
4706 if (!TARGET_PORTABLE_RUNTIME
4707 && !TARGET_LONG_CALLS
4708 && (TARGET_SOM || flag_function_sections))
4709 use_mcount_pcrel_call = TRUE;
4710 else
4711 use_mcount_pcrel_call = FALSE;
4712
4713 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4714 label_no);
4715 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4716
4717 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4718
4719 if (!use_mcount_pcrel_call)
4720 {
4721 /* The address of the function is loaded into %r25 with an instruction-
4722 relative sequence that avoids the use of relocations. We use SImode
4723 for the address of the function in both 32 and 64-bit code to avoid
4724 having to provide DImode versions of the lcla2 pattern. */
4725 if (TARGET_PA_20)
4726 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4727 else
4728 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4729 }
4730
4731 if (!NO_DEFERRED_PROFILE_COUNTERS)
4732 {
4733 rtx count_label_rtx, addr, r24;
4734 char count_label_name[16];
4735
4736 funcdef_nos.safe_push (label_no);
4737 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4738 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4739 ggc_strdup (count_label_name));
4740
4741 addr = force_reg (Pmode, count_label_rtx);
4742 r24 = gen_rtx_REG (Pmode, 24);
4743 emit_move_insn (r24, addr);
4744
4745 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4746 if (use_mcount_pcrel_call)
4747 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4748 begin_label_rtx));
4749 else
4750 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4751
4752 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4753 }
4754 else
4755 {
4756 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4757 if (use_mcount_pcrel_call)
4758 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4759 begin_label_rtx));
4760 else
4761 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4762 }
4763
4764 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4765 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4766
4767 /* Indicate the _mcount call cannot throw, nor will it execute a
4768 non-local goto. */
4769 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4770
4771 /* Allocate space for fixed arguments. */
4772 if (reg_parm_stack_space > crtl->outgoing_args_size)
4773 crtl->outgoing_args_size = reg_parm_stack_space;
4774 }
4775
4776 /* Fetch the return address for the frame COUNT steps up from
4777 the current frame, after the prologue. FRAMEADDR is the
4778 frame pointer of the COUNT frame.
4779
4780 We want to ignore any export stub remnants here. To handle this,
4781 we examine the code at the return address, and if it is an export
4782 stub, we return a memory rtx for the stub return address stored
4783 at frame-24.
4784
4785 The value returned is used in two different ways:
4786
4787 1. To find a function's caller.
4788
4789 2. To change the return address for a function.
4790
4791 This function handles most instances of case 1; however, it will
4792 fail if there are two levels of stubs to execute on the return
4793 path. The only way I believe that can happen is if the return value
4794 needs a parameter relocation, which never happens for C code.
4795
4796 This function handles most instances of case 2; however, it will
4797 fail if we did not originally have stub code on the return path
4798 but will need stub code on the new return path. This can happen if
4799 the caller & callee are both in the main program, but the new
4800 return location is in a shared library. */
4801
4802 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4803 pa_return_addr_rtx (int count, rtx frameaddr)
4804 {
4805 rtx label;
4806 rtx rp;
4807 rtx saved_rp;
4808 rtx ins;
4809
4810 /* The instruction stream at the return address of a PA1.X export stub is:
4811
4812 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4813 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4814 0x00011820 | stub+16: mtsp r1,sr0
4815 0xe0400002 | stub+20: be,n 0(sr0,rp)
4816
4817 0xe0400002 must be specified as -532676606 so that it won't be
4818 rejected as an invalid immediate operand on 64-bit hosts.
4819
4820 The instruction stream at the return address of a PA2.0 export stub is:
4821
4822 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4823 0xe840d002 | stub+12: bve,n (rp)
4824 */
4825
4826 HOST_WIDE_INT insns[4];
4827 int i, len;
4828
4829 if (count != 0)
4830 return NULL_RTX;
4831
4832 rp = get_hard_reg_initial_val (Pmode, 2);
4833
4834 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4835 return rp;
4836
4837 /* If there is no export stub then just use the value saved from
4838 the return pointer register. */
4839
4840 saved_rp = gen_reg_rtx (Pmode);
4841 emit_move_insn (saved_rp, rp);
4842
4843 /* Get pointer to the instruction stream. We have to mask out the
4844 privilege level from the two low order bits of the return address
4845 pointer here so that ins will point to the start of the first
4846 instruction that would have been executed if we returned. */
4847 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4848 label = gen_label_rtx ();
4849
4850 if (TARGET_PA_20)
4851 {
4852 insns[0] = 0x4bc23fd1;
4853 insns[1] = -398405630;
4854 len = 2;
4855 }
4856 else
4857 {
4858 insns[0] = 0x4bc23fd1;
4859 insns[1] = 0x004010a1;
4860 insns[2] = 0x00011820;
4861 insns[3] = -532676606;
4862 len = 4;
4863 }
4864
4865 /* Check the instruction stream at the normal return address for the
4866 export stub. If it is an export stub, than our return address is
4867 really in -24[frameaddr]. */
4868
4869 for (i = 0; i < len; i++)
4870 {
4871 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4872 rtx op1 = GEN_INT (insns[i]);
4873 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4874 }
4875
4876 /* Here we know that our return address points to an export
4877 stub. We don't want to return the address of the export stub,
4878 but rather the return address of the export stub. That return
4879 address is stored at -24[frameaddr]. */
4880
4881 emit_move_insn (saved_rp,
4882 gen_rtx_MEM (Pmode,
4883 memory_address (Pmode,
4884 plus_constant (Pmode, frameaddr,
4885 -24))));
4886
4887 emit_label (label);
4888
4889 return saved_rp;
4890 }
4891
4892 void
pa_emit_bcond_fp(rtx operands[])4893 pa_emit_bcond_fp (rtx operands[])
4894 {
4895 enum rtx_code code = GET_CODE (operands[0]);
4896 rtx operand0 = operands[1];
4897 rtx operand1 = operands[2];
4898 rtx label = operands[3];
4899
4900 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4901 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4902
4903 emit_jump_insn (gen_rtx_SET (pc_rtx,
4904 gen_rtx_IF_THEN_ELSE (VOIDmode,
4905 gen_rtx_fmt_ee (NE,
4906 VOIDmode,
4907 gen_rtx_REG (CCFPmode, 0),
4908 const0_rtx),
4909 gen_rtx_LABEL_REF (VOIDmode, label),
4910 pc_rtx)));
4911
4912 }
4913
4914 /* Adjust the cost of a scheduling dependency. Return the new cost of
4915 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4916
4917 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4918 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4919 unsigned int)
4920 {
4921 enum attr_type attr_type;
4922
4923 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4924 true dependencies as they are described with bypasses now. */
4925 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4926 return cost;
4927
4928 if (! recog_memoized (insn))
4929 return 0;
4930
4931 attr_type = get_attr_type (insn);
4932
4933 switch (dep_type)
4934 {
4935 case REG_DEP_ANTI:
4936 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4937 cycles later. */
4938
4939 if (attr_type == TYPE_FPLOAD)
4940 {
4941 rtx pat = PATTERN (insn);
4942 rtx dep_pat = PATTERN (dep_insn);
4943 if (GET_CODE (pat) == PARALLEL)
4944 {
4945 /* This happens for the fldXs,mb patterns. */
4946 pat = XVECEXP (pat, 0, 0);
4947 }
4948 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4949 /* If this happens, we have to extend this to schedule
4950 optimally. Return 0 for now. */
4951 return 0;
4952
4953 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4954 {
4955 if (! recog_memoized (dep_insn))
4956 return 0;
4957 switch (get_attr_type (dep_insn))
4958 {
4959 case TYPE_FPALU:
4960 case TYPE_FPMULSGL:
4961 case TYPE_FPMULDBL:
4962 case TYPE_FPDIVSGL:
4963 case TYPE_FPDIVDBL:
4964 case TYPE_FPSQRTSGL:
4965 case TYPE_FPSQRTDBL:
4966 /* A fpload can't be issued until one cycle before a
4967 preceding arithmetic operation has finished if
4968 the target of the fpload is any of the sources
4969 (or destination) of the arithmetic operation. */
4970 return insn_default_latency (dep_insn) - 1;
4971
4972 default:
4973 return 0;
4974 }
4975 }
4976 }
4977 else if (attr_type == TYPE_FPALU)
4978 {
4979 rtx pat = PATTERN (insn);
4980 rtx dep_pat = PATTERN (dep_insn);
4981 if (GET_CODE (pat) == PARALLEL)
4982 {
4983 /* This happens for the fldXs,mb patterns. */
4984 pat = XVECEXP (pat, 0, 0);
4985 }
4986 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4987 /* If this happens, we have to extend this to schedule
4988 optimally. Return 0 for now. */
4989 return 0;
4990
4991 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4992 {
4993 if (! recog_memoized (dep_insn))
4994 return 0;
4995 switch (get_attr_type (dep_insn))
4996 {
4997 case TYPE_FPDIVSGL:
4998 case TYPE_FPDIVDBL:
4999 case TYPE_FPSQRTSGL:
5000 case TYPE_FPSQRTDBL:
5001 /* An ALU flop can't be issued until two cycles before a
5002 preceding divide or sqrt operation has finished if
5003 the target of the ALU flop is any of the sources
5004 (or destination) of the divide or sqrt operation. */
5005 return insn_default_latency (dep_insn) - 2;
5006
5007 default:
5008 return 0;
5009 }
5010 }
5011 }
5012
5013 /* For other anti dependencies, the cost is 0. */
5014 return 0;
5015
5016 case REG_DEP_OUTPUT:
5017 /* Output dependency; DEP_INSN writes a register that INSN writes some
5018 cycles later. */
5019 if (attr_type == TYPE_FPLOAD)
5020 {
5021 rtx pat = PATTERN (insn);
5022 rtx dep_pat = PATTERN (dep_insn);
5023 if (GET_CODE (pat) == PARALLEL)
5024 {
5025 /* This happens for the fldXs,mb patterns. */
5026 pat = XVECEXP (pat, 0, 0);
5027 }
5028 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5029 /* If this happens, we have to extend this to schedule
5030 optimally. Return 0 for now. */
5031 return 0;
5032
5033 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5034 {
5035 if (! recog_memoized (dep_insn))
5036 return 0;
5037 switch (get_attr_type (dep_insn))
5038 {
5039 case TYPE_FPALU:
5040 case TYPE_FPMULSGL:
5041 case TYPE_FPMULDBL:
5042 case TYPE_FPDIVSGL:
5043 case TYPE_FPDIVDBL:
5044 case TYPE_FPSQRTSGL:
5045 case TYPE_FPSQRTDBL:
5046 /* A fpload can't be issued until one cycle before a
5047 preceding arithmetic operation has finished if
5048 the target of the fpload is the destination of the
5049 arithmetic operation.
5050
5051 Exception: For PA7100LC, PA7200 and PA7300, the cost
5052 is 3 cycles, unless they bundle together. We also
5053 pay the penalty if the second insn is a fpload. */
5054 return insn_default_latency (dep_insn) - 1;
5055
5056 default:
5057 return 0;
5058 }
5059 }
5060 }
5061 else if (attr_type == TYPE_FPALU)
5062 {
5063 rtx pat = PATTERN (insn);
5064 rtx dep_pat = PATTERN (dep_insn);
5065 if (GET_CODE (pat) == PARALLEL)
5066 {
5067 /* This happens for the fldXs,mb patterns. */
5068 pat = XVECEXP (pat, 0, 0);
5069 }
5070 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5071 /* If this happens, we have to extend this to schedule
5072 optimally. Return 0 for now. */
5073 return 0;
5074
5075 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5076 {
5077 if (! recog_memoized (dep_insn))
5078 return 0;
5079 switch (get_attr_type (dep_insn))
5080 {
5081 case TYPE_FPDIVSGL:
5082 case TYPE_FPDIVDBL:
5083 case TYPE_FPSQRTSGL:
5084 case TYPE_FPSQRTDBL:
5085 /* An ALU flop can't be issued until two cycles before a
5086 preceding divide or sqrt operation has finished if
5087 the target of the ALU flop is also the target of
5088 the divide or sqrt operation. */
5089 return insn_default_latency (dep_insn) - 2;
5090
5091 default:
5092 return 0;
5093 }
5094 }
5095 }
5096
5097 /* For other output dependencies, the cost is 0. */
5098 return 0;
5099
5100 default:
5101 gcc_unreachable ();
5102 }
5103 }
5104
5105 /* The 700 can only issue a single insn at a time.
5106 The 7XXX processors can issue two insns at a time.
5107 The 8000 can issue 4 insns at a time. */
5108 static int
pa_issue_rate(void)5109 pa_issue_rate (void)
5110 {
5111 switch (pa_cpu)
5112 {
5113 case PROCESSOR_700: return 1;
5114 case PROCESSOR_7100: return 2;
5115 case PROCESSOR_7100LC: return 2;
5116 case PROCESSOR_7200: return 2;
5117 case PROCESSOR_7300: return 2;
5118 case PROCESSOR_8000: return 4;
5119
5120 default:
5121 gcc_unreachable ();
5122 }
5123 }
5124
5125
5126
5127 /* Return any length plus adjustment needed by INSN which already has
5128 its length computed as LENGTH. Return LENGTH if no adjustment is
5129 necessary.
5130
5131 Also compute the length of an inline block move here as it is too
5132 complicated to express as a length attribute in pa.md. */
5133 int
pa_adjust_insn_length(rtx_insn * insn,int length)5134 pa_adjust_insn_length (rtx_insn *insn, int length)
5135 {
5136 rtx pat = PATTERN (insn);
5137
5138 /* If length is negative or undefined, provide initial length. */
5139 if ((unsigned int) length >= INT_MAX)
5140 {
5141 if (GET_CODE (pat) == SEQUENCE)
5142 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5143
5144 switch (get_attr_type (insn))
5145 {
5146 case TYPE_MILLI:
5147 length = pa_attr_length_millicode_call (insn);
5148 break;
5149 case TYPE_CALL:
5150 length = pa_attr_length_call (insn, 0);
5151 break;
5152 case TYPE_SIBCALL:
5153 length = pa_attr_length_call (insn, 1);
5154 break;
5155 case TYPE_DYNCALL:
5156 length = pa_attr_length_indirect_call (insn);
5157 break;
5158 case TYPE_SH_FUNC_ADRS:
5159 length = pa_attr_length_millicode_call (insn) + 20;
5160 break;
5161 default:
5162 gcc_unreachable ();
5163 }
5164 }
5165
5166 /* Block move pattern. */
5167 if (NONJUMP_INSN_P (insn)
5168 && GET_CODE (pat) == PARALLEL
5169 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5170 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5171 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5172 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5173 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5174 length += compute_cpymem_length (insn) - 4;
5175 /* Block clear pattern. */
5176 else if (NONJUMP_INSN_P (insn)
5177 && GET_CODE (pat) == PARALLEL
5178 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5179 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5180 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5181 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5182 length += compute_clrmem_length (insn) - 4;
5183 /* Conditional branch with an unfilled delay slot. */
5184 else if (JUMP_P (insn) && ! simplejump_p (insn))
5185 {
5186 /* Adjust a short backwards conditional with an unfilled delay slot. */
5187 if (GET_CODE (pat) == SET
5188 && length == 4
5189 && JUMP_LABEL (insn) != NULL_RTX
5190 && ! forward_branch_p (insn))
5191 length += 4;
5192 else if (GET_CODE (pat) == PARALLEL
5193 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5194 && length == 4)
5195 length += 4;
5196 /* Adjust dbra insn with short backwards conditional branch with
5197 unfilled delay slot -- only for case where counter is in a
5198 general register register. */
5199 else if (GET_CODE (pat) == PARALLEL
5200 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5201 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5202 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5203 && length == 4
5204 && ! forward_branch_p (insn))
5205 length += 4;
5206 }
5207 return length;
5208 }
5209
5210 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5211
5212 static bool
pa_print_operand_punct_valid_p(unsigned char code)5213 pa_print_operand_punct_valid_p (unsigned char code)
5214 {
5215 if (code == '@'
5216 || code == '#'
5217 || code == '*'
5218 || code == '^')
5219 return true;
5220
5221 return false;
5222 }
5223
5224 /* Print operand X (an rtx) in assembler syntax to file FILE.
5225 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5226 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5227
5228 void
pa_print_operand(FILE * file,rtx x,int code)5229 pa_print_operand (FILE *file, rtx x, int code)
5230 {
5231 switch (code)
5232 {
5233 case '#':
5234 /* Output a 'nop' if there's nothing for the delay slot. */
5235 if (dbr_sequence_length () == 0)
5236 fputs ("\n\tnop", file);
5237 return;
5238 case '*':
5239 /* Output a nullification completer if there's nothing for the */
5240 /* delay slot or nullification is requested. */
5241 if (dbr_sequence_length () == 0 ||
5242 (final_sequence &&
5243 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5244 fputs (",n", file);
5245 return;
5246 case 'R':
5247 /* Print out the second register name of a register pair.
5248 I.e., R (6) => 7. */
5249 fputs (reg_names[REGNO (x) + 1], file);
5250 return;
5251 case 'r':
5252 /* A register or zero. */
5253 if (x == const0_rtx
5254 || (x == CONST0_RTX (DFmode))
5255 || (x == CONST0_RTX (SFmode)))
5256 {
5257 fputs ("%r0", file);
5258 return;
5259 }
5260 else
5261 break;
5262 case 'f':
5263 /* A register or zero (floating point). */
5264 if (x == const0_rtx
5265 || (x == CONST0_RTX (DFmode))
5266 || (x == CONST0_RTX (SFmode)))
5267 {
5268 fputs ("%fr0", file);
5269 return;
5270 }
5271 else
5272 break;
5273 case 'A':
5274 {
5275 rtx xoperands[2];
5276
5277 xoperands[0] = XEXP (XEXP (x, 0), 0);
5278 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5279 pa_output_global_address (file, xoperands[1], 0);
5280 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5281 return;
5282 }
5283
5284 case 'C': /* Plain (C)ondition */
5285 case 'X':
5286 switch (GET_CODE (x))
5287 {
5288 case EQ:
5289 fputs ("=", file); break;
5290 case NE:
5291 fputs ("<>", file); break;
5292 case GT:
5293 fputs (">", file); break;
5294 case GE:
5295 fputs (">=", file); break;
5296 case GEU:
5297 fputs (">>=", file); break;
5298 case GTU:
5299 fputs (">>", file); break;
5300 case LT:
5301 fputs ("<", file); break;
5302 case LE:
5303 fputs ("<=", file); break;
5304 case LEU:
5305 fputs ("<<=", file); break;
5306 case LTU:
5307 fputs ("<<", file); break;
5308 default:
5309 gcc_unreachable ();
5310 }
5311 return;
5312 case 'N': /* Condition, (N)egated */
5313 switch (GET_CODE (x))
5314 {
5315 case EQ:
5316 fputs ("<>", file); break;
5317 case NE:
5318 fputs ("=", file); break;
5319 case GT:
5320 fputs ("<=", file); break;
5321 case GE:
5322 fputs ("<", file); break;
5323 case GEU:
5324 fputs ("<<", file); break;
5325 case GTU:
5326 fputs ("<<=", file); break;
5327 case LT:
5328 fputs (">=", file); break;
5329 case LE:
5330 fputs (">", file); break;
5331 case LEU:
5332 fputs (">>", file); break;
5333 case LTU:
5334 fputs (">>=", file); break;
5335 default:
5336 gcc_unreachable ();
5337 }
5338 return;
5339 /* For floating point comparisons. Note that the output
5340 predicates are the complement of the desired mode. The
5341 conditions for GT, GE, LT, LE and LTGT cause an invalid
5342 operation exception if the result is unordered and this
5343 exception is enabled in the floating-point status register. */
5344 case 'Y':
5345 switch (GET_CODE (x))
5346 {
5347 case EQ:
5348 fputs ("!=", file); break;
5349 case NE:
5350 fputs ("=", file); break;
5351 case GT:
5352 fputs ("!>", file); break;
5353 case GE:
5354 fputs ("!>=", file); break;
5355 case LT:
5356 fputs ("!<", file); break;
5357 case LE:
5358 fputs ("!<=", file); break;
5359 case LTGT:
5360 fputs ("!<>", file); break;
5361 case UNLE:
5362 fputs ("!?<=", file); break;
5363 case UNLT:
5364 fputs ("!?<", file); break;
5365 case UNGE:
5366 fputs ("!?>=", file); break;
5367 case UNGT:
5368 fputs ("!?>", file); break;
5369 case UNEQ:
5370 fputs ("!?=", file); break;
5371 case UNORDERED:
5372 fputs ("!?", file); break;
5373 case ORDERED:
5374 fputs ("?", file); break;
5375 default:
5376 gcc_unreachable ();
5377 }
5378 return;
5379 case 'S': /* Condition, operands are (S)wapped. */
5380 switch (GET_CODE (x))
5381 {
5382 case EQ:
5383 fputs ("=", file); break;
5384 case NE:
5385 fputs ("<>", file); break;
5386 case GT:
5387 fputs ("<", file); break;
5388 case GE:
5389 fputs ("<=", file); break;
5390 case GEU:
5391 fputs ("<<=", file); break;
5392 case GTU:
5393 fputs ("<<", file); break;
5394 case LT:
5395 fputs (">", file); break;
5396 case LE:
5397 fputs (">=", file); break;
5398 case LEU:
5399 fputs (">>=", file); break;
5400 case LTU:
5401 fputs (">>", file); break;
5402 default:
5403 gcc_unreachable ();
5404 }
5405 return;
5406 case 'B': /* Condition, (B)oth swapped and negate. */
5407 switch (GET_CODE (x))
5408 {
5409 case EQ:
5410 fputs ("<>", file); break;
5411 case NE:
5412 fputs ("=", file); break;
5413 case GT:
5414 fputs (">=", file); break;
5415 case GE:
5416 fputs (">", file); break;
5417 case GEU:
5418 fputs (">>", file); break;
5419 case GTU:
5420 fputs (">>=", file); break;
5421 case LT:
5422 fputs ("<=", file); break;
5423 case LE:
5424 fputs ("<", file); break;
5425 case LEU:
5426 fputs ("<<", file); break;
5427 case LTU:
5428 fputs ("<<=", file); break;
5429 default:
5430 gcc_unreachable ();
5431 }
5432 return;
5433 case 'k':
5434 gcc_assert (GET_CODE (x) == CONST_INT);
5435 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5436 return;
5437 case 'Q':
5438 gcc_assert (GET_CODE (x) == CONST_INT);
5439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5440 return;
5441 case 'L':
5442 gcc_assert (GET_CODE (x) == CONST_INT);
5443 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5444 return;
5445 case 'o':
5446 gcc_assert (GET_CODE (x) == CONST_INT
5447 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5448 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5449 return;
5450 case 'O':
5451 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5452 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5453 return;
5454 case 'p':
5455 gcc_assert (GET_CODE (x) == CONST_INT);
5456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5457 return;
5458 case 'P':
5459 gcc_assert (GET_CODE (x) == CONST_INT);
5460 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5461 return;
5462 case 'I':
5463 if (GET_CODE (x) == CONST_INT)
5464 fputs ("i", file);
5465 return;
5466 case 'M':
5467 case 'F':
5468 switch (GET_CODE (XEXP (x, 0)))
5469 {
5470 case PRE_DEC:
5471 case PRE_INC:
5472 if (ASSEMBLER_DIALECT == 0)
5473 fputs ("s,mb", file);
5474 else
5475 fputs (",mb", file);
5476 break;
5477 case POST_DEC:
5478 case POST_INC:
5479 if (ASSEMBLER_DIALECT == 0)
5480 fputs ("s,ma", file);
5481 else
5482 fputs (",ma", file);
5483 break;
5484 case PLUS:
5485 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5486 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5487 {
5488 if (ASSEMBLER_DIALECT == 0)
5489 fputs ("x", file);
5490 }
5491 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5492 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5493 {
5494 if (ASSEMBLER_DIALECT == 0)
5495 fputs ("x,s", file);
5496 else
5497 fputs (",s", file);
5498 }
5499 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5500 fputs ("s", file);
5501 break;
5502 default:
5503 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5504 fputs ("s", file);
5505 break;
5506 }
5507 return;
5508 case 'G':
5509 pa_output_global_address (file, x, 0);
5510 return;
5511 case 'H':
5512 pa_output_global_address (file, x, 1);
5513 return;
5514 case 0: /* Don't do anything special */
5515 break;
5516 case 'Z':
5517 {
5518 unsigned op[3];
5519 compute_zdepwi_operands (INTVAL (x), op);
5520 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5521 return;
5522 }
5523 case 'z':
5524 {
5525 unsigned op[3];
5526 compute_zdepdi_operands (INTVAL (x), op);
5527 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5528 return;
5529 }
5530 case 'c':
5531 /* We can get here from a .vtable_inherit due to our
5532 CONSTANT_ADDRESS_P rejecting perfectly good constant
5533 addresses. */
5534 break;
5535 default:
5536 gcc_unreachable ();
5537 }
5538 if (GET_CODE (x) == REG)
5539 {
5540 fputs (reg_names [REGNO (x)], file);
5541 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5542 {
5543 fputs ("R", file);
5544 return;
5545 }
5546 if (FP_REG_P (x)
5547 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5548 && (REGNO (x) & 1) == 0)
5549 fputs ("L", file);
5550 }
5551 else if (GET_CODE (x) == MEM)
5552 {
5553 int size = GET_MODE_SIZE (GET_MODE (x));
5554 rtx base = NULL_RTX;
5555 switch (GET_CODE (XEXP (x, 0)))
5556 {
5557 case PRE_DEC:
5558 case POST_DEC:
5559 base = XEXP (XEXP (x, 0), 0);
5560 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5561 break;
5562 case PRE_INC:
5563 case POST_INC:
5564 base = XEXP (XEXP (x, 0), 0);
5565 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5566 break;
5567 case PLUS:
5568 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5569 fprintf (file, "%s(%s)",
5570 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5571 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5572 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5573 fprintf (file, "%s(%s)",
5574 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5575 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5576 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5577 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5578 {
5579 /* Because the REG_POINTER flag can get lost during reload,
5580 pa_legitimate_address_p canonicalizes the order of the
5581 index and base registers in the combined move patterns. */
5582 rtx base = XEXP (XEXP (x, 0), 1);
5583 rtx index = XEXP (XEXP (x, 0), 0);
5584
5585 fprintf (file, "%s(%s)",
5586 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5587 }
5588 else
5589 output_address (GET_MODE (x), XEXP (x, 0));
5590 break;
5591 default:
5592 output_address (GET_MODE (x), XEXP (x, 0));
5593 break;
5594 }
5595 }
5596 else
5597 output_addr_const (file, x);
5598 }
5599
5600 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5601
5602 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5603 pa_output_global_address (FILE *file, rtx x, int round_constant)
5604 {
5605
5606 /* Imagine (high (const (plus ...))). */
5607 if (GET_CODE (x) == HIGH)
5608 x = XEXP (x, 0);
5609
5610 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5611 output_addr_const (file, x);
5612 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5613 {
5614 output_addr_const (file, x);
5615 fputs ("-$global$", file);
5616 }
5617 else if (GET_CODE (x) == CONST)
5618 {
5619 const char *sep = "";
5620 int offset = 0; /* assembler wants -$global$ at end */
5621 rtx base = NULL_RTX;
5622
5623 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5624 {
5625 case LABEL_REF:
5626 case SYMBOL_REF:
5627 base = XEXP (XEXP (x, 0), 0);
5628 output_addr_const (file, base);
5629 break;
5630 case CONST_INT:
5631 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5632 break;
5633 default:
5634 gcc_unreachable ();
5635 }
5636
5637 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5638 {
5639 case LABEL_REF:
5640 case SYMBOL_REF:
5641 base = XEXP (XEXP (x, 0), 1);
5642 output_addr_const (file, base);
5643 break;
5644 case CONST_INT:
5645 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5646 break;
5647 default:
5648 gcc_unreachable ();
5649 }
5650
5651 /* How bogus. The compiler is apparently responsible for
5652 rounding the constant if it uses an LR field selector.
5653
5654 The linker and/or assembler seem a better place since
5655 they have to do this kind of thing already.
5656
5657 If we fail to do this, HP's optimizing linker may eliminate
5658 an addil, but not update the ldw/stw/ldo instruction that
5659 uses the result of the addil. */
5660 if (round_constant)
5661 offset = ((offset + 0x1000) & ~0x1fff);
5662
5663 switch (GET_CODE (XEXP (x, 0)))
5664 {
5665 case PLUS:
5666 if (offset < 0)
5667 {
5668 offset = -offset;
5669 sep = "-";
5670 }
5671 else
5672 sep = "+";
5673 break;
5674
5675 case MINUS:
5676 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5677 sep = "-";
5678 break;
5679
5680 default:
5681 gcc_unreachable ();
5682 }
5683
5684 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5685 fputs ("-$global$", file);
5686 if (offset)
5687 fprintf (file, "%s%d", sep, offset);
5688 }
5689 else
5690 output_addr_const (file, x);
5691 }
5692
5693 /* Output boilerplate text to appear at the beginning of the file.
5694 There are several possible versions. */
5695 #define aputs(x) fputs(x, asm_out_file)
5696 static inline void
pa_file_start_level(void)5697 pa_file_start_level (void)
5698 {
5699 if (TARGET_64BIT)
5700 aputs ("\t.LEVEL 2.0w\n");
5701 else if (TARGET_PA_20)
5702 aputs ("\t.LEVEL 2.0\n");
5703 else if (TARGET_PA_11)
5704 aputs ("\t.LEVEL 1.1\n");
5705 else
5706 aputs ("\t.LEVEL 1.0\n");
5707 }
5708
5709 static inline void
pa_file_start_space(int sortspace)5710 pa_file_start_space (int sortspace)
5711 {
5712 aputs ("\t.SPACE $PRIVATE$");
5713 if (sortspace)
5714 aputs (",SORT=16");
5715 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5716 if (flag_tm)
5717 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5718 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5719 "\n\t.SPACE $TEXT$");
5720 if (sortspace)
5721 aputs (",SORT=8");
5722 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5723 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5724 }
5725
5726 static inline void
pa_file_start_file(int want_version)5727 pa_file_start_file (int want_version)
5728 {
5729 if (write_symbols != NO_DEBUG)
5730 {
5731 output_file_directive (asm_out_file, main_input_filename);
5732 if (want_version)
5733 aputs ("\t.version\t\"01.01\"\n");
5734 }
5735 }
5736
5737 static inline void
pa_file_start_mcount(const char * aswhat)5738 pa_file_start_mcount (const char *aswhat)
5739 {
5740 if (profile_flag)
5741 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5742 }
5743
5744 static void
pa_elf_file_start(void)5745 pa_elf_file_start (void)
5746 {
5747 pa_file_start_level ();
5748 pa_file_start_mcount ("ENTRY");
5749 pa_file_start_file (0);
5750 }
5751
5752 static void
pa_som_file_start(void)5753 pa_som_file_start (void)
5754 {
5755 pa_file_start_level ();
5756 pa_file_start_space (0);
5757 aputs ("\t.IMPORT $global$,DATA\n"
5758 "\t.IMPORT $$dyncall,MILLICODE\n");
5759 pa_file_start_mcount ("CODE");
5760 pa_file_start_file (0);
5761 }
5762
5763 static void
pa_linux_file_start(void)5764 pa_linux_file_start (void)
5765 {
5766 pa_file_start_file (0);
5767 pa_file_start_level ();
5768 pa_file_start_mcount ("CODE");
5769 }
5770
5771 static void
pa_hpux64_gas_file_start(void)5772 pa_hpux64_gas_file_start (void)
5773 {
5774 pa_file_start_level ();
5775 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5776 if (profile_flag)
5777 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5778 #endif
5779 pa_file_start_file (1);
5780 }
5781
5782 static void
pa_hpux64_hpas_file_start(void)5783 pa_hpux64_hpas_file_start (void)
5784 {
5785 pa_file_start_level ();
5786 pa_file_start_space (1);
5787 pa_file_start_mcount ("CODE");
5788 pa_file_start_file (0);
5789 }
5790 #undef aputs
5791
5792 /* Search the deferred plabel list for SYMBOL and return its internal
5793 label. If an entry for SYMBOL is not found, a new entry is created. */
5794
5795 rtx
pa_get_deferred_plabel(rtx symbol)5796 pa_get_deferred_plabel (rtx symbol)
5797 {
5798 const char *fname = XSTR (symbol, 0);
5799 size_t i;
5800
5801 /* See if we have already put this function on the list of deferred
5802 plabels. This list is generally small, so a liner search is not
5803 too ugly. If it proves too slow replace it with something faster. */
5804 for (i = 0; i < n_deferred_plabels; i++)
5805 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5806 break;
5807
5808 /* If the deferred plabel list is empty, or this entry was not found
5809 on the list, create a new entry on the list. */
5810 if (deferred_plabels == NULL || i == n_deferred_plabels)
5811 {
5812 tree id;
5813
5814 if (deferred_plabels == 0)
5815 deferred_plabels = ggc_alloc<deferred_plabel> ();
5816 else
5817 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5818 deferred_plabels,
5819 n_deferred_plabels + 1);
5820
5821 i = n_deferred_plabels++;
5822 deferred_plabels[i].internal_label = gen_label_rtx ();
5823 deferred_plabels[i].symbol = symbol;
5824
5825 /* Gross. We have just implicitly taken the address of this
5826 function. Mark it in the same manner as assemble_name. */
5827 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5828 if (id)
5829 mark_referenced (id);
5830 }
5831
5832 return deferred_plabels[i].internal_label;
5833 }
5834
5835 static void
output_deferred_plabels(void)5836 output_deferred_plabels (void)
5837 {
5838 size_t i;
5839
5840 /* If we have some deferred plabels, then we need to switch into the
5841 data or readonly data section, and align it to a 4 byte boundary
5842 before outputting the deferred plabels. */
5843 if (n_deferred_plabels)
5844 {
5845 switch_to_section (flag_pic ? data_section : readonly_data_section);
5846 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5847 }
5848
5849 /* Now output the deferred plabels. */
5850 for (i = 0; i < n_deferred_plabels; i++)
5851 {
5852 targetm.asm_out.internal_label (asm_out_file, "L",
5853 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5854 assemble_integer (deferred_plabels[i].symbol,
5855 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5856 }
5857 }
5858
5859 /* Initialize optabs to point to emulation routines. */
5860
5861 static void
pa_init_libfuncs(void)5862 pa_init_libfuncs (void)
5863 {
5864 if (HPUX_LONG_DOUBLE_LIBRARY)
5865 {
5866 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5867 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5868 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5869 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5870 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5871 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5872 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5873 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5874 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5875
5876 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5877 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5878 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5879 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5880 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5881 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5882 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5883
5884 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5885 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5886 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5887 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5888
5889 set_conv_libfunc (sfix_optab, SImode, TFmode,
5890 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5891 : "_U_Qfcnvfxt_quad_to_sgl");
5892 set_conv_libfunc (sfix_optab, DImode, TFmode,
5893 "_U_Qfcnvfxt_quad_to_dbl");
5894 set_conv_libfunc (ufix_optab, SImode, TFmode,
5895 "_U_Qfcnvfxt_quad_to_usgl");
5896 set_conv_libfunc (ufix_optab, DImode, TFmode,
5897 "_U_Qfcnvfxt_quad_to_udbl");
5898
5899 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5900 "_U_Qfcnvxf_sgl_to_quad");
5901 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5902 "_U_Qfcnvxf_dbl_to_quad");
5903 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5904 "_U_Qfcnvxf_usgl_to_quad");
5905 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5906 "_U_Qfcnvxf_udbl_to_quad");
5907 }
5908
5909 if (TARGET_SYNC_LIBCALL)
5910 init_sync_libfuncs (8);
5911 }
5912
5913 /* HP's millicode routines mean something special to the assembler.
5914 Keep track of which ones we have used. */
5915
5916 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5917 static void import_milli (enum millicodes);
5918 static char imported[(int) end1000];
5919 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5920 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5921 #define MILLI_START 10
5922
5923 static void
import_milli(enum millicodes code)5924 import_milli (enum millicodes code)
5925 {
5926 char str[sizeof (import_string)];
5927
5928 if (!imported[(int) code])
5929 {
5930 imported[(int) code] = 1;
5931 strcpy (str, import_string);
5932 memcpy (str + MILLI_START, milli_names[(int) code], 4);
5933 output_asm_insn (str, 0);
5934 }
5935 }
5936
5937 /* The register constraints have put the operands and return value in
5938 the proper registers. */
5939
5940 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5941 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5942 {
5943 import_milli (mulI);
5944 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5945 }
5946
5947 /* Emit the rtl for doing a division by a constant. */
5948
5949 /* Do magic division millicodes exist for this value? */
5950 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5951
5952 /* We'll use an array to keep track of the magic millicodes and
5953 whether or not we've used them already. [n][0] is signed, [n][1] is
5954 unsigned. */
5955
5956 static int div_milli[16][2];
5957
5958 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5959 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5960 {
5961 if (GET_CODE (operands[2]) == CONST_INT
5962 && INTVAL (operands[2]) > 0
5963 && INTVAL (operands[2]) < 16
5964 && pa_magic_milli[INTVAL (operands[2])])
5965 {
5966 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5967
5968 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5969 emit
5970 (gen_rtx_PARALLEL
5971 (VOIDmode,
5972 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5973 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5974 SImode,
5975 gen_rtx_REG (SImode, 26),
5976 operands[2])),
5977 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5978 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5979 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5980 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5981 gen_rtx_CLOBBER (VOIDmode, ret))));
5982 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5983 return 1;
5984 }
5985 return 0;
5986 }
5987
5988 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)5989 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5990 {
5991 HOST_WIDE_INT divisor;
5992
5993 /* If the divisor is a constant, try to use one of the special
5994 opcodes .*/
5995 if (GET_CODE (operands[0]) == CONST_INT)
5996 {
5997 static char buf[100];
5998 divisor = INTVAL (operands[0]);
5999 if (!div_milli[divisor][unsignedp])
6000 {
6001 div_milli[divisor][unsignedp] = 1;
6002 if (unsignedp)
6003 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6004 else
6005 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6006 }
6007 if (unsignedp)
6008 {
6009 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6010 INTVAL (operands[0]));
6011 return pa_output_millicode_call (insn,
6012 gen_rtx_SYMBOL_REF (SImode, buf));
6013 }
6014 else
6015 {
6016 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6017 INTVAL (operands[0]));
6018 return pa_output_millicode_call (insn,
6019 gen_rtx_SYMBOL_REF (SImode, buf));
6020 }
6021 }
6022 /* Divisor isn't a special constant. */
6023 else
6024 {
6025 if (unsignedp)
6026 {
6027 import_milli (divU);
6028 return pa_output_millicode_call (insn,
6029 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6030 }
6031 else
6032 {
6033 import_milli (divI);
6034 return pa_output_millicode_call (insn,
6035 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6036 }
6037 }
6038 }
6039
6040 /* Output a $$rem millicode to do mod. */
6041
6042 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)6043 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6044 {
6045 if (unsignedp)
6046 {
6047 import_milli (remU);
6048 return pa_output_millicode_call (insn,
6049 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6050 }
6051 else
6052 {
6053 import_milli (remI);
6054 return pa_output_millicode_call (insn,
6055 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6056 }
6057 }
6058
6059 void
pa_output_arg_descriptor(rtx_insn * call_insn)6060 pa_output_arg_descriptor (rtx_insn *call_insn)
6061 {
6062 const char *arg_regs[4];
6063 machine_mode arg_mode;
6064 rtx link;
6065 int i, output_flag = 0;
6066 int regno;
6067
6068 /* We neither need nor want argument location descriptors for the
6069 64bit runtime environment or the ELF32 environment. */
6070 if (TARGET_64BIT || TARGET_ELF32)
6071 return;
6072
6073 for (i = 0; i < 4; i++)
6074 arg_regs[i] = 0;
6075
6076 /* Specify explicitly that no argument relocations should take place
6077 if using the portable runtime calling conventions. */
6078 if (TARGET_PORTABLE_RUNTIME)
6079 {
6080 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6081 asm_out_file);
6082 return;
6083 }
6084
6085 gcc_assert (CALL_P (call_insn));
6086 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6087 link; link = XEXP (link, 1))
6088 {
6089 rtx use = XEXP (link, 0);
6090
6091 if (! (GET_CODE (use) == USE
6092 && GET_CODE (XEXP (use, 0)) == REG
6093 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6094 continue;
6095
6096 arg_mode = GET_MODE (XEXP (use, 0));
6097 regno = REGNO (XEXP (use, 0));
6098 if (regno >= 23 && regno <= 26)
6099 {
6100 arg_regs[26 - regno] = "GR";
6101 if (arg_mode == DImode)
6102 arg_regs[25 - regno] = "GR";
6103 }
6104 else if (regno >= 32 && regno <= 39)
6105 {
6106 if (arg_mode == SFmode)
6107 arg_regs[(regno - 32) / 2] = "FR";
6108 else
6109 {
6110 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6111 arg_regs[(regno - 34) / 2] = "FR";
6112 arg_regs[(regno - 34) / 2 + 1] = "FU";
6113 #else
6114 arg_regs[(regno - 34) / 2] = "FU";
6115 arg_regs[(regno - 34) / 2 + 1] = "FR";
6116 #endif
6117 }
6118 }
6119 }
6120 fputs ("\t.CALL ", asm_out_file);
6121 for (i = 0; i < 4; i++)
6122 {
6123 if (arg_regs[i])
6124 {
6125 if (output_flag++)
6126 fputc (',', asm_out_file);
6127 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6128 }
6129 }
6130 fputc ('\n', asm_out_file);
6131 }
6132
6133 /* Inform reload about cases where moving X with a mode MODE to or from
6134 a register in RCLASS requires an extra scratch or immediate register.
6135 Return the class needed for the immediate register. */
6136
6137 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6138 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6139 machine_mode mode, secondary_reload_info *sri)
6140 {
6141 int regno;
6142 enum reg_class rclass = (enum reg_class) rclass_i;
6143
6144 /* Handle the easy stuff first. */
6145 if (rclass == R1_REGS)
6146 return NO_REGS;
6147
6148 if (REG_P (x))
6149 {
6150 regno = REGNO (x);
6151 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6152 return NO_REGS;
6153 }
6154 else
6155 regno = -1;
6156
6157 /* If we have something like (mem (mem (...)), we can safely assume the
6158 inner MEM will end up in a general register after reloading, so there's
6159 no need for a secondary reload. */
6160 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6161 return NO_REGS;
6162
6163 /* Trying to load a constant into a FP register during PIC code
6164 generation requires %r1 as a scratch register. For float modes,
6165 the only legitimate constant is CONST0_RTX. However, there are
6166 a few patterns that accept constant double operands. */
6167 if (flag_pic
6168 && FP_REG_CLASS_P (rclass)
6169 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6170 {
6171 switch (mode)
6172 {
6173 case E_SImode:
6174 sri->icode = CODE_FOR_reload_insi_r1;
6175 break;
6176
6177 case E_DImode:
6178 sri->icode = CODE_FOR_reload_indi_r1;
6179 break;
6180
6181 case E_SFmode:
6182 sri->icode = CODE_FOR_reload_insf_r1;
6183 break;
6184
6185 case E_DFmode:
6186 sri->icode = CODE_FOR_reload_indf_r1;
6187 break;
6188
6189 default:
6190 gcc_unreachable ();
6191 }
6192 return NO_REGS;
6193 }
6194
6195 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6196 register when we're generating PIC code or when the operand isn't
6197 readonly. */
6198 if (pa_symbolic_expression_p (x))
6199 {
6200 if (GET_CODE (x) == HIGH)
6201 x = XEXP (x, 0);
6202
6203 if (flag_pic || !read_only_operand (x, VOIDmode))
6204 {
6205 switch (mode)
6206 {
6207 case E_SImode:
6208 sri->icode = CODE_FOR_reload_insi_r1;
6209 break;
6210
6211 case E_DImode:
6212 sri->icode = CODE_FOR_reload_indi_r1;
6213 break;
6214
6215 default:
6216 gcc_unreachable ();
6217 }
6218 return NO_REGS;
6219 }
6220 }
6221
6222 /* Profiling showed the PA port spends about 1.3% of its compilation
6223 time in true_regnum from calls inside pa_secondary_reload_class. */
6224 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6225 regno = true_regnum (x);
6226
6227 /* Handle reloads for floating point loads and stores. */
6228 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6229 && FP_REG_CLASS_P (rclass))
6230 {
6231 if (MEM_P (x))
6232 {
6233 x = XEXP (x, 0);
6234
6235 /* We don't need a secondary reload for indexed memory addresses.
6236
6237 When INT14_OK_STRICT is true, it might appear that we could
6238 directly allow register indirect memory addresses. However,
6239 this doesn't work because we don't support SUBREGs in
6240 floating-point register copies and reload doesn't tell us
6241 when it's going to use a SUBREG. */
6242 if (IS_INDEX_ADDR_P (x))
6243 return NO_REGS;
6244 }
6245
6246 /* Request a secondary reload with a general scratch register
6247 for everything else. ??? Could symbolic operands be handled
6248 directly when generating non-pic PA 2.0 code? */
6249 sri->icode = (in_p
6250 ? direct_optab_handler (reload_in_optab, mode)
6251 : direct_optab_handler (reload_out_optab, mode));
6252 return NO_REGS;
6253 }
6254
6255 /* A SAR<->FP register copy requires an intermediate general register
6256 and secondary memory. We need a secondary reload with a general
6257 scratch register for spills. */
6258 if (rclass == SHIFT_REGS)
6259 {
6260 /* Handle spill. */
6261 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6262 {
6263 sri->icode = (in_p
6264 ? direct_optab_handler (reload_in_optab, mode)
6265 : direct_optab_handler (reload_out_optab, mode));
6266 return NO_REGS;
6267 }
6268
6269 /* Handle FP copy. */
6270 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6271 return GENERAL_REGS;
6272 }
6273
6274 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6275 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6276 && FP_REG_CLASS_P (rclass))
6277 return GENERAL_REGS;
6278
6279 return NO_REGS;
6280 }
6281
6282 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6283
6284 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6285 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6286 reg_class_t class1 ATTRIBUTE_UNUSED,
6287 reg_class_t class2 ATTRIBUTE_UNUSED)
6288 {
6289 #ifdef PA_SECONDARY_MEMORY_NEEDED
6290 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6291 #else
6292 return false;
6293 #endif
6294 }
6295
6296 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6297 is only marked as live on entry by df-scan when it is a fixed
6298 register. It isn't a fixed register in the 64-bit runtime,
6299 so we need to mark it here. */
6300
6301 static void
pa_extra_live_on_entry(bitmap regs)6302 pa_extra_live_on_entry (bitmap regs)
6303 {
6304 if (TARGET_64BIT)
6305 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6306 }
6307
6308 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6309 to prevent it from being deleted. */
6310
6311 rtx
pa_eh_return_handler_rtx(void)6312 pa_eh_return_handler_rtx (void)
6313 {
6314 rtx tmp;
6315
6316 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6317 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6318 tmp = gen_rtx_MEM (word_mode, tmp);
6319 tmp->volatil = 1;
6320 return tmp;
6321 }
6322
6323 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6324 by invisible reference. As a GCC extension, we also pass anything
6325 with a zero or variable size by reference.
6326
6327 The 64-bit runtime does not describe passing any types by invisible
6328 reference. The internals of GCC can't currently handle passing
6329 empty structures, and zero or variable length arrays when they are
6330 not passed entirely on the stack or by reference. Thus, as a GCC
6331 extension, we pass these types by reference. The HP compiler doesn't
6332 support these types, so hopefully there shouldn't be any compatibility
6333 issues. This may have to be revisited when HP releases a C99 compiler
6334 or updates the ABI. */
6335
6336 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6337 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6338 {
6339 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6340 if (TARGET_64BIT)
6341 return size <= 0;
6342 else
6343 return size <= 0 || size > 8;
6344 }
6345
6346 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6347
6348 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6349 pa_function_arg_padding (machine_mode mode, const_tree type)
6350 {
6351 if (mode == BLKmode
6352 || (TARGET_64BIT
6353 && type
6354 && (AGGREGATE_TYPE_P (type)
6355 || TREE_CODE (type) == COMPLEX_TYPE
6356 || TREE_CODE (type) == VECTOR_TYPE)))
6357 {
6358 /* Return PAD_NONE if justification is not required. */
6359 if (type
6360 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6361 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6362 return PAD_NONE;
6363
6364 /* The directions set here are ignored when a BLKmode argument larger
6365 than a word is placed in a register. Different code is used for
6366 the stack and registers. This makes it difficult to have a
6367 consistent data representation for both the stack and registers.
6368 For both runtimes, the justification and padding for arguments on
6369 the stack and in registers should be identical. */
6370 if (TARGET_64BIT)
6371 /* The 64-bit runtime specifies left justification for aggregates. */
6372 return PAD_UPWARD;
6373 else
6374 /* The 32-bit runtime architecture specifies right justification.
6375 When the argument is passed on the stack, the argument is padded
6376 with garbage on the left. The HP compiler pads with zeros. */
6377 return PAD_DOWNWARD;
6378 }
6379
6380 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6381 return PAD_DOWNWARD;
6382 else
6383 return PAD_NONE;
6384 }
6385
6386
6387 /* Do what is necessary for `va_start'. We look at the current function
6388 to determine if stdargs or varargs is used and fill in an initial
6389 va_list. A pointer to this constructor is returned. */
6390
6391 static rtx
hppa_builtin_saveregs(void)6392 hppa_builtin_saveregs (void)
6393 {
6394 rtx offset, dest;
6395 tree fntype = TREE_TYPE (current_function_decl);
6396 int argadj = ((!stdarg_p (fntype))
6397 ? UNITS_PER_WORD : 0);
6398
6399 if (argadj)
6400 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6401 else
6402 offset = crtl->args.arg_offset_rtx;
6403
6404 if (TARGET_64BIT)
6405 {
6406 int i, off;
6407
6408 /* Adjust for varargs/stdarg differences. */
6409 if (argadj)
6410 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6411 else
6412 offset = crtl->args.arg_offset_rtx;
6413
6414 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6415 from the incoming arg pointer and growing to larger addresses. */
6416 for (i = 26, off = -64; i >= 19; i--, off += 8)
6417 emit_move_insn (gen_rtx_MEM (word_mode,
6418 plus_constant (Pmode,
6419 arg_pointer_rtx, off)),
6420 gen_rtx_REG (word_mode, i));
6421
6422 /* The incoming args pointer points just beyond the flushback area;
6423 normally this is not a serious concern. However, when we are doing
6424 varargs/stdargs we want to make the arg pointer point to the start
6425 of the incoming argument area. */
6426 emit_move_insn (virtual_incoming_args_rtx,
6427 plus_constant (Pmode, arg_pointer_rtx, -64));
6428
6429 /* Now return a pointer to the first anonymous argument. */
6430 return copy_to_reg (expand_binop (Pmode, add_optab,
6431 virtual_incoming_args_rtx,
6432 offset, 0, 0, OPTAB_LIB_WIDEN));
6433 }
6434
6435 /* Store general registers on the stack. */
6436 dest = gen_rtx_MEM (BLKmode,
6437 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6438 -16));
6439 set_mem_alias_set (dest, get_varargs_alias_set ());
6440 set_mem_align (dest, BITS_PER_WORD);
6441 move_block_from_reg (23, dest, 4);
6442
6443 /* move_block_from_reg will emit code to store the argument registers
6444 individually as scalar stores.
6445
6446 However, other insns may later load from the same addresses for
6447 a structure load (passing a struct to a varargs routine).
6448
6449 The alias code assumes that such aliasing can never happen, so we
6450 have to keep memory referencing insns from moving up beyond the
6451 last argument register store. So we emit a blockage insn here. */
6452 emit_insn (gen_blockage ());
6453
6454 return copy_to_reg (expand_binop (Pmode, add_optab,
6455 crtl->args.internal_arg_pointer,
6456 offset, 0, 0, OPTAB_LIB_WIDEN));
6457 }
6458
6459 static void
hppa_va_start(tree valist,rtx nextarg)6460 hppa_va_start (tree valist, rtx nextarg)
6461 {
6462 nextarg = expand_builtin_saveregs ();
6463 std_expand_builtin_va_start (valist, nextarg);
6464 }
6465
6466 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6467 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6468 gimple_seq *post_p)
6469 {
6470 if (TARGET_64BIT)
6471 {
6472 /* Args grow upward. We can use the generic routines. */
6473 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6474 }
6475 else /* !TARGET_64BIT */
6476 {
6477 tree ptr = build_pointer_type (type);
6478 tree valist_type;
6479 tree t, u;
6480 unsigned int size, ofs;
6481 bool indirect;
6482
6483 indirect = pass_va_arg_by_reference (type);
6484 if (indirect)
6485 {
6486 type = ptr;
6487 ptr = build_pointer_type (type);
6488 }
6489 size = int_size_in_bytes (type);
6490 valist_type = TREE_TYPE (valist);
6491
6492 /* Args grow down. Not handled by generic routines. */
6493
6494 u = fold_convert (sizetype, size_in_bytes (type));
6495 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6496 t = fold_build_pointer_plus (valist, u);
6497
6498 /* Align to 4 or 8 byte boundary depending on argument size. */
6499
6500 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6501 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6502 t = fold_convert (valist_type, t);
6503
6504 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6505
6506 ofs = (8 - size) % 4;
6507 if (ofs != 0)
6508 t = fold_build_pointer_plus_hwi (t, ofs);
6509
6510 t = fold_convert (ptr, t);
6511 t = build_va_arg_indirect_ref (t);
6512
6513 if (indirect)
6514 t = build_va_arg_indirect_ref (t);
6515
6516 return t;
6517 }
6518 }
6519
6520 /* True if MODE is valid for the target. By "valid", we mean able to
6521 be manipulated in non-trivial ways. In particular, this means all
6522 the arithmetic is supported.
6523
6524 Currently, TImode is not valid as the HP 64-bit runtime documentation
6525 doesn't document the alignment and calling conventions for this type.
6526 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6527 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6528
6529 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6530 pa_scalar_mode_supported_p (scalar_mode mode)
6531 {
6532 int precision = GET_MODE_PRECISION (mode);
6533
6534 switch (GET_MODE_CLASS (mode))
6535 {
6536 case MODE_PARTIAL_INT:
6537 case MODE_INT:
6538 if (precision == CHAR_TYPE_SIZE)
6539 return true;
6540 if (precision == SHORT_TYPE_SIZE)
6541 return true;
6542 if (precision == INT_TYPE_SIZE)
6543 return true;
6544 if (precision == LONG_TYPE_SIZE)
6545 return true;
6546 if (precision == LONG_LONG_TYPE_SIZE)
6547 return true;
6548 return false;
6549
6550 case MODE_FLOAT:
6551 if (precision == FLOAT_TYPE_SIZE)
6552 return true;
6553 if (precision == DOUBLE_TYPE_SIZE)
6554 return true;
6555 if (precision == LONG_DOUBLE_TYPE_SIZE)
6556 return true;
6557 return false;
6558
6559 case MODE_DECIMAL_FLOAT:
6560 return false;
6561
6562 default:
6563 gcc_unreachable ();
6564 }
6565 }
6566
6567 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6568 it branches into the delay slot. Otherwise, return FALSE. */
6569
6570 static bool
branch_to_delay_slot_p(rtx_insn * insn)6571 branch_to_delay_slot_p (rtx_insn *insn)
6572 {
6573 rtx_insn *jump_insn;
6574
6575 if (dbr_sequence_length ())
6576 return FALSE;
6577
6578 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6579 while (insn)
6580 {
6581 insn = next_active_insn (insn);
6582 if (jump_insn == insn)
6583 return TRUE;
6584
6585 /* We can't rely on the length of asms. So, we return FALSE when
6586 the branch is followed by an asm. */
6587 if (!insn
6588 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6589 || asm_noperands (PATTERN (insn)) >= 0
6590 || get_attr_length (insn) > 0)
6591 break;
6592 }
6593
6594 return FALSE;
6595 }
6596
6597 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6598
6599 This occurs when INSN has an unfilled delay slot and is followed
6600 by an asm. Disaster can occur if the asm is empty and the jump
6601 branches into the delay slot. So, we add a nop in the delay slot
6602 when this occurs. */
6603
6604 static bool
branch_needs_nop_p(rtx_insn * insn)6605 branch_needs_nop_p (rtx_insn *insn)
6606 {
6607 rtx_insn *jump_insn;
6608
6609 if (dbr_sequence_length ())
6610 return FALSE;
6611
6612 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6613 while (insn)
6614 {
6615 insn = next_active_insn (insn);
6616 if (!insn || jump_insn == insn)
6617 return TRUE;
6618
6619 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6620 || asm_noperands (PATTERN (insn)) >= 0)
6621 && get_attr_length (insn) > 0)
6622 break;
6623 }
6624
6625 return FALSE;
6626 }
6627
6628 /* Return TRUE if INSN, a forward jump insn, can use nullification
6629 to skip the following instruction. This avoids an extra cycle due
6630 to a mis-predicted branch when we fall through. */
6631
6632 static bool
use_skip_p(rtx_insn * insn)6633 use_skip_p (rtx_insn *insn)
6634 {
6635 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6636
6637 while (insn)
6638 {
6639 insn = next_active_insn (insn);
6640
6641 /* We can't rely on the length of asms, so we can't skip asms. */
6642 if (!insn
6643 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6644 || asm_noperands (PATTERN (insn)) >= 0)
6645 break;
6646 if (get_attr_length (insn) == 4
6647 && jump_insn == next_active_insn (insn))
6648 return TRUE;
6649 if (get_attr_length (insn) > 0)
6650 break;
6651 }
6652
6653 return FALSE;
6654 }
6655
6656 /* This routine handles all the normal conditional branch sequences we
6657 might need to generate. It handles compare immediate vs compare
6658 register, nullification of delay slots, varying length branches,
6659 negated branches, and all combinations of the above. It returns the
6660 output appropriate to emit the branch corresponding to all given
6661 parameters. */
6662
6663 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6664 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6665 {
6666 static char buf[100];
6667 bool useskip;
6668 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6669 int length = get_attr_length (insn);
6670 int xdelay;
6671
6672 /* A conditional branch to the following instruction (e.g. the delay slot)
6673 is asking for a disaster. This can happen when not optimizing and
6674 when jump optimization fails.
6675
6676 While it is usually safe to emit nothing, this can fail if the
6677 preceding instruction is a nullified branch with an empty delay
6678 slot and the same branch target as this branch. We could check
6679 for this but jump optimization should eliminate nop jumps. It
6680 is always safe to emit a nop. */
6681 if (branch_to_delay_slot_p (insn))
6682 return "nop";
6683
6684 /* The doubleword form of the cmpib instruction doesn't have the LEU
6685 and GTU conditions while the cmpb instruction does. Since we accept
6686 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6687 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6688 operands[2] = gen_rtx_REG (DImode, 0);
6689 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6690 operands[1] = gen_rtx_REG (DImode, 0);
6691
6692 /* If this is a long branch with its delay slot unfilled, set `nullify'
6693 as it can nullify the delay slot and save a nop. */
6694 if (length == 8 && dbr_sequence_length () == 0)
6695 nullify = 1;
6696
6697 /* If this is a short forward conditional branch which did not get
6698 its delay slot filled, the delay slot can still be nullified. */
6699 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6700 nullify = forward_branch_p (insn);
6701
6702 /* A forward branch over a single nullified insn can be done with a
6703 comclr instruction. This avoids a single cycle penalty due to
6704 mis-predicted branch if we fall through (branch not taken). */
6705 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6706
6707 switch (length)
6708 {
6709 /* All short conditional branches except backwards with an unfilled
6710 delay slot. */
6711 case 4:
6712 if (useskip)
6713 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6714 else
6715 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6716 if (GET_MODE (operands[1]) == DImode)
6717 strcat (buf, "*");
6718 if (negated)
6719 strcat (buf, "%B3");
6720 else
6721 strcat (buf, "%S3");
6722 if (useskip)
6723 strcat (buf, " %2,%r1,%%r0");
6724 else if (nullify)
6725 {
6726 if (branch_needs_nop_p (insn))
6727 strcat (buf, ",n %2,%r1,%0%#");
6728 else
6729 strcat (buf, ",n %2,%r1,%0");
6730 }
6731 else
6732 strcat (buf, " %2,%r1,%0");
6733 break;
6734
6735 /* All long conditionals. Note a short backward branch with an
6736 unfilled delay slot is treated just like a long backward branch
6737 with an unfilled delay slot. */
6738 case 8:
6739 /* Handle weird backwards branch with a filled delay slot
6740 which is nullified. */
6741 if (dbr_sequence_length () != 0
6742 && ! forward_branch_p (insn)
6743 && nullify)
6744 {
6745 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6746 if (GET_MODE (operands[1]) == DImode)
6747 strcat (buf, "*");
6748 if (negated)
6749 strcat (buf, "%S3");
6750 else
6751 strcat (buf, "%B3");
6752 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6753 }
6754 /* Handle short backwards branch with an unfilled delay slot.
6755 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6756 taken and untaken branches. */
6757 else if (dbr_sequence_length () == 0
6758 && ! forward_branch_p (insn)
6759 && INSN_ADDRESSES_SET_P ()
6760 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6761 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6762 {
6763 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6764 if (GET_MODE (operands[1]) == DImode)
6765 strcat (buf, "*");
6766 if (negated)
6767 strcat (buf, "%B3 %2,%r1,%0%#");
6768 else
6769 strcat (buf, "%S3 %2,%r1,%0%#");
6770 }
6771 else
6772 {
6773 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6774 if (GET_MODE (operands[1]) == DImode)
6775 strcat (buf, "*");
6776 if (negated)
6777 strcat (buf, "%S3");
6778 else
6779 strcat (buf, "%B3");
6780 if (nullify)
6781 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6782 else
6783 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6784 }
6785 break;
6786
6787 default:
6788 /* The reversed conditional branch must branch over one additional
6789 instruction if the delay slot is filled and needs to be extracted
6790 by pa_output_lbranch. If the delay slot is empty or this is a
6791 nullified forward branch, the instruction after the reversed
6792 condition branch must be nullified. */
6793 if (dbr_sequence_length () == 0
6794 || (nullify && forward_branch_p (insn)))
6795 {
6796 nullify = 1;
6797 xdelay = 0;
6798 operands[4] = GEN_INT (length);
6799 }
6800 else
6801 {
6802 xdelay = 1;
6803 operands[4] = GEN_INT (length + 4);
6804 }
6805
6806 /* Create a reversed conditional branch which branches around
6807 the following insns. */
6808 if (GET_MODE (operands[1]) != DImode)
6809 {
6810 if (nullify)
6811 {
6812 if (negated)
6813 strcpy (buf,
6814 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6815 else
6816 strcpy (buf,
6817 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6818 }
6819 else
6820 {
6821 if (negated)
6822 strcpy (buf,
6823 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6824 else
6825 strcpy (buf,
6826 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6827 }
6828 }
6829 else
6830 {
6831 if (nullify)
6832 {
6833 if (negated)
6834 strcpy (buf,
6835 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6836 else
6837 strcpy (buf,
6838 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6839 }
6840 else
6841 {
6842 if (negated)
6843 strcpy (buf,
6844 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6845 else
6846 strcpy (buf,
6847 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6848 }
6849 }
6850
6851 output_asm_insn (buf, operands);
6852 return pa_output_lbranch (operands[0], insn, xdelay);
6853 }
6854 return buf;
6855 }
6856
6857 /* Output a PIC pc-relative instruction sequence to load the address of
6858 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6859 or a code label. OPERANDS[1] specifies the register to use to load
6860 the program counter. OPERANDS[3] may be used for label generation
6861 The sequence is always three instructions in length. The program
6862 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6863 Register %r1 is clobbered. */
6864
6865 static void
pa_output_pic_pcrel_sequence(rtx * operands)6866 pa_output_pic_pcrel_sequence (rtx *operands)
6867 {
6868 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6869 if (TARGET_PA_20)
6870 {
6871 /* We can use mfia to determine the current program counter. */
6872 if (TARGET_SOM || !TARGET_GAS)
6873 {
6874 operands[3] = gen_label_rtx ();
6875 targetm.asm_out.internal_label (asm_out_file, "L",
6876 CODE_LABEL_NUMBER (operands[3]));
6877 output_asm_insn ("mfia %1", operands);
6878 output_asm_insn ("addil L'%0-%l3,%1", operands);
6879 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6880 }
6881 else
6882 {
6883 output_asm_insn ("mfia %1", operands);
6884 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6885 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6886 }
6887 }
6888 else
6889 {
6890 /* We need to use a branch to determine the current program counter. */
6891 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6892 if (TARGET_SOM || !TARGET_GAS)
6893 {
6894 operands[3] = gen_label_rtx ();
6895 output_asm_insn ("addil L'%0-%l3,%1", operands);
6896 targetm.asm_out.internal_label (asm_out_file, "L",
6897 CODE_LABEL_NUMBER (operands[3]));
6898 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6899 }
6900 else
6901 {
6902 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6903 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6904 }
6905 }
6906 }
6907
6908 /* This routine handles output of long unconditional branches that
6909 exceed the maximum range of a simple branch instruction. Since
6910 we don't have a register available for the branch, we save register
6911 %r1 in the frame marker, load the branch destination DEST into %r1,
6912 execute the branch, and restore %r1 in the delay slot of the branch.
6913
6914 Since long branches may have an insn in the delay slot and the
6915 delay slot is used to restore %r1, we in general need to extract
6916 this insn and execute it before the branch. However, to facilitate
6917 use of this function by conditional branches, we also provide an
6918 option to not extract the delay insn so that it will be emitted
6919 after the long branch. So, if there is an insn in the delay slot,
6920 it is extracted if XDELAY is nonzero.
6921
6922 The lengths of the various long-branch sequences are 20, 16 and 24
6923 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6924
6925 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6926 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6927 {
6928 rtx xoperands[4];
6929
6930 xoperands[0] = dest;
6931
6932 /* First, free up the delay slot. */
6933 if (xdelay && dbr_sequence_length () != 0)
6934 {
6935 /* We can't handle a jump in the delay slot. */
6936 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6937
6938 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6939 optimize, 0, NULL);
6940
6941 /* Now delete the delay insn. */
6942 SET_INSN_DELETED (NEXT_INSN (insn));
6943 }
6944
6945 /* Output an insn to save %r1. The runtime documentation doesn't
6946 specify whether the "Clean Up" slot in the callers frame can
6947 be clobbered by the callee. It isn't copied by HP's builtin
6948 alloca, so this suggests that it can be clobbered if necessary.
6949 The "Static Link" location is copied by HP builtin alloca, so
6950 we avoid using it. Using the cleanup slot might be a problem
6951 if we have to interoperate with languages that pass cleanup
6952 information. However, it should be possible to handle these
6953 situations with GCC's asm feature.
6954
6955 The "Current RP" slot is reserved for the called procedure, so
6956 we try to use it when we don't have a frame of our own. It's
6957 rather unlikely that we won't have a frame when we need to emit
6958 a very long branch.
6959
6960 Really the way to go long term is a register scavenger; goto
6961 the target of the jump and find a register which we can use
6962 as a scratch to hold the value in %r1. Then, we wouldn't have
6963 to free up the delay slot or clobber a slot that may be needed
6964 for other purposes. */
6965 if (TARGET_64BIT)
6966 {
6967 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6968 /* Use the return pointer slot in the frame marker. */
6969 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6970 else
6971 /* Use the slot at -40 in the frame marker since HP builtin
6972 alloca doesn't copy it. */
6973 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6974 }
6975 else
6976 {
6977 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6978 /* Use the return pointer slot in the frame marker. */
6979 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6980 else
6981 /* Use the "Clean Up" slot in the frame marker. In GCC,
6982 the only other use of this location is for copying a
6983 floating point double argument from a floating-point
6984 register to two general registers. The copy is done
6985 as an "atomic" operation when outputting a call, so it
6986 won't interfere with our using the location here. */
6987 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6988 }
6989
6990 if (TARGET_PORTABLE_RUNTIME)
6991 {
6992 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6993 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6994 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6995 }
6996 else if (flag_pic)
6997 {
6998 xoperands[1] = gen_rtx_REG (Pmode, 1);
6999 xoperands[2] = xoperands[1];
7000 pa_output_pic_pcrel_sequence (xoperands);
7001 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7002 }
7003 else
7004 /* Now output a very long branch to the original target. */
7005 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7006
7007 /* Now restore the value of %r1 in the delay slot. */
7008 if (TARGET_64BIT)
7009 {
7010 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7011 return "ldd -16(%%r30),%%r1";
7012 else
7013 return "ldd -40(%%r30),%%r1";
7014 }
7015 else
7016 {
7017 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7018 return "ldw -20(%%r30),%%r1";
7019 else
7020 return "ldw -12(%%r30),%%r1";
7021 }
7022 }
7023
7024 /* This routine handles all the branch-on-bit conditional branch sequences we
7025 might need to generate. It handles nullification of delay slots,
7026 varying length branches, negated branches and all combinations of the
7027 above. it returns the appropriate output template to emit the branch. */
7028
7029 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7030 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7031 {
7032 static char buf[100];
7033 bool useskip;
7034 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7035 int length = get_attr_length (insn);
7036 int xdelay;
7037
7038 /* A conditional branch to the following instruction (e.g. the delay slot) is
7039 asking for a disaster. I do not think this can happen as this pattern
7040 is only used when optimizing; jump optimization should eliminate the
7041 jump. But be prepared just in case. */
7042
7043 if (branch_to_delay_slot_p (insn))
7044 return "nop";
7045
7046 /* If this is a long branch with its delay slot unfilled, set `nullify'
7047 as it can nullify the delay slot and save a nop. */
7048 if (length == 8 && dbr_sequence_length () == 0)
7049 nullify = 1;
7050
7051 /* If this is a short forward conditional branch which did not get
7052 its delay slot filled, the delay slot can still be nullified. */
7053 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7054 nullify = forward_branch_p (insn);
7055
7056 /* A forward branch over a single nullified insn can be done with a
7057 extrs instruction. This avoids a single cycle penalty due to
7058 mis-predicted branch if we fall through (branch not taken). */
7059 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7060
7061 switch (length)
7062 {
7063
7064 /* All short conditional branches except backwards with an unfilled
7065 delay slot. */
7066 case 4:
7067 if (useskip)
7068 strcpy (buf, "{extrs,|extrw,s,}");
7069 else
7070 strcpy (buf, "bb,");
7071 if (useskip && GET_MODE (operands[0]) == DImode)
7072 strcpy (buf, "extrd,s,*");
7073 else if (GET_MODE (operands[0]) == DImode)
7074 strcpy (buf, "bb,*");
7075 if ((which == 0 && negated)
7076 || (which == 1 && ! negated))
7077 strcat (buf, ">=");
7078 else
7079 strcat (buf, "<");
7080 if (useskip)
7081 strcat (buf, " %0,%1,1,%%r0");
7082 else if (nullify && negated)
7083 {
7084 if (branch_needs_nop_p (insn))
7085 strcat (buf, ",n %0,%1,%3%#");
7086 else
7087 strcat (buf, ",n %0,%1,%3");
7088 }
7089 else if (nullify && ! negated)
7090 {
7091 if (branch_needs_nop_p (insn))
7092 strcat (buf, ",n %0,%1,%2%#");
7093 else
7094 strcat (buf, ",n %0,%1,%2");
7095 }
7096 else if (! nullify && negated)
7097 strcat (buf, " %0,%1,%3");
7098 else if (! nullify && ! negated)
7099 strcat (buf, " %0,%1,%2");
7100 break;
7101
7102 /* All long conditionals. Note a short backward branch with an
7103 unfilled delay slot is treated just like a long backward branch
7104 with an unfilled delay slot. */
7105 case 8:
7106 /* Handle weird backwards branch with a filled delay slot
7107 which is nullified. */
7108 if (dbr_sequence_length () != 0
7109 && ! forward_branch_p (insn)
7110 && nullify)
7111 {
7112 strcpy (buf, "bb,");
7113 if (GET_MODE (operands[0]) == DImode)
7114 strcat (buf, "*");
7115 if ((which == 0 && negated)
7116 || (which == 1 && ! negated))
7117 strcat (buf, "<");
7118 else
7119 strcat (buf, ">=");
7120 if (negated)
7121 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7122 else
7123 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7124 }
7125 /* Handle short backwards branch with an unfilled delay slot.
7126 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7127 taken and untaken branches. */
7128 else if (dbr_sequence_length () == 0
7129 && ! forward_branch_p (insn)
7130 && INSN_ADDRESSES_SET_P ()
7131 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7132 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7133 {
7134 strcpy (buf, "bb,");
7135 if (GET_MODE (operands[0]) == DImode)
7136 strcat (buf, "*");
7137 if ((which == 0 && negated)
7138 || (which == 1 && ! negated))
7139 strcat (buf, ">=");
7140 else
7141 strcat (buf, "<");
7142 if (negated)
7143 strcat (buf, " %0,%1,%3%#");
7144 else
7145 strcat (buf, " %0,%1,%2%#");
7146 }
7147 else
7148 {
7149 if (GET_MODE (operands[0]) == DImode)
7150 strcpy (buf, "extrd,s,*");
7151 else
7152 strcpy (buf, "{extrs,|extrw,s,}");
7153 if ((which == 0 && negated)
7154 || (which == 1 && ! negated))
7155 strcat (buf, "<");
7156 else
7157 strcat (buf, ">=");
7158 if (nullify && negated)
7159 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7160 else if (nullify && ! negated)
7161 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7162 else if (negated)
7163 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7164 else
7165 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7166 }
7167 break;
7168
7169 default:
7170 /* The reversed conditional branch must branch over one additional
7171 instruction if the delay slot is filled and needs to be extracted
7172 by pa_output_lbranch. If the delay slot is empty or this is a
7173 nullified forward branch, the instruction after the reversed
7174 condition branch must be nullified. */
7175 if (dbr_sequence_length () == 0
7176 || (nullify && forward_branch_p (insn)))
7177 {
7178 nullify = 1;
7179 xdelay = 0;
7180 operands[4] = GEN_INT (length);
7181 }
7182 else
7183 {
7184 xdelay = 1;
7185 operands[4] = GEN_INT (length + 4);
7186 }
7187
7188 if (GET_MODE (operands[0]) == DImode)
7189 strcpy (buf, "bb,*");
7190 else
7191 strcpy (buf, "bb,");
7192 if ((which == 0 && negated)
7193 || (which == 1 && !negated))
7194 strcat (buf, "<");
7195 else
7196 strcat (buf, ">=");
7197 if (nullify)
7198 strcat (buf, ",n %0,%1,.+%4");
7199 else
7200 strcat (buf, " %0,%1,.+%4");
7201 output_asm_insn (buf, operands);
7202 return pa_output_lbranch (negated ? operands[3] : operands[2],
7203 insn, xdelay);
7204 }
7205 return buf;
7206 }
7207
7208 /* This routine handles all the branch-on-variable-bit conditional branch
7209 sequences we might need to generate. It handles nullification of delay
7210 slots, varying length branches, negated branches and all combinations
7211 of the above. it returns the appropriate output template to emit the
7212 branch. */
7213
7214 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7215 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7216 int which)
7217 {
7218 static char buf[100];
7219 bool useskip;
7220 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7221 int length = get_attr_length (insn);
7222 int xdelay;
7223
7224 /* A conditional branch to the following instruction (e.g. the delay slot) is
7225 asking for a disaster. I do not think this can happen as this pattern
7226 is only used when optimizing; jump optimization should eliminate the
7227 jump. But be prepared just in case. */
7228
7229 if (branch_to_delay_slot_p (insn))
7230 return "nop";
7231
7232 /* If this is a long branch with its delay slot unfilled, set `nullify'
7233 as it can nullify the delay slot and save a nop. */
7234 if (length == 8 && dbr_sequence_length () == 0)
7235 nullify = 1;
7236
7237 /* If this is a short forward conditional branch which did not get
7238 its delay slot filled, the delay slot can still be nullified. */
7239 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7240 nullify = forward_branch_p (insn);
7241
7242 /* A forward branch over a single nullified insn can be done with a
7243 extrs instruction. This avoids a single cycle penalty due to
7244 mis-predicted branch if we fall through (branch not taken). */
7245 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7246
7247 switch (length)
7248 {
7249
7250 /* All short conditional branches except backwards with an unfilled
7251 delay slot. */
7252 case 4:
7253 if (useskip)
7254 strcpy (buf, "{vextrs,|extrw,s,}");
7255 else
7256 strcpy (buf, "{bvb,|bb,}");
7257 if (useskip && GET_MODE (operands[0]) == DImode)
7258 strcpy (buf, "extrd,s,*");
7259 else if (GET_MODE (operands[0]) == DImode)
7260 strcpy (buf, "bb,*");
7261 if ((which == 0 && negated)
7262 || (which == 1 && ! negated))
7263 strcat (buf, ">=");
7264 else
7265 strcat (buf, "<");
7266 if (useskip)
7267 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7268 else if (nullify && negated)
7269 {
7270 if (branch_needs_nop_p (insn))
7271 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7272 else
7273 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7274 }
7275 else if (nullify && ! negated)
7276 {
7277 if (branch_needs_nop_p (insn))
7278 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7279 else
7280 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7281 }
7282 else if (! nullify && negated)
7283 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7284 else if (! nullify && ! negated)
7285 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7286 break;
7287
7288 /* All long conditionals. Note a short backward branch with an
7289 unfilled delay slot is treated just like a long backward branch
7290 with an unfilled delay slot. */
7291 case 8:
7292 /* Handle weird backwards branch with a filled delay slot
7293 which is nullified. */
7294 if (dbr_sequence_length () != 0
7295 && ! forward_branch_p (insn)
7296 && nullify)
7297 {
7298 strcpy (buf, "{bvb,|bb,}");
7299 if (GET_MODE (operands[0]) == DImode)
7300 strcat (buf, "*");
7301 if ((which == 0 && negated)
7302 || (which == 1 && ! negated))
7303 strcat (buf, "<");
7304 else
7305 strcat (buf, ">=");
7306 if (negated)
7307 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7308 else
7309 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7310 }
7311 /* Handle short backwards branch with an unfilled delay slot.
7312 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7313 taken and untaken branches. */
7314 else if (dbr_sequence_length () == 0
7315 && ! forward_branch_p (insn)
7316 && INSN_ADDRESSES_SET_P ()
7317 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7318 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7319 {
7320 strcpy (buf, "{bvb,|bb,}");
7321 if (GET_MODE (operands[0]) == DImode)
7322 strcat (buf, "*");
7323 if ((which == 0 && negated)
7324 || (which == 1 && ! negated))
7325 strcat (buf, ">=");
7326 else
7327 strcat (buf, "<");
7328 if (negated)
7329 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7330 else
7331 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7332 }
7333 else
7334 {
7335 strcpy (buf, "{vextrs,|extrw,s,}");
7336 if (GET_MODE (operands[0]) == DImode)
7337 strcpy (buf, "extrd,s,*");
7338 if ((which == 0 && negated)
7339 || (which == 1 && ! negated))
7340 strcat (buf, "<");
7341 else
7342 strcat (buf, ">=");
7343 if (nullify && negated)
7344 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7345 else if (nullify && ! negated)
7346 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7347 else if (negated)
7348 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7349 else
7350 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7351 }
7352 break;
7353
7354 default:
7355 /* The reversed conditional branch must branch over one additional
7356 instruction if the delay slot is filled and needs to be extracted
7357 by pa_output_lbranch. If the delay slot is empty or this is a
7358 nullified forward branch, the instruction after the reversed
7359 condition branch must be nullified. */
7360 if (dbr_sequence_length () == 0
7361 || (nullify && forward_branch_p (insn)))
7362 {
7363 nullify = 1;
7364 xdelay = 0;
7365 operands[4] = GEN_INT (length);
7366 }
7367 else
7368 {
7369 xdelay = 1;
7370 operands[4] = GEN_INT (length + 4);
7371 }
7372
7373 if (GET_MODE (operands[0]) == DImode)
7374 strcpy (buf, "bb,*");
7375 else
7376 strcpy (buf, "{bvb,|bb,}");
7377 if ((which == 0 && negated)
7378 || (which == 1 && !negated))
7379 strcat (buf, "<");
7380 else
7381 strcat (buf, ">=");
7382 if (nullify)
7383 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7384 else
7385 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7386 output_asm_insn (buf, operands);
7387 return pa_output_lbranch (negated ? operands[3] : operands[2],
7388 insn, xdelay);
7389 }
7390 return buf;
7391 }
7392
7393 /* Return the output template for emitting a dbra type insn.
7394
7395 Note it may perform some output operations on its own before
7396 returning the final output string. */
7397 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7398 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7399 {
7400 int length = get_attr_length (insn);
7401
7402 /* A conditional branch to the following instruction (e.g. the delay slot) is
7403 asking for a disaster. Be prepared! */
7404
7405 if (branch_to_delay_slot_p (insn))
7406 {
7407 if (which_alternative == 0)
7408 return "ldo %1(%0),%0";
7409 else if (which_alternative == 1)
7410 {
7411 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7412 output_asm_insn ("ldw -16(%%r30),%4", operands);
7413 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7414 return "{fldws|fldw} -16(%%r30),%0";
7415 }
7416 else
7417 {
7418 output_asm_insn ("ldw %0,%4", operands);
7419 return "ldo %1(%4),%4\n\tstw %4,%0";
7420 }
7421 }
7422
7423 if (which_alternative == 0)
7424 {
7425 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7426 int xdelay;
7427
7428 /* If this is a long branch with its delay slot unfilled, set `nullify'
7429 as it can nullify the delay slot and save a nop. */
7430 if (length == 8 && dbr_sequence_length () == 0)
7431 nullify = 1;
7432
7433 /* If this is a short forward conditional branch which did not get
7434 its delay slot filled, the delay slot can still be nullified. */
7435 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7436 nullify = forward_branch_p (insn);
7437
7438 switch (length)
7439 {
7440 case 4:
7441 if (nullify)
7442 {
7443 if (branch_needs_nop_p (insn))
7444 return "addib,%C2,n %1,%0,%3%#";
7445 else
7446 return "addib,%C2,n %1,%0,%3";
7447 }
7448 else
7449 return "addib,%C2 %1,%0,%3";
7450
7451 case 8:
7452 /* Handle weird backwards branch with a fulled delay slot
7453 which is nullified. */
7454 if (dbr_sequence_length () != 0
7455 && ! forward_branch_p (insn)
7456 && nullify)
7457 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7458 /* Handle short backwards branch with an unfilled delay slot.
7459 Using a addb;nop rather than addi;bl saves 1 cycle for both
7460 taken and untaken branches. */
7461 else if (dbr_sequence_length () == 0
7462 && ! forward_branch_p (insn)
7463 && INSN_ADDRESSES_SET_P ()
7464 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7465 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7466 return "addib,%C2 %1,%0,%3%#";
7467
7468 /* Handle normal cases. */
7469 if (nullify)
7470 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7471 else
7472 return "addi,%N2 %1,%0,%0\n\tb %3";
7473
7474 default:
7475 /* The reversed conditional branch must branch over one additional
7476 instruction if the delay slot is filled and needs to be extracted
7477 by pa_output_lbranch. If the delay slot is empty or this is a
7478 nullified forward branch, the instruction after the reversed
7479 condition branch must be nullified. */
7480 if (dbr_sequence_length () == 0
7481 || (nullify && forward_branch_p (insn)))
7482 {
7483 nullify = 1;
7484 xdelay = 0;
7485 operands[4] = GEN_INT (length);
7486 }
7487 else
7488 {
7489 xdelay = 1;
7490 operands[4] = GEN_INT (length + 4);
7491 }
7492
7493 if (nullify)
7494 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7495 else
7496 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7497
7498 return pa_output_lbranch (operands[3], insn, xdelay);
7499 }
7500
7501 }
7502 /* Deal with gross reload from FP register case. */
7503 else if (which_alternative == 1)
7504 {
7505 /* Move loop counter from FP register to MEM then into a GR,
7506 increment the GR, store the GR into MEM, and finally reload
7507 the FP register from MEM from within the branch's delay slot. */
7508 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7509 operands);
7510 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7511 if (length == 24)
7512 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7513 else if (length == 28)
7514 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7515 else
7516 {
7517 operands[5] = GEN_INT (length - 16);
7518 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7519 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7520 return pa_output_lbranch (operands[3], insn, 0);
7521 }
7522 }
7523 /* Deal with gross reload from memory case. */
7524 else
7525 {
7526 /* Reload loop counter from memory, the store back to memory
7527 happens in the branch's delay slot. */
7528 output_asm_insn ("ldw %0,%4", operands);
7529 if (length == 12)
7530 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7531 else if (length == 16)
7532 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7533 else
7534 {
7535 operands[5] = GEN_INT (length - 4);
7536 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7537 return pa_output_lbranch (operands[3], insn, 0);
7538 }
7539 }
7540 }
7541
7542 /* Return the output template for emitting a movb type insn.
7543
7544 Note it may perform some output operations on its own before
7545 returning the final output string. */
7546 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7547 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7548 int reverse_comparison)
7549 {
7550 int length = get_attr_length (insn);
7551
7552 /* A conditional branch to the following instruction (e.g. the delay slot) is
7553 asking for a disaster. Be prepared! */
7554
7555 if (branch_to_delay_slot_p (insn))
7556 {
7557 if (which_alternative == 0)
7558 return "copy %1,%0";
7559 else if (which_alternative == 1)
7560 {
7561 output_asm_insn ("stw %1,-16(%%r30)", operands);
7562 return "{fldws|fldw} -16(%%r30),%0";
7563 }
7564 else if (which_alternative == 2)
7565 return "stw %1,%0";
7566 else
7567 return "mtsar %r1";
7568 }
7569
7570 /* Support the second variant. */
7571 if (reverse_comparison)
7572 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7573
7574 if (which_alternative == 0)
7575 {
7576 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7577 int xdelay;
7578
7579 /* If this is a long branch with its delay slot unfilled, set `nullify'
7580 as it can nullify the delay slot and save a nop. */
7581 if (length == 8 && dbr_sequence_length () == 0)
7582 nullify = 1;
7583
7584 /* If this is a short forward conditional branch which did not get
7585 its delay slot filled, the delay slot can still be nullified. */
7586 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7587 nullify = forward_branch_p (insn);
7588
7589 switch (length)
7590 {
7591 case 4:
7592 if (nullify)
7593 {
7594 if (branch_needs_nop_p (insn))
7595 return "movb,%C2,n %1,%0,%3%#";
7596 else
7597 return "movb,%C2,n %1,%0,%3";
7598 }
7599 else
7600 return "movb,%C2 %1,%0,%3";
7601
7602 case 8:
7603 /* Handle weird backwards branch with a filled delay slot
7604 which is nullified. */
7605 if (dbr_sequence_length () != 0
7606 && ! forward_branch_p (insn)
7607 && nullify)
7608 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7609
7610 /* Handle short backwards branch with an unfilled delay slot.
7611 Using a movb;nop rather than or;bl saves 1 cycle for both
7612 taken and untaken branches. */
7613 else if (dbr_sequence_length () == 0
7614 && ! forward_branch_p (insn)
7615 && INSN_ADDRESSES_SET_P ()
7616 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7617 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7618 return "movb,%C2 %1,%0,%3%#";
7619 /* Handle normal cases. */
7620 if (nullify)
7621 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7622 else
7623 return "or,%N2 %1,%%r0,%0\n\tb %3";
7624
7625 default:
7626 /* The reversed conditional branch must branch over one additional
7627 instruction if the delay slot is filled and needs to be extracted
7628 by pa_output_lbranch. If the delay slot is empty or this is a
7629 nullified forward branch, the instruction after the reversed
7630 condition branch must be nullified. */
7631 if (dbr_sequence_length () == 0
7632 || (nullify && forward_branch_p (insn)))
7633 {
7634 nullify = 1;
7635 xdelay = 0;
7636 operands[4] = GEN_INT (length);
7637 }
7638 else
7639 {
7640 xdelay = 1;
7641 operands[4] = GEN_INT (length + 4);
7642 }
7643
7644 if (nullify)
7645 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7646 else
7647 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7648
7649 return pa_output_lbranch (operands[3], insn, xdelay);
7650 }
7651 }
7652 /* Deal with gross reload for FP destination register case. */
7653 else if (which_alternative == 1)
7654 {
7655 /* Move source register to MEM, perform the branch test, then
7656 finally load the FP register from MEM from within the branch's
7657 delay slot. */
7658 output_asm_insn ("stw %1,-16(%%r30)", operands);
7659 if (length == 12)
7660 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7661 else if (length == 16)
7662 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7663 else
7664 {
7665 operands[4] = GEN_INT (length - 4);
7666 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7667 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7668 return pa_output_lbranch (operands[3], insn, 0);
7669 }
7670 }
7671 /* Deal with gross reload from memory case. */
7672 else if (which_alternative == 2)
7673 {
7674 /* Reload loop counter from memory, the store back to memory
7675 happens in the branch's delay slot. */
7676 if (length == 8)
7677 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7678 else if (length == 12)
7679 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7680 else
7681 {
7682 operands[4] = GEN_INT (length);
7683 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7684 operands);
7685 return pa_output_lbranch (operands[3], insn, 0);
7686 }
7687 }
7688 /* Handle SAR as a destination. */
7689 else
7690 {
7691 if (length == 8)
7692 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7693 else if (length == 12)
7694 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7695 else
7696 {
7697 operands[4] = GEN_INT (length);
7698 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7699 operands);
7700 return pa_output_lbranch (operands[3], insn, 0);
7701 }
7702 }
7703 }
7704
7705 /* Copy any FP arguments in INSN into integer registers. */
7706 static void
copy_fp_args(rtx_insn * insn)7707 copy_fp_args (rtx_insn *insn)
7708 {
7709 rtx link;
7710 rtx xoperands[2];
7711
7712 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7713 {
7714 int arg_mode, regno;
7715 rtx use = XEXP (link, 0);
7716
7717 if (! (GET_CODE (use) == USE
7718 && GET_CODE (XEXP (use, 0)) == REG
7719 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7720 continue;
7721
7722 arg_mode = GET_MODE (XEXP (use, 0));
7723 regno = REGNO (XEXP (use, 0));
7724
7725 /* Is it a floating point register? */
7726 if (regno >= 32 && regno <= 39)
7727 {
7728 /* Copy the FP register into an integer register via memory. */
7729 if (arg_mode == SFmode)
7730 {
7731 xoperands[0] = XEXP (use, 0);
7732 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7733 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7734 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7735 }
7736 else
7737 {
7738 xoperands[0] = XEXP (use, 0);
7739 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7740 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7741 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7742 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7743 }
7744 }
7745 }
7746 }
7747
7748 /* Compute length of the FP argument copy sequence for INSN. */
7749 static int
length_fp_args(rtx_insn * insn)7750 length_fp_args (rtx_insn *insn)
7751 {
7752 int length = 0;
7753 rtx link;
7754
7755 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7756 {
7757 int arg_mode, regno;
7758 rtx use = XEXP (link, 0);
7759
7760 if (! (GET_CODE (use) == USE
7761 && GET_CODE (XEXP (use, 0)) == REG
7762 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7763 continue;
7764
7765 arg_mode = GET_MODE (XEXP (use, 0));
7766 regno = REGNO (XEXP (use, 0));
7767
7768 /* Is it a floating point register? */
7769 if (regno >= 32 && regno <= 39)
7770 {
7771 if (arg_mode == SFmode)
7772 length += 8;
7773 else
7774 length += 12;
7775 }
7776 }
7777
7778 return length;
7779 }
7780
7781 /* Return the attribute length for the millicode call instruction INSN.
7782 The length must match the code generated by pa_output_millicode_call.
7783 We include the delay slot in the returned length as it is better to
7784 over estimate the length than to under estimate it. */
7785
7786 int
pa_attr_length_millicode_call(rtx_insn * insn)7787 pa_attr_length_millicode_call (rtx_insn *insn)
7788 {
7789 unsigned long distance = -1;
7790 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7791
7792 if (INSN_ADDRESSES_SET_P ())
7793 {
7794 distance = (total + insn_current_reference_address (insn));
7795 if (distance < total)
7796 distance = -1;
7797 }
7798
7799 if (TARGET_64BIT)
7800 {
7801 if (!TARGET_LONG_CALLS && distance < 7600000)
7802 return 8;
7803
7804 return 20;
7805 }
7806 else if (TARGET_PORTABLE_RUNTIME)
7807 return 24;
7808 else
7809 {
7810 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7811 return 8;
7812
7813 if (!flag_pic)
7814 return 12;
7815
7816 return 24;
7817 }
7818 }
7819
7820 /* INSN is a function call.
7821
7822 CALL_DEST is the routine we are calling. */
7823
7824 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7825 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7826 {
7827 int attr_length = get_attr_length (insn);
7828 int seq_length = dbr_sequence_length ();
7829 rtx xoperands[4];
7830
7831 xoperands[0] = call_dest;
7832
7833 /* Handle the common case where we are sure that the branch will
7834 reach the beginning of the $CODE$ subspace. The within reach
7835 form of the $$sh_func_adrs call has a length of 28. Because it
7836 has an attribute type of sh_func_adrs, it never has a nonzero
7837 sequence length (i.e., the delay slot is never filled). */
7838 if (!TARGET_LONG_CALLS
7839 && (attr_length == 8
7840 || (attr_length == 28
7841 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7842 {
7843 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7844 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7845 }
7846 else
7847 {
7848 if (TARGET_64BIT)
7849 {
7850 /* It might seem that one insn could be saved by accessing
7851 the millicode function using the linkage table. However,
7852 this doesn't work in shared libraries and other dynamically
7853 loaded objects. Using a pc-relative sequence also avoids
7854 problems related to the implicit use of the gp register. */
7855 xoperands[1] = gen_rtx_REG (Pmode, 1);
7856 xoperands[2] = xoperands[1];
7857 pa_output_pic_pcrel_sequence (xoperands);
7858 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7859 }
7860 else if (TARGET_PORTABLE_RUNTIME)
7861 {
7862 /* Pure portable runtime doesn't allow be/ble; we also don't
7863 have PIC support in the assembler/linker, so this sequence
7864 is needed. */
7865
7866 /* Get the address of our target into %r1. */
7867 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7868 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7869
7870 /* Get our return address into %r31. */
7871 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7872 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7873
7874 /* Jump to our target address in %r1. */
7875 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7876 }
7877 else if (!flag_pic)
7878 {
7879 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7880 if (TARGET_PA_20)
7881 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7882 else
7883 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7884 }
7885 else
7886 {
7887 xoperands[1] = gen_rtx_REG (Pmode, 31);
7888 xoperands[2] = gen_rtx_REG (Pmode, 1);
7889 pa_output_pic_pcrel_sequence (xoperands);
7890
7891 /* Adjust return address. */
7892 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7893
7894 /* Jump to our target address in %r1. */
7895 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7896 }
7897 }
7898
7899 if (seq_length == 0)
7900 output_asm_insn ("nop", xoperands);
7901
7902 return "";
7903 }
7904
7905 /* Return the attribute length of the call instruction INSN. The SIBCALL
7906 flag indicates whether INSN is a regular call or a sibling call. The
7907 length returned must be longer than the code actually generated by
7908 pa_output_call. Since branch shortening is done before delay branch
7909 sequencing, there is no way to determine whether or not the delay
7910 slot will be filled during branch shortening. Even when the delay
7911 slot is filled, we may have to add a nop if the delay slot contains
7912 a branch that can't reach its target. Thus, we always have to include
7913 the delay slot in the length estimate. This used to be done in
7914 pa_adjust_insn_length but we do it here now as some sequences always
7915 fill the delay slot and we can save four bytes in the estimate for
7916 these sequences. */
7917
7918 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7919 pa_attr_length_call (rtx_insn *insn, int sibcall)
7920 {
7921 int local_call;
7922 rtx call, call_dest;
7923 tree call_decl;
7924 int length = 0;
7925 rtx pat = PATTERN (insn);
7926 unsigned long distance = -1;
7927
7928 gcc_assert (CALL_P (insn));
7929
7930 if (INSN_ADDRESSES_SET_P ())
7931 {
7932 unsigned long total;
7933
7934 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7935 distance = (total + insn_current_reference_address (insn));
7936 if (distance < total)
7937 distance = -1;
7938 }
7939
7940 gcc_assert (GET_CODE (pat) == PARALLEL);
7941
7942 /* Get the call rtx. */
7943 call = XVECEXP (pat, 0, 0);
7944 if (GET_CODE (call) == SET)
7945 call = SET_SRC (call);
7946
7947 gcc_assert (GET_CODE (call) == CALL);
7948
7949 /* Determine if this is a local call. */
7950 call_dest = XEXP (XEXP (call, 0), 0);
7951 call_decl = SYMBOL_REF_DECL (call_dest);
7952 local_call = call_decl && targetm.binds_local_p (call_decl);
7953
7954 /* pc-relative branch. */
7955 if (!TARGET_LONG_CALLS
7956 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7957 || distance < MAX_PCREL17F_OFFSET))
7958 length += 8;
7959
7960 /* 64-bit plabel sequence. */
7961 else if (TARGET_64BIT && !local_call)
7962 length += 24;
7963
7964 /* non-pic long absolute branch sequence. */
7965 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7966 length += 12;
7967
7968 /* long pc-relative branch sequence. */
7969 else if (TARGET_LONG_PIC_SDIFF_CALL
7970 || (TARGET_GAS && !TARGET_SOM && local_call))
7971 {
7972 length += 20;
7973
7974 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7975 length += 8;
7976 }
7977
7978 /* 32-bit plabel sequence. */
7979 else
7980 {
7981 length += 32;
7982
7983 if (TARGET_SOM)
7984 length += length_fp_args (insn);
7985
7986 if (flag_pic)
7987 length += 4;
7988
7989 if (!TARGET_PA_20)
7990 {
7991 if (!sibcall)
7992 length += 8;
7993
7994 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7995 length += 8;
7996 }
7997 }
7998
7999 return length;
8000 }
8001
8002 /* INSN is a function call.
8003
8004 CALL_DEST is the routine we are calling. */
8005
8006 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)8007 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8008 {
8009 int seq_length = dbr_sequence_length ();
8010 tree call_decl = SYMBOL_REF_DECL (call_dest);
8011 int local_call = call_decl && targetm.binds_local_p (call_decl);
8012 rtx xoperands[4];
8013
8014 xoperands[0] = call_dest;
8015
8016 /* Handle the common case where we're sure that the branch will reach
8017 the beginning of the "$CODE$" subspace. This is the beginning of
8018 the current function if we are in a named section. */
8019 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8020 {
8021 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8022 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8023 }
8024 else
8025 {
8026 if (TARGET_64BIT && !local_call)
8027 {
8028 /* ??? As far as I can tell, the HP linker doesn't support the
8029 long pc-relative sequence described in the 64-bit runtime
8030 architecture. So, we use a slightly longer indirect call. */
8031 xoperands[0] = pa_get_deferred_plabel (call_dest);
8032 xoperands[1] = gen_label_rtx ();
8033
8034 /* Put the load of %r27 into the delay slot. We don't need to
8035 do anything when generating fast indirect calls. */
8036 if (seq_length != 0)
8037 {
8038 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8039 optimize, 0, NULL);
8040
8041 /* Now delete the delay insn. */
8042 SET_INSN_DELETED (NEXT_INSN (insn));
8043 }
8044
8045 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8046 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8047 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8048 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8049 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8050 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8051 seq_length = 1;
8052 }
8053 else
8054 {
8055 int indirect_call = 0;
8056
8057 /* Emit a long call. There are several different sequences
8058 of increasing length and complexity. In most cases,
8059 they don't allow an instruction in the delay slot. */
8060 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8061 && !TARGET_LONG_PIC_SDIFF_CALL
8062 && !(TARGET_GAS && !TARGET_SOM && local_call)
8063 && !TARGET_64BIT)
8064 indirect_call = 1;
8065
8066 if (seq_length != 0
8067 && !sibcall
8068 && (!TARGET_PA_20
8069 || indirect_call
8070 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8071 {
8072 /* A non-jump insn in the delay slot. By definition we can
8073 emit this insn before the call (and in fact before argument
8074 relocating. */
8075 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8076 NULL);
8077
8078 /* Now delete the delay insn. */
8079 SET_INSN_DELETED (NEXT_INSN (insn));
8080 seq_length = 0;
8081 }
8082
8083 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8084 {
8085 /* This is the best sequence for making long calls in
8086 non-pic code. Unfortunately, GNU ld doesn't provide
8087 the stub needed for external calls, and GAS's support
8088 for this with the SOM linker is buggy. It is safe
8089 to use this for local calls. */
8090 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8091 if (sibcall)
8092 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8093 else
8094 {
8095 if (TARGET_PA_20)
8096 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8097 xoperands);
8098 else
8099 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8100
8101 output_asm_insn ("copy %%r31,%%r2", xoperands);
8102 seq_length = 1;
8103 }
8104 }
8105 else
8106 {
8107 /* The HP assembler and linker can handle relocations for
8108 the difference of two symbols. The HP assembler
8109 recognizes the sequence as a pc-relative call and
8110 the linker provides stubs when needed. */
8111
8112 /* GAS currently can't generate the relocations that
8113 are needed for the SOM linker under HP-UX using this
8114 sequence. The GNU linker doesn't generate the stubs
8115 that are needed for external calls on TARGET_ELF32
8116 with this sequence. For now, we have to use a longer
8117 plabel sequence when using GAS for non local calls. */
8118 if (TARGET_LONG_PIC_SDIFF_CALL
8119 || (TARGET_GAS && !TARGET_SOM && local_call))
8120 {
8121 xoperands[1] = gen_rtx_REG (Pmode, 1);
8122 xoperands[2] = xoperands[1];
8123 pa_output_pic_pcrel_sequence (xoperands);
8124 }
8125 else
8126 {
8127 /* Emit a long plabel-based call sequence. This is
8128 essentially an inline implementation of $$dyncall.
8129 We don't actually try to call $$dyncall as this is
8130 as difficult as calling the function itself. */
8131 xoperands[0] = pa_get_deferred_plabel (call_dest);
8132 xoperands[1] = gen_label_rtx ();
8133
8134 /* Since the call is indirect, FP arguments in registers
8135 need to be copied to the general registers. Then, the
8136 argument relocation stub will copy them back. */
8137 if (TARGET_SOM)
8138 copy_fp_args (insn);
8139
8140 if (flag_pic)
8141 {
8142 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8143 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8144 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8145 }
8146 else
8147 {
8148 output_asm_insn ("addil LR'%0-$global$,%%r27",
8149 xoperands);
8150 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8151 xoperands);
8152 }
8153
8154 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8155 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8156 /* Should this be an ordered load to ensure the target
8157 address is loaded before the global pointer? */
8158 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8159 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8160
8161 if (!sibcall && !TARGET_PA_20)
8162 {
8163 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8164 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8165 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8166 else
8167 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8168 }
8169 }
8170
8171 if (TARGET_PA_20)
8172 {
8173 if (sibcall)
8174 output_asm_insn ("bve (%%r1)", xoperands);
8175 else
8176 {
8177 if (indirect_call)
8178 {
8179 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8180 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8181 seq_length = 1;
8182 }
8183 else
8184 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8185 }
8186 }
8187 else
8188 {
8189 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8190 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8191 xoperands);
8192
8193 if (sibcall)
8194 {
8195 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8196 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8197 else
8198 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8199 }
8200 else
8201 {
8202 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8203 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8204 else
8205 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8206
8207 if (indirect_call)
8208 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8209 else
8210 output_asm_insn ("copy %%r31,%%r2", xoperands);
8211 seq_length = 1;
8212 }
8213 }
8214 }
8215 }
8216 }
8217
8218 if (seq_length == 0)
8219 output_asm_insn ("nop", xoperands);
8220
8221 return "";
8222 }
8223
8224 /* Return the attribute length of the indirect call instruction INSN.
8225 The length must match the code generated by output_indirect call.
8226 The returned length includes the delay slot. Currently, the delay
8227 slot of an indirect call sequence is not exposed and it is used by
8228 the sequence itself. */
8229
8230 int
pa_attr_length_indirect_call(rtx_insn * insn)8231 pa_attr_length_indirect_call (rtx_insn *insn)
8232 {
8233 unsigned long distance = -1;
8234 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8235
8236 if (INSN_ADDRESSES_SET_P ())
8237 {
8238 distance = (total + insn_current_reference_address (insn));
8239 if (distance < total)
8240 distance = -1;
8241 }
8242
8243 if (TARGET_64BIT)
8244 return 12;
8245
8246 if (TARGET_FAST_INDIRECT_CALLS)
8247 return 8;
8248
8249 if (TARGET_PORTABLE_RUNTIME)
8250 return 16;
8251
8252 if (!TARGET_LONG_CALLS
8253 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8254 || distance < MAX_PCREL17F_OFFSET))
8255 return 8;
8256
8257 /* Out of reach, can use ble. */
8258 if (!flag_pic)
8259 return 12;
8260
8261 /* Inline versions of $$dyncall. */
8262 if (!optimize_size)
8263 {
8264 if (TARGET_NO_SPACE_REGS)
8265 return 28;
8266
8267 if (TARGET_PA_20)
8268 return 32;
8269 }
8270
8271 /* Long PIC pc-relative call. */
8272 return 20;
8273 }
8274
8275 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8276 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8277 {
8278 rtx xoperands[4];
8279 int length;
8280
8281 if (TARGET_64BIT)
8282 {
8283 xoperands[0] = call_dest;
8284 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8285 "bve,l (%%r2),%%r2\n\t"
8286 "ldd 24(%0),%%r27", xoperands);
8287 return "";
8288 }
8289
8290 /* First the special case for kernels, level 0 systems, etc. */
8291 if (TARGET_FAST_INDIRECT_CALLS)
8292 {
8293 pa_output_arg_descriptor (insn);
8294 if (TARGET_PA_20)
8295 return "bve,l,n (%%r22),%%r2\n\tnop";
8296 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8297 }
8298
8299 if (TARGET_PORTABLE_RUNTIME)
8300 {
8301 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8302 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8303 pa_output_arg_descriptor (insn);
8304 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8305 }
8306
8307 /* Now the normal case -- we can reach $$dyncall directly or
8308 we're sure that we can get there via a long-branch stub.
8309
8310 No need to check target flags as the length uniquely identifies
8311 the remaining cases. */
8312 length = pa_attr_length_indirect_call (insn);
8313 if (length == 8)
8314 {
8315 pa_output_arg_descriptor (insn);
8316
8317 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8318 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8319 variant of the B,L instruction can't be used on the SOM target. */
8320 if (TARGET_PA_20 && !TARGET_SOM)
8321 return "b,l,n $$dyncall,%%r2\n\tnop";
8322 else
8323 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8324 }
8325
8326 /* Long millicode call, but we are not generating PIC or portable runtime
8327 code. */
8328 if (length == 12)
8329 {
8330 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8331 pa_output_arg_descriptor (insn);
8332 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8333 }
8334
8335 /* The long PIC pc-relative call sequence is five instructions. So,
8336 let's use an inline version of $$dyncall when the calling sequence
8337 has a roughly similar number of instructions and we are not optimizing
8338 for size. We need two instructions to load the return pointer plus
8339 the $$dyncall implementation. */
8340 if (!optimize_size)
8341 {
8342 if (TARGET_NO_SPACE_REGS)
8343 {
8344 pa_output_arg_descriptor (insn);
8345 output_asm_insn ("bl .+8,%%r2\n\t"
8346 "ldo 20(%%r2),%%r2\n\t"
8347 "extru,<> %%r22,30,1,%%r0\n\t"
8348 "bv,n %%r0(%%r22)\n\t"
8349 "ldw -2(%%r22),%%r21\n\t"
8350 "bv %%r0(%%r21)\n\t"
8351 "ldw 2(%%r22),%%r19", xoperands);
8352 return "";
8353 }
8354 if (TARGET_PA_20)
8355 {
8356 pa_output_arg_descriptor (insn);
8357 output_asm_insn ("bl .+8,%%r2\n\t"
8358 "ldo 24(%%r2),%%r2\n\t"
8359 "stw %%r2,-24(%%sp)\n\t"
8360 "extru,<> %r22,30,1,%%r0\n\t"
8361 "bve,n (%%r22)\n\t"
8362 "ldw -2(%%r22),%%r21\n\t"
8363 "bve (%%r21)\n\t"
8364 "ldw 2(%%r22),%%r19", xoperands);
8365 return "";
8366 }
8367 }
8368
8369 /* We need a long PIC call to $$dyncall. */
8370 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8371 xoperands[1] = gen_rtx_REG (Pmode, 2);
8372 xoperands[2] = gen_rtx_REG (Pmode, 1);
8373 pa_output_pic_pcrel_sequence (xoperands);
8374 pa_output_arg_descriptor (insn);
8375 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8376 }
8377
8378 /* In HPUX 8.0's shared library scheme, special relocations are needed
8379 for function labels if they might be passed to a function
8380 in a shared library (because shared libraries don't live in code
8381 space), and special magic is needed to construct their address. */
8382
8383 void
pa_encode_label(rtx sym)8384 pa_encode_label (rtx sym)
8385 {
8386 const char *str = XSTR (sym, 0);
8387 int len = strlen (str) + 1;
8388 char *newstr, *p;
8389
8390 p = newstr = XALLOCAVEC (char, len + 1);
8391 *p++ = '@';
8392 strcpy (p, str);
8393
8394 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8395 }
8396
8397 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8398 pa_encode_section_info (tree decl, rtx rtl, int first)
8399 {
8400 int old_referenced = 0;
8401
8402 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8403 old_referenced
8404 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8405
8406 default_encode_section_info (decl, rtl, first);
8407
8408 if (first && TEXT_SPACE_P (decl))
8409 {
8410 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8411 if (TREE_CODE (decl) == FUNCTION_DECL)
8412 pa_encode_label (XEXP (rtl, 0));
8413 }
8414 else if (old_referenced)
8415 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8416 }
8417
8418 /* This is sort of inverse to pa_encode_section_info. */
8419
8420 static const char *
pa_strip_name_encoding(const char * str)8421 pa_strip_name_encoding (const char *str)
8422 {
8423 str += (*str == '@');
8424 str += (*str == '*');
8425 return str;
8426 }
8427
8428 /* Returns 1 if OP is a function label involved in a simple addition
8429 with a constant. Used to keep certain patterns from matching
8430 during instruction combination. */
8431 int
pa_is_function_label_plus_const(rtx op)8432 pa_is_function_label_plus_const (rtx op)
8433 {
8434 /* Strip off any CONST. */
8435 if (GET_CODE (op) == CONST)
8436 op = XEXP (op, 0);
8437
8438 return (GET_CODE (op) == PLUS
8439 && function_label_operand (XEXP (op, 0), VOIDmode)
8440 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8441 }
8442
8443 /* Output the assembler code for a thunk function. THUNK_DECL is the
8444 declaration for the thunk function itself, FUNCTION is the decl for
8445 the target function. DELTA is an immediate constant offset to be
8446 added to THIS. If VCALL_OFFSET is nonzero, the word at
8447 *(*this + vcall_offset) should be added to THIS. */
8448
8449 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8450 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8451 HOST_WIDE_INT vcall_offset, tree function)
8452 {
8453 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8454 static unsigned int current_thunk_number;
8455 int val_14 = VAL_14_BITS_P (delta);
8456 unsigned int old_last_address = last_address, nbytes = 0;
8457 char label[17];
8458 rtx xoperands[4];
8459
8460 xoperands[0] = XEXP (DECL_RTL (function), 0);
8461 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8462 xoperands[2] = GEN_INT (delta);
8463
8464 assemble_start_function (thunk_fndecl, fnname);
8465 final_start_function (emit_barrier (), file, 1);
8466
8467 if (!vcall_offset)
8468 {
8469 /* Output the thunk. We know that the function is in the same
8470 translation unit (i.e., the same space) as the thunk, and that
8471 thunks are output after their method. Thus, we don't need an
8472 external branch to reach the function. With SOM and GAS,
8473 functions and thunks are effectively in different sections.
8474 Thus, we can always use a IA-relative branch and the linker
8475 will add a long branch stub if necessary.
8476
8477 However, we have to be careful when generating PIC code on the
8478 SOM port to ensure that the sequence does not transfer to an
8479 import stub for the target function as this could clobber the
8480 return value saved at SP-24. This would also apply to the
8481 32-bit linux port if the multi-space model is implemented. */
8482 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8483 && !(flag_pic && TREE_PUBLIC (function))
8484 && (TARGET_GAS || last_address < 262132))
8485 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8486 && ((targetm_common.have_named_sections
8487 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8488 /* The GNU 64-bit linker has rather poor stub management.
8489 So, we use a long branch from thunks that aren't in
8490 the same section as the target function. */
8491 && ((!TARGET_64BIT
8492 && (DECL_SECTION_NAME (thunk_fndecl)
8493 != DECL_SECTION_NAME (function)))
8494 || ((DECL_SECTION_NAME (thunk_fndecl)
8495 == DECL_SECTION_NAME (function))
8496 && last_address < 262132)))
8497 /* In this case, we need to be able to reach the start of
8498 the stub table even though the function is likely closer
8499 and can be jumped to directly. */
8500 || (targetm_common.have_named_sections
8501 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8502 && DECL_SECTION_NAME (function) == NULL
8503 && total_code_bytes < MAX_PCREL17F_OFFSET)
8504 /* Likewise. */
8505 || (!targetm_common.have_named_sections
8506 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8507 {
8508 if (!val_14)
8509 output_asm_insn ("addil L'%2,%%r26", xoperands);
8510
8511 output_asm_insn ("b %0", xoperands);
8512
8513 if (val_14)
8514 {
8515 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8516 nbytes += 8;
8517 }
8518 else
8519 {
8520 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8521 nbytes += 12;
8522 }
8523 }
8524 else if (TARGET_64BIT)
8525 {
8526 rtx xop[4];
8527
8528 /* We only have one call-clobbered scratch register, so we can't
8529 make use of the delay slot if delta doesn't fit in 14 bits. */
8530 if (!val_14)
8531 {
8532 output_asm_insn ("addil L'%2,%%r26", xoperands);
8533 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8534 }
8535
8536 /* Load function address into %r1. */
8537 xop[0] = xoperands[0];
8538 xop[1] = gen_rtx_REG (Pmode, 1);
8539 xop[2] = xop[1];
8540 pa_output_pic_pcrel_sequence (xop);
8541
8542 if (val_14)
8543 {
8544 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8545 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8546 nbytes += 20;
8547 }
8548 else
8549 {
8550 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8551 nbytes += 24;
8552 }
8553 }
8554 else if (TARGET_PORTABLE_RUNTIME)
8555 {
8556 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8557 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8558
8559 if (!val_14)
8560 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8561
8562 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8563
8564 if (val_14)
8565 {
8566 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8567 nbytes += 16;
8568 }
8569 else
8570 {
8571 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8572 nbytes += 20;
8573 }
8574 }
8575 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8576 {
8577 /* The function is accessible from outside this module. The only
8578 way to avoid an import stub between the thunk and function is to
8579 call the function directly with an indirect sequence similar to
8580 that used by $$dyncall. This is possible because $$dyncall acts
8581 as the import stub in an indirect call. */
8582 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8583 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8584 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8585 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8586 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8587 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8588 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8589 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8590 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8591
8592 if (!val_14)
8593 {
8594 output_asm_insn ("addil L'%2,%%r26", xoperands);
8595 nbytes += 4;
8596 }
8597
8598 if (TARGET_PA_20)
8599 {
8600 output_asm_insn ("bve (%%r22)", xoperands);
8601 nbytes += 36;
8602 }
8603 else if (TARGET_NO_SPACE_REGS)
8604 {
8605 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8606 nbytes += 36;
8607 }
8608 else
8609 {
8610 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8611 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8612 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8613 nbytes += 44;
8614 }
8615
8616 if (val_14)
8617 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8618 else
8619 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8620 }
8621 else if (flag_pic)
8622 {
8623 rtx xop[4];
8624
8625 /* Load function address into %r22. */
8626 xop[0] = xoperands[0];
8627 xop[1] = gen_rtx_REG (Pmode, 1);
8628 xop[2] = gen_rtx_REG (Pmode, 22);
8629 pa_output_pic_pcrel_sequence (xop);
8630
8631 if (!val_14)
8632 output_asm_insn ("addil L'%2,%%r26", xoperands);
8633
8634 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8635
8636 if (val_14)
8637 {
8638 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8639 nbytes += 20;
8640 }
8641 else
8642 {
8643 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8644 nbytes += 24;
8645 }
8646 }
8647 else
8648 {
8649 if (!val_14)
8650 output_asm_insn ("addil L'%2,%%r26", xoperands);
8651
8652 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8653 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8654
8655 if (val_14)
8656 {
8657 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8658 nbytes += 12;
8659 }
8660 else
8661 {
8662 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8663 nbytes += 16;
8664 }
8665 }
8666 }
8667 else
8668 {
8669 rtx xop[4];
8670
8671 /* Add DELTA to THIS. */
8672 if (val_14)
8673 {
8674 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8675 nbytes += 4;
8676 }
8677 else
8678 {
8679 output_asm_insn ("addil L'%2,%%r26", xoperands);
8680 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8681 nbytes += 8;
8682 }
8683
8684 if (TARGET_64BIT)
8685 {
8686 /* Load *(THIS + DELTA) to %r1. */
8687 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8688
8689 val_14 = VAL_14_BITS_P (vcall_offset);
8690 xoperands[2] = GEN_INT (vcall_offset);
8691
8692 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8693 if (val_14)
8694 {
8695 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8696 nbytes += 8;
8697 }
8698 else
8699 {
8700 output_asm_insn ("addil L'%2,%%r1", xoperands);
8701 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8702 nbytes += 12;
8703 }
8704 }
8705 else
8706 {
8707 /* Load *(THIS + DELTA) to %r1. */
8708 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8709
8710 val_14 = VAL_14_BITS_P (vcall_offset);
8711 xoperands[2] = GEN_INT (vcall_offset);
8712
8713 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8714 if (val_14)
8715 {
8716 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8717 nbytes += 8;
8718 }
8719 else
8720 {
8721 output_asm_insn ("addil L'%2,%%r1", xoperands);
8722 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8723 nbytes += 12;
8724 }
8725 }
8726
8727 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8728 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8729 && !(flag_pic && TREE_PUBLIC (function))
8730 && (TARGET_GAS || last_address < 262132))
8731 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8732 && ((targetm_common.have_named_sections
8733 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8734 /* The GNU 64-bit linker has rather poor stub management.
8735 So, we use a long branch from thunks that aren't in
8736 the same section as the target function. */
8737 && ((!TARGET_64BIT
8738 && (DECL_SECTION_NAME (thunk_fndecl)
8739 != DECL_SECTION_NAME (function)))
8740 || ((DECL_SECTION_NAME (thunk_fndecl)
8741 == DECL_SECTION_NAME (function))
8742 && last_address < 262132)))
8743 /* In this case, we need to be able to reach the start of
8744 the stub table even though the function is likely closer
8745 and can be jumped to directly. */
8746 || (targetm_common.have_named_sections
8747 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8748 && DECL_SECTION_NAME (function) == NULL
8749 && total_code_bytes < MAX_PCREL17F_OFFSET)
8750 /* Likewise. */
8751 || (!targetm_common.have_named_sections
8752 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8753 {
8754 nbytes += 4;
8755 output_asm_insn ("b %0", xoperands);
8756
8757 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8758 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8759 }
8760 else if (TARGET_64BIT)
8761 {
8762 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8763 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8764
8765 /* Load function address into %r1. */
8766 nbytes += 16;
8767 xop[0] = xoperands[0];
8768 xop[1] = gen_rtx_REG (Pmode, 1);
8769 xop[2] = xop[1];
8770 pa_output_pic_pcrel_sequence (xop);
8771
8772 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8773 }
8774 else if (TARGET_PORTABLE_RUNTIME)
8775 {
8776 /* Load function address into %r22. */
8777 nbytes += 12;
8778 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8779 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8780
8781 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8782
8783 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8784 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8785 }
8786 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8787 {
8788 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8789 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8790
8791 /* The function is accessible from outside this module. The only
8792 way to avoid an import stub between the thunk and function is to
8793 call the function directly with an indirect sequence similar to
8794 that used by $$dyncall. This is possible because $$dyncall acts
8795 as the import stub in an indirect call. */
8796 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8797 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8798 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8799 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8800 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8801 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8802 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8803 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8804 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8805
8806 if (TARGET_PA_20)
8807 {
8808 output_asm_insn ("bve,n (%%r22)", xoperands);
8809 nbytes += 32;
8810 }
8811 else if (TARGET_NO_SPACE_REGS)
8812 {
8813 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8814 nbytes += 32;
8815 }
8816 else
8817 {
8818 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8819 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8820 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8821 nbytes += 40;
8822 }
8823 }
8824 else if (flag_pic)
8825 {
8826 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8827 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8828
8829 /* Load function address into %r1. */
8830 nbytes += 16;
8831 xop[0] = xoperands[0];
8832 xop[1] = gen_rtx_REG (Pmode, 1);
8833 xop[2] = xop[1];
8834 pa_output_pic_pcrel_sequence (xop);
8835
8836 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8837 }
8838 else
8839 {
8840 /* Load function address into %r22. */
8841 nbytes += 8;
8842 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8843 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8844
8845 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8846 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8847 }
8848 }
8849
8850 final_end_function ();
8851
8852 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8853 {
8854 switch_to_section (data_section);
8855 output_asm_insn (".align 4", xoperands);
8856 ASM_OUTPUT_LABEL (file, label);
8857 output_asm_insn (".word P'%0", xoperands);
8858 }
8859
8860 current_thunk_number++;
8861 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8862 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8863 last_address += nbytes;
8864 if (old_last_address > last_address)
8865 last_address = UINT_MAX;
8866 update_total_code_bytes (nbytes);
8867 assemble_end_function (thunk_fndecl, fnname);
8868 }
8869
8870 /* Only direct calls to static functions are allowed to be sibling (tail)
8871 call optimized.
8872
8873 This restriction is necessary because some linker generated stubs will
8874 store return pointers into rp' in some cases which might clobber a
8875 live value already in rp'.
8876
8877 In a sibcall the current function and the target function share stack
8878 space. Thus if the path to the current function and the path to the
8879 target function save a value in rp', they save the value into the
8880 same stack slot, which has undesirable consequences.
8881
8882 Because of the deferred binding nature of shared libraries any function
8883 with external scope could be in a different load module and thus require
8884 rp' to be saved when calling that function. So sibcall optimizations
8885 can only be safe for static function.
8886
8887 Note that GCC never needs return value relocations, so we don't have to
8888 worry about static calls with return value relocations (which require
8889 saving rp').
8890
8891 It is safe to perform a sibcall optimization when the target function
8892 will never return. */
8893 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8894 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8895 {
8896 /* Sibcalls are not ok because the arg pointer register is not a fixed
8897 register. This prevents the sibcall optimization from occurring. In
8898 addition, there are problems with stub placement using GNU ld. This
8899 is because a normal sibcall branch uses a 17-bit relocation while
8900 a regular call branch uses a 22-bit relocation. As a result, more
8901 care needs to be taken in the placement of long-branch stubs. */
8902 if (TARGET_64BIT)
8903 return false;
8904
8905 if (TARGET_PORTABLE_RUNTIME)
8906 return false;
8907
8908 /* Sibcalls are only ok within a translation unit. */
8909 return decl && targetm.binds_local_p (decl);
8910 }
8911
8912 /* ??? Addition is not commutative on the PA due to the weird implicit
8913 space register selection rules for memory addresses. Therefore, we
8914 don't consider a + b == b + a, as this might be inside a MEM. */
8915 static bool
pa_commutative_p(const_rtx x,int outer_code)8916 pa_commutative_p (const_rtx x, int outer_code)
8917 {
8918 return (COMMUTATIVE_P (x)
8919 && (TARGET_NO_SPACE_REGS
8920 || (outer_code != UNKNOWN && outer_code != MEM)
8921 || GET_CODE (x) != PLUS));
8922 }
8923
8924 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8925 use in fmpyadd instructions. */
8926 int
pa_fmpyaddoperands(rtx * operands)8927 pa_fmpyaddoperands (rtx *operands)
8928 {
8929 machine_mode mode = GET_MODE (operands[0]);
8930
8931 /* Must be a floating point mode. */
8932 if (mode != SFmode && mode != DFmode)
8933 return 0;
8934
8935 /* All modes must be the same. */
8936 if (! (mode == GET_MODE (operands[1])
8937 && mode == GET_MODE (operands[2])
8938 && mode == GET_MODE (operands[3])
8939 && mode == GET_MODE (operands[4])
8940 && mode == GET_MODE (operands[5])))
8941 return 0;
8942
8943 /* All operands must be registers. */
8944 if (! (GET_CODE (operands[1]) == REG
8945 && GET_CODE (operands[2]) == REG
8946 && GET_CODE (operands[3]) == REG
8947 && GET_CODE (operands[4]) == REG
8948 && GET_CODE (operands[5]) == REG))
8949 return 0;
8950
8951 /* Only 2 real operands to the addition. One of the input operands must
8952 be the same as the output operand. */
8953 if (! rtx_equal_p (operands[3], operands[4])
8954 && ! rtx_equal_p (operands[3], operands[5]))
8955 return 0;
8956
8957 /* Inout operand of add cannot conflict with any operands from multiply. */
8958 if (rtx_equal_p (operands[3], operands[0])
8959 || rtx_equal_p (operands[3], operands[1])
8960 || rtx_equal_p (operands[3], operands[2]))
8961 return 0;
8962
8963 /* multiply cannot feed into addition operands. */
8964 if (rtx_equal_p (operands[4], operands[0])
8965 || rtx_equal_p (operands[5], operands[0]))
8966 return 0;
8967
8968 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8969 if (mode == SFmode
8970 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8971 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8972 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8973 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8974 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8975 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8976 return 0;
8977
8978 /* Passed. Operands are suitable for fmpyadd. */
8979 return 1;
8980 }
8981
8982 #if !defined(USE_COLLECT2)
8983 static void
pa_asm_out_constructor(rtx symbol,int priority)8984 pa_asm_out_constructor (rtx symbol, int priority)
8985 {
8986 if (!function_label_operand (symbol, VOIDmode))
8987 pa_encode_label (symbol);
8988
8989 #ifdef CTORS_SECTION_ASM_OP
8990 default_ctor_section_asm_out_constructor (symbol, priority);
8991 #else
8992 # ifdef TARGET_ASM_NAMED_SECTION
8993 default_named_section_asm_out_constructor (symbol, priority);
8994 # else
8995 default_stabs_asm_out_constructor (symbol, priority);
8996 # endif
8997 #endif
8998 }
8999
9000 static void
pa_asm_out_destructor(rtx symbol,int priority)9001 pa_asm_out_destructor (rtx symbol, int priority)
9002 {
9003 if (!function_label_operand (symbol, VOIDmode))
9004 pa_encode_label (symbol);
9005
9006 #ifdef DTORS_SECTION_ASM_OP
9007 default_dtor_section_asm_out_destructor (symbol, priority);
9008 #else
9009 # ifdef TARGET_ASM_NAMED_SECTION
9010 default_named_section_asm_out_destructor (symbol, priority);
9011 # else
9012 default_stabs_asm_out_destructor (symbol, priority);
9013 # endif
9014 #endif
9015 }
9016 #endif
9017
9018 /* This function places uninitialized global data in the bss section.
9019 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9020 function on the SOM port to prevent uninitialized global data from
9021 being placed in the data section. */
9022
9023 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9024 pa_asm_output_aligned_bss (FILE *stream,
9025 const char *name,
9026 unsigned HOST_WIDE_INT size,
9027 unsigned int align)
9028 {
9029 switch_to_section (bss_section);
9030
9031 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9032 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9033 #endif
9034
9035 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9036 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9037 #endif
9038
9039 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9040 ASM_OUTPUT_LABEL (stream, name);
9041 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9042 }
9043
9044 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9045 that doesn't allow the alignment of global common storage to be directly
9046 specified. The SOM linker aligns common storage based on the rounded
9047 value of the NUM_BYTES parameter in the .comm directive. It's not
9048 possible to use the .align directive as it doesn't affect the alignment
9049 of the label associated with a .comm directive. */
9050
9051 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9052 pa_asm_output_aligned_common (FILE *stream,
9053 const char *name,
9054 unsigned HOST_WIDE_INT size,
9055 unsigned int align)
9056 {
9057 unsigned int max_common_align;
9058
9059 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9060 if (align > max_common_align)
9061 {
9062 warning (0, "alignment (%u) for %s exceeds maximum alignment "
9063 "for global common data. Using %u",
9064 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
9065 align = max_common_align;
9066 }
9067
9068 switch_to_section (bss_section);
9069
9070 assemble_name (stream, name);
9071 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9072 MAX (size, align / BITS_PER_UNIT));
9073 }
9074
9075 /* We can't use .comm for local common storage as the SOM linker effectively
9076 treats the symbol as universal and uses the same storage for local symbols
9077 with the same name in different object files. The .block directive
9078 reserves an uninitialized block of storage. However, it's not common
9079 storage. Fortunately, GCC never requests common storage with the same
9080 name in any given translation unit. */
9081
9082 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9083 pa_asm_output_aligned_local (FILE *stream,
9084 const char *name,
9085 unsigned HOST_WIDE_INT size,
9086 unsigned int align)
9087 {
9088 switch_to_section (bss_section);
9089 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9090
9091 #ifdef LOCAL_ASM_OP
9092 fprintf (stream, "%s", LOCAL_ASM_OP);
9093 assemble_name (stream, name);
9094 fprintf (stream, "\n");
9095 #endif
9096
9097 ASM_OUTPUT_LABEL (stream, name);
9098 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9099 }
9100
9101 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9102 use in fmpysub instructions. */
9103 int
pa_fmpysuboperands(rtx * operands)9104 pa_fmpysuboperands (rtx *operands)
9105 {
9106 machine_mode mode = GET_MODE (operands[0]);
9107
9108 /* Must be a floating point mode. */
9109 if (mode != SFmode && mode != DFmode)
9110 return 0;
9111
9112 /* All modes must be the same. */
9113 if (! (mode == GET_MODE (operands[1])
9114 && mode == GET_MODE (operands[2])
9115 && mode == GET_MODE (operands[3])
9116 && mode == GET_MODE (operands[4])
9117 && mode == GET_MODE (operands[5])))
9118 return 0;
9119
9120 /* All operands must be registers. */
9121 if (! (GET_CODE (operands[1]) == REG
9122 && GET_CODE (operands[2]) == REG
9123 && GET_CODE (operands[3]) == REG
9124 && GET_CODE (operands[4]) == REG
9125 && GET_CODE (operands[5]) == REG))
9126 return 0;
9127
9128 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9129 operation, so operands[4] must be the same as operand[3]. */
9130 if (! rtx_equal_p (operands[3], operands[4]))
9131 return 0;
9132
9133 /* multiply cannot feed into subtraction. */
9134 if (rtx_equal_p (operands[5], operands[0]))
9135 return 0;
9136
9137 /* Inout operand of sub cannot conflict with any operands from multiply. */
9138 if (rtx_equal_p (operands[3], operands[0])
9139 || rtx_equal_p (operands[3], operands[1])
9140 || rtx_equal_p (operands[3], operands[2]))
9141 return 0;
9142
9143 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9144 if (mode == SFmode
9145 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9146 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9147 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9148 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9149 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9150 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9151 return 0;
9152
9153 /* Passed. Operands are suitable for fmpysub. */
9154 return 1;
9155 }
9156
9157 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9158 constants for a MULT embedded inside a memory address. */
9159 int
pa_mem_shadd_constant_p(int val)9160 pa_mem_shadd_constant_p (int val)
9161 {
9162 if (val == 2 || val == 4 || val == 8)
9163 return 1;
9164 else
9165 return 0;
9166 }
9167
9168 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9169 constants for shadd instructions. */
9170 int
pa_shadd_constant_p(int val)9171 pa_shadd_constant_p (int val)
9172 {
9173 if (val == 1 || val == 2 || val == 3)
9174 return 1;
9175 else
9176 return 0;
9177 }
9178
9179 /* Return TRUE if INSN branches forward. */
9180
9181 static bool
forward_branch_p(rtx_insn * insn)9182 forward_branch_p (rtx_insn *insn)
9183 {
9184 rtx lab = JUMP_LABEL (insn);
9185
9186 /* The INSN must have a jump label. */
9187 gcc_assert (lab != NULL_RTX);
9188
9189 if (INSN_ADDRESSES_SET_P ())
9190 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9191
9192 while (insn)
9193 {
9194 if (insn == lab)
9195 return true;
9196 else
9197 insn = NEXT_INSN (insn);
9198 }
9199
9200 return false;
9201 }
9202
9203 /* Output an unconditional move and branch insn. */
9204
9205 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)9206 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9207 {
9208 int length = get_attr_length (insn);
9209
9210 /* These are the cases in which we win. */
9211 if (length == 4)
9212 return "mov%I1b,tr %1,%0,%2";
9213
9214 /* None of the following cases win, but they don't lose either. */
9215 if (length == 8)
9216 {
9217 if (dbr_sequence_length () == 0)
9218 {
9219 /* Nothing in the delay slot, fake it by putting the combined
9220 insn (the copy or add) in the delay slot of a bl. */
9221 if (GET_CODE (operands[1]) == CONST_INT)
9222 return "b %2\n\tldi %1,%0";
9223 else
9224 return "b %2\n\tcopy %1,%0";
9225 }
9226 else
9227 {
9228 /* Something in the delay slot, but we've got a long branch. */
9229 if (GET_CODE (operands[1]) == CONST_INT)
9230 return "ldi %1,%0\n\tb %2";
9231 else
9232 return "copy %1,%0\n\tb %2";
9233 }
9234 }
9235
9236 if (GET_CODE (operands[1]) == CONST_INT)
9237 output_asm_insn ("ldi %1,%0", operands);
9238 else
9239 output_asm_insn ("copy %1,%0", operands);
9240 return pa_output_lbranch (operands[2], insn, 1);
9241 }
9242
9243 /* Output an unconditional add and branch insn. */
9244
9245 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)9246 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9247 {
9248 int length = get_attr_length (insn);
9249
9250 /* To make life easy we want operand0 to be the shared input/output
9251 operand and operand1 to be the readonly operand. */
9252 if (operands[0] == operands[1])
9253 operands[1] = operands[2];
9254
9255 /* These are the cases in which we win. */
9256 if (length == 4)
9257 return "add%I1b,tr %1,%0,%3";
9258
9259 /* None of the following cases win, but they don't lose either. */
9260 if (length == 8)
9261 {
9262 if (dbr_sequence_length () == 0)
9263 /* Nothing in the delay slot, fake it by putting the combined
9264 insn (the copy or add) in the delay slot of a bl. */
9265 return "b %3\n\tadd%I1 %1,%0,%0";
9266 else
9267 /* Something in the delay slot, but we've got a long branch. */
9268 return "add%I1 %1,%0,%0\n\tb %3";
9269 }
9270
9271 output_asm_insn ("add%I1 %1,%0,%0", operands);
9272 return pa_output_lbranch (operands[3], insn, 1);
9273 }
9274
9275 /* We use this hook to perform a PA specific optimization which is difficult
9276 to do in earlier passes. */
9277
9278 static void
pa_reorg(void)9279 pa_reorg (void)
9280 {
9281 remove_useless_addtr_insns (1);
9282
9283 if (pa_cpu < PROCESSOR_8000)
9284 pa_combine_instructions ();
9285 }
9286
9287 /* The PA has a number of odd instructions which can perform multiple
9288 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9289 it may be profitable to combine two instructions into one instruction
9290 with two outputs. It's not profitable PA2.0 machines because the
9291 two outputs would take two slots in the reorder buffers.
9292
9293 This routine finds instructions which can be combined and combines
9294 them. We only support some of the potential combinations, and we
9295 only try common ways to find suitable instructions.
9296
9297 * addb can add two registers or a register and a small integer
9298 and jump to a nearby (+-8k) location. Normally the jump to the
9299 nearby location is conditional on the result of the add, but by
9300 using the "true" condition we can make the jump unconditional.
9301 Thus addb can perform two independent operations in one insn.
9302
9303 * movb is similar to addb in that it can perform a reg->reg
9304 or small immediate->reg copy and jump to a nearby (+-8k location).
9305
9306 * fmpyadd and fmpysub can perform a FP multiply and either an
9307 FP add or FP sub if the operands of the multiply and add/sub are
9308 independent (there are other minor restrictions). Note both
9309 the fmpy and fadd/fsub can in theory move to better spots according
9310 to data dependencies, but for now we require the fmpy stay at a
9311 fixed location.
9312
9313 * Many of the memory operations can perform pre & post updates
9314 of index registers. GCC's pre/post increment/decrement addressing
9315 is far too simple to take advantage of all the possibilities. This
9316 pass may not be suitable since those insns may not be independent.
9317
9318 * comclr can compare two ints or an int and a register, nullify
9319 the following instruction and zero some other register. This
9320 is more difficult to use as it's harder to find an insn which
9321 will generate a comclr than finding something like an unconditional
9322 branch. (conditional moves & long branches create comclr insns).
9323
9324 * Most arithmetic operations can conditionally skip the next
9325 instruction. They can be viewed as "perform this operation
9326 and conditionally jump to this nearby location" (where nearby
9327 is an insns away). These are difficult to use due to the
9328 branch length restrictions. */
9329
9330 static void
pa_combine_instructions(void)9331 pa_combine_instructions (void)
9332 {
9333 rtx_insn *anchor;
9334
9335 /* This can get expensive since the basic algorithm is on the
9336 order of O(n^2) (or worse). Only do it for -O2 or higher
9337 levels of optimization. */
9338 if (optimize < 2)
9339 return;
9340
9341 /* Walk down the list of insns looking for "anchor" insns which
9342 may be combined with "floating" insns. As the name implies,
9343 "anchor" instructions don't move, while "floating" insns may
9344 move around. */
9345 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9346 rtx_insn *new_rtx = make_insn_raw (par);
9347
9348 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9349 {
9350 enum attr_pa_combine_type anchor_attr;
9351 enum attr_pa_combine_type floater_attr;
9352
9353 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9354 Also ignore any special USE insns. */
9355 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9356 || GET_CODE (PATTERN (anchor)) == USE
9357 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9358 continue;
9359
9360 anchor_attr = get_attr_pa_combine_type (anchor);
9361 /* See if anchor is an insn suitable for combination. */
9362 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9363 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9364 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9365 && ! forward_branch_p (anchor)))
9366 {
9367 rtx_insn *floater;
9368
9369 for (floater = PREV_INSN (anchor);
9370 floater;
9371 floater = PREV_INSN (floater))
9372 {
9373 if (NOTE_P (floater)
9374 || (NONJUMP_INSN_P (floater)
9375 && (GET_CODE (PATTERN (floater)) == USE
9376 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9377 continue;
9378
9379 /* Anything except a regular INSN will stop our search. */
9380 if (! NONJUMP_INSN_P (floater))
9381 {
9382 floater = NULL;
9383 break;
9384 }
9385
9386 /* See if FLOATER is suitable for combination with the
9387 anchor. */
9388 floater_attr = get_attr_pa_combine_type (floater);
9389 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9390 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9391 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9392 && floater_attr == PA_COMBINE_TYPE_FMPY))
9393 {
9394 /* If ANCHOR and FLOATER can be combined, then we're
9395 done with this pass. */
9396 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9397 SET_DEST (PATTERN (floater)),
9398 XEXP (SET_SRC (PATTERN (floater)), 0),
9399 XEXP (SET_SRC (PATTERN (floater)), 1)))
9400 break;
9401 }
9402
9403 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9404 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9405 {
9406 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9407 {
9408 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9409 SET_DEST (PATTERN (floater)),
9410 XEXP (SET_SRC (PATTERN (floater)), 0),
9411 XEXP (SET_SRC (PATTERN (floater)), 1)))
9412 break;
9413 }
9414 else
9415 {
9416 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9417 SET_DEST (PATTERN (floater)),
9418 SET_SRC (PATTERN (floater)),
9419 SET_SRC (PATTERN (floater))))
9420 break;
9421 }
9422 }
9423 }
9424
9425 /* If we didn't find anything on the backwards scan try forwards. */
9426 if (!floater
9427 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9428 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9429 {
9430 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9431 {
9432 if (NOTE_P (floater)
9433 || (NONJUMP_INSN_P (floater)
9434 && (GET_CODE (PATTERN (floater)) == USE
9435 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9436
9437 continue;
9438
9439 /* Anything except a regular INSN will stop our search. */
9440 if (! NONJUMP_INSN_P (floater))
9441 {
9442 floater = NULL;
9443 break;
9444 }
9445
9446 /* See if FLOATER is suitable for combination with the
9447 anchor. */
9448 floater_attr = get_attr_pa_combine_type (floater);
9449 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9450 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9451 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9452 && floater_attr == PA_COMBINE_TYPE_FMPY))
9453 {
9454 /* If ANCHOR and FLOATER can be combined, then we're
9455 done with this pass. */
9456 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9457 SET_DEST (PATTERN (floater)),
9458 XEXP (SET_SRC (PATTERN (floater)),
9459 0),
9460 XEXP (SET_SRC (PATTERN (floater)),
9461 1)))
9462 break;
9463 }
9464 }
9465 }
9466
9467 /* FLOATER will be nonzero if we found a suitable floating
9468 insn for combination with ANCHOR. */
9469 if (floater
9470 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9471 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9472 {
9473 /* Emit the new instruction and delete the old anchor. */
9474 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9475 copy_rtx (PATTERN (floater)));
9476 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9477 emit_insn_before (temp, anchor);
9478
9479 SET_INSN_DELETED (anchor);
9480
9481 /* Emit a special USE insn for FLOATER, then delete
9482 the floating insn. */
9483 temp = copy_rtx (PATTERN (floater));
9484 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9485 delete_insn (floater);
9486
9487 continue;
9488 }
9489 else if (floater
9490 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9491 {
9492 /* Emit the new_jump instruction and delete the old anchor. */
9493 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9494 copy_rtx (PATTERN (floater)));
9495 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9496 temp = emit_jump_insn_before (temp, anchor);
9497
9498 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9499 SET_INSN_DELETED (anchor);
9500
9501 /* Emit a special USE insn for FLOATER, then delete
9502 the floating insn. */
9503 temp = copy_rtx (PATTERN (floater));
9504 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9505 delete_insn (floater);
9506 continue;
9507 }
9508 }
9509 }
9510 }
9511
9512 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9513 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9514 int reversed, rtx dest,
9515 rtx src1, rtx src2)
9516 {
9517 int insn_code_number;
9518 rtx_insn *start, *end;
9519
9520 /* Create a PARALLEL with the patterns of ANCHOR and
9521 FLOATER, try to recognize it, then test constraints
9522 for the resulting pattern.
9523
9524 If the pattern doesn't match or the constraints
9525 aren't met keep searching for a suitable floater
9526 insn. */
9527 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9528 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9529 INSN_CODE (new_rtx) = -1;
9530 insn_code_number = recog_memoized (new_rtx);
9531 basic_block bb = BLOCK_FOR_INSN (anchor);
9532 if (insn_code_number < 0
9533 || (extract_insn (new_rtx),
9534 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9535 return 0;
9536
9537 if (reversed)
9538 {
9539 start = anchor;
9540 end = floater;
9541 }
9542 else
9543 {
9544 start = floater;
9545 end = anchor;
9546 }
9547
9548 /* There's up to three operands to consider. One
9549 output and two inputs.
9550
9551 The output must not be used between FLOATER & ANCHOR
9552 exclusive. The inputs must not be set between
9553 FLOATER and ANCHOR exclusive. */
9554
9555 if (reg_used_between_p (dest, start, end))
9556 return 0;
9557
9558 if (reg_set_between_p (src1, start, end))
9559 return 0;
9560
9561 if (reg_set_between_p (src2, start, end))
9562 return 0;
9563
9564 /* If we get here, then everything is good. */
9565 return 1;
9566 }
9567
9568 /* Return nonzero if references for INSN are delayed.
9569
9570 Millicode insns are actually function calls with some special
9571 constraints on arguments and register usage.
9572
9573 Millicode calls always expect their arguments in the integer argument
9574 registers, and always return their result in %r29 (ret1). They
9575 are expected to clobber their arguments, %r1, %r29, and the return
9576 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9577
9578 This function tells reorg that the references to arguments and
9579 millicode calls do not appear to happen until after the millicode call.
9580 This allows reorg to put insns which set the argument registers into the
9581 delay slot of the millicode call -- thus they act more like traditional
9582 CALL_INSNs.
9583
9584 Note we cannot consider side effects of the insn to be delayed because
9585 the branch and link insn will clobber the return pointer. If we happened
9586 to use the return pointer in the delay slot of the call, then we lose.
9587
9588 get_attr_type will try to recognize the given insn, so make sure to
9589 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9590 in particular. */
9591 int
pa_insn_refs_are_delayed(rtx_insn * insn)9592 pa_insn_refs_are_delayed (rtx_insn *insn)
9593 {
9594 return ((NONJUMP_INSN_P (insn)
9595 && GET_CODE (PATTERN (insn)) != SEQUENCE
9596 && GET_CODE (PATTERN (insn)) != USE
9597 && GET_CODE (PATTERN (insn)) != CLOBBER
9598 && get_attr_type (insn) == TYPE_MILLI));
9599 }
9600
9601 /* Promote the return value, but not the arguments. */
9602
9603 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9604 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9605 machine_mode mode,
9606 int *punsignedp ATTRIBUTE_UNUSED,
9607 const_tree fntype ATTRIBUTE_UNUSED,
9608 int for_return)
9609 {
9610 if (for_return == 0)
9611 return mode;
9612 return promote_mode (type, mode, punsignedp);
9613 }
9614
9615 /* On the HP-PA the value is found in register(s) 28(-29), unless
9616 the mode is SF or DF. Then the value is returned in fr4 (32).
9617
9618 This must perform the same promotions as PROMOTE_MODE, else promoting
9619 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9620
9621 Small structures must be returned in a PARALLEL on PA64 in order
9622 to match the HP Compiler ABI. */
9623
9624 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9625 pa_function_value (const_tree valtype,
9626 const_tree func ATTRIBUTE_UNUSED,
9627 bool outgoing ATTRIBUTE_UNUSED)
9628 {
9629 machine_mode valmode;
9630
9631 if (AGGREGATE_TYPE_P (valtype)
9632 || TREE_CODE (valtype) == COMPLEX_TYPE
9633 || TREE_CODE (valtype) == VECTOR_TYPE)
9634 {
9635 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9636
9637 /* Handle aggregates that fit exactly in a word or double word. */
9638 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9639 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9640
9641 if (TARGET_64BIT)
9642 {
9643 /* Aggregates with a size less than or equal to 128 bits are
9644 returned in GR 28(-29). They are left justified. The pad
9645 bits are undefined. Larger aggregates are returned in
9646 memory. */
9647 rtx loc[2];
9648 int i, offset = 0;
9649 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9650
9651 for (i = 0; i < ub; i++)
9652 {
9653 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9654 gen_rtx_REG (DImode, 28 + i),
9655 GEN_INT (offset));
9656 offset += 8;
9657 }
9658
9659 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9660 }
9661 else if (valsize > UNITS_PER_WORD)
9662 {
9663 /* Aggregates 5 to 8 bytes in size are returned in general
9664 registers r28-r29 in the same manner as other non
9665 floating-point objects. The data is right-justified and
9666 zero-extended to 64 bits. This is opposite to the normal
9667 justification used on big endian targets and requires
9668 special treatment. */
9669 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9670 gen_rtx_REG (DImode, 28), const0_rtx);
9671 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9672 }
9673 }
9674
9675 if ((INTEGRAL_TYPE_P (valtype)
9676 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9677 || POINTER_TYPE_P (valtype))
9678 valmode = word_mode;
9679 else
9680 valmode = TYPE_MODE (valtype);
9681
9682 if (TREE_CODE (valtype) == REAL_TYPE
9683 && !AGGREGATE_TYPE_P (valtype)
9684 && TYPE_MODE (valtype) != TFmode
9685 && !TARGET_SOFT_FLOAT)
9686 return gen_rtx_REG (valmode, 32);
9687
9688 return gen_rtx_REG (valmode, 28);
9689 }
9690
9691 /* Implement the TARGET_LIBCALL_VALUE hook. */
9692
9693 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9694 pa_libcall_value (machine_mode mode,
9695 const_rtx fun ATTRIBUTE_UNUSED)
9696 {
9697 if (! TARGET_SOFT_FLOAT
9698 && (mode == SFmode || mode == DFmode))
9699 return gen_rtx_REG (mode, 32);
9700 else
9701 return gen_rtx_REG (mode, 28);
9702 }
9703
9704 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9705
9706 static bool
pa_function_value_regno_p(const unsigned int regno)9707 pa_function_value_regno_p (const unsigned int regno)
9708 {
9709 if (regno == 28
9710 || (! TARGET_SOFT_FLOAT && regno == 32))
9711 return true;
9712
9713 return false;
9714 }
9715
9716 /* Update the data in CUM to advance over argument ARG. */
9717
9718 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9719 pa_function_arg_advance (cumulative_args_t cum_v,
9720 const function_arg_info &arg)
9721 {
9722 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9723 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9724
9725 cum->nargs_prototype--;
9726 cum->words += (arg_size
9727 + ((cum->words & 01)
9728 && arg.type != NULL_TREE
9729 && arg_size > 1));
9730 }
9731
9732 /* Return the location of a parameter that is passed in a register or NULL
9733 if the parameter has any component that is passed in memory.
9734
9735 This is new code and will be pushed to into the net sources after
9736 further testing.
9737
9738 ??? We might want to restructure this so that it looks more like other
9739 ports. */
9740 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9741 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9742 {
9743 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9744 tree type = arg.type;
9745 machine_mode mode = arg.mode;
9746 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9747 int alignment = 0;
9748 int arg_size;
9749 int fpr_reg_base;
9750 int gpr_reg_base;
9751 rtx retval;
9752
9753 if (arg.end_marker_p ())
9754 return NULL_RTX;
9755
9756 arg_size = pa_function_arg_size (mode, type);
9757
9758 /* If this arg would be passed partially or totally on the stack, then
9759 this routine should return zero. pa_arg_partial_bytes will
9760 handle arguments which are split between regs and stack slots if
9761 the ABI mandates split arguments. */
9762 if (!TARGET_64BIT)
9763 {
9764 /* The 32-bit ABI does not split arguments. */
9765 if (cum->words + arg_size > max_arg_words)
9766 return NULL_RTX;
9767 }
9768 else
9769 {
9770 if (arg_size > 1)
9771 alignment = cum->words & 1;
9772 if (cum->words + alignment >= max_arg_words)
9773 return NULL_RTX;
9774 }
9775
9776 /* The 32bit ABIs and the 64bit ABIs are rather different,
9777 particularly in their handling of FP registers. We might
9778 be able to cleverly share code between them, but I'm not
9779 going to bother in the hope that splitting them up results
9780 in code that is more easily understood. */
9781
9782 if (TARGET_64BIT)
9783 {
9784 /* Advance the base registers to their current locations.
9785
9786 Remember, gprs grow towards smaller register numbers while
9787 fprs grow to higher register numbers. Also remember that
9788 although FP regs are 32-bit addressable, we pretend that
9789 the registers are 64-bits wide. */
9790 gpr_reg_base = 26 - cum->words;
9791 fpr_reg_base = 32 + cum->words;
9792
9793 /* Arguments wider than one word and small aggregates need special
9794 treatment. */
9795 if (arg_size > 1
9796 || mode == BLKmode
9797 || (type && (AGGREGATE_TYPE_P (type)
9798 || TREE_CODE (type) == COMPLEX_TYPE
9799 || TREE_CODE (type) == VECTOR_TYPE)))
9800 {
9801 /* Double-extended precision (80-bit), quad-precision (128-bit)
9802 and aggregates including complex numbers are aligned on
9803 128-bit boundaries. The first eight 64-bit argument slots
9804 are associated one-to-one, with general registers r26
9805 through r19, and also with floating-point registers fr4
9806 through fr11. Arguments larger than one word are always
9807 passed in general registers.
9808
9809 Using a PARALLEL with a word mode register results in left
9810 justified data on a big-endian target. */
9811
9812 rtx loc[8];
9813 int i, offset = 0, ub = arg_size;
9814
9815 /* Align the base register. */
9816 gpr_reg_base -= alignment;
9817
9818 ub = MIN (ub, max_arg_words - cum->words - alignment);
9819 for (i = 0; i < ub; i++)
9820 {
9821 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9822 gen_rtx_REG (DImode, gpr_reg_base),
9823 GEN_INT (offset));
9824 gpr_reg_base -= 1;
9825 offset += 8;
9826 }
9827
9828 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9829 }
9830 }
9831 else
9832 {
9833 /* If the argument is larger than a word, then we know precisely
9834 which registers we must use. */
9835 if (arg_size > 1)
9836 {
9837 if (cum->words)
9838 {
9839 gpr_reg_base = 23;
9840 fpr_reg_base = 38;
9841 }
9842 else
9843 {
9844 gpr_reg_base = 25;
9845 fpr_reg_base = 34;
9846 }
9847
9848 /* Structures 5 to 8 bytes in size are passed in the general
9849 registers in the same manner as other non floating-point
9850 objects. The data is right-justified and zero-extended
9851 to 64 bits. This is opposite to the normal justification
9852 used on big endian targets and requires special treatment.
9853 We now define BLOCK_REG_PADDING to pad these objects.
9854 Aggregates, complex and vector types are passed in the same
9855 manner as structures. */
9856 if (mode == BLKmode
9857 || (type && (AGGREGATE_TYPE_P (type)
9858 || TREE_CODE (type) == COMPLEX_TYPE
9859 || TREE_CODE (type) == VECTOR_TYPE)))
9860 {
9861 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9862 gen_rtx_REG (DImode, gpr_reg_base),
9863 const0_rtx);
9864 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9865 }
9866 }
9867 else
9868 {
9869 /* We have a single word (32 bits). A simple computation
9870 will get us the register #s we need. */
9871 gpr_reg_base = 26 - cum->words;
9872 fpr_reg_base = 32 + 2 * cum->words;
9873 }
9874 }
9875
9876 /* Determine if the argument needs to be passed in both general and
9877 floating point registers. */
9878 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9879 /* If we are doing soft-float with portable runtime, then there
9880 is no need to worry about FP regs. */
9881 && !TARGET_SOFT_FLOAT
9882 /* The parameter must be some kind of scalar float, else we just
9883 pass it in integer registers. */
9884 && GET_MODE_CLASS (mode) == MODE_FLOAT
9885 /* The target function must not have a prototype. */
9886 && cum->nargs_prototype <= 0
9887 /* libcalls do not need to pass items in both FP and general
9888 registers. */
9889 && type != NULL_TREE
9890 /* All this hair applies to "outgoing" args only. This includes
9891 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9892 && !cum->incoming)
9893 /* Also pass outgoing floating arguments in both registers in indirect
9894 calls with the 32 bit ABI and the HP assembler since there is no
9895 way to the specify argument locations in static functions. */
9896 || (!TARGET_64BIT
9897 && !TARGET_GAS
9898 && !cum->incoming
9899 && cum->indirect
9900 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9901 {
9902 retval
9903 = gen_rtx_PARALLEL
9904 (mode,
9905 gen_rtvec (2,
9906 gen_rtx_EXPR_LIST (VOIDmode,
9907 gen_rtx_REG (mode, fpr_reg_base),
9908 const0_rtx),
9909 gen_rtx_EXPR_LIST (VOIDmode,
9910 gen_rtx_REG (mode, gpr_reg_base),
9911 const0_rtx)));
9912 }
9913 else
9914 {
9915 /* See if we should pass this parameter in a general register. */
9916 if (TARGET_SOFT_FLOAT
9917 /* Indirect calls in the normal 32bit ABI require all arguments
9918 to be passed in general registers. */
9919 || (!TARGET_PORTABLE_RUNTIME
9920 && !TARGET_64BIT
9921 && !TARGET_ELF32
9922 && cum->indirect)
9923 /* If the parameter is not a scalar floating-point parameter,
9924 then it belongs in GPRs. */
9925 || GET_MODE_CLASS (mode) != MODE_FLOAT
9926 /* Structure with single SFmode field belongs in GPR. */
9927 || (type && AGGREGATE_TYPE_P (type)))
9928 retval = gen_rtx_REG (mode, gpr_reg_base);
9929 else
9930 retval = gen_rtx_REG (mode, fpr_reg_base);
9931 }
9932 return retval;
9933 }
9934
9935 /* Arguments larger than one word are double word aligned. */
9936
9937 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9938 pa_function_arg_boundary (machine_mode mode, const_tree type)
9939 {
9940 bool singleword = (type
9941 ? (integer_zerop (TYPE_SIZE (type))
9942 || !TREE_CONSTANT (TYPE_SIZE (type))
9943 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9944 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9945
9946 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9947 }
9948
9949 /* If this arg would be passed totally in registers or totally on the stack,
9950 then this routine should return zero. */
9951
9952 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9953 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9954 {
9955 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9956 unsigned int max_arg_words = 8;
9957 unsigned int offset = 0;
9958
9959 if (!TARGET_64BIT)
9960 return 0;
9961
9962 if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1))
9963 offset = 1;
9964
9965 if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type)
9966 <= max_arg_words)
9967 /* Arg fits fully into registers. */
9968 return 0;
9969 else if (cum->words + offset >= max_arg_words)
9970 /* Arg fully on the stack. */
9971 return 0;
9972 else
9973 /* Arg is split. */
9974 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9975 }
9976
9977
9978 /* A get_unnamed_section callback for switching to the text section.
9979
9980 This function is only used with SOM. Because we don't support
9981 named subspaces, we can only create a new subspace or switch back
9982 to the default text subspace. */
9983
9984 static void
som_output_text_section_asm_op(const void * data ATTRIBUTE_UNUSED)9985 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9986 {
9987 gcc_assert (TARGET_SOM);
9988 if (TARGET_GAS)
9989 {
9990 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9991 {
9992 /* We only want to emit a .nsubspa directive once at the
9993 start of the function. */
9994 cfun->machine->in_nsubspa = 1;
9995
9996 /* Create a new subspace for the text. This provides
9997 better stub placement and one-only functions. */
9998 if (cfun->decl
9999 && DECL_ONE_ONLY (cfun->decl)
10000 && !DECL_WEAK (cfun->decl))
10001 {
10002 output_section_asm_op ("\t.SPACE $TEXT$\n"
10003 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10004 "ACCESS=44,SORT=24,COMDAT");
10005 return;
10006 }
10007 }
10008 else
10009 {
10010 /* There isn't a current function or the body of the current
10011 function has been completed. So, we are changing to the
10012 text section to output debugging information. Thus, we
10013 need to forget that we are in the text section so that
10014 varasm.c will call us when text_section is selected again. */
10015 gcc_assert (!cfun || !cfun->machine
10016 || cfun->machine->in_nsubspa == 2);
10017 in_section = NULL;
10018 }
10019 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10020 return;
10021 }
10022 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10023 }
10024
10025 /* A get_unnamed_section callback for switching to comdat data
10026 sections. This function is only used with SOM. */
10027
10028 static void
som_output_comdat_data_section_asm_op(const void * data)10029 som_output_comdat_data_section_asm_op (const void *data)
10030 {
10031 in_section = NULL;
10032 output_section_asm_op (data);
10033 }
10034
10035 /* Implement TARGET_ASM_INIT_SECTIONS. */
10036
10037 static void
pa_som_asm_init_sections(void)10038 pa_som_asm_init_sections (void)
10039 {
10040 text_section
10041 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10042
10043 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10044 is not being generated. */
10045 som_readonly_data_section
10046 = get_unnamed_section (0, output_section_asm_op,
10047 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10048
10049 /* When secondary definitions are not supported, SOM makes readonly
10050 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10051 the comdat flag. */
10052 som_one_only_readonly_data_section
10053 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10054 "\t.SPACE $TEXT$\n"
10055 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10056 "ACCESS=0x2c,SORT=16,COMDAT");
10057
10058
10059 /* When secondary definitions are not supported, SOM makes data one-only
10060 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10061 som_one_only_data_section
10062 = get_unnamed_section (SECTION_WRITE,
10063 som_output_comdat_data_section_asm_op,
10064 "\t.SPACE $PRIVATE$\n"
10065 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10066 "ACCESS=31,SORT=24,COMDAT");
10067
10068 if (flag_tm)
10069 som_tm_clone_table_section
10070 = get_unnamed_section (0, output_section_asm_op,
10071 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10072
10073 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10074 reference data within the $TEXT$ space (for example constant
10075 strings in the $LIT$ subspace).
10076
10077 The assemblers (GAS and HP as) both have problems with handling
10078 the difference of two symbols. This is the other correct way to
10079 reference constant data during PIC code generation.
10080
10081 Thus, we can't put constant data needing relocation in the $TEXT$
10082 space during PIC generation.
10083
10084 Previously, we placed all constant data into the $DATA$ subspace
10085 when generating PIC code. This reduces sharing, but it works
10086 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10087 This puts constant data not needing relocation into the $TEXT$ space. */
10088 readonly_data_section = som_readonly_data_section;
10089
10090 /* We must not have a reference to an external symbol defined in a
10091 shared library in a readonly section, else the SOM linker will
10092 complain.
10093
10094 So, we force exception information into the data section. */
10095 exception_section = data_section;
10096 }
10097
10098 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10099
10100 static section *
pa_som_tm_clone_table_section(void)10101 pa_som_tm_clone_table_section (void)
10102 {
10103 return som_tm_clone_table_section;
10104 }
10105
10106 /* On hpux10, the linker will give an error if we have a reference
10107 in the read-only data section to a symbol defined in a shared
10108 library. Therefore, expressions that might require a reloc
10109 cannot be placed in the read-only data section. */
10110
10111 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)10112 pa_select_section (tree exp, int reloc,
10113 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10114 {
10115 if (TREE_CODE (exp) == VAR_DECL
10116 && TREE_READONLY (exp)
10117 && !TREE_THIS_VOLATILE (exp)
10118 && DECL_INITIAL (exp)
10119 && (DECL_INITIAL (exp) == error_mark_node
10120 || TREE_CONSTANT (DECL_INITIAL (exp)))
10121 && !(reloc & pa_reloc_rw_mask ()))
10122 {
10123 if (TARGET_SOM
10124 && DECL_ONE_ONLY (exp)
10125 && !DECL_WEAK (exp))
10126 return som_one_only_readonly_data_section;
10127 else
10128 return readonly_data_section;
10129 }
10130 else if (CONSTANT_CLASS_P (exp)
10131 && !(reloc & pa_reloc_rw_mask ()))
10132 return readonly_data_section;
10133 else if (TARGET_SOM
10134 && TREE_CODE (exp) == VAR_DECL
10135 && DECL_ONE_ONLY (exp)
10136 && !DECL_WEAK (exp))
10137 return som_one_only_data_section;
10138 else
10139 return data_section;
10140 }
10141
10142 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10143 and the function is in a COMDAT group, place the plabel reference in the
10144 .data.rel.ro.local section. The linker ignores references to symbols in
10145 discarded sections from this section. */
10146
10147 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10148 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10149 unsigned HOST_WIDE_INT align)
10150 {
10151 if (function_label_operand (x, VOIDmode))
10152 {
10153 tree decl = SYMBOL_REF_DECL (x);
10154
10155 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10156 return get_named_section (NULL, ".data.rel.ro.local", 1);
10157 }
10158
10159 return default_elf_select_rtx_section (mode, x, align);
10160 }
10161
10162 /* Implement pa_reloc_rw_mask. */
10163
10164 static int
pa_reloc_rw_mask(void)10165 pa_reloc_rw_mask (void)
10166 {
10167 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10168 return 3;
10169
10170 /* HP linker does not support global relocs in readonly memory. */
10171 return TARGET_SOM ? 2 : 0;
10172 }
10173
10174 static void
pa_globalize_label(FILE * stream,const char * name)10175 pa_globalize_label (FILE *stream, const char *name)
10176 {
10177 /* We only handle DATA objects here, functions are globalized in
10178 ASM_DECLARE_FUNCTION_NAME. */
10179 if (! FUNCTION_NAME_P (name))
10180 {
10181 fputs ("\t.EXPORT ", stream);
10182 assemble_name (stream, name);
10183 fputs (",DATA\n", stream);
10184 }
10185 }
10186
10187 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10188
10189 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)10190 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10191 int incoming ATTRIBUTE_UNUSED)
10192 {
10193 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10194 }
10195
10196 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10197
10198 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)10199 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10200 {
10201 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10202 PA64 ABI says that objects larger than 128 bits are returned in memory.
10203 Note, int_size_in_bytes can return -1 if the size of the object is
10204 variable or larger than the maximum value that can be expressed as
10205 a HOST_WIDE_INT. It can also return zero for an empty type. The
10206 simplest way to handle variable and empty types is to pass them in
10207 memory. This avoids problems in defining the boundaries of argument
10208 slots, allocating registers, etc. */
10209 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10210 || int_size_in_bytes (type) <= 0);
10211 }
10212
10213 /* Structure to hold declaration and name of external symbols that are
10214 emitted by GCC. We generate a vector of these symbols and output them
10215 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10216 This avoids putting out names that are never really used. */
10217
10218 typedef struct GTY(()) extern_symbol
10219 {
10220 tree decl;
10221 const char *name;
10222 } extern_symbol;
10223
10224 /* Define gc'd vector type for extern_symbol. */
10225
10226 /* Vector of extern_symbol pointers. */
10227 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10228
10229 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10230 /* Mark DECL (name NAME) as an external reference (assembler output
10231 file FILE). This saves the names to output at the end of the file
10232 if actually referenced. */
10233
10234 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)10235 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10236 {
10237 gcc_assert (file == asm_out_file);
10238 extern_symbol p = {decl, name};
10239 vec_safe_push (extern_symbols, p);
10240 }
10241 #endif
10242
10243 /* Output text required at the end of an assembler file.
10244 This includes deferred plabels and .import directives for
10245 all external symbols that were actually referenced. */
10246
10247 static void
pa_file_end(void)10248 pa_file_end (void)
10249 {
10250 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10251 unsigned int i;
10252 extern_symbol *p;
10253
10254 if (!NO_DEFERRED_PROFILE_COUNTERS)
10255 output_deferred_profile_counters ();
10256 #endif
10257
10258 output_deferred_plabels ();
10259
10260 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10261 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10262 {
10263 tree decl = p->decl;
10264
10265 if (!TREE_ASM_WRITTEN (decl)
10266 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10267 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10268 }
10269
10270 vec_free (extern_symbols);
10271 #endif
10272
10273 if (NEED_INDICATE_EXEC_STACK)
10274 file_end_indicate_exec_stack ();
10275 }
10276
10277 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10278
10279 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10280 pa_can_change_mode_class (machine_mode from, machine_mode to,
10281 reg_class_t rclass)
10282 {
10283 if (from == to)
10284 return true;
10285
10286 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10287 return true;
10288
10289 /* Reject changes to/from modes with zero size. */
10290 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10291 return false;
10292
10293 /* Reject changes to/from complex and vector modes. */
10294 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10295 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10296 return false;
10297
10298 /* There is no way to load QImode or HImode values directly from memory
10299 to a FP register. SImode loads to the FP registers are not zero
10300 extended. On the 64-bit target, this conflicts with the definition
10301 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10302 except for DImode to SImode on the 64-bit target. It is handled by
10303 register renaming in pa_print_operand. */
10304 if (MAYBE_FP_REG_CLASS_P (rclass))
10305 return TARGET_64BIT && from == DImode && to == SImode;
10306
10307 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10308 in specific sets of registers. Thus, we cannot allow changing
10309 to a larger mode when it's larger than a word. */
10310 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10311 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10312 return false;
10313
10314 return true;
10315 }
10316
10317 /* Implement TARGET_MODES_TIEABLE_P.
10318
10319 We should return FALSE for QImode and HImode because these modes
10320 are not ok in the floating-point registers. However, this prevents
10321 tieing these modes to SImode and DImode in the general registers.
10322 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10323 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10324 in the floating-point registers. */
10325
10326 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10327 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10328 {
10329 /* Don't tie modes in different classes. */
10330 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10331 return false;
10332
10333 return true;
10334 }
10335
10336
10337 /* Length in units of the trampoline instruction code. */
10338
10339 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10340
10341
10342 /* Output assembler code for a block containing the constant parts
10343 of a trampoline, leaving space for the variable parts.\
10344
10345 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10346 and then branches to the specified routine.
10347
10348 This code template is copied from text segment to stack location
10349 and then patched with pa_trampoline_init to contain valid values,
10350 and then entered as a subroutine.
10351
10352 It is best to keep this as small as possible to avoid having to
10353 flush multiple lines in the cache. */
10354
10355 static void
pa_asm_trampoline_template(FILE * f)10356 pa_asm_trampoline_template (FILE *f)
10357 {
10358 if (!TARGET_64BIT)
10359 {
10360 if (TARGET_PA_20)
10361 {
10362 fputs ("\tmfia %r20\n", f);
10363 fputs ("\tldw 48(%r20),%r22\n", f);
10364 fputs ("\tcopy %r22,%r21\n", f);
10365 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10366 fputs ("\tdepwi 0,31,2,%r22\n", f);
10367 fputs ("\tldw 0(%r22),%r21\n", f);
10368 fputs ("\tldw 4(%r22),%r19\n", f);
10369 fputs ("\tbve (%r21)\n", f);
10370 fputs ("\tldw 52(%r1),%r29\n", f);
10371 fputs ("\t.word 0\n", f);
10372 fputs ("\t.word 0\n", f);
10373 fputs ("\t.word 0\n", f);
10374 }
10375 else
10376 {
10377 if (ASSEMBLER_DIALECT == 0)
10378 {
10379 fputs ("\tbl .+8,%r20\n", f);
10380 fputs ("\tdepi 0,31,2,%r20\n", f);
10381 }
10382 else
10383 {
10384 fputs ("\tb,l .+8,%r20\n", f);
10385 fputs ("\tdepwi 0,31,2,%r20\n", f);
10386 }
10387 fputs ("\tldw 40(%r20),%r22\n", f);
10388 fputs ("\tcopy %r22,%r21\n", f);
10389 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10390 if (ASSEMBLER_DIALECT == 0)
10391 fputs ("\tdepi 0,31,2,%r22\n", f);
10392 else
10393 fputs ("\tdepwi 0,31,2,%r22\n", f);
10394 fputs ("\tldw 0(%r22),%r21\n", f);
10395 fputs ("\tldw 4(%r22),%r19\n", f);
10396 fputs ("\tldsid (%r21),%r1\n", f);
10397 fputs ("\tmtsp %r1,%sr0\n", f);
10398 fputs ("\tbe 0(%sr0,%r21)\n", f);
10399 fputs ("\tldw 44(%r20),%r29\n", f);
10400 }
10401 fputs ("\t.word 0\n", f);
10402 fputs ("\t.word 0\n", f);
10403 fputs ("\t.word 0\n", f);
10404 fputs ("\t.word 0\n", f);
10405 }
10406 else
10407 {
10408 fputs ("\t.dword 0\n", f);
10409 fputs ("\t.dword 0\n", f);
10410 fputs ("\t.dword 0\n", f);
10411 fputs ("\t.dword 0\n", f);
10412 fputs ("\tmfia %r31\n", f);
10413 fputs ("\tldd 24(%r31),%r27\n", f);
10414 fputs ("\tldd 32(%r31),%r31\n", f);
10415 fputs ("\tldd 16(%r27),%r1\n", f);
10416 fputs ("\tbve (%r1)\n", f);
10417 fputs ("\tldd 24(%r27),%r27\n", f);
10418 fputs ("\t.dword 0 ; fptr\n", f);
10419 fputs ("\t.dword 0 ; static link\n", f);
10420 }
10421 }
10422
10423 /* Emit RTL insns to initialize the variable parts of a trampoline.
10424 FNADDR is an RTX for the address of the function's pure code.
10425 CXT is an RTX for the static chain value for the function.
10426
10427 Move the function address to the trampoline template at offset 48.
10428 Move the static chain value to trampoline template at offset 52.
10429 Move the trampoline address to trampoline template at offset 56.
10430 Move r19 to trampoline template at offset 60. The latter two
10431 words create a plabel for the indirect call to the trampoline.
10432
10433 A similar sequence is used for the 64-bit port but the plabel is
10434 at the beginning of the trampoline.
10435
10436 Finally, the cache entries for the trampoline code are flushed.
10437 This is necessary to ensure that the trampoline instruction sequence
10438 is written to memory prior to any attempts at prefetching the code
10439 sequence. */
10440
10441 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10442 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10443 {
10444 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10445 rtx start_addr = gen_reg_rtx (Pmode);
10446 rtx end_addr = gen_reg_rtx (Pmode);
10447 rtx line_length = gen_reg_rtx (Pmode);
10448 rtx r_tramp, tmp;
10449
10450 emit_block_move (m_tramp, assemble_trampoline_template (),
10451 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10452 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10453
10454 if (!TARGET_64BIT)
10455 {
10456 tmp = adjust_address (m_tramp, Pmode, 48);
10457 emit_move_insn (tmp, fnaddr);
10458 tmp = adjust_address (m_tramp, Pmode, 52);
10459 emit_move_insn (tmp, chain_value);
10460
10461 /* Create a fat pointer for the trampoline. */
10462 tmp = adjust_address (m_tramp, Pmode, 56);
10463 emit_move_insn (tmp, r_tramp);
10464 tmp = adjust_address (m_tramp, Pmode, 60);
10465 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10466
10467 /* fdc and fic only use registers for the address to flush,
10468 they do not accept integer displacements. We align the
10469 start and end addresses to the beginning of their respective
10470 cache lines to minimize the number of lines flushed. */
10471 emit_insn (gen_andsi3 (start_addr, r_tramp,
10472 GEN_INT (-MIN_CACHELINE_SIZE)));
10473 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10474 TRAMPOLINE_CODE_SIZE-1));
10475 emit_insn (gen_andsi3 (end_addr, tmp,
10476 GEN_INT (-MIN_CACHELINE_SIZE)));
10477 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10478 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10479 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10480 gen_reg_rtx (Pmode),
10481 gen_reg_rtx (Pmode)));
10482 }
10483 else
10484 {
10485 tmp = adjust_address (m_tramp, Pmode, 56);
10486 emit_move_insn (tmp, fnaddr);
10487 tmp = adjust_address (m_tramp, Pmode, 64);
10488 emit_move_insn (tmp, chain_value);
10489
10490 /* Create a fat pointer for the trampoline. */
10491 tmp = adjust_address (m_tramp, Pmode, 16);
10492 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10493 r_tramp, 32)));
10494 tmp = adjust_address (m_tramp, Pmode, 24);
10495 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10496
10497 /* fdc and fic only use registers for the address to flush,
10498 they do not accept integer displacements. We align the
10499 start and end addresses to the beginning of their respective
10500 cache lines to minimize the number of lines flushed. */
10501 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10502 emit_insn (gen_anddi3 (start_addr, tmp,
10503 GEN_INT (-MIN_CACHELINE_SIZE)));
10504 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10505 TRAMPOLINE_CODE_SIZE - 1));
10506 emit_insn (gen_anddi3 (end_addr, tmp,
10507 GEN_INT (-MIN_CACHELINE_SIZE)));
10508 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10509 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10510 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10511 gen_reg_rtx (Pmode),
10512 gen_reg_rtx (Pmode)));
10513 }
10514
10515 #ifdef HAVE_ENABLE_EXECUTE_STACK
10516 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10517 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10518 #endif
10519 }
10520
10521 /* Perform any machine-specific adjustment in the address of the trampoline.
10522 ADDR contains the address that was passed to pa_trampoline_init.
10523 Adjust the trampoline address to point to the plabel at offset 56. */
10524
10525 static rtx
pa_trampoline_adjust_address(rtx addr)10526 pa_trampoline_adjust_address (rtx addr)
10527 {
10528 if (!TARGET_64BIT)
10529 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10530 return addr;
10531 }
10532
10533 static rtx
pa_delegitimize_address(rtx orig_x)10534 pa_delegitimize_address (rtx orig_x)
10535 {
10536 rtx x = delegitimize_mem_from_attrs (orig_x);
10537
10538 if (GET_CODE (x) == LO_SUM
10539 && GET_CODE (XEXP (x, 1)) == UNSPEC
10540 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10541 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10542 return x;
10543 }
10544
10545 static rtx
pa_internal_arg_pointer(void)10546 pa_internal_arg_pointer (void)
10547 {
10548 /* The argument pointer and the hard frame pointer are the same in
10549 the 32-bit runtime, so we don't need a copy. */
10550 if (TARGET_64BIT)
10551 return copy_to_reg (virtual_incoming_args_rtx);
10552 else
10553 return virtual_incoming_args_rtx;
10554 }
10555
10556 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10557 Frame pointer elimination is automatically handled. */
10558
10559 static bool
pa_can_eliminate(const int from,const int to)10560 pa_can_eliminate (const int from, const int to)
10561 {
10562 /* The argument cannot be eliminated in the 64-bit runtime. */
10563 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10564 return false;
10565
10566 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10567 ? ! frame_pointer_needed
10568 : true);
10569 }
10570
10571 /* Define the offset between two registers, FROM to be eliminated and its
10572 replacement TO, at the start of a routine. */
10573 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10574 pa_initial_elimination_offset (int from, int to)
10575 {
10576 HOST_WIDE_INT offset;
10577
10578 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10579 && to == STACK_POINTER_REGNUM)
10580 offset = -pa_compute_frame_size (get_frame_size (), 0);
10581 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10582 offset = 0;
10583 else
10584 gcc_unreachable ();
10585
10586 return offset;
10587 }
10588
10589 static void
pa_conditional_register_usage(void)10590 pa_conditional_register_usage (void)
10591 {
10592 int i;
10593
10594 if (!TARGET_64BIT && !TARGET_PA_11)
10595 {
10596 for (i = 56; i <= FP_REG_LAST; i++)
10597 fixed_regs[i] = call_used_regs[i] = 1;
10598 for (i = 33; i < 56; i += 2)
10599 fixed_regs[i] = call_used_regs[i] = 1;
10600 }
10601 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10602 {
10603 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10604 fixed_regs[i] = call_used_regs[i] = 1;
10605 }
10606 if (flag_pic)
10607 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10608 }
10609
10610 /* Target hook for c_mode_for_suffix. */
10611
10612 static machine_mode
pa_c_mode_for_suffix(char suffix)10613 pa_c_mode_for_suffix (char suffix)
10614 {
10615 if (HPUX_LONG_DOUBLE_LIBRARY)
10616 {
10617 if (suffix == 'q')
10618 return TFmode;
10619 }
10620
10621 return VOIDmode;
10622 }
10623
10624 /* Target hook for function_section. */
10625
10626 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10627 pa_function_section (tree decl, enum node_frequency freq,
10628 bool startup, bool exit)
10629 {
10630 /* Put functions in text section if target doesn't have named sections. */
10631 if (!targetm_common.have_named_sections)
10632 return text_section;
10633
10634 /* Force nested functions into the same section as the containing
10635 function. */
10636 if (decl
10637 && DECL_SECTION_NAME (decl) == NULL
10638 && DECL_CONTEXT (decl) != NULL_TREE
10639 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10640 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10641 return function_section (DECL_CONTEXT (decl));
10642
10643 /* Otherwise, use the default function section. */
10644 return default_function_section (decl, freq, startup, exit);
10645 }
10646
10647 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10648
10649 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10650 that need more than three instructions to load prior to reload. This
10651 limit is somewhat arbitrary. It takes three instructions to load a
10652 CONST_INT from memory but two are memory accesses. It may be better
10653 to increase the allowed range for CONST_INTS. We may also be able
10654 to handle CONST_DOUBLES. */
10655
10656 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10657 pa_legitimate_constant_p (machine_mode mode, rtx x)
10658 {
10659 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10660 return false;
10661
10662 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10663 return false;
10664
10665 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10666 legitimate constants. The other variants can't be handled by
10667 the move patterns after reload starts. */
10668 if (tls_referenced_p (x))
10669 return false;
10670
10671 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10672 return false;
10673
10674 if (TARGET_64BIT
10675 && HOST_BITS_PER_WIDE_INT > 32
10676 && GET_CODE (x) == CONST_INT
10677 && !reload_in_progress
10678 && !reload_completed
10679 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10680 && !pa_cint_ok_for_move (UINTVAL (x)))
10681 return false;
10682
10683 if (function_label_operand (x, mode))
10684 return false;
10685
10686 return true;
10687 }
10688
10689 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10690
10691 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10692 pa_section_type_flags (tree decl, const char *name, int reloc)
10693 {
10694 unsigned int flags;
10695
10696 flags = default_section_type_flags (decl, name, reloc);
10697
10698 /* Function labels are placed in the constant pool. This can
10699 cause a section conflict if decls are put in ".data.rel.ro"
10700 or ".data.rel.ro.local" using the __attribute__ construct. */
10701 if (strcmp (name, ".data.rel.ro") == 0
10702 || strcmp (name, ".data.rel.ro.local") == 0)
10703 flags |= SECTION_WRITE | SECTION_RELRO;
10704
10705 return flags;
10706 }
10707
10708 /* pa_legitimate_address_p recognizes an RTL expression that is a
10709 valid memory address for an instruction. The MODE argument is the
10710 machine mode for the MEM expression that wants to use this address.
10711
10712 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10713 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10714 available with floating point loads and stores, and integer loads.
10715 We get better code by allowing indexed addresses in the initial
10716 RTL generation.
10717
10718 The acceptance of indexed addresses as legitimate implies that we
10719 must provide patterns for doing indexed integer stores, or the move
10720 expanders must force the address of an indexed store to a register.
10721 We have adopted the latter approach.
10722
10723 Another function of pa_legitimate_address_p is to ensure that
10724 the base register is a valid pointer for indexed instructions.
10725 On targets that have non-equivalent space registers, we have to
10726 know at the time of assembler output which register in a REG+REG
10727 pair is the base register. The REG_POINTER flag is sometimes lost
10728 in reload and the following passes, so it can't be relied on during
10729 code generation. Thus, we either have to canonicalize the order
10730 of the registers in REG+REG indexed addresses, or treat REG+REG
10731 addresses separately and provide patterns for both permutations.
10732
10733 The latter approach requires several hundred additional lines of
10734 code in pa.md. The downside to canonicalizing is that a PLUS
10735 in the wrong order can't combine to form to make a scaled indexed
10736 memory operand. As we won't need to canonicalize the operands if
10737 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10738
10739 We initially break out scaled indexed addresses in canonical order
10740 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10741 scaled indexed addresses during RTL generation. However, fold_rtx
10742 has its own opinion on how the operands of a PLUS should be ordered.
10743 If one of the operands is equivalent to a constant, it will make
10744 that operand the second operand. As the base register is likely to
10745 be equivalent to a SYMBOL_REF, we have made it the second operand.
10746
10747 pa_legitimate_address_p accepts REG+REG as legitimate when the
10748 operands are in the order INDEX+BASE on targets with non-equivalent
10749 space registers, and in any order on targets with equivalent space
10750 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10751
10752 We treat a SYMBOL_REF as legitimate if it is part of the current
10753 function's constant-pool, because such addresses can actually be
10754 output as REG+SMALLINT. */
10755
10756 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10757 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10758 {
10759 if ((REG_P (x)
10760 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10761 : REG_OK_FOR_BASE_P (x)))
10762 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10763 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10764 && REG_P (XEXP (x, 0))
10765 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10766 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10767 return true;
10768
10769 if (GET_CODE (x) == PLUS)
10770 {
10771 rtx base, index;
10772
10773 /* For REG+REG, the base register should be in XEXP (x, 1),
10774 so check it first. */
10775 if (REG_P (XEXP (x, 1))
10776 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10777 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10778 base = XEXP (x, 1), index = XEXP (x, 0);
10779 else if (REG_P (XEXP (x, 0))
10780 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10781 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10782 base = XEXP (x, 0), index = XEXP (x, 1);
10783 else
10784 return false;
10785
10786 if (GET_CODE (index) == CONST_INT)
10787 {
10788 if (INT_5_BITS (index))
10789 return true;
10790
10791 /* When INT14_OK_STRICT is false, a secondary reload is needed
10792 to adjust the displacement of SImode and DImode floating point
10793 instructions but this may fail when the register also needs
10794 reloading. So, we return false when STRICT is true. We
10795 also reject long displacements for float mode addresses since
10796 the majority of accesses will use floating point instructions
10797 that don't support 14-bit offsets. */
10798 if (!INT14_OK_STRICT
10799 && (strict || !(reload_in_progress || reload_completed))
10800 && mode != QImode
10801 && mode != HImode)
10802 return false;
10803
10804 return base14_operand (index, mode);
10805 }
10806
10807 if (!TARGET_DISABLE_INDEXING
10808 /* Only accept the "canonical" INDEX+BASE operand order
10809 on targets with non-equivalent space registers. */
10810 && (TARGET_NO_SPACE_REGS
10811 ? REG_P (index)
10812 : (base == XEXP (x, 1) && REG_P (index)
10813 && (reload_completed
10814 || (reload_in_progress && HARD_REGISTER_P (base))
10815 || REG_POINTER (base))
10816 && (reload_completed
10817 || (reload_in_progress && HARD_REGISTER_P (index))
10818 || !REG_POINTER (index))))
10819 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10820 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10821 : REG_OK_FOR_INDEX_P (index))
10822 && borx_reg_operand (base, Pmode)
10823 && borx_reg_operand (index, Pmode))
10824 return true;
10825
10826 if (!TARGET_DISABLE_INDEXING
10827 && GET_CODE (index) == MULT
10828 /* Only accept base operands with the REG_POINTER flag prior to
10829 reload on targets with non-equivalent space registers. */
10830 && (TARGET_NO_SPACE_REGS
10831 || (base == XEXP (x, 1)
10832 && (reload_completed
10833 || (reload_in_progress && HARD_REGISTER_P (base))
10834 || REG_POINTER (base))))
10835 && REG_P (XEXP (index, 0))
10836 && GET_MODE (XEXP (index, 0)) == Pmode
10837 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10838 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10839 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10840 && GET_CODE (XEXP (index, 1)) == CONST_INT
10841 && INTVAL (XEXP (index, 1))
10842 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10843 && borx_reg_operand (base, Pmode))
10844 return true;
10845
10846 return false;
10847 }
10848
10849 if (GET_CODE (x) == LO_SUM)
10850 {
10851 rtx y = XEXP (x, 0);
10852
10853 if (GET_CODE (y) == SUBREG)
10854 y = SUBREG_REG (y);
10855
10856 if (REG_P (y)
10857 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10858 : REG_OK_FOR_BASE_P (y)))
10859 {
10860 /* Needed for -fPIC */
10861 if (mode == Pmode
10862 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10863 return true;
10864
10865 if (!INT14_OK_STRICT
10866 && (strict || !(reload_in_progress || reload_completed))
10867 && mode != QImode
10868 && mode != HImode)
10869 return false;
10870
10871 if (CONSTANT_P (XEXP (x, 1)))
10872 return true;
10873 }
10874 return false;
10875 }
10876
10877 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10878 return true;
10879
10880 return false;
10881 }
10882
10883 /* Look for machine dependent ways to make the invalid address AD a
10884 valid address.
10885
10886 For the PA, transform:
10887
10888 memory(X + <large int>)
10889
10890 into:
10891
10892 if (<large int> & mask) >= 16
10893 Y = (<large int> & ~mask) + mask + 1 Round up.
10894 else
10895 Y = (<large int> & ~mask) Round down.
10896 Z = X + Y
10897 memory (Z + (<large int> - Y));
10898
10899 This makes reload inheritance and reload_cse work better since Z
10900 can be reused.
10901
10902 There may be more opportunities to improve code with this hook. */
10903
10904 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10905 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10906 int opnum, int type,
10907 int ind_levels ATTRIBUTE_UNUSED)
10908 {
10909 long offset, newoffset, mask;
10910 rtx new_rtx, temp = NULL_RTX;
10911
10912 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10913 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10914
10915 if (optimize && GET_CODE (ad) == PLUS)
10916 temp = simplify_binary_operation (PLUS, Pmode,
10917 XEXP (ad, 0), XEXP (ad, 1));
10918
10919 new_rtx = temp ? temp : ad;
10920
10921 if (optimize
10922 && GET_CODE (new_rtx) == PLUS
10923 && GET_CODE (XEXP (new_rtx, 0)) == REG
10924 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10925 {
10926 offset = INTVAL (XEXP ((new_rtx), 1));
10927
10928 /* Choose rounding direction. Round up if we are >= halfway. */
10929 if ((offset & mask) >= ((mask + 1) / 2))
10930 newoffset = (offset & ~mask) + mask + 1;
10931 else
10932 newoffset = offset & ~mask;
10933
10934 /* Ensure that long displacements are aligned. */
10935 if (mask == 0x3fff
10936 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10937 || (TARGET_64BIT && (mode) == DImode)))
10938 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10939
10940 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10941 {
10942 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10943 GEN_INT (newoffset));
10944 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10945 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10946 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10947 opnum, (enum reload_type) type);
10948 return ad;
10949 }
10950 }
10951
10952 return NULL_RTX;
10953 }
10954
10955 /* Output address vector. */
10956
10957 void
pa_output_addr_vec(rtx lab,rtx body)10958 pa_output_addr_vec (rtx lab, rtx body)
10959 {
10960 int idx, vlen = XVECLEN (body, 0);
10961
10962 if (!TARGET_SOM)
10963 fputs ("\t.align 4\n", asm_out_file);
10964 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10965 if (TARGET_GAS)
10966 fputs ("\t.begin_brtab\n", asm_out_file);
10967 for (idx = 0; idx < vlen; idx++)
10968 {
10969 ASM_OUTPUT_ADDR_VEC_ELT
10970 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10971 }
10972 if (TARGET_GAS)
10973 fputs ("\t.end_brtab\n", asm_out_file);
10974 }
10975
10976 /* Output address difference vector. */
10977
10978 void
pa_output_addr_diff_vec(rtx lab,rtx body)10979 pa_output_addr_diff_vec (rtx lab, rtx body)
10980 {
10981 rtx base = XEXP (XEXP (body, 0), 0);
10982 int idx, vlen = XVECLEN (body, 1);
10983
10984 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10985 if (TARGET_GAS)
10986 fputs ("\t.begin_brtab\n", asm_out_file);
10987 for (idx = 0; idx < vlen; idx++)
10988 {
10989 ASM_OUTPUT_ADDR_DIFF_ELT
10990 (asm_out_file,
10991 body,
10992 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10993 CODE_LABEL_NUMBER (base));
10994 }
10995 if (TARGET_GAS)
10996 fputs ("\t.end_brtab\n", asm_out_file);
10997 }
10998
10999 /* This is a helper function for the other atomic operations. This function
11000 emits a loop that contains SEQ that iterates until a compare-and-swap
11001 operation at the end succeeds. MEM is the memory to be modified. SEQ is
11002 a set of instructions that takes a value from OLD_REG as an input and
11003 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
11004 set to the current contents of MEM. After SEQ, a compare-and-swap will
11005 attempt to update MEM with NEW_REG. The function returns true when the
11006 loop was generated successfully. */
11007
11008 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)11009 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
11010 {
11011 machine_mode mode = GET_MODE (mem);
11012 rtx_code_label *label;
11013 rtx cmp_reg, success, oldval;
11014
11015 /* The loop we want to generate looks like
11016
11017 cmp_reg = mem;
11018 label:
11019 old_reg = cmp_reg;
11020 seq;
11021 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
11022 if (success)
11023 goto label;
11024
11025 Note that we only do the plain load from memory once. Subsequent
11026 iterations use the value loaded by the compare-and-swap pattern. */
11027
11028 label = gen_label_rtx ();
11029 cmp_reg = gen_reg_rtx (mode);
11030
11031 emit_move_insn (cmp_reg, mem);
11032 emit_label (label);
11033 emit_move_insn (old_reg, cmp_reg);
11034 if (seq)
11035 emit_insn (seq);
11036
11037 success = NULL_RTX;
11038 oldval = cmp_reg;
11039 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
11040 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
11041 MEMMODEL_RELAXED))
11042 return false;
11043
11044 if (oldval != cmp_reg)
11045 emit_move_insn (cmp_reg, oldval);
11046
11047 /* Mark this jump predicted not taken. */
11048 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
11049 GET_MODE (success), 1, label,
11050 profile_probability::guessed_never ());
11051 return true;
11052 }
11053
11054 /* This function tries to implement an atomic exchange operation using a
11055 compare_and_swap loop. VAL is written to *MEM. The previous contents of
11056 *MEM are returned, using TARGET if possible. No memory model is required
11057 since a compare_and_swap loop is seq-cst. */
11058
11059 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)11060 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
11061 {
11062 machine_mode mode = GET_MODE (mem);
11063
11064 if (can_compare_and_swap_p (mode, true))
11065 {
11066 if (!target || !register_operand (target, mode))
11067 target = gen_reg_rtx (mode);
11068 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
11069 return target;
11070 }
11071
11072 return NULL_RTX;
11073 }
11074
11075 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11076 arguments passed by hidden reference in the 32-bit HP runtime. Users
11077 can override this behavior for better compatibility with openmp at the
11078 risk of library incompatibilities. Arguments are always passed by value
11079 in the 64-bit HP runtime. */
11080
11081 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)11082 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11083 {
11084 return !TARGET_CALLER_COPIES;
11085 }
11086
11087 /* Implement TARGET_HARD_REGNO_NREGS. */
11088
11089 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)11090 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11091 {
11092 return PA_HARD_REGNO_NREGS (regno, mode);
11093 }
11094
11095 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11096
11097 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)11098 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11099 {
11100 return PA_HARD_REGNO_MODE_OK (regno, mode);
11101 }
11102
11103 /* Implement TARGET_STARTING_FRAME_OFFSET.
11104
11105 On the 32-bit ports, we reserve one slot for the previous frame
11106 pointer and one fill slot. The fill slot is for compatibility
11107 with HP compiled programs. On the 64-bit ports, we reserve one
11108 slot for the previous frame pointer. */
11109
11110 static HOST_WIDE_INT
pa_starting_frame_offset(void)11111 pa_starting_frame_offset (void)
11112 {
11113 return 8;
11114 }
11115
11116 /* Figure out the size in words of the function argument. The size
11117 returned by this function should always be greater than zero because
11118 we pass variable and zero sized objects by reference. */
11119
11120 HOST_WIDE_INT
pa_function_arg_size(machine_mode mode,const_tree type)11121 pa_function_arg_size (machine_mode mode, const_tree type)
11122 {
11123 HOST_WIDE_INT size;
11124
11125 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11126 return CEIL (size, UNITS_PER_WORD);
11127 }
11128
11129 #include "gt-pa.h"
11130