xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/pa/pa.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2020 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 				    reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 			     rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 				    HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 				  const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 				     const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 					machine_mode,
178 					secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 					reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 						   machine_mode, int *,
184 						   const_tree, int);
185 
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207 
208 /* The following extra sections are only used for SOM.  */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213 
214 /* Counts for the number of callee-saved general and floating point
215    registers which were saved by the current function's prologue.  */
216 static int gr_saved, fr_saved;
217 
218 /* Boolean indicating whether the return pointer was saved by the
219    current function's prologue.  */
220 static bool rp_saved;
221 
222 static rtx find_addr_reg (rtx);
223 
224 /* Keep track of the number of bytes we have output in the CODE subspace
225    during this compilation so we'll know when to emit inline long-calls.  */
226 unsigned long total_code_bytes;
227 
228 /* The last address of the previous function plus the number of bytes in
229    associated thunks that have been output.  This is used to determine if
230    a thunk can use an IA-relative branch to reach its target function.  */
231 static unsigned int last_address;
232 
233 /* Variables to handle plabels that we discover are necessary at assembly
234    output time.  They are output after the current function.  */
235 struct GTY(()) deferred_plabel
236 {
237   rtx internal_label;
238   rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241   deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243 
244 /* Initialize the GCC target structure.  */
245 
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248 
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263 
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266 
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273 
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276 
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281 
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286 
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289 
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292 
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
297 
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300 
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303 
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306 
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313 
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316 
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319 
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326 
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332 
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337 
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376 
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379 
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398 
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405 
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408 
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415 
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418 
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421 
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424 
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427 
428 struct gcc_target targetm = TARGET_INITIALIZER;
429 
430 /* Parse the -mfixed-range= option string.  */
431 
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435   int i, first, last;
436   char *str, *dash, *comma;
437 
438   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439      REG2 are either register names or register numbers.  The effect
440      of this option is to mark the registers in the range from REG1 to
441      REG2 as ``fixed'' so they won't be used by the compiler.  This is
442      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
443 
444   i = strlen (const_str);
445   str = (char *) alloca (i + 1);
446   memcpy (str, const_str, i + 1);
447 
448   while (1)
449     {
450       dash = strchr (str, '-');
451       if (!dash)
452 	{
453 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 	  return;
455 	}
456       *dash = '\0';
457 
458       comma = strchr (dash + 1, ',');
459       if (comma)
460 	*comma = '\0';
461 
462       first = decode_reg_name (str);
463       if (first < 0)
464 	{
465 	  warning (0, "unknown register name: %s", str);
466 	  return;
467 	}
468 
469       last = decode_reg_name (dash + 1);
470       if (last < 0)
471 	{
472 	  warning (0, "unknown register name: %s", dash + 1);
473 	  return;
474 	}
475 
476       *dash = '-';
477 
478       if (first > last)
479 	{
480 	  warning (0, "%s-%s is an empty range", str, dash + 1);
481 	  return;
482 	}
483 
484       for (i = first; i <= last; ++i)
485 	fixed_regs[i] = call_used_regs[i] = 1;
486 
487       if (!comma)
488 	break;
489 
490       *comma = ',';
491       str = comma + 1;
492     }
493 
494   /* Check if all floating point registers have been fixed.  */
495   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496     if (!fixed_regs[i])
497       break;
498 
499   if (i > FP_REG_LAST)
500     target_flags |= MASK_DISABLE_FPREGS;
501 }
502 
503 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
504 
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508   unsigned int i;
509   cl_deferred_option *opt;
510   vec<cl_deferred_option> *v
511     = (vec<cl_deferred_option> *) pa_deferred_options;
512 
513   if (v)
514     FOR_EACH_VEC_ELT (*v, i, opt)
515       {
516 	switch (opt->opt_index)
517 	  {
518 	  case OPT_mfixed_range_:
519 	    fix_range (opt->arg);
520 	    break;
521 
522 	  default:
523 	    gcc_unreachable ();
524 	  }
525       }
526 
527   if (flag_pic && TARGET_PORTABLE_RUNTIME)
528     {
529       warning (0, "PIC code generation is not supported in the portable runtime model");
530     }
531 
532   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533    {
534       warning (0, "PIC code generation is not compatible with fast indirect calls");
535    }
536 
537   if (! TARGET_GAS && write_symbols != NO_DEBUG)
538     {
539       warning (0, "%<-g%> is only supported when using GAS on this processor");
540       warning (0, "%<-g%> option disabled");
541       write_symbols = NO_DEBUG;
542     }
543 
544   /* We only support the "big PIC" model now.  And we always generate PIC
545      code when in 64bit mode.  */
546   if (flag_pic == 1 || TARGET_64BIT)
547     flag_pic = 2;
548 
549   /* Disable -freorder-blocks-and-partition as we don't support hot and
550      cold partitioning.  */
551   if (flag_reorder_blocks_and_partition)
552     {
553       inform (input_location,
554 	      "%<-freorder-blocks-and-partition%> does not work "
555 	      "on this architecture");
556       flag_reorder_blocks_and_partition = 0;
557       flag_reorder_blocks = 1;
558     }
559 
560   /* We can't guarantee that .dword is available for 32-bit targets.  */
561   if (UNITS_PER_WORD == 4)
562     targetm.asm_out.aligned_op.di = NULL;
563 
564   /* The unaligned ops are only available when using GAS.  */
565   if (!TARGET_GAS)
566     {
567       targetm.asm_out.unaligned_op.hi = NULL;
568       targetm.asm_out.unaligned_op.si = NULL;
569       targetm.asm_out.unaligned_op.di = NULL;
570     }
571 
572   init_machine_status = pa_init_machine_status;
573 }
574 
575 enum pa_builtins
576 {
577   PA_BUILTIN_COPYSIGNQ,
578   PA_BUILTIN_FABSQ,
579   PA_BUILTIN_INFQ,
580   PA_BUILTIN_HUGE_VALQ,
581   PA_BUILTIN_max
582 };
583 
584 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
585 
586 static void
pa_init_builtins(void)587 pa_init_builtins (void)
588 {
589 #ifdef DONT_HAVE_FPUTC_UNLOCKED
590   {
591     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
592     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
593 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
594   }
595 #endif
596 #if TARGET_HPUX_11
597   {
598     tree decl;
599 
600     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
601       set_user_assembler_name (decl, "_Isfinite");
602     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
603       set_user_assembler_name (decl, "_Isfinitef");
604   }
605 #endif
606 
607   if (HPUX_LONG_DOUBLE_LIBRARY)
608     {
609       tree decl, ftype;
610 
611       /* Under HPUX, the __float128 type is a synonym for "long double".  */
612       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
613 						 "__float128");
614 
615       /* TFmode support builtins.  */
616       ftype = build_function_type_list (long_double_type_node,
617 					long_double_type_node,
618 					NULL_TREE);
619       decl = add_builtin_function ("__builtin_fabsq", ftype,
620 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
621 				   "_U_Qfabs", NULL_TREE);
622       TREE_READONLY (decl) = 1;
623       pa_builtins[PA_BUILTIN_FABSQ] = decl;
624 
625       ftype = build_function_type_list (long_double_type_node,
626 					long_double_type_node,
627 					long_double_type_node,
628 					NULL_TREE);
629       decl = add_builtin_function ("__builtin_copysignq", ftype,
630 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
631 				   "_U_Qfcopysign", NULL_TREE);
632       TREE_READONLY (decl) = 1;
633       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
634 
635       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
636       decl = add_builtin_function ("__builtin_infq", ftype,
637 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
638 				   NULL, NULL_TREE);
639       pa_builtins[PA_BUILTIN_INFQ] = decl;
640 
641       decl = add_builtin_function ("__builtin_huge_valq", ftype,
642                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
643                                    NULL, NULL_TREE);
644       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
645     }
646 }
647 
648 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)649 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
650 		   machine_mode mode ATTRIBUTE_UNUSED,
651 		   int ignore ATTRIBUTE_UNUSED)
652 {
653   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
654   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
655 
656   switch (fcode)
657     {
658     case PA_BUILTIN_FABSQ:
659     case PA_BUILTIN_COPYSIGNQ:
660       return expand_call (exp, target, ignore);
661 
662     case PA_BUILTIN_INFQ:
663     case PA_BUILTIN_HUGE_VALQ:
664       {
665 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
666 	REAL_VALUE_TYPE inf;
667 	rtx tmp;
668 
669 	real_inf (&inf);
670 	tmp = const_double_from_real_value (inf, target_mode);
671 
672 	tmp = validize_mem (force_const_mem (target_mode, tmp));
673 
674 	if (target == 0)
675 	  target = gen_reg_rtx (target_mode);
676 
677 	emit_move_insn (target, tmp);
678 	return target;
679       }
680 
681     default:
682       gcc_unreachable ();
683     }
684 
685   return NULL_RTX;
686 }
687 
688 /* Function to init struct machine_function.
689    This will be called, via a pointer variable,
690    from push_function_context.  */
691 
692 static struct machine_function *
pa_init_machine_status(void)693 pa_init_machine_status (void)
694 {
695   return ggc_cleared_alloc<machine_function> ();
696 }
697 
698 /* If FROM is a probable pointer register, mark TO as a probable
699    pointer register with the same pointer alignment as FROM.  */
700 
701 static void
copy_reg_pointer(rtx to,rtx from)702 copy_reg_pointer (rtx to, rtx from)
703 {
704   if (REG_POINTER (from))
705     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
706 }
707 
708 /* Return 1 if X contains a symbolic expression.  We know these
709    expressions will have one of a few well defined forms, so
710    we need only check those forms.  */
711 int
pa_symbolic_expression_p(rtx x)712 pa_symbolic_expression_p (rtx x)
713 {
714 
715   /* Strip off any HIGH.  */
716   if (GET_CODE (x) == HIGH)
717     x = XEXP (x, 0);
718 
719   return symbolic_operand (x, VOIDmode);
720 }
721 
722 /* Accept any constant that can be moved in one instruction into a
723    general register.  */
724 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)725 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
726 {
727   /* OK if ldo, ldil, or zdepi, can be used.  */
728   return (VAL_14_BITS_P (ival)
729 	  || pa_ldil_cint_p (ival)
730 	  || pa_zdepi_cint_p (ival));
731 }
732 
733 /* True iff ldil can be used to load this CONST_INT.  The least
734    significant 11 bits of the value must be zero and the value must
735    not change sign when extended from 32 to 64 bits.  */
736 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)737 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
738 {
739   unsigned HOST_WIDE_INT x;
740 
741   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
742   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
743 }
744 
745 /* True iff zdepi can be used to generate this CONST_INT.
746    zdepi first sign extends a 5-bit signed number to a given field
747    length, then places this field anywhere in a zero.  */
748 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)749 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
750 {
751   unsigned HOST_WIDE_INT lsb_mask, t;
752 
753   /* This might not be obvious, but it's at least fast.
754      This function is critical; we don't have the time loops would take.  */
755   lsb_mask = x & -x;
756   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
757   /* Return true iff t is a power of two.  */
758   return ((t & (t - 1)) == 0);
759 }
760 
761 /* True iff depi or extru can be used to compute (reg & mask).
762    Accept bit pattern like these:
763    0....01....1
764    1....10....0
765    1..10..01..1  */
766 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)767 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
768 {
769   mask = ~mask;
770   mask += mask & -mask;
771   return (mask & (mask - 1)) == 0;
772 }
773 
774 /* True iff depi can be used to compute (reg | MASK).  */
775 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)776 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
777 {
778   mask += mask & -mask;
779   return (mask & (mask - 1)) == 0;
780 }
781 
782 /* Legitimize PIC addresses.  If the address is already
783    position-independent, we return ORIG.  Newly generated
784    position-independent addresses go to REG.  If we need more
785    than one register, we lose.  */
786 
787 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)788 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
789 {
790   rtx pic_ref = orig;
791 
792   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
793 
794   /* Labels need special handling.  */
795   if (pic_label_operand (orig, mode))
796     {
797       rtx_insn *insn;
798 
799       /* We do not want to go through the movXX expanders here since that
800 	 would create recursion.
801 
802 	 Nor do we really want to call a generator for a named pattern
803 	 since that requires multiple patterns if we want to support
804 	 multiple word sizes.
805 
806 	 So instead we just emit the raw set, which avoids the movXX
807 	 expanders completely.  */
808       mark_reg_pointer (reg, BITS_PER_UNIT);
809       insn = emit_insn (gen_rtx_SET (reg, orig));
810 
811       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
812       add_reg_note (insn, REG_EQUAL, orig);
813 
814       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
815 	 and update LABEL_NUSES because this is not done automatically.  */
816       if (reload_in_progress || reload_completed)
817 	{
818 	  /* Extract LABEL_REF.  */
819 	  if (GET_CODE (orig) == CONST)
820 	    orig = XEXP (XEXP (orig, 0), 0);
821 	  /* Extract CODE_LABEL.  */
822 	  orig = XEXP (orig, 0);
823 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
824 	  /* Make sure we have label and not a note.  */
825 	  if (LABEL_P (orig))
826 	    LABEL_NUSES (orig)++;
827 	}
828       crtl->uses_pic_offset_table = 1;
829       return reg;
830     }
831   if (GET_CODE (orig) == SYMBOL_REF)
832     {
833       rtx_insn *insn;
834       rtx tmp_reg;
835 
836       gcc_assert (reg);
837 
838       /* Before reload, allocate a temporary register for the intermediate
839 	 result.  This allows the sequence to be deleted when the final
840 	 result is unused and the insns are trivially dead.  */
841       tmp_reg = ((reload_in_progress || reload_completed)
842 		 ? reg : gen_reg_rtx (Pmode));
843 
844       if (function_label_operand (orig, VOIDmode))
845 	{
846 	  /* Force function label into memory in word mode.  */
847 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
848 	  /* Load plabel address from DLT.  */
849 	  emit_move_insn (tmp_reg,
850 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
851 					gen_rtx_HIGH (word_mode, orig)));
852 	  pic_ref
853 	    = gen_const_mem (Pmode,
854 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
855 					     gen_rtx_UNSPEC (Pmode,
856 						         gen_rtvec (1, orig),
857 						         UNSPEC_DLTIND14R)));
858 	  emit_move_insn (reg, pic_ref);
859 	  /* Now load address of function descriptor.  */
860 	  pic_ref = gen_rtx_MEM (Pmode, reg);
861 	}
862       else
863 	{
864 	  /* Load symbol reference from DLT.  */
865 	  emit_move_insn (tmp_reg,
866 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
867 					gen_rtx_HIGH (word_mode, orig)));
868 	  pic_ref
869 	    = gen_const_mem (Pmode,
870 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
871 					     gen_rtx_UNSPEC (Pmode,
872 						         gen_rtvec (1, orig),
873 						         UNSPEC_DLTIND14R)));
874 	}
875 
876       crtl->uses_pic_offset_table = 1;
877       mark_reg_pointer (reg, BITS_PER_UNIT);
878       insn = emit_move_insn (reg, pic_ref);
879 
880       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
881       set_unique_reg_note (insn, REG_EQUAL, orig);
882 
883       return reg;
884     }
885   else if (GET_CODE (orig) == CONST)
886     {
887       rtx base;
888 
889       if (GET_CODE (XEXP (orig, 0)) == PLUS
890 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
891 	return orig;
892 
893       gcc_assert (reg);
894       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
895 
896       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
897       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
898 				     base == reg ? 0 : reg);
899 
900       if (GET_CODE (orig) == CONST_INT)
901 	{
902 	  if (INT_14_BITS (orig))
903 	    return plus_constant (Pmode, base, INTVAL (orig));
904 	  orig = force_reg (Pmode, orig);
905 	}
906       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907       /* Likewise, should we set special REG_NOTEs here?  */
908     }
909 
910   return pic_ref;
911 }
912 
913 static GTY(()) rtx gen_tls_tga;
914 
915 static rtx
gen_tls_get_addr(void)916 gen_tls_get_addr (void)
917 {
918   if (!gen_tls_tga)
919     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
920   return gen_tls_tga;
921 }
922 
923 static rtx
hppa_tls_call(rtx arg)924 hppa_tls_call (rtx arg)
925 {
926   rtx ret;
927 
928   ret = gen_reg_rtx (Pmode);
929   emit_library_call_value (gen_tls_get_addr (), ret,
930 			   LCT_CONST, Pmode, arg, Pmode);
931 
932   return ret;
933 }
934 
935 static rtx
legitimize_tls_address(rtx addr)936 legitimize_tls_address (rtx addr)
937 {
938   rtx ret, tmp, t1, t2, tp;
939   rtx_insn *insn;
940 
941   /* Currently, we can't handle anything but a SYMBOL_REF.  */
942   if (GET_CODE (addr) != SYMBOL_REF)
943     return addr;
944 
945   switch (SYMBOL_REF_TLS_MODEL (addr))
946     {
947       case TLS_MODEL_GLOBAL_DYNAMIC:
948 	tmp = gen_reg_rtx (Pmode);
949 	if (flag_pic)
950 	  emit_insn (gen_tgd_load_pic (tmp, addr));
951 	else
952 	  emit_insn (gen_tgd_load (tmp, addr));
953 	ret = hppa_tls_call (tmp);
954 	break;
955 
956       case TLS_MODEL_LOCAL_DYNAMIC:
957 	ret = gen_reg_rtx (Pmode);
958 	tmp = gen_reg_rtx (Pmode);
959 	start_sequence ();
960 	if (flag_pic)
961 	  emit_insn (gen_tld_load_pic (tmp, addr));
962 	else
963 	  emit_insn (gen_tld_load (tmp, addr));
964 	t1 = hppa_tls_call (tmp);
965 	insn = get_insns ();
966 	end_sequence ();
967 	t2 = gen_reg_rtx (Pmode);
968 	emit_libcall_block (insn, t2, t1,
969 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
970 				            UNSPEC_TLSLDBASE));
971 	emit_insn (gen_tld_offset_load (ret, addr, t2));
972 	break;
973 
974       case TLS_MODEL_INITIAL_EXEC:
975 	tp = gen_reg_rtx (Pmode);
976 	tmp = gen_reg_rtx (Pmode);
977 	ret = gen_reg_rtx (Pmode);
978 	emit_insn (gen_tp_load (tp));
979 	if (flag_pic)
980 	  emit_insn (gen_tie_load_pic (tmp, addr));
981 	else
982 	  emit_insn (gen_tie_load (tmp, addr));
983 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
984 	break;
985 
986       case TLS_MODEL_LOCAL_EXEC:
987 	tp = gen_reg_rtx (Pmode);
988 	ret = gen_reg_rtx (Pmode);
989 	emit_insn (gen_tp_load (tp));
990 	emit_insn (gen_tle_load (ret, addr, tp));
991 	break;
992 
993       default:
994 	gcc_unreachable ();
995     }
996 
997   return ret;
998 }
999 
1000 /* Helper for hppa_legitimize_address.  Given X, return true if it
1001    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1002 
1003    This respectively represent canonical shift-add rtxs or scaled
1004    memory addresses.  */
1005 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1006 mem_shadd_or_shadd_rtx_p (rtx x)
1007 {
1008   return ((GET_CODE (x) == ASHIFT
1009 	   || GET_CODE (x) == MULT)
1010 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
1011 	  && ((GET_CODE (x) == ASHIFT
1012 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1013 	      || (GET_CODE (x) == MULT
1014 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1015 }
1016 
1017 /* Try machine-dependent ways of modifying an illegitimate address
1018    to be legitimate.  If we find one, return the new, valid address.
1019    This macro is used in only one place: `memory_address' in explow.c.
1020 
1021    OLDX is the address as it was before break_out_memory_refs was called.
1022    In some cases it is useful to look at this to decide what needs to be done.
1023 
1024    It is always safe for this macro to do nothing.  It exists to recognize
1025    opportunities to optimize the output.
1026 
1027    For the PA, transform:
1028 
1029 	memory(X + <large int>)
1030 
1031    into:
1032 
1033 	if (<large int> & mask) >= 16
1034 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1035 	else
1036 	  Y = (<large int> & ~mask)		Round down.
1037 	Z = X + Y
1038 	memory (Z + (<large int> - Y));
1039 
1040    This is for CSE to find several similar references, and only use one Z.
1041 
1042    X can either be a SYMBOL_REF or REG, but because combine cannot
1043    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1044    D will not fit in 14 bits.
1045 
1046    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1047    0x1f as the mask.
1048 
1049    MODE_INT references allow displacements which fit in 14 bits, so use
1050    0x3fff as the mask.
1051 
1052    This relies on the fact that most mode MODE_FLOAT references will use FP
1053    registers and most mode MODE_INT references will use integer registers.
1054    (In the rare case of an FP register used in an integer MODE, we depend
1055    on secondary reloads to clean things up.)
1056 
1057 
1058    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1059    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1060    addressing modes to be used).
1061 
1062    Note that the addresses passed into hppa_legitimize_address always
1063    come from a MEM, so we only have to match the MULT form on incoming
1064    addresses.  But to be future proof we also match the ASHIFT form.
1065 
1066    However, this routine always places those shift-add sequences into
1067    registers, so we have to generate the ASHIFT form as our output.
1068 
1069    Put X and Z into registers.  Then put the entire expression into
1070    a register.  */
1071 
1072 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1073 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1074 			 machine_mode mode)
1075 {
1076   rtx orig = x;
1077 
1078   /* We need to canonicalize the order of operands in unscaled indexed
1079      addresses since the code that checks if an address is valid doesn't
1080      always try both orders.  */
1081   if (!TARGET_NO_SPACE_REGS
1082       && GET_CODE (x) == PLUS
1083       && GET_MODE (x) == Pmode
1084       && REG_P (XEXP (x, 0))
1085       && REG_P (XEXP (x, 1))
1086       && REG_POINTER (XEXP (x, 0))
1087       && !REG_POINTER (XEXP (x, 1)))
1088     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1089 
1090   if (tls_referenced_p (x))
1091     return legitimize_tls_address (x);
1092   else if (flag_pic)
1093     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1094 
1095   /* Strip off CONST.  */
1096   if (GET_CODE (x) == CONST)
1097     x = XEXP (x, 0);
1098 
1099   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1100      That should always be safe.  */
1101   if (GET_CODE (x) == PLUS
1102       && GET_CODE (XEXP (x, 0)) == REG
1103       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1104     {
1105       rtx reg = force_reg (Pmode, XEXP (x, 1));
1106       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1107     }
1108 
1109   /* Note we must reject symbols which represent function addresses
1110      since the assembler/linker can't handle arithmetic on plabels.  */
1111   if (GET_CODE (x) == PLUS
1112       && GET_CODE (XEXP (x, 1)) == CONST_INT
1113       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1114 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1115 	  || GET_CODE (XEXP (x, 0)) == REG))
1116     {
1117       rtx int_part, ptr_reg;
1118       HOST_WIDE_INT newoffset;
1119       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
1120       HOST_WIDE_INT mask;
1121 
1122       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1123 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1124 
1125       /* Choose which way to round the offset.  Round up if we
1126 	 are >= halfway to the next boundary.  */
1127       if ((offset & mask) >= ((mask + 1) / 2))
1128 	newoffset = (offset & ~ mask) + mask + 1;
1129       else
1130 	newoffset = (offset & ~ mask);
1131 
1132       /* If the newoffset will not fit in 14 bits (ldo), then
1133 	 handling this would take 4 or 5 instructions (2 to load
1134 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1135 	 add the new offset and the SYMBOL_REF.)  Combine cannot
1136 	 handle 4->2 or 5->2 combinations, so do not create
1137 	 them.  */
1138       if (! VAL_14_BITS_P (newoffset)
1139 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1140 	{
1141 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1142 	  rtx tmp_reg
1143 	    = force_reg (Pmode,
1144 			 gen_rtx_HIGH (Pmode, const_part));
1145 	  ptr_reg
1146 	    = force_reg (Pmode,
1147 			 gen_rtx_LO_SUM (Pmode,
1148 					 tmp_reg, const_part));
1149 	}
1150       else
1151 	{
1152 	  if (! VAL_14_BITS_P (newoffset))
1153 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1154 	  else
1155 	    int_part = GEN_INT (newoffset);
1156 
1157 	  ptr_reg = force_reg (Pmode,
1158 			       gen_rtx_PLUS (Pmode,
1159 					     force_reg (Pmode, XEXP (x, 0)),
1160 					     int_part));
1161 	}
1162       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1163     }
1164 
1165   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1166 
1167   if (GET_CODE (x) == PLUS
1168       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1169       && (OBJECT_P (XEXP (x, 1))
1170 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1171       && GET_CODE (XEXP (x, 1)) != CONST)
1172     {
1173       /* If we were given a MULT, we must fix the constant
1174 	 as we're going to create the ASHIFT form.  */
1175       HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1176       if (GET_CODE (XEXP (x, 0)) == MULT)
1177 	shift_val = exact_log2 (shift_val);
1178 
1179       rtx reg1, reg2;
1180       reg1 = XEXP (x, 1);
1181       if (GET_CODE (reg1) != REG)
1182 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1183 
1184       reg2 = XEXP (XEXP (x, 0), 0);
1185       if (GET_CODE (reg2) != REG)
1186         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1187 
1188       return force_reg (Pmode,
1189 			gen_rtx_PLUS (Pmode,
1190 				      gen_rtx_ASHIFT (Pmode, reg2,
1191 						      GEN_INT (shift_val)),
1192 				      reg1));
1193     }
1194 
1195   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1196 
1197      Only do so for floating point modes since this is more speculative
1198      and we lose if it's an integer store.  */
1199   if (GET_CODE (x) == PLUS
1200       && GET_CODE (XEXP (x, 0)) == PLUS
1201       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1202       && (mode == SFmode || mode == DFmode))
1203     {
1204       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1205 
1206       /* If we were given a MULT, we must fix the constant
1207 	 as we're going to create the ASHIFT form.  */
1208       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1209 	shift_val = exact_log2 (shift_val);
1210 
1211       /* Try and figure out what to use as a base register.  */
1212       rtx reg1, reg2, base, idx;
1213 
1214       reg1 = XEXP (XEXP (x, 0), 1);
1215       reg2 = XEXP (x, 1);
1216       base = NULL_RTX;
1217       idx = NULL_RTX;
1218 
1219       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1220 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1221 	 it's a base register below.  */
1222       if (GET_CODE (reg1) != REG)
1223 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1224 
1225       if (GET_CODE (reg2) != REG)
1226 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1227 
1228       /* Figure out what the base and index are.  */
1229 
1230       if (GET_CODE (reg1) == REG
1231 	  && REG_POINTER (reg1))
1232 	{
1233 	  base = reg1;
1234 	  idx = gen_rtx_PLUS (Pmode,
1235 			      gen_rtx_ASHIFT (Pmode,
1236 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1237 					      GEN_INT (shift_val)),
1238 			      XEXP (x, 1));
1239 	}
1240       else if (GET_CODE (reg2) == REG
1241 	       && REG_POINTER (reg2))
1242 	{
1243 	  base = reg2;
1244 	  idx = XEXP (x, 0);
1245 	}
1246 
1247       if (base == 0)
1248 	return orig;
1249 
1250       /* If the index adds a large constant, try to scale the
1251 	 constant so that it can be loaded with only one insn.  */
1252       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1253 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1254 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1255 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1256 	{
1257 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1258 	  HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1259 	  val /= (1 << shift_val);
1260 
1261 	  reg1 = XEXP (XEXP (idx, 0), 0);
1262 	  if (GET_CODE (reg1) != REG)
1263 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1264 
1265 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1266 
1267 	  /* We can now generate a simple scaled indexed address.  */
1268 	  return
1269 	    force_reg
1270 	      (Pmode, gen_rtx_PLUS (Pmode,
1271 				    gen_rtx_ASHIFT (Pmode, reg1,
1272 						    GEN_INT (shift_val)),
1273 				    base));
1274 	}
1275 
1276       /* If B + C is still a valid base register, then add them.  */
1277       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1278 	  && INTVAL (XEXP (idx, 1)) <= 4096
1279 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1280 	{
1281 	  rtx reg1, reg2;
1282 
1283 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1284 
1285 	  reg2 = XEXP (XEXP (idx, 0), 0);
1286 	  if (GET_CODE (reg2) != CONST_INT)
1287 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1288 
1289 	  return force_reg (Pmode,
1290 			    gen_rtx_PLUS (Pmode,
1291 					  gen_rtx_ASHIFT (Pmode, reg2,
1292 							  GEN_INT (shift_val)),
1293 					  reg1));
1294 	}
1295 
1296       /* Get the index into a register, then add the base + index and
1297 	 return a register holding the result.  */
1298 
1299       /* First get A into a register.  */
1300       reg1 = XEXP (XEXP (idx, 0), 0);
1301       if (GET_CODE (reg1) != REG)
1302 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1303 
1304       /* And get B into a register.  */
1305       reg2 = XEXP (idx, 1);
1306       if (GET_CODE (reg2) != REG)
1307 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1308 
1309       reg1 = force_reg (Pmode,
1310 			gen_rtx_PLUS (Pmode,
1311 				      gen_rtx_ASHIFT (Pmode, reg1,
1312 						      GEN_INT (shift_val)),
1313 				      reg2));
1314 
1315       /* Add the result to our base register and return.  */
1316       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1317 
1318     }
1319 
1320   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1321      special handling to avoid creating an indexed memory address
1322      with x-100000 as the base.
1323 
1324      If the constant part is small enough, then it's still safe because
1325      there is a guard page at the beginning and end of the data segment.
1326 
1327      Scaled references are common enough that we want to try and rearrange the
1328      terms so that we can use indexing for these addresses too.  Only
1329      do the optimization for floatint point modes.  */
1330 
1331   if (GET_CODE (x) == PLUS
1332       && pa_symbolic_expression_p (XEXP (x, 1)))
1333     {
1334       /* Ugly.  We modify things here so that the address offset specified
1335 	 by the index expression is computed first, then added to x to form
1336 	 the entire address.  */
1337 
1338       rtx regx1, regx2, regy1, regy2, y;
1339 
1340       /* Strip off any CONST.  */
1341       y = XEXP (x, 1);
1342       if (GET_CODE (y) == CONST)
1343 	y = XEXP (y, 0);
1344 
1345       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1346 	{
1347 	  /* See if this looks like
1348 		(plus (mult (reg) (mem_shadd_const))
1349 		      (const (plus (symbol_ref) (const_int))))
1350 
1351 	     Where const_int is small.  In that case the const
1352 	     expression is a valid pointer for indexing.
1353 
1354 	     If const_int is big, but can be divided evenly by shadd_const
1355 	     and added to (reg).  This allows more scaled indexed addresses.  */
1356 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1357 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1358 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1359 	      && INTVAL (XEXP (y, 1)) >= -4096
1360 	      && INTVAL (XEXP (y, 1)) <= 4095)
1361 	    {
1362 	      HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1363 
1364 	      /* If we were given a MULT, we must fix the constant
1365 		 as we're going to create the ASHIFT form.  */
1366 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1367 		shift_val = exact_log2 (shift_val);
1368 
1369 	      rtx reg1, reg2;
1370 
1371 	      reg1 = XEXP (x, 1);
1372 	      if (GET_CODE (reg1) != REG)
1373 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1374 
1375 	      reg2 = XEXP (XEXP (x, 0), 0);
1376 	      if (GET_CODE (reg2) != REG)
1377 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1378 
1379 	      return
1380 		force_reg (Pmode,
1381 			   gen_rtx_PLUS (Pmode,
1382 					 gen_rtx_ASHIFT (Pmode,
1383 							 reg2,
1384 							 GEN_INT (shift_val)),
1385 					 reg1));
1386 	    }
1387 	  else if ((mode == DFmode || mode == SFmode)
1388 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1389 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1390 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1391 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1392 	    {
1393 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1394 
1395 	      /* If we were given a MULT, we must fix the constant
1396 		 as we're going to create the ASHIFT form.  */
1397 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1398 		shift_val = exact_log2 (shift_val);
1399 
1400 	      regx1
1401 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1402 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1403 	      regx2 = XEXP (XEXP (x, 0), 0);
1404 	      if (GET_CODE (regx2) != REG)
1405 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1406 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1407 							regx2, regx1));
1408 	      return
1409 		force_reg (Pmode,
1410 			   gen_rtx_PLUS (Pmode,
1411 					 gen_rtx_ASHIFT (Pmode, regx2,
1412 						         GEN_INT (shift_val)),
1413 					 force_reg (Pmode, XEXP (y, 0))));
1414 	    }
1415 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1416 		   && INTVAL (XEXP (y, 1)) >= -4096
1417 		   && INTVAL (XEXP (y, 1)) <= 4095)
1418 	    {
1419 	      /* This is safe because of the guard page at the
1420 		 beginning and end of the data space.  Just
1421 		 return the original address.  */
1422 	      return orig;
1423 	    }
1424 	  else
1425 	    {
1426 	      /* Doesn't look like one we can optimize.  */
1427 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1428 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1429 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1430 	      regx1 = force_reg (Pmode,
1431 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1432 						 regx1, regy2));
1433 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1434 	    }
1435 	}
1436     }
1437 
1438   return orig;
1439 }
1440 
1441 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1442 
1443    Compute extra cost of moving data between one register class
1444    and another.
1445 
1446    Make moves from SAR so expensive they should never happen.  We used to
1447    have 0xffff here, but that generates overflow in rare cases.
1448 
1449    Copies involving a FP register and a non-FP register are relatively
1450    expensive because they must go through memory.
1451 
1452    Other copies are reasonably cheap.  */
1453 
1454 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1455 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1456 			 reg_class_t from, reg_class_t to)
1457 {
1458   if (from == SHIFT_REGS)
1459     return 0x100;
1460   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1461     return 18;
1462   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1463            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1464     return 16;
1465   else
1466     return 2;
1467 }
1468 
1469 /* For the HPPA, REG and REG+CONST is cost 0
1470    and addresses involving symbolic constants are cost 2.
1471 
1472    PIC addresses are very expensive.
1473 
1474    It is no coincidence that this has the same structure
1475    as pa_legitimate_address_p.  */
1476 
1477 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1478 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1479 		   addr_space_t as ATTRIBUTE_UNUSED,
1480 		   bool speed ATTRIBUTE_UNUSED)
1481 {
1482   switch (GET_CODE (X))
1483     {
1484     case REG:
1485     case PLUS:
1486     case LO_SUM:
1487       return 1;
1488     case HIGH:
1489       return 2;
1490     default:
1491       return 4;
1492     }
1493 }
1494 
1495 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1496    The machine mode of X is known to be SImode or DImode.  */
1497 
1498 static bool
hppa_rtx_costs_shadd_p(rtx x)1499 hppa_rtx_costs_shadd_p (rtx x)
1500 {
1501   if (GET_CODE (x) != PLUS
1502       || !REG_P (XEXP (x, 1)))
1503     return false;
1504   rtx op0 = XEXP (x, 0);
1505   if (GET_CODE (op0) == ASHIFT
1506       && CONST_INT_P (XEXP (op0, 1))
1507       && REG_P (XEXP (op0, 0)))
1508     {
1509       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1510       return x == 1 || x == 2 || x == 3;
1511     }
1512   if (GET_CODE (op0) == MULT
1513       && CONST_INT_P (XEXP (op0, 1))
1514       && REG_P (XEXP (op0, 0)))
1515     {
1516       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1517       return x == 2 || x == 4 || x == 8;
1518     }
1519   return false;
1520 }
1521 
1522 /* Compute a (partial) cost for rtx X.  Return true if the complete
1523    cost has been computed, and false if subexpressions should be
1524    scanned.  In either case, *TOTAL contains the cost result.  */
1525 
1526 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)1527 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1528 		int opno ATTRIBUTE_UNUSED,
1529 		int *total, bool speed)
1530 {
1531   int code = GET_CODE (x);
1532 
1533   switch (code)
1534     {
1535     case CONST_INT:
1536       if (outer_code == SET)
1537 	*total = COSTS_N_INSNS (1);
1538       else if (INTVAL (x) == 0)
1539 	*total = 0;
1540       else if (INT_14_BITS (x))
1541 	*total = 1;
1542       else
1543 	*total = 2;
1544       return true;
1545 
1546     case HIGH:
1547       *total = 2;
1548       return true;
1549 
1550     case CONST:
1551     case LABEL_REF:
1552     case SYMBOL_REF:
1553       *total = 4;
1554       return true;
1555 
1556     case CONST_DOUBLE:
1557       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1558 	  && outer_code != SET)
1559 	*total = 0;
1560       else
1561 	*total = 8;
1562       return true;
1563 
1564     case MULT:
1565       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1566 	{
1567 	  *total = COSTS_N_INSNS (3);
1568 	}
1569       else if (mode == DImode)
1570 	{
1571 	  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1572 	    *total = COSTS_N_INSNS (32);
1573 	  else
1574 	    *total = COSTS_N_INSNS (80);
1575 	}
1576       else
1577 	{
1578 	  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1579 	    *total = COSTS_N_INSNS (8);
1580 	  else
1581 	    *total = COSTS_N_INSNS (20);
1582 	}
1583       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1584 
1585     case DIV:
1586       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1587 	{
1588 	  *total = COSTS_N_INSNS (14);
1589 	  return false;
1590 	}
1591       /* FALLTHRU */
1592 
1593     case UDIV:
1594     case MOD:
1595     case UMOD:
1596       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1597       if (mode == DImode)
1598 	*total = COSTS_N_INSNS (240);
1599       else
1600 	*total = COSTS_N_INSNS (60);
1601       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1602 
1603     case PLUS: /* this includes shNadd insns */
1604     case MINUS:
1605       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1606 	*total = COSTS_N_INSNS (3);
1607       else if (mode == DImode)
1608 	{
1609 	  if (TARGET_64BIT)
1610 	    {
1611 	      *total = COSTS_N_INSNS (1);
1612 	      /* Handle shladd,l instructions.  */
1613 	      if (hppa_rtx_costs_shadd_p (x))
1614 		return true;
1615 	    }
1616 	  else
1617 	    *total = COSTS_N_INSNS (2);
1618 	}
1619       else
1620 	{
1621 	  *total = COSTS_N_INSNS (1);
1622 	  /* Handle shNadd instructions.  */
1623 	  if (hppa_rtx_costs_shadd_p (x))
1624 	    return true;
1625 	}
1626       return REG_P (XEXP (x, 0))
1627 	     && (REG_P (XEXP (x, 1))
1628 		 || CONST_INT_P (XEXP (x, 1)));
1629 
1630     case ASHIFT:
1631       if (mode == DImode)
1632 	{
1633 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1634 	    {
1635 	      if (TARGET_64BIT)
1636 		*total = COSTS_N_INSNS (1);
1637 	      else
1638 		*total = COSTS_N_INSNS (2);
1639 	      return true;
1640 	    }
1641 	  else if (TARGET_64BIT)
1642 	    *total = COSTS_N_INSNS (3);
1643 	  else if (speed)
1644 	    *total = COSTS_N_INSNS (13);
1645 	  else
1646 	    *total = COSTS_N_INSNS (18);
1647 	}
1648       else if (TARGET_64BIT)
1649 	*total = COSTS_N_INSNS (4);
1650       else
1651 	*total = COSTS_N_INSNS (2);
1652       return REG_P (XEXP (x, 0))
1653 	     && (REG_P (XEXP (x, 1))
1654 		 || CONST_INT_P (XEXP (x, 1)));
1655 
1656     case ASHIFTRT:
1657       if (mode == DImode)
1658 	{
1659 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1660 	    {
1661 	      if (TARGET_64BIT)
1662 		*total = COSTS_N_INSNS (1);
1663 	      else
1664 		*total = COSTS_N_INSNS (2);
1665 	      return true;
1666 	    }
1667 	  else if (TARGET_64BIT)
1668 	    *total = COSTS_N_INSNS (3);
1669 	  else if (speed)
1670 	    *total = COSTS_N_INSNS (14);
1671 	  else
1672 	    *total = COSTS_N_INSNS (19);
1673 	}
1674       else if (TARGET_64BIT)
1675 	*total = COSTS_N_INSNS (4);
1676       else
1677 	*total = COSTS_N_INSNS (2);
1678       return REG_P (XEXP (x, 0))
1679 	     && (REG_P (XEXP (x, 1))
1680 		 || CONST_INT_P (XEXP (x, 1)));
1681 
1682     case LSHIFTRT:
1683       if (mode == DImode)
1684 	{
1685 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1686 	    {
1687 	      if (TARGET_64BIT)
1688 		*total = COSTS_N_INSNS (1);
1689 	      else
1690 		*total = COSTS_N_INSNS (2);
1691 	      return true;
1692 	    }
1693 	  else if (TARGET_64BIT)
1694 	    *total = COSTS_N_INSNS (2);
1695 	  else if (speed)
1696 	    *total = COSTS_N_INSNS (12);
1697 	  else
1698 	    *total = COSTS_N_INSNS (15);
1699 	}
1700       else if (TARGET_64BIT)
1701 	*total = COSTS_N_INSNS (3);
1702       else
1703 	*total = COSTS_N_INSNS (2);
1704       return REG_P (XEXP (x, 0))
1705 	     && (REG_P (XEXP (x, 1))
1706 		 || CONST_INT_P (XEXP (x, 1)));
1707 
1708     default:
1709       return false;
1710     }
1711 }
1712 
1713 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1714    new rtx with the correct mode.  */
1715 static inline rtx
force_mode(machine_mode mode,rtx orig)1716 force_mode (machine_mode mode, rtx orig)
1717 {
1718   if (mode == GET_MODE (orig))
1719     return orig;
1720 
1721   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1722 
1723   return gen_rtx_REG (mode, REGNO (orig));
1724 }
1725 
1726 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1727 
1728 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1729 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1730 {
1731   return tls_referenced_p (x);
1732 }
1733 
1734 /* Emit insns to move operands[1] into operands[0].
1735 
1736    Return 1 if we have written out everything that needs to be done to
1737    do the move.  Otherwise, return 0 and the caller will emit the move
1738    normally.
1739 
1740    Note SCRATCH_REG may not be in the proper mode depending on how it
1741    will be used.  This routine is responsible for creating a new copy
1742    of SCRATCH_REG in the proper mode.  */
1743 
1744 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1745 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1746 {
1747   register rtx operand0 = operands[0];
1748   register rtx operand1 = operands[1];
1749   register rtx tem;
1750 
1751   /* We can only handle indexed addresses in the destination operand
1752      of floating point stores.  Thus, we need to break out indexed
1753      addresses from the destination operand.  */
1754   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1755     {
1756       gcc_assert (can_create_pseudo_p ());
1757 
1758       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1759       operand0 = replace_equiv_address (operand0, tem);
1760     }
1761 
1762   /* On targets with non-equivalent space registers, break out unscaled
1763      indexed addresses from the source operand before the final CSE.
1764      We have to do this because the REG_POINTER flag is not correctly
1765      carried through various optimization passes and CSE may substitute
1766      a pseudo without the pointer set for one with the pointer set.  As
1767      a result, we loose various opportunities to create insns with
1768      unscaled indexed addresses.  */
1769   if (!TARGET_NO_SPACE_REGS
1770       && !cse_not_expected
1771       && GET_CODE (operand1) == MEM
1772       && GET_CODE (XEXP (operand1, 0)) == PLUS
1773       && REG_P (XEXP (XEXP (operand1, 0), 0))
1774       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1775     operand1
1776       = replace_equiv_address (operand1,
1777 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1778 
1779   if (scratch_reg
1780       && reload_in_progress && GET_CODE (operand0) == REG
1781       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1782     operand0 = reg_equiv_mem (REGNO (operand0));
1783   else if (scratch_reg
1784 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1785 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1786 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1787     {
1788      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1789 	the code which tracks sets/uses for delete_output_reload.  */
1790       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1791 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1792 				 SUBREG_BYTE (operand0));
1793       operand0 = alter_subreg (&temp, true);
1794     }
1795 
1796   if (scratch_reg
1797       && reload_in_progress && GET_CODE (operand1) == REG
1798       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1799     operand1 = reg_equiv_mem (REGNO (operand1));
1800   else if (scratch_reg
1801 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1802 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1803 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1804     {
1805      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1806 	the code which tracks sets/uses for delete_output_reload.  */
1807       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1808 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1809 				 SUBREG_BYTE (operand1));
1810       operand1 = alter_subreg (&temp, true);
1811     }
1812 
1813   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1814       && ((tem = find_replacement (&XEXP (operand0, 0)))
1815 	  != XEXP (operand0, 0)))
1816     operand0 = replace_equiv_address (operand0, tem);
1817 
1818   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1819       && ((tem = find_replacement (&XEXP (operand1, 0)))
1820 	  != XEXP (operand1, 0)))
1821     operand1 = replace_equiv_address (operand1, tem);
1822 
1823   /* Handle secondary reloads for loads/stores of FP registers from
1824      REG+D addresses where D does not fit in 5 or 14 bits, including
1825      (subreg (mem (addr))) cases, and reloads for other unsupported
1826      memory operands.  */
1827   if (scratch_reg
1828       && FP_REG_P (operand0)
1829       && (MEM_P (operand1)
1830 	  || (GET_CODE (operand1) == SUBREG
1831 	      && MEM_P (XEXP (operand1, 0)))))
1832     {
1833       rtx op1 = operand1;
1834 
1835       if (GET_CODE (op1) == SUBREG)
1836 	op1 = XEXP (op1, 0);
1837 
1838       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1839 	{
1840 	  if (!(TARGET_PA_20
1841 		&& !TARGET_ELF32
1842 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1843 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1844 	    {
1845 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1846 		 We want it in WORD_MODE regardless of what mode it was
1847 		 originally given to us.  */
1848 	      scratch_reg = force_mode (word_mode, scratch_reg);
1849 
1850 	      /* D might not fit in 14 bits either; for such cases load D
1851 		 into scratch reg.  */
1852 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1853 		{
1854 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1855 		  emit_move_insn (scratch_reg,
1856 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1857 						  Pmode,
1858 						  XEXP (XEXP (op1, 0), 0),
1859 						  scratch_reg));
1860 		}
1861 	      else
1862 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1863 	      op1 = replace_equiv_address (op1, scratch_reg);
1864 	    }
1865 	}
1866       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1867 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1868 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1869 	{
1870 	  /* Load memory address into SCRATCH_REG.  */
1871 	  scratch_reg = force_mode (word_mode, scratch_reg);
1872 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1873 	  op1 = replace_equiv_address (op1, scratch_reg);
1874 	}
1875       emit_insn (gen_rtx_SET (operand0, op1));
1876       return 1;
1877     }
1878   else if (scratch_reg
1879 	   && FP_REG_P (operand1)
1880 	   && (MEM_P (operand0)
1881 	       || (GET_CODE (operand0) == SUBREG
1882 		   && MEM_P (XEXP (operand0, 0)))))
1883     {
1884       rtx op0 = operand0;
1885 
1886       if (GET_CODE (op0) == SUBREG)
1887 	op0 = XEXP (op0, 0);
1888 
1889       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1890 	{
1891 	  if (!(TARGET_PA_20
1892 		&& !TARGET_ELF32
1893 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1894 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1895 	    {
1896 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1897 		 We want it in WORD_MODE regardless of what mode it was
1898 		 originally given to us.  */
1899 	      scratch_reg = force_mode (word_mode, scratch_reg);
1900 
1901 	      /* D might not fit in 14 bits either; for such cases load D
1902 		 into scratch reg.  */
1903 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1904 		{
1905 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1906 		  emit_move_insn (scratch_reg,
1907 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1908 						  Pmode,
1909 						  XEXP (XEXP (op0, 0), 0),
1910 						  scratch_reg));
1911 		}
1912 	      else
1913 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1914 	      op0 = replace_equiv_address (op0, scratch_reg);
1915 	    }
1916 	}
1917       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1918 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1919 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1920 	{
1921 	  /* Load memory address into SCRATCH_REG.  */
1922 	  scratch_reg = force_mode (word_mode, scratch_reg);
1923 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1924 	  op0 = replace_equiv_address (op0, scratch_reg);
1925 	}
1926       emit_insn (gen_rtx_SET (op0, operand1));
1927       return 1;
1928     }
1929   /* Handle secondary reloads for loads of FP registers from constant
1930      expressions by forcing the constant into memory.  For the most part,
1931      this is only necessary for SImode and DImode.
1932 
1933      Use scratch_reg to hold the address of the memory location.  */
1934   else if (scratch_reg
1935 	   && CONSTANT_P (operand1)
1936 	   && FP_REG_P (operand0))
1937     {
1938       rtx const_mem, xoperands[2];
1939 
1940       if (operand1 == CONST0_RTX (mode))
1941 	{
1942 	  emit_insn (gen_rtx_SET (operand0, operand1));
1943 	  return 1;
1944 	}
1945 
1946       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1947 	 it in WORD_MODE regardless of what mode it was originally given
1948 	 to us.  */
1949       scratch_reg = force_mode (word_mode, scratch_reg);
1950 
1951       /* Force the constant into memory and put the address of the
1952 	 memory location into scratch_reg.  */
1953       const_mem = force_const_mem (mode, operand1);
1954       xoperands[0] = scratch_reg;
1955       xoperands[1] = XEXP (const_mem, 0);
1956       pa_emit_move_sequence (xoperands, Pmode, 0);
1957 
1958       /* Now load the destination register.  */
1959       emit_insn (gen_rtx_SET (operand0,
1960 			      replace_equiv_address (const_mem, scratch_reg)));
1961       return 1;
1962     }
1963   /* Handle secondary reloads for SAR.  These occur when trying to load
1964      the SAR from memory or a constant.  */
1965   else if (scratch_reg
1966 	   && GET_CODE (operand0) == REG
1967 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1968 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1969 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1970     {
1971       /* D might not fit in 14 bits either; for such cases load D into
1972 	 scratch reg.  */
1973       if (GET_CODE (operand1) == MEM
1974 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1975 	{
1976 	  /* We are reloading the address into the scratch register, so we
1977 	     want to make sure the scratch register is a full register.  */
1978 	  scratch_reg = force_mode (word_mode, scratch_reg);
1979 
1980 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1981 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1982 								        0)),
1983 						       Pmode,
1984 						       XEXP (XEXP (operand1, 0),
1985 						       0),
1986 						       scratch_reg));
1987 
1988 	  /* Now we are going to load the scratch register from memory,
1989 	     we want to load it in the same width as the original MEM,
1990 	     which must be the same as the width of the ultimate destination,
1991 	     OPERAND0.  */
1992 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1993 
1994 	  emit_move_insn (scratch_reg,
1995 			  replace_equiv_address (operand1, scratch_reg));
1996 	}
1997       else
1998 	{
1999 	  /* We want to load the scratch register using the same mode as
2000 	     the ultimate destination.  */
2001 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2002 
2003 	  emit_move_insn (scratch_reg, operand1);
2004 	}
2005 
2006       /* And emit the insn to set the ultimate destination.  We know that
2007 	 the scratch register has the same mode as the destination at this
2008 	 point.  */
2009       emit_move_insn (operand0, scratch_reg);
2010       return 1;
2011     }
2012 
2013   /* Handle the most common case: storing into a register.  */
2014   if (register_operand (operand0, mode))
2015     {
2016       /* Legitimize TLS symbol references.  This happens for references
2017 	 that aren't a legitimate constant.  */
2018       if (PA_SYMBOL_REF_TLS_P (operand1))
2019 	operand1 = legitimize_tls_address (operand1);
2020 
2021       if (register_operand (operand1, mode)
2022 	  || (GET_CODE (operand1) == CONST_INT
2023 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
2024 	  || (operand1 == CONST0_RTX (mode))
2025 	  || (GET_CODE (operand1) == HIGH
2026 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2027 	  /* Only `general_operands' can come here, so MEM is ok.  */
2028 	  || GET_CODE (operand1) == MEM)
2029 	{
2030 	  /* Various sets are created during RTL generation which don't
2031 	     have the REG_POINTER flag correctly set.  After the CSE pass,
2032 	     instruction recognition can fail if we don't consistently
2033 	     set this flag when performing register copies.  This should
2034 	     also improve the opportunities for creating insns that use
2035 	     unscaled indexing.  */
2036 	  if (REG_P (operand0) && REG_P (operand1))
2037 	    {
2038 	      if (REG_POINTER (operand1)
2039 		  && !REG_POINTER (operand0)
2040 		  && !HARD_REGISTER_P (operand0))
2041 		copy_reg_pointer (operand0, operand1);
2042 	    }
2043 
2044 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
2045 	     get set.  In some cases, we can set the REG_POINTER flag
2046 	     from the declaration for the MEM.  */
2047 	  if (REG_P (operand0)
2048 	      && GET_CODE (operand1) == MEM
2049 	      && !REG_POINTER (operand0))
2050 	    {
2051 	      tree decl = MEM_EXPR (operand1);
2052 
2053 	      /* Set the register pointer flag and register alignment
2054 		 if the declaration for this memory reference is a
2055 		 pointer type.  */
2056 	      if (decl)
2057 		{
2058 		  tree type;
2059 
2060 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
2061 		     tree operand 1.  */
2062 		  if (TREE_CODE (decl) == COMPONENT_REF)
2063 		    decl = TREE_OPERAND (decl, 1);
2064 
2065 		  type = TREE_TYPE (decl);
2066 		  type = strip_array_types (type);
2067 
2068 		  if (POINTER_TYPE_P (type))
2069 		    mark_reg_pointer (operand0, BITS_PER_UNIT);
2070 		}
2071 	    }
2072 
2073 	  emit_insn (gen_rtx_SET (operand0, operand1));
2074 	  return 1;
2075 	}
2076     }
2077   else if (GET_CODE (operand0) == MEM)
2078     {
2079       if (mode == DFmode && operand1 == CONST0_RTX (mode)
2080 	  && !(reload_in_progress || reload_completed))
2081 	{
2082 	  rtx temp = gen_reg_rtx (DFmode);
2083 
2084 	  emit_insn (gen_rtx_SET (temp, operand1));
2085 	  emit_insn (gen_rtx_SET (operand0, temp));
2086 	  return 1;
2087 	}
2088       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2089 	{
2090 	  /* Run this case quickly.  */
2091 	  emit_insn (gen_rtx_SET (operand0, operand1));
2092 	  return 1;
2093 	}
2094       if (! (reload_in_progress || reload_completed))
2095 	{
2096 	  operands[0] = validize_mem (operand0);
2097 	  operands[1] = operand1 = force_reg (mode, operand1);
2098 	}
2099     }
2100 
2101   /* Simplify the source if we need to.
2102      Note we do have to handle function labels here, even though we do
2103      not consider them legitimate constants.  Loop optimizations can
2104      call the emit_move_xxx with one as a source.  */
2105   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2106       || (GET_CODE (operand1) == HIGH
2107 	  && symbolic_operand (XEXP (operand1, 0), mode))
2108       || function_label_operand (operand1, VOIDmode)
2109       || tls_referenced_p (operand1))
2110     {
2111       int ishighonly = 0;
2112 
2113       if (GET_CODE (operand1) == HIGH)
2114 	{
2115 	  ishighonly = 1;
2116 	  operand1 = XEXP (operand1, 0);
2117 	}
2118       if (symbolic_operand (operand1, mode))
2119 	{
2120 	  /* Argh.  The assembler and linker can't handle arithmetic
2121 	     involving plabels.
2122 
2123 	     So we force the plabel into memory, load operand0 from
2124 	     the memory location, then add in the constant part.  */
2125 	  if ((GET_CODE (operand1) == CONST
2126 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
2127 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2128 					  VOIDmode))
2129 	      || function_label_operand (operand1, VOIDmode))
2130 	    {
2131 	      rtx temp, const_part;
2132 
2133 	      /* Figure out what (if any) scratch register to use.  */
2134 	      if (reload_in_progress || reload_completed)
2135 		{
2136 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2137 		  /* SCRATCH_REG will hold an address and maybe the actual
2138 		     data.  We want it in WORD_MODE regardless of what mode it
2139 		     was originally given to us.  */
2140 		  scratch_reg = force_mode (word_mode, scratch_reg);
2141 		}
2142 	      else if (flag_pic)
2143 		scratch_reg = gen_reg_rtx (Pmode);
2144 
2145 	      if (GET_CODE (operand1) == CONST)
2146 		{
2147 		  /* Save away the constant part of the expression.  */
2148 		  const_part = XEXP (XEXP (operand1, 0), 1);
2149 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2150 
2151 		  /* Force the function label into memory.  */
2152 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2153 		}
2154 	      else
2155 		{
2156 		  /* No constant part.  */
2157 		  const_part = NULL_RTX;
2158 
2159 		  /* Force the function label into memory.  */
2160 		  temp = force_const_mem (mode, operand1);
2161 		}
2162 
2163 
2164 	      /* Get the address of the memory location.  PIC-ify it if
2165 		 necessary.  */
2166 	      temp = XEXP (temp, 0);
2167 	      if (flag_pic)
2168 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2169 
2170 	      /* Put the address of the memory location into our destination
2171 		 register.  */
2172 	      operands[1] = temp;
2173 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2174 
2175 	      /* Now load from the memory location into our destination
2176 		 register.  */
2177 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2178 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2179 
2180 	      /* And add back in the constant part.  */
2181 	      if (const_part != NULL_RTX)
2182 		expand_inc (operand0, const_part);
2183 
2184 	      return 1;
2185 	    }
2186 
2187 	  if (flag_pic)
2188 	    {
2189 	      rtx_insn *insn;
2190 	      rtx temp;
2191 
2192 	      if (reload_in_progress || reload_completed)
2193 		{
2194 		  temp = scratch_reg ? scratch_reg : operand0;
2195 		  /* TEMP will hold an address and maybe the actual
2196 		     data.  We want it in WORD_MODE regardless of what mode it
2197 		     was originally given to us.  */
2198 		  temp = force_mode (word_mode, temp);
2199 		}
2200 	      else
2201 		temp = gen_reg_rtx (Pmode);
2202 
2203 	      /* Force (const (plus (symbol) (const_int))) to memory
2204 	         if the const_int will not fit in 14 bits.  Although
2205 		 this requires a relocation, the instruction sequence
2206 		 needed to load the value is shorter.  */
2207 	      if (GET_CODE (operand1) == CONST
2208 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2209 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2210 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2211 		{
2212 		  rtx x, m = force_const_mem (mode, operand1);
2213 
2214 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2215 		  x = replace_equiv_address (m, x);
2216 		  insn = emit_move_insn (operand0, x);
2217 		}
2218 	      else
2219 		{
2220 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2221 		  if (REG_P (operand0) && REG_P (operands[1]))
2222 		    copy_reg_pointer (operand0, operands[1]);
2223 		  insn = emit_move_insn (operand0, operands[1]);
2224 		}
2225 
2226 	      /* Put a REG_EQUAL note on this insn.  */
2227 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2228 	    }
2229 	  /* On the HPPA, references to data space are supposed to use dp,
2230 	     register 27, but showing it in the RTL inhibits various cse
2231 	     and loop optimizations.  */
2232 	  else
2233 	    {
2234 	      rtx temp, set;
2235 
2236 	      if (reload_in_progress || reload_completed)
2237 		{
2238 		  temp = scratch_reg ? scratch_reg : operand0;
2239 		  /* TEMP will hold an address and maybe the actual
2240 		     data.  We want it in WORD_MODE regardless of what mode it
2241 		     was originally given to us.  */
2242 		  temp = force_mode (word_mode, temp);
2243 		}
2244 	      else
2245 		temp = gen_reg_rtx (mode);
2246 
2247 	      /* Loading a SYMBOL_REF into a register makes that register
2248 		 safe to be used as the base in an indexed address.
2249 
2250 		 Don't mark hard registers though.  That loses.  */
2251 	      if (GET_CODE (operand0) == REG
2252 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2253 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2254 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2255 		mark_reg_pointer (temp, BITS_PER_UNIT);
2256 
2257 	      if (ishighonly)
2258 		set = gen_rtx_SET (operand0, temp);
2259 	      else
2260 		set = gen_rtx_SET (operand0,
2261 				   gen_rtx_LO_SUM (mode, temp, operand1));
2262 
2263 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2264 	      emit_insn (set);
2265 
2266 	    }
2267 	  return 1;
2268 	}
2269       else if (tls_referenced_p (operand1))
2270 	{
2271 	  rtx tmp = operand1;
2272 	  rtx addend = NULL;
2273 
2274 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2275 	    {
2276 	      addend = XEXP (XEXP (tmp, 0), 1);
2277 	      tmp = XEXP (XEXP (tmp, 0), 0);
2278 	    }
2279 
2280 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2281 	  tmp = legitimize_tls_address (tmp);
2282 	  if (addend)
2283 	    {
2284 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2285 	      tmp = force_operand (tmp, operands[0]);
2286 	    }
2287 	  operands[1] = tmp;
2288 	}
2289       else if (GET_CODE (operand1) != CONST_INT
2290 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2291 	{
2292 	  rtx temp;
2293 	  rtx_insn *insn;
2294 	  rtx op1 = operand1;
2295 	  HOST_WIDE_INT value = 0;
2296 	  HOST_WIDE_INT insv = 0;
2297 	  int insert = 0;
2298 
2299 	  if (GET_CODE (operand1) == CONST_INT)
2300 	    value = INTVAL (operand1);
2301 
2302 	  if (TARGET_64BIT
2303 	      && GET_CODE (operand1) == CONST_INT
2304 	      && HOST_BITS_PER_WIDE_INT > 32
2305 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2306 	    {
2307 	      HOST_WIDE_INT nval;
2308 
2309 	      /* Extract the low order 32 bits of the value and sign extend.
2310 		 If the new value is the same as the original value, we can
2311 		 can use the original value as-is.  If the new value is
2312 		 different, we use it and insert the most-significant 32-bits
2313 		 of the original value into the final result.  */
2314 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2315 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2316 	      if (value != nval)
2317 		{
2318 #if HOST_BITS_PER_WIDE_INT > 32
2319 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2320 #endif
2321 		  insert = 1;
2322 		  value = nval;
2323 		  operand1 = GEN_INT (nval);
2324 		}
2325 	    }
2326 
2327 	  if (reload_in_progress || reload_completed)
2328 	    temp = scratch_reg ? scratch_reg : operand0;
2329 	  else
2330 	    temp = gen_reg_rtx (mode);
2331 
2332 	  /* We don't directly split DImode constants on 32-bit targets
2333 	     because PLUS uses an 11-bit immediate and the insn sequence
2334 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2335 	  if (GET_CODE (operand1) == CONST_INT
2336 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2337 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2338 	      && !insert)
2339 	    {
2340 	      /* Directly break constant into high and low parts.  This
2341 		 provides better optimization opportunities because various
2342 		 passes recognize constants split with PLUS but not LO_SUM.
2343 		 We use a 14-bit signed low part except when the addition
2344 		 of 0x4000 to the high part might change the sign of the
2345 		 high part.  */
2346 	      HOST_WIDE_INT low = value & 0x3fff;
2347 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2348 
2349 	      if (low >= 0x2000)
2350 		{
2351 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2352 		    high += 0x2000;
2353 		  else
2354 		    high += 0x4000;
2355 		}
2356 
2357 	      low = value - high;
2358 
2359 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2360 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2361 	    }
2362 	  else
2363 	    {
2364 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2365 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2366 	    }
2367 
2368 	  insn = emit_move_insn (operands[0], operands[1]);
2369 
2370 	  /* Now insert the most significant 32 bits of the value
2371 	     into the register.  When we don't have a second register
2372 	     available, it could take up to nine instructions to load
2373 	     a 64-bit integer constant.  Prior to reload, we force
2374 	     constants that would take more than three instructions
2375 	     to load to the constant pool.  During and after reload,
2376 	     we have to handle all possible values.  */
2377 	  if (insert)
2378 	    {
2379 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2380 		 register and the value to be inserted is outside the
2381 		 range that can be loaded with three depdi instructions.  */
2382 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2383 		{
2384 		  operand1 = GEN_INT (insv);
2385 
2386 		  emit_insn (gen_rtx_SET (temp,
2387 					  gen_rtx_HIGH (mode, operand1)));
2388 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2389 		  if (mode == DImode)
2390 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2391 						  const0_rtx, temp));
2392 		  else
2393 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2394 						  const0_rtx, temp));
2395 		}
2396 	      else
2397 		{
2398 		  int len = 5, pos = 27;
2399 
2400 		  /* Insert the bits using the depdi instruction.  */
2401 		  while (pos >= 0)
2402 		    {
2403 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2404 		      HOST_WIDE_INT sign = v5 < 0;
2405 
2406 		      /* Left extend the insertion.  */
2407 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2408 		      while (pos > 0 && (insv & 1) == sign)
2409 			{
2410 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2411 			  len += 1;
2412 			  pos -= 1;
2413 			}
2414 
2415 		      if (mode == DImode)
2416 			insn = emit_insn (gen_insvdi (operand0,
2417 						      GEN_INT (len),
2418 						      GEN_INT (pos),
2419 						      GEN_INT (v5)));
2420 		      else
2421 			insn = emit_insn (gen_insvsi (operand0,
2422 						      GEN_INT (len),
2423 						      GEN_INT (pos),
2424 						      GEN_INT (v5)));
2425 
2426 		      len = pos > 0 && pos < 5 ? pos : 5;
2427 		      pos -= len;
2428 		    }
2429 		}
2430 	    }
2431 
2432 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2433 
2434 	  return 1;
2435 	}
2436     }
2437   /* Now have insn-emit do whatever it normally does.  */
2438   return 0;
2439 }
2440 
2441 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2442    it will need a link/runtime reloc).  */
2443 
2444 int
pa_reloc_needed(tree exp)2445 pa_reloc_needed (tree exp)
2446 {
2447   int reloc = 0;
2448 
2449   switch (TREE_CODE (exp))
2450     {
2451     case ADDR_EXPR:
2452       return 1;
2453 
2454     case POINTER_PLUS_EXPR:
2455     case PLUS_EXPR:
2456     case MINUS_EXPR:
2457       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2458       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2459       break;
2460 
2461     CASE_CONVERT:
2462     case NON_LVALUE_EXPR:
2463       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2464       break;
2465 
2466     case CONSTRUCTOR:
2467       {
2468 	tree value;
2469 	unsigned HOST_WIDE_INT ix;
2470 
2471 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2472 	  if (value)
2473 	    reloc |= pa_reloc_needed (value);
2474       }
2475       break;
2476 
2477     case ERROR_MARK:
2478       break;
2479 
2480     default:
2481       break;
2482     }
2483   return reloc;
2484 }
2485 
2486 
2487 /* Return the best assembler insn template
2488    for moving operands[1] into operands[0] as a fullword.  */
2489 const char *
pa_singlemove_string(rtx * operands)2490 pa_singlemove_string (rtx *operands)
2491 {
2492   HOST_WIDE_INT intval;
2493 
2494   if (GET_CODE (operands[0]) == MEM)
2495     return "stw %r1,%0";
2496   if (GET_CODE (operands[1]) == MEM)
2497     return "ldw %1,%0";
2498   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2499     {
2500       long i;
2501 
2502       gcc_assert (GET_MODE (operands[1]) == SFmode);
2503 
2504       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2505 	 bit pattern.  */
2506       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2507 
2508       operands[1] = GEN_INT (i);
2509       /* Fall through to CONST_INT case.  */
2510     }
2511   if (GET_CODE (operands[1]) == CONST_INT)
2512     {
2513       intval = INTVAL (operands[1]);
2514 
2515       if (VAL_14_BITS_P (intval))
2516 	return "ldi %1,%0";
2517       else if ((intval & 0x7ff) == 0)
2518 	return "ldil L'%1,%0";
2519       else if (pa_zdepi_cint_p (intval))
2520 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2521       else
2522 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2523     }
2524   return "copy %1,%0";
2525 }
2526 
2527 
2528 /* Compute position (in OP[1]) and width (in OP[2])
2529    useful for copying IMM to a register using the zdepi
2530    instructions.  Store the immediate value to insert in OP[0].  */
2531 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2532 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2533 {
2534   int lsb, len;
2535 
2536   /* Find the least significant set bit in IMM.  */
2537   for (lsb = 0; lsb < 32; lsb++)
2538     {
2539       if ((imm & 1) != 0)
2540         break;
2541       imm >>= 1;
2542     }
2543 
2544   /* Choose variants based on *sign* of the 5-bit field.  */
2545   if ((imm & 0x10) == 0)
2546     len = (lsb <= 28) ? 4 : 32 - lsb;
2547   else
2548     {
2549       /* Find the width of the bitstring in IMM.  */
2550       for (len = 5; len < 32 - lsb; len++)
2551 	{
2552 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2553 	    break;
2554 	}
2555 
2556       /* Sign extend IMM as a 5-bit value.  */
2557       imm = (imm & 0xf) - 0x10;
2558     }
2559 
2560   op[0] = imm;
2561   op[1] = 31 - lsb;
2562   op[2] = len;
2563 }
2564 
2565 /* Compute position (in OP[1]) and width (in OP[2])
2566    useful for copying IMM to a register using the depdi,z
2567    instructions.  Store the immediate value to insert in OP[0].  */
2568 
2569 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2570 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2571 {
2572   int lsb, len, maxlen;
2573 
2574   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2575 
2576   /* Find the least significant set bit in IMM.  */
2577   for (lsb = 0; lsb < maxlen; lsb++)
2578     {
2579       if ((imm & 1) != 0)
2580         break;
2581       imm >>= 1;
2582     }
2583 
2584   /* Choose variants based on *sign* of the 5-bit field.  */
2585   if ((imm & 0x10) == 0)
2586     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2587   else
2588     {
2589       /* Find the width of the bitstring in IMM.  */
2590       for (len = 5; len < maxlen - lsb; len++)
2591 	{
2592 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2593 	    break;
2594 	}
2595 
2596       /* Extend length if host is narrow and IMM is negative.  */
2597       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2598 	len += 32;
2599 
2600       /* Sign extend IMM as a 5-bit value.  */
2601       imm = (imm & 0xf) - 0x10;
2602     }
2603 
2604   op[0] = imm;
2605   op[1] = 63 - lsb;
2606   op[2] = len;
2607 }
2608 
2609 /* Output assembler code to perform a doubleword move insn
2610    with operands OPERANDS.  */
2611 
2612 const char *
pa_output_move_double(rtx * operands)2613 pa_output_move_double (rtx *operands)
2614 {
2615   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2616   rtx latehalf[2];
2617   rtx addreg0 = 0, addreg1 = 0;
2618   int highonly = 0;
2619 
2620   /* First classify both operands.  */
2621 
2622   if (REG_P (operands[0]))
2623     optype0 = REGOP;
2624   else if (offsettable_memref_p (operands[0]))
2625     optype0 = OFFSOP;
2626   else if (GET_CODE (operands[0]) == MEM)
2627     optype0 = MEMOP;
2628   else
2629     optype0 = RNDOP;
2630 
2631   if (REG_P (operands[1]))
2632     optype1 = REGOP;
2633   else if (CONSTANT_P (operands[1]))
2634     optype1 = CNSTOP;
2635   else if (offsettable_memref_p (operands[1]))
2636     optype1 = OFFSOP;
2637   else if (GET_CODE (operands[1]) == MEM)
2638     optype1 = MEMOP;
2639   else
2640     optype1 = RNDOP;
2641 
2642   /* Check for the cases that the operand constraints are not
2643      supposed to allow to happen.  */
2644   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2645 
2646   /* Handle copies between general and floating registers.  */
2647 
2648   if (optype0 == REGOP && optype1 == REGOP
2649       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2650     {
2651       if (FP_REG_P (operands[0]))
2652 	{
2653 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2654 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2655 	  return "{fldds|fldd} -16(%%sp),%0";
2656 	}
2657       else
2658 	{
2659 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2660 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2661 	  return "{ldws|ldw} -12(%%sp),%R0";
2662 	}
2663     }
2664 
2665    /* Handle auto decrementing and incrementing loads and stores
2666      specifically, since the structure of the function doesn't work
2667      for them without major modification.  Do it better when we learn
2668      this port about the general inc/dec addressing of PA.
2669      (This was written by tege.  Chide him if it doesn't work.)  */
2670 
2671   if (optype0 == MEMOP)
2672     {
2673       /* We have to output the address syntax ourselves, since print_operand
2674 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2675 
2676       rtx addr = XEXP (operands[0], 0);
2677       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2678 	{
2679 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2680 
2681 	  operands[0] = XEXP (addr, 0);
2682 	  gcc_assert (GET_CODE (operands[1]) == REG
2683 		      && GET_CODE (operands[0]) == REG);
2684 
2685 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2686 
2687 	  /* No overlap between high target register and address
2688 	     register.  (We do this in a non-obvious way to
2689 	     save a register file writeback)  */
2690 	  if (GET_CODE (addr) == POST_INC)
2691 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2692 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2693 	}
2694       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2695 	{
2696 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2697 
2698 	  operands[0] = XEXP (addr, 0);
2699 	  gcc_assert (GET_CODE (operands[1]) == REG
2700 		      && GET_CODE (operands[0]) == REG);
2701 
2702 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2703 	  /* No overlap between high target register and address
2704 	     register.  (We do this in a non-obvious way to save a
2705 	     register file writeback)  */
2706 	  if (GET_CODE (addr) == PRE_INC)
2707 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2708 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2709 	}
2710     }
2711   if (optype1 == MEMOP)
2712     {
2713       /* We have to output the address syntax ourselves, since print_operand
2714 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2715 
2716       rtx addr = XEXP (operands[1], 0);
2717       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2718 	{
2719 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2720 
2721 	  operands[1] = XEXP (addr, 0);
2722 	  gcc_assert (GET_CODE (operands[0]) == REG
2723 		      && GET_CODE (operands[1]) == REG);
2724 
2725 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2726 	    {
2727 	      /* No overlap between high target register and address
2728 		 register.  (We do this in a non-obvious way to
2729 		 save a register file writeback)  */
2730 	      if (GET_CODE (addr) == POST_INC)
2731 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2732 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2733 	    }
2734 	  else
2735 	    {
2736 	      /* This is an undefined situation.  We should load into the
2737 		 address register *and* update that register.  Probably
2738 		 we don't need to handle this at all.  */
2739 	      if (GET_CODE (addr) == POST_INC)
2740 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2741 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2742 	    }
2743 	}
2744       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2745 	{
2746 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2747 
2748 	  operands[1] = XEXP (addr, 0);
2749 	  gcc_assert (GET_CODE (operands[0]) == REG
2750 		      && GET_CODE (operands[1]) == REG);
2751 
2752 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2753 	    {
2754 	      /* No overlap between high target register and address
2755 		 register.  (We do this in a non-obvious way to
2756 		 save a register file writeback)  */
2757 	      if (GET_CODE (addr) == PRE_INC)
2758 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2759 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2760 	    }
2761 	  else
2762 	    {
2763 	      /* This is an undefined situation.  We should load into the
2764 		 address register *and* update that register.  Probably
2765 		 we don't need to handle this at all.  */
2766 	      if (GET_CODE (addr) == PRE_INC)
2767 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2768 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2769 	    }
2770 	}
2771       else if (GET_CODE (addr) == PLUS
2772 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2773 	{
2774 	  rtx xoperands[4];
2775 
2776 	  /* Load address into left half of destination register.  */
2777 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2778 	  xoperands[1] = XEXP (addr, 1);
2779 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2780 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2781 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2782 			   xoperands);
2783 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2784 	}
2785       else if (GET_CODE (addr) == PLUS
2786 	       && REG_P (XEXP (addr, 0))
2787 	       && REG_P (XEXP (addr, 1)))
2788 	{
2789 	  rtx xoperands[3];
2790 
2791 	  /* Load address into left half of destination register.  */
2792 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2793 	  xoperands[1] = XEXP (addr, 0);
2794 	  xoperands[2] = XEXP (addr, 1);
2795 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2796 			   xoperands);
2797 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2798 	}
2799     }
2800 
2801   /* If an operand is an unoffsettable memory ref, find a register
2802      we can increment temporarily to make it refer to the second word.  */
2803 
2804   if (optype0 == MEMOP)
2805     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2806 
2807   if (optype1 == MEMOP)
2808     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2809 
2810   /* Ok, we can do one word at a time.
2811      Normally we do the low-numbered word first.
2812 
2813      In either case, set up in LATEHALF the operands to use
2814      for the high-numbered word and in some cases alter the
2815      operands in OPERANDS to be suitable for the low-numbered word.  */
2816 
2817   if (optype0 == REGOP)
2818     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2819   else if (optype0 == OFFSOP)
2820     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2821   else
2822     latehalf[0] = operands[0];
2823 
2824   if (optype1 == REGOP)
2825     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2826   else if (optype1 == OFFSOP)
2827     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2828   else if (optype1 == CNSTOP)
2829     {
2830       if (GET_CODE (operands[1]) == HIGH)
2831 	{
2832 	  operands[1] = XEXP (operands[1], 0);
2833 	  highonly = 1;
2834 	}
2835       split_double (operands[1], &operands[1], &latehalf[1]);
2836     }
2837   else
2838     latehalf[1] = operands[1];
2839 
2840   /* If the first move would clobber the source of the second one,
2841      do them in the other order.
2842 
2843      This can happen in two cases:
2844 
2845 	mem -> register where the first half of the destination register
2846  	is the same register used in the memory's address.  Reload
2847 	can create such insns.
2848 
2849 	mem in this case will be either register indirect or register
2850 	indirect plus a valid offset.
2851 
2852 	register -> register move where REGNO(dst) == REGNO(src + 1)
2853 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2854 
2855      Handle mem -> register case first.  */
2856   if (optype0 == REGOP
2857       && (optype1 == MEMOP || optype1 == OFFSOP)
2858       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2859     {
2860       /* Do the late half first.  */
2861       if (addreg1)
2862 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2863       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2864 
2865       /* Then clobber.  */
2866       if (addreg1)
2867 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2868       return pa_singlemove_string (operands);
2869     }
2870 
2871   /* Now handle register -> register case.  */
2872   if (optype0 == REGOP && optype1 == REGOP
2873       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2874     {
2875       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2876       return pa_singlemove_string (operands);
2877     }
2878 
2879   /* Normal case: do the two words, low-numbered first.  */
2880 
2881   output_asm_insn (pa_singlemove_string (operands), operands);
2882 
2883   /* Make any unoffsettable addresses point at high-numbered word.  */
2884   if (addreg0)
2885     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2886   if (addreg1)
2887     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2888 
2889   /* Do high-numbered word.  */
2890   if (highonly)
2891     output_asm_insn ("ldil L'%1,%0", latehalf);
2892   else
2893     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2894 
2895   /* Undo the adds we just did.  */
2896   if (addreg0)
2897     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2898   if (addreg1)
2899     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2900 
2901   return "";
2902 }
2903 
2904 const char *
pa_output_fp_move_double(rtx * operands)2905 pa_output_fp_move_double (rtx *operands)
2906 {
2907   if (FP_REG_P (operands[0]))
2908     {
2909       if (FP_REG_P (operands[1])
2910 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2911 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2912       else
2913 	output_asm_insn ("fldd%F1 %1,%0", operands);
2914     }
2915   else if (FP_REG_P (operands[1]))
2916     {
2917       output_asm_insn ("fstd%F0 %1,%0", operands);
2918     }
2919   else
2920     {
2921       rtx xoperands[2];
2922 
2923       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2924 
2925       /* This is a pain.  You have to be prepared to deal with an
2926 	 arbitrary address here including pre/post increment/decrement.
2927 
2928 	 so avoid this in the MD.  */
2929       gcc_assert (GET_CODE (operands[0]) == REG);
2930 
2931       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2932       xoperands[0] = operands[0];
2933       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2934     }
2935   return "";
2936 }
2937 
2938 /* Return a REG that occurs in ADDR with coefficient 1.
2939    ADDR can be effectively incremented by incrementing REG.  */
2940 
2941 static rtx
find_addr_reg(rtx addr)2942 find_addr_reg (rtx addr)
2943 {
2944   while (GET_CODE (addr) == PLUS)
2945     {
2946       if (GET_CODE (XEXP (addr, 0)) == REG)
2947 	addr = XEXP (addr, 0);
2948       else if (GET_CODE (XEXP (addr, 1)) == REG)
2949 	addr = XEXP (addr, 1);
2950       else if (CONSTANT_P (XEXP (addr, 0)))
2951 	addr = XEXP (addr, 1);
2952       else if (CONSTANT_P (XEXP (addr, 1)))
2953 	addr = XEXP (addr, 0);
2954       else
2955 	gcc_unreachable ();
2956     }
2957   gcc_assert (GET_CODE (addr) == REG);
2958   return addr;
2959 }
2960 
2961 /* Emit code to perform a block move.
2962 
2963    OPERANDS[0] is the destination pointer as a REG, clobbered.
2964    OPERANDS[1] is the source pointer as a REG, clobbered.
2965    OPERANDS[2] is a register for temporary storage.
2966    OPERANDS[3] is a register for temporary storage.
2967    OPERANDS[4] is the size as a CONST_INT
2968    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2969    OPERANDS[6] is another temporary register.  */
2970 
2971 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2972 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2973 {
2974   HOST_WIDE_INT align = INTVAL (operands[5]);
2975   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
2976 
2977   /* We can't move more than a word at a time because the PA
2978      has no longer integer move insns.  (Could use fp mem ops?)  */
2979   if (align > (TARGET_64BIT ? 8 : 4))
2980     align = (TARGET_64BIT ? 8 : 4);
2981 
2982   /* Note that we know each loop below will execute at least twice
2983      (else we would have open-coded the copy).  */
2984   switch (align)
2985     {
2986       case 8:
2987 	/* Pre-adjust the loop counter.  */
2988 	operands[4] = GEN_INT (n_bytes - 16);
2989 	output_asm_insn ("ldi %4,%2", operands);
2990 
2991 	/* Copying loop.  */
2992 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2993 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2994 	output_asm_insn ("std,ma %3,8(%0)", operands);
2995 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2996 	output_asm_insn ("std,ma %6,8(%0)", operands);
2997 
2998 	/* Handle the residual.  There could be up to 7 bytes of
2999 	   residual to copy!  */
3000 	if (n_bytes % 16 != 0)
3001 	  {
3002 	    operands[4] = GEN_INT (n_bytes % 8);
3003 	    if (n_bytes % 16 >= 8)
3004 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
3005 	    if (n_bytes % 8 != 0)
3006 	      output_asm_insn ("ldd 0(%1),%6", operands);
3007 	    if (n_bytes % 16 >= 8)
3008 	      output_asm_insn ("std,ma %3,8(%0)", operands);
3009 	    if (n_bytes % 8 != 0)
3010 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
3011 	  }
3012 	return "";
3013 
3014       case 4:
3015 	/* Pre-adjust the loop counter.  */
3016 	operands[4] = GEN_INT (n_bytes - 8);
3017 	output_asm_insn ("ldi %4,%2", operands);
3018 
3019 	/* Copying loop.  */
3020 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3021 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3022 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3023 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
3024 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3025 
3026 	/* Handle the residual.  There could be up to 7 bytes of
3027 	   residual to copy!  */
3028 	if (n_bytes % 8 != 0)
3029 	  {
3030 	    operands[4] = GEN_INT (n_bytes % 4);
3031 	    if (n_bytes % 8 >= 4)
3032 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3033 	    if (n_bytes % 4 != 0)
3034 	      output_asm_insn ("ldw 0(%1),%6", operands);
3035 	    if (n_bytes % 8 >= 4)
3036 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3037 	    if (n_bytes % 4 != 0)
3038 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3039 	  }
3040 	return "";
3041 
3042       case 2:
3043 	/* Pre-adjust the loop counter.  */
3044 	operands[4] = GEN_INT (n_bytes - 4);
3045 	output_asm_insn ("ldi %4,%2", operands);
3046 
3047 	/* Copying loop.  */
3048 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3049 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3050 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3051 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
3052 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3053 
3054 	/* Handle the residual.  */
3055 	if (n_bytes % 4 != 0)
3056 	  {
3057 	    if (n_bytes % 4 >= 2)
3058 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3059 	    if (n_bytes % 2 != 0)
3060 	      output_asm_insn ("ldb 0(%1),%6", operands);
3061 	    if (n_bytes % 4 >= 2)
3062 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3063 	    if (n_bytes % 2 != 0)
3064 	      output_asm_insn ("stb %6,0(%0)", operands);
3065 	  }
3066 	return "";
3067 
3068       case 1:
3069 	/* Pre-adjust the loop counter.  */
3070 	operands[4] = GEN_INT (n_bytes - 2);
3071 	output_asm_insn ("ldi %4,%2", operands);
3072 
3073 	/* Copying loop.  */
3074 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3075 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3076 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3077 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
3078 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3079 
3080 	/* Handle the residual.  */
3081 	if (n_bytes % 2 != 0)
3082 	  {
3083 	    output_asm_insn ("ldb 0(%1),%3", operands);
3084 	    output_asm_insn ("stb %3,0(%0)", operands);
3085 	  }
3086 	return "";
3087 
3088       default:
3089 	gcc_unreachable ();
3090     }
3091 }
3092 
3093 /* Count the number of insns necessary to handle this block move.
3094 
3095    Basic structure is the same as emit_block_move, except that we
3096    count insns rather than emit them.  */
3097 
3098 static int
compute_cpymem_length(rtx_insn * insn)3099 compute_cpymem_length (rtx_insn *insn)
3100 {
3101   rtx pat = PATTERN (insn);
3102   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3103   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3104   unsigned int n_insns = 0;
3105 
3106   /* We can't move more than four bytes at a time because the PA
3107      has no longer integer move insns.  (Could use fp mem ops?)  */
3108   if (align > (TARGET_64BIT ? 8 : 4))
3109     align = (TARGET_64BIT ? 8 : 4);
3110 
3111   /* The basic copying loop.  */
3112   n_insns = 6;
3113 
3114   /* Residuals.  */
3115   if (n_bytes % (2 * align) != 0)
3116     {
3117       if ((n_bytes % (2 * align)) >= align)
3118 	n_insns += 2;
3119 
3120       if ((n_bytes % align) != 0)
3121 	n_insns += 2;
3122     }
3123 
3124   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3125   return n_insns * 4;
3126 }
3127 
3128 /* Emit code to perform a block clear.
3129 
3130    OPERANDS[0] is the destination pointer as a REG, clobbered.
3131    OPERANDS[1] is a register for temporary storage.
3132    OPERANDS[2] is the size as a CONST_INT
3133    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3134 
3135 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3136 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3137 {
3138   HOST_WIDE_INT align = INTVAL (operands[3]);
3139   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
3140 
3141   /* We can't clear more than a word at a time because the PA
3142      has no longer integer move insns.  */
3143   if (align > (TARGET_64BIT ? 8 : 4))
3144     align = (TARGET_64BIT ? 8 : 4);
3145 
3146   /* Note that we know each loop below will execute at least twice
3147      (else we would have open-coded the copy).  */
3148   switch (align)
3149     {
3150       case 8:
3151 	/* Pre-adjust the loop counter.  */
3152 	operands[2] = GEN_INT (n_bytes - 16);
3153 	output_asm_insn ("ldi %2,%1", operands);
3154 
3155 	/* Loop.  */
3156 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3157 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3158 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3159 
3160 	/* Handle the residual.  There could be up to 7 bytes of
3161 	   residual to copy!  */
3162 	if (n_bytes % 16 != 0)
3163 	  {
3164 	    operands[2] = GEN_INT (n_bytes % 8);
3165 	    if (n_bytes % 16 >= 8)
3166 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3167 	    if (n_bytes % 8 != 0)
3168 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3169 	  }
3170 	return "";
3171 
3172       case 4:
3173 	/* Pre-adjust the loop counter.  */
3174 	operands[2] = GEN_INT (n_bytes - 8);
3175 	output_asm_insn ("ldi %2,%1", operands);
3176 
3177 	/* Loop.  */
3178 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3179 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3180 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3181 
3182 	/* Handle the residual.  There could be up to 7 bytes of
3183 	   residual to copy!  */
3184 	if (n_bytes % 8 != 0)
3185 	  {
3186 	    operands[2] = GEN_INT (n_bytes % 4);
3187 	    if (n_bytes % 8 >= 4)
3188 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3189 	    if (n_bytes % 4 != 0)
3190 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3191 	  }
3192 	return "";
3193 
3194       case 2:
3195 	/* Pre-adjust the loop counter.  */
3196 	operands[2] = GEN_INT (n_bytes - 4);
3197 	output_asm_insn ("ldi %2,%1", operands);
3198 
3199 	/* Loop.  */
3200 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3201 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3202 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3203 
3204 	/* Handle the residual.  */
3205 	if (n_bytes % 4 != 0)
3206 	  {
3207 	    if (n_bytes % 4 >= 2)
3208 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3209 	    if (n_bytes % 2 != 0)
3210 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3211 	  }
3212 	return "";
3213 
3214       case 1:
3215 	/* Pre-adjust the loop counter.  */
3216 	operands[2] = GEN_INT (n_bytes - 2);
3217 	output_asm_insn ("ldi %2,%1", operands);
3218 
3219 	/* Loop.  */
3220 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3221 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3222 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3223 
3224 	/* Handle the residual.  */
3225 	if (n_bytes % 2 != 0)
3226 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3227 
3228 	return "";
3229 
3230       default:
3231 	gcc_unreachable ();
3232     }
3233 }
3234 
3235 /* Count the number of insns necessary to handle this block move.
3236 
3237    Basic structure is the same as emit_block_move, except that we
3238    count insns rather than emit them.  */
3239 
3240 static int
compute_clrmem_length(rtx_insn * insn)3241 compute_clrmem_length (rtx_insn *insn)
3242 {
3243   rtx pat = PATTERN (insn);
3244   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3245   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3246   unsigned int n_insns = 0;
3247 
3248   /* We can't clear more than a word at a time because the PA
3249      has no longer integer move insns.  */
3250   if (align > (TARGET_64BIT ? 8 : 4))
3251     align = (TARGET_64BIT ? 8 : 4);
3252 
3253   /* The basic loop.  */
3254   n_insns = 4;
3255 
3256   /* Residuals.  */
3257   if (n_bytes % (2 * align) != 0)
3258     {
3259       if ((n_bytes % (2 * align)) >= align)
3260 	n_insns++;
3261 
3262       if ((n_bytes % align) != 0)
3263 	n_insns++;
3264     }
3265 
3266   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3267   return n_insns * 4;
3268 }
3269 
3270 
3271 const char *
pa_output_and(rtx * operands)3272 pa_output_and (rtx *operands)
3273 {
3274   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3275     {
3276       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3277       int ls0, ls1, ms0, p, len;
3278 
3279       for (ls0 = 0; ls0 < 32; ls0++)
3280 	if ((mask & (1 << ls0)) == 0)
3281 	  break;
3282 
3283       for (ls1 = ls0; ls1 < 32; ls1++)
3284 	if ((mask & (1 << ls1)) != 0)
3285 	  break;
3286 
3287       for (ms0 = ls1; ms0 < 32; ms0++)
3288 	if ((mask & (1 << ms0)) == 0)
3289 	  break;
3290 
3291       gcc_assert (ms0 == 32);
3292 
3293       if (ls1 == 32)
3294 	{
3295 	  len = ls0;
3296 
3297 	  gcc_assert (len);
3298 
3299 	  operands[2] = GEN_INT (len);
3300 	  return "{extru|extrw,u} %1,31,%2,%0";
3301 	}
3302       else
3303 	{
3304 	  /* We could use this `depi' for the case above as well, but `depi'
3305 	     requires one more register file access than an `extru'.  */
3306 
3307 	  p = 31 - ls0;
3308 	  len = ls1 - ls0;
3309 
3310 	  operands[2] = GEN_INT (p);
3311 	  operands[3] = GEN_INT (len);
3312 	  return "{depi|depwi} 0,%2,%3,%0";
3313 	}
3314     }
3315   else
3316     return "and %1,%2,%0";
3317 }
3318 
3319 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3320    storing the result in operands[0].  */
3321 const char *
pa_output_64bit_and(rtx * operands)3322 pa_output_64bit_and (rtx *operands)
3323 {
3324   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3325     {
3326       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3327       int ls0, ls1, ms0, p, len;
3328 
3329       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3330 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3331 	  break;
3332 
3333       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3334 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3335 	  break;
3336 
3337       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3338 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3339 	  break;
3340 
3341       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3342 
3343       if (ls1 == HOST_BITS_PER_WIDE_INT)
3344 	{
3345 	  len = ls0;
3346 
3347 	  gcc_assert (len);
3348 
3349 	  operands[2] = GEN_INT (len);
3350 	  return "extrd,u %1,63,%2,%0";
3351 	}
3352       else
3353 	{
3354 	  /* We could use this `depi' for the case above as well, but `depi'
3355 	     requires one more register file access than an `extru'.  */
3356 
3357 	  p = 63 - ls0;
3358 	  len = ls1 - ls0;
3359 
3360 	  operands[2] = GEN_INT (p);
3361 	  operands[3] = GEN_INT (len);
3362 	  return "depdi 0,%2,%3,%0";
3363 	}
3364     }
3365   else
3366     return "and %1,%2,%0";
3367 }
3368 
3369 const char *
pa_output_ior(rtx * operands)3370 pa_output_ior (rtx *operands)
3371 {
3372   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3373   int bs0, bs1, p, len;
3374 
3375   if (INTVAL (operands[2]) == 0)
3376     return "copy %1,%0";
3377 
3378   for (bs0 = 0; bs0 < 32; bs0++)
3379     if ((mask & (1 << bs0)) != 0)
3380       break;
3381 
3382   for (bs1 = bs0; bs1 < 32; bs1++)
3383     if ((mask & (1 << bs1)) == 0)
3384       break;
3385 
3386   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3387 
3388   p = 31 - bs0;
3389   len = bs1 - bs0;
3390 
3391   operands[2] = GEN_INT (p);
3392   operands[3] = GEN_INT (len);
3393   return "{depi|depwi} -1,%2,%3,%0";
3394 }
3395 
3396 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3397    storing the result in operands[0].  */
3398 const char *
pa_output_64bit_ior(rtx * operands)3399 pa_output_64bit_ior (rtx *operands)
3400 {
3401   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3402   int bs0, bs1, p, len;
3403 
3404   if (INTVAL (operands[2]) == 0)
3405     return "copy %1,%0";
3406 
3407   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3408     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3409       break;
3410 
3411   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3412     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3413       break;
3414 
3415   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3416 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3417 
3418   p = 63 - bs0;
3419   len = bs1 - bs0;
3420 
3421   operands[2] = GEN_INT (p);
3422   operands[3] = GEN_INT (len);
3423   return "depdi -1,%2,%3,%0";
3424 }
3425 
3426 /* Target hook for assembling integer objects.  This code handles
3427    aligned SI and DI integers specially since function references
3428    must be preceded by P%.  */
3429 
3430 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3431 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3432 {
3433   bool result;
3434   tree decl = NULL;
3435 
3436   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3437      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3438      calling output_addr_const.  Otherwise, it may call assemble_external
3439      in the midst of outputing the assembler code for the SYMBOL_REF.
3440      We restore the SYMBOL_REF_DECL after the output is done.  */
3441   if (GET_CODE (x) == SYMBOL_REF)
3442     {
3443       decl = SYMBOL_REF_DECL (x);
3444       if (decl)
3445 	{
3446 	  assemble_external (decl);
3447 	  SET_SYMBOL_REF_DECL (x, NULL);
3448 	}
3449     }
3450 
3451   if (size == UNITS_PER_WORD
3452       && aligned_p
3453       && function_label_operand (x, VOIDmode))
3454     {
3455       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3456 
3457       /* We don't want an OPD when generating fast indirect calls.  */
3458       if (!TARGET_FAST_INDIRECT_CALLS)
3459 	fputs ("P%", asm_out_file);
3460 
3461       output_addr_const (asm_out_file, x);
3462       fputc ('\n', asm_out_file);
3463       result = true;
3464     }
3465   else
3466     result = default_assemble_integer (x, size, aligned_p);
3467 
3468   if (decl)
3469     SET_SYMBOL_REF_DECL (x, decl);
3470 
3471   return result;
3472 }
3473 
3474 /* Output an ascii string.  */
3475 void
pa_output_ascii(FILE * file,const char * p,int size)3476 pa_output_ascii (FILE *file, const char *p, int size)
3477 {
3478   int i;
3479   int chars_output;
3480   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3481 
3482   /* The HP assembler can only take strings of 256 characters at one
3483      time.  This is a limitation on input line length, *not* the
3484      length of the string.  Sigh.  Even worse, it seems that the
3485      restriction is in number of input characters (see \xnn &
3486      \whatever).  So we have to do this very carefully.  */
3487 
3488   fputs ("\t.STRING \"", file);
3489 
3490   chars_output = 0;
3491   for (i = 0; i < size; i += 4)
3492     {
3493       int co = 0;
3494       int io = 0;
3495       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3496 	{
3497 	  register unsigned int c = (unsigned char) p[i + io];
3498 
3499 	  if (c == '\"' || c == '\\')
3500 	    partial_output[co++] = '\\';
3501 	  if (c >= ' ' && c < 0177)
3502 	    partial_output[co++] = c;
3503 	  else
3504 	    {
3505 	      unsigned int hexd;
3506 	      partial_output[co++] = '\\';
3507 	      partial_output[co++] = 'x';
3508 	      hexd =  c  / 16 - 0 + '0';
3509 	      if (hexd > '9')
3510 		hexd -= '9' - 'a' + 1;
3511 	      partial_output[co++] = hexd;
3512 	      hexd =  c % 16 - 0 + '0';
3513 	      if (hexd > '9')
3514 		hexd -= '9' - 'a' + 1;
3515 	      partial_output[co++] = hexd;
3516 	    }
3517 	}
3518       if (chars_output + co > 243)
3519 	{
3520 	  fputs ("\"\n\t.STRING \"", file);
3521 	  chars_output = 0;
3522 	}
3523       fwrite (partial_output, 1, (size_t) co, file);
3524       chars_output += co;
3525       co = 0;
3526     }
3527   fputs ("\"\n", file);
3528 }
3529 
3530 /* Try to rewrite floating point comparisons & branches to avoid
3531    useless add,tr insns.
3532 
3533    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3534    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3535    first attempt to remove useless add,tr insns.  It is zero
3536    for the second pass as reorg sometimes leaves bogus REG_DEAD
3537    notes lying around.
3538 
3539    When CHECK_NOTES is zero we can only eliminate add,tr insns
3540    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3541    instructions.  */
3542 static void
remove_useless_addtr_insns(int check_notes)3543 remove_useless_addtr_insns (int check_notes)
3544 {
3545   rtx_insn *insn;
3546   static int pass = 0;
3547 
3548   /* This is fairly cheap, so always run it when optimizing.  */
3549   if (optimize > 0)
3550     {
3551       int fcmp_count = 0;
3552       int fbranch_count = 0;
3553 
3554       /* Walk all the insns in this function looking for fcmp & fbranch
3555 	 instructions.  Keep track of how many of each we find.  */
3556       for (insn = get_insns (); insn; insn = next_insn (insn))
3557 	{
3558 	  rtx tmp;
3559 
3560 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3561 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3562 	    continue;
3563 
3564 	  tmp = PATTERN (insn);
3565 
3566 	  /* It must be a set.  */
3567 	  if (GET_CODE (tmp) != SET)
3568 	    continue;
3569 
3570 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3571 	  tmp = SET_DEST (tmp);
3572 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3573 	    {
3574 	      fcmp_count++;
3575 	      continue;
3576 	    }
3577 
3578 	  tmp = PATTERN (insn);
3579 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3580 	  if (GET_CODE (tmp) == SET
3581 	      && SET_DEST (tmp) == pc_rtx
3582 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3583 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3584 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3585 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3586 	    {
3587 	      fbranch_count++;
3588 	      continue;
3589 	    }
3590 	}
3591 
3592 
3593       /* Find all floating point compare + branch insns.  If possible,
3594 	 reverse the comparison & the branch to avoid add,tr insns.  */
3595       for (insn = get_insns (); insn; insn = next_insn (insn))
3596 	{
3597 	  rtx tmp;
3598 	  rtx_insn *next;
3599 
3600 	  /* Ignore anything that isn't an INSN.  */
3601 	  if (! NONJUMP_INSN_P (insn))
3602 	    continue;
3603 
3604 	  tmp = PATTERN (insn);
3605 
3606 	  /* It must be a set.  */
3607 	  if (GET_CODE (tmp) != SET)
3608 	    continue;
3609 
3610 	  /* The destination must be CCFP, which is register zero.  */
3611 	  tmp = SET_DEST (tmp);
3612 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3613 	    continue;
3614 
3615 	  /* INSN should be a set of CCFP.
3616 
3617 	     See if the result of this insn is used in a reversed FP
3618 	     conditional branch.  If so, reverse our condition and
3619 	     the branch.  Doing so avoids useless add,tr insns.  */
3620 	  next = next_insn (insn);
3621 	  while (next)
3622 	    {
3623 	      /* Jumps, calls and labels stop our search.  */
3624 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3625 		break;
3626 
3627 	      /* As does another fcmp insn.  */
3628 	      if (NONJUMP_INSN_P (next)
3629 		  && GET_CODE (PATTERN (next)) == SET
3630 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3631 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3632 		break;
3633 
3634 	      next = next_insn (next);
3635 	    }
3636 
3637 	  /* Is NEXT_INSN a branch?  */
3638 	  if (next && JUMP_P (next))
3639 	    {
3640 	      rtx pattern = PATTERN (next);
3641 
3642 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3643 		 and CCFP dies, then reverse our conditional and the branch
3644 		 to avoid the add,tr.  */
3645 	      if (GET_CODE (pattern) == SET
3646 		  && SET_DEST (pattern) == pc_rtx
3647 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3648 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3649 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3650 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3651 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3652 		  && (fcmp_count == fbranch_count
3653 		      || (check_notes
3654 			  && find_regno_note (next, REG_DEAD, 0))))
3655 		{
3656 		  /* Reverse the branch.  */
3657 		  tmp = XEXP (SET_SRC (pattern), 1);
3658 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3659 		  XEXP (SET_SRC (pattern), 2) = tmp;
3660 		  INSN_CODE (next) = -1;
3661 
3662 		  /* Reverse our condition.  */
3663 		  tmp = PATTERN (insn);
3664 		  PUT_CODE (XEXP (tmp, 1),
3665 			    (reverse_condition_maybe_unordered
3666 			     (GET_CODE (XEXP (tmp, 1)))));
3667 		}
3668 	    }
3669 	}
3670     }
3671 
3672   pass = !pass;
3673 
3674 }
3675 
3676 /* You may have trouble believing this, but this is the 32 bit HP-PA
3677    stack layout.  Wow.
3678 
3679    Offset		Contents
3680 
3681    Variable arguments	(optional; any number may be allocated)
3682 
3683    SP-(4*(N+9))		arg word N
3684    	:		    :
3685       SP-56		arg word 5
3686       SP-52		arg word 4
3687 
3688    Fixed arguments	(must be allocated; may remain unused)
3689 
3690       SP-48		arg word 3
3691       SP-44		arg word 2
3692       SP-40		arg word 1
3693       SP-36		arg word 0
3694 
3695    Frame Marker
3696 
3697       SP-32		External Data Pointer (DP)
3698       SP-28		External sr4
3699       SP-24		External/stub RP (RP')
3700       SP-20		Current RP
3701       SP-16		Static Link
3702       SP-12		Clean up
3703       SP-8		Calling Stub RP (RP'')
3704       SP-4		Previous SP
3705 
3706    Top of Frame
3707 
3708       SP-0		Stack Pointer (points to next available address)
3709 
3710 */
3711 
3712 /* This function saves registers as follows.  Registers marked with ' are
3713    this function's registers (as opposed to the previous function's).
3714    If a frame_pointer isn't needed, r4 is saved as a general register;
3715    the space for the frame pointer is still allocated, though, to keep
3716    things simple.
3717 
3718 
3719    Top of Frame
3720 
3721        SP (FP')		Previous FP
3722        SP + 4		Alignment filler (sigh)
3723        SP + 8		Space for locals reserved here.
3724        .
3725        .
3726        .
3727        SP + n		All call saved register used.
3728        .
3729        .
3730        .
3731        SP + o		All call saved fp registers used.
3732        .
3733        .
3734        .
3735        SP + p (SP')	points to next available address.
3736 
3737 */
3738 
3739 /* Global variables set by output_function_prologue().  */
3740 /* Size of frame.  Need to know this to emit return insns from
3741    leaf procedures.  */
3742 static HOST_WIDE_INT actual_fsize, local_fsize;
3743 static int save_fregs;
3744 
3745 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3746    Handle case where DISP > 8k by using the add_high_const patterns.
3747 
3748    Note in DISP > 8k case, we will leave the high part of the address
3749    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3750 
3751 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3752 store_reg (int reg, HOST_WIDE_INT disp, int base)
3753 {
3754   rtx dest, src, basereg;
3755   rtx_insn *insn;
3756 
3757   src = gen_rtx_REG (word_mode, reg);
3758   basereg = gen_rtx_REG (Pmode, base);
3759   if (VAL_14_BITS_P (disp))
3760     {
3761       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3762       insn = emit_move_insn (dest, src);
3763     }
3764   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3765     {
3766       rtx delta = GEN_INT (disp);
3767       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3768 
3769       emit_move_insn (tmpreg, delta);
3770       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3771       if (DO_FRAME_NOTES)
3772 	{
3773 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3774 			gen_rtx_SET (tmpreg,
3775 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3776 	  RTX_FRAME_RELATED_P (insn) = 1;
3777 	}
3778       dest = gen_rtx_MEM (word_mode, tmpreg);
3779       insn = emit_move_insn (dest, src);
3780     }
3781   else
3782     {
3783       rtx delta = GEN_INT (disp);
3784       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3785       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3786 
3787       emit_move_insn (tmpreg, high);
3788       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3789       insn = emit_move_insn (dest, src);
3790       if (DO_FRAME_NOTES)
3791 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3792 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3793 						gen_rtx_PLUS (word_mode,
3794 							      basereg,
3795 							      delta)),
3796 				   src));
3797     }
3798 
3799   if (DO_FRAME_NOTES)
3800     RTX_FRAME_RELATED_P (insn) = 1;
3801 }
3802 
3803 /* Emit RTL to store REG at the memory location specified by BASE and then
3804    add MOD to BASE.  MOD must be <= 8k.  */
3805 
3806 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3807 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3808 {
3809   rtx basereg, srcreg, delta;
3810   rtx_insn *insn;
3811 
3812   gcc_assert (VAL_14_BITS_P (mod));
3813 
3814   basereg = gen_rtx_REG (Pmode, base);
3815   srcreg = gen_rtx_REG (word_mode, reg);
3816   delta = GEN_INT (mod);
3817 
3818   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3819   if (DO_FRAME_NOTES)
3820     {
3821       RTX_FRAME_RELATED_P (insn) = 1;
3822 
3823       /* RTX_FRAME_RELATED_P must be set on each frame related set
3824 	 in a parallel with more than one element.  */
3825       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3826       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3827     }
3828 }
3829 
3830 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3831    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3832    whether to add a frame note or not.
3833 
3834    In the DISP > 8k case, we leave the high part of the address in %r1.
3835    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3836 
3837 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3838 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3839 {
3840   rtx_insn *insn;
3841 
3842   if (VAL_14_BITS_P (disp))
3843     {
3844       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3845 			     plus_constant (Pmode,
3846 					    gen_rtx_REG (Pmode, base), disp));
3847     }
3848   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3849     {
3850       rtx basereg = gen_rtx_REG (Pmode, base);
3851       rtx delta = GEN_INT (disp);
3852       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3853 
3854       emit_move_insn (tmpreg, delta);
3855       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3856 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3857       if (DO_FRAME_NOTES)
3858 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3859 		      gen_rtx_SET (tmpreg,
3860 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3861     }
3862   else
3863     {
3864       rtx basereg = gen_rtx_REG (Pmode, base);
3865       rtx delta = GEN_INT (disp);
3866       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3867 
3868       emit_move_insn (tmpreg,
3869 		      gen_rtx_PLUS (Pmode, basereg,
3870 				    gen_rtx_HIGH (Pmode, delta)));
3871       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3872 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3873     }
3874 
3875   if (DO_FRAME_NOTES && note)
3876     RTX_FRAME_RELATED_P (insn) = 1;
3877 }
3878 
3879 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3880 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3881 {
3882   int freg_saved = 0;
3883   int i, j;
3884 
3885   /* The code in pa_expand_prologue and pa_expand_epilogue must
3886      be consistent with the rounding and size calculation done here.
3887      Change them at the same time.  */
3888 
3889   /* We do our own stack alignment.  First, round the size of the
3890      stack locals up to a word boundary.  */
3891   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3892 
3893   /* Space for previous frame pointer + filler.  If any frame is
3894      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3895      waste some space here for the sake of HP compatibility.  The
3896      first slot is only used when the frame pointer is needed.  */
3897   if (size || frame_pointer_needed)
3898     size += pa_starting_frame_offset ();
3899 
3900   /* If the current function calls __builtin_eh_return, then we need
3901      to allocate stack space for registers that will hold data for
3902      the exception handler.  */
3903   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3904     {
3905       unsigned int i;
3906 
3907       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3908 	continue;
3909       size += i * UNITS_PER_WORD;
3910     }
3911 
3912   /* Account for space used by the callee general register saves.  */
3913   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3914     if (df_regs_ever_live_p (i))
3915       size += UNITS_PER_WORD;
3916 
3917   /* Account for space used by the callee floating point register saves.  */
3918   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3919     if (df_regs_ever_live_p (i)
3920 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3921       {
3922 	freg_saved = 1;
3923 
3924 	/* We always save both halves of the FP register, so always
3925 	   increment the frame size by 8 bytes.  */
3926 	size += 8;
3927       }
3928 
3929   /* If any of the floating registers are saved, account for the
3930      alignment needed for the floating point register save block.  */
3931   if (freg_saved)
3932     {
3933       size = (size + 7) & ~7;
3934       if (fregs_live)
3935 	*fregs_live = 1;
3936     }
3937 
3938   /* The various ABIs include space for the outgoing parameters in the
3939      size of the current function's stack frame.  We don't need to align
3940      for the outgoing arguments as their alignment is set by the final
3941      rounding for the frame as a whole.  */
3942   size += crtl->outgoing_args_size;
3943 
3944   /* Allocate space for the fixed frame marker.  This space must be
3945      allocated for any function that makes calls or allocates
3946      stack space.  */
3947   if (!crtl->is_leaf || size)
3948     size += TARGET_64BIT ? 48 : 32;
3949 
3950   /* Finally, round to the preferred stack boundary.  */
3951   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3952 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3953 }
3954 
3955 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3956 
3957 void
pa_output_function_label(FILE * file)3958 pa_output_function_label (FILE *file)
3959 {
3960   /* The function's label and associated .PROC must never be
3961      separated and must be output *after* any profiling declarations
3962      to avoid changing spaces/subspaces within a procedure.  */
3963   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3964   fputs ("\t.PROC\n", file);
3965 
3966   /* pa_expand_prologue does the dirty work now.  We just need
3967      to output the assembler directives which denote the start
3968      of a function.  */
3969   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3970   if (crtl->is_leaf)
3971     fputs (",NO_CALLS", file);
3972   else
3973     fputs (",CALLS", file);
3974   if (rp_saved)
3975     fputs (",SAVE_RP", file);
3976 
3977   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3978      at the beginning of the frame and that it is used as the frame
3979      pointer for the frame.  We do this because our current frame
3980      layout doesn't conform to that specified in the HP runtime
3981      documentation and we need a way to indicate to programs such as
3982      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3983      isn't used by HP compilers but is supported by the assembler.
3984      However, SAVE_SP is supposed to indicate that the previous stack
3985      pointer has been saved in the frame marker.  */
3986   if (frame_pointer_needed)
3987     fputs (",SAVE_SP", file);
3988 
3989   /* Pass on information about the number of callee register saves
3990      performed in the prologue.
3991 
3992      The compiler is supposed to pass the highest register number
3993      saved, the assembler then has to adjust that number before
3994      entering it into the unwind descriptor (to account for any
3995      caller saved registers with lower register numbers than the
3996      first callee saved register).  */
3997   if (gr_saved)
3998     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3999 
4000   if (fr_saved)
4001     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4002 
4003   fputs ("\n\t.ENTRY\n", file);
4004 }
4005 
4006 /* Output function prologue.  */
4007 
4008 static void
pa_output_function_prologue(FILE * file)4009 pa_output_function_prologue (FILE *file)
4010 {
4011   pa_output_function_label (file);
4012   remove_useless_addtr_insns (0);
4013 }
4014 
4015 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
4016 
4017 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)4018 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4019 {
4020   remove_useless_addtr_insns (0);
4021 }
4022 
4023 void
pa_expand_prologue(void)4024 pa_expand_prologue (void)
4025 {
4026   int merge_sp_adjust_with_store = 0;
4027   HOST_WIDE_INT size = get_frame_size ();
4028   HOST_WIDE_INT offset;
4029   int i;
4030   rtx tmpreg;
4031   rtx_insn *insn;
4032 
4033   gr_saved = 0;
4034   fr_saved = 0;
4035   save_fregs = 0;
4036 
4037   /* Compute total size for frame pointer, filler, locals and rounding to
4038      the next word boundary.  Similar code appears in pa_compute_frame_size
4039      and must be changed in tandem with this code.  */
4040   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4041   if (local_fsize || frame_pointer_needed)
4042     local_fsize += pa_starting_frame_offset ();
4043 
4044   actual_fsize = pa_compute_frame_size (size, &save_fregs);
4045   if (flag_stack_usage_info)
4046     current_function_static_stack_size = actual_fsize;
4047 
4048   /* Compute a few things we will use often.  */
4049   tmpreg = gen_rtx_REG (word_mode, 1);
4050 
4051   /* Save RP first.  The calling conventions manual states RP will
4052      always be stored into the caller's frame at sp - 20 or sp - 16
4053      depending on which ABI is in use.  */
4054   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4055     {
4056       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4057       rp_saved = true;
4058     }
4059   else
4060     rp_saved = false;
4061 
4062   /* Allocate the local frame and set up the frame pointer if needed.  */
4063   if (actual_fsize != 0)
4064     {
4065       if (frame_pointer_needed)
4066 	{
4067 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
4068 	     new stack pointer, then store away the saved old frame pointer
4069 	     into the stack at sp and at the same time update the stack
4070 	     pointer by actual_fsize bytes.  Two versions, first
4071 	     handles small (<8k) frames.  The second handles large (>=8k)
4072 	     frames.  */
4073 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4074 	  if (DO_FRAME_NOTES)
4075 	    RTX_FRAME_RELATED_P (insn) = 1;
4076 
4077 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4078 	  if (DO_FRAME_NOTES)
4079 	    RTX_FRAME_RELATED_P (insn) = 1;
4080 
4081 	  if (VAL_14_BITS_P (actual_fsize))
4082 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4083 	  else
4084 	    {
4085 	      /* It is incorrect to store the saved frame pointer at *sp,
4086 		 then increment sp (writes beyond the current stack boundary).
4087 
4088 		 So instead use stwm to store at *sp and post-increment the
4089 		 stack pointer as an atomic operation.  Then increment sp to
4090 		 finish allocating the new frame.  */
4091 	      HOST_WIDE_INT adjust1 = 8192 - 64;
4092 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4093 
4094 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4095 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4096 			      adjust2, 1);
4097 	    }
4098 
4099 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
4100 	     we need to store the previous stack pointer (frame pointer)
4101 	     into the frame marker on targets that use the HP unwind
4102 	     library.  This allows the HP unwind library to be used to
4103 	     unwind GCC frames.  However, we are not fully compatible
4104 	     with the HP library because our frame layout differs from
4105 	     that specified in the HP runtime specification.
4106 
4107 	     We don't want a frame note on this instruction as the frame
4108 	     marker moves during dynamic stack allocation.
4109 
4110 	     This instruction also serves as a blockage to prevent
4111 	     register spills from being scheduled before the stack
4112 	     pointer is raised.  This is necessary as we store
4113 	     registers using the frame pointer as a base register,
4114 	     and the frame pointer is set before sp is raised.  */
4115 	  if (TARGET_HPUX_UNWIND_LIBRARY)
4116 	    {
4117 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4118 				       GEN_INT (TARGET_64BIT ? -8 : -4));
4119 
4120 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
4121 			      hard_frame_pointer_rtx);
4122 	    }
4123 	  else
4124 	    emit_insn (gen_blockage ());
4125 	}
4126       /* no frame pointer needed.  */
4127       else
4128 	{
4129 	  /* In some cases we can perform the first callee register save
4130 	     and allocating the stack frame at the same time.   If so, just
4131 	     make a note of it and defer allocating the frame until saving
4132 	     the callee registers.  */
4133 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4134 	    merge_sp_adjust_with_store = 1;
4135 	  /* Cannot optimize.  Adjust the stack frame by actual_fsize
4136 	     bytes.  */
4137 	  else
4138 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4139 			    actual_fsize, 1);
4140 	}
4141     }
4142 
4143   /* Normal register save.
4144 
4145      Do not save the frame pointer in the frame_pointer_needed case.  It
4146      was done earlier.  */
4147   if (frame_pointer_needed)
4148     {
4149       offset = local_fsize;
4150 
4151       /* Saving the EH return data registers in the frame is the simplest
4152 	 way to get the frame unwind information emitted.  We put them
4153 	 just before the general registers.  */
4154       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4155 	{
4156 	  unsigned int i, regno;
4157 
4158 	  for (i = 0; ; ++i)
4159 	    {
4160 	      regno = EH_RETURN_DATA_REGNO (i);
4161 	      if (regno == INVALID_REGNUM)
4162 		break;
4163 
4164 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4165 	      offset += UNITS_PER_WORD;
4166 	    }
4167 	}
4168 
4169       for (i = 18; i >= 4; i--)
4170 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4171 	  {
4172 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4173 	    offset += UNITS_PER_WORD;
4174 	    gr_saved++;
4175 	  }
4176       /* Account for %r3 which is saved in a special place.  */
4177       gr_saved++;
4178     }
4179   /* No frame pointer needed.  */
4180   else
4181     {
4182       offset = local_fsize - actual_fsize;
4183 
4184       /* Saving the EH return data registers in the frame is the simplest
4185          way to get the frame unwind information emitted.  */
4186       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4187 	{
4188 	  unsigned int i, regno;
4189 
4190 	  for (i = 0; ; ++i)
4191 	    {
4192 	      regno = EH_RETURN_DATA_REGNO (i);
4193 	      if (regno == INVALID_REGNUM)
4194 		break;
4195 
4196 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4197 		 optimize the first save.  */
4198 	      if (merge_sp_adjust_with_store)
4199 		{
4200 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4201 		  merge_sp_adjust_with_store = 0;
4202 		}
4203 	      else
4204 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4205 	      offset += UNITS_PER_WORD;
4206 	    }
4207 	}
4208 
4209       for (i = 18; i >= 3; i--)
4210 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4211 	  {
4212 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4213 	       optimize the first GR save.  */
4214 	    if (merge_sp_adjust_with_store)
4215 	      {
4216 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4217 		merge_sp_adjust_with_store = 0;
4218 	      }
4219 	    else
4220 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4221 	    offset += UNITS_PER_WORD;
4222 	    gr_saved++;
4223 	  }
4224 
4225       /* If we wanted to merge the SP adjustment with a GR save, but we never
4226 	 did any GR saves, then just emit the adjustment here.  */
4227       if (merge_sp_adjust_with_store)
4228 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4229 			actual_fsize, 1);
4230     }
4231 
4232   /* The hppa calling conventions say that %r19, the pic offset
4233      register, is saved at sp - 32 (in this function's frame)
4234      when generating PIC code.  FIXME:  What is the correct thing
4235      to do for functions which make no calls and allocate no
4236      frame?  Do we need to allocate a frame, or can we just omit
4237      the save?   For now we'll just omit the save.
4238 
4239      We don't want a note on this insn as the frame marker can
4240      move if there is a dynamic stack allocation.  */
4241   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4242     {
4243       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4244 
4245       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4246 
4247     }
4248 
4249   /* Align pointer properly (doubleword boundary).  */
4250   offset = (offset + 7) & ~7;
4251 
4252   /* Floating point register store.  */
4253   if (save_fregs)
4254     {
4255       rtx base;
4256 
4257       /* First get the frame or stack pointer to the start of the FP register
4258 	 save area.  */
4259       if (frame_pointer_needed)
4260 	{
4261 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4262 	  base = hard_frame_pointer_rtx;
4263 	}
4264       else
4265 	{
4266 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4267 	  base = stack_pointer_rtx;
4268 	}
4269 
4270       /* Now actually save the FP registers.  */
4271       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4272 	{
4273 	  if (df_regs_ever_live_p (i)
4274 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4275 	    {
4276 	      rtx addr, reg;
4277 	      rtx_insn *insn;
4278 	      addr = gen_rtx_MEM (DFmode,
4279 				  gen_rtx_POST_INC (word_mode, tmpreg));
4280 	      reg = gen_rtx_REG (DFmode, i);
4281 	      insn = emit_move_insn (addr, reg);
4282 	      if (DO_FRAME_NOTES)
4283 		{
4284 		  RTX_FRAME_RELATED_P (insn) = 1;
4285 		  if (TARGET_64BIT)
4286 		    {
4287 		      rtx mem = gen_rtx_MEM (DFmode,
4288 					     plus_constant (Pmode, base,
4289 							    offset));
4290 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4291 				    gen_rtx_SET (mem, reg));
4292 		    }
4293 		  else
4294 		    {
4295 		      rtx meml = gen_rtx_MEM (SFmode,
4296 					      plus_constant (Pmode, base,
4297 							     offset));
4298 		      rtx memr = gen_rtx_MEM (SFmode,
4299 					      plus_constant (Pmode, base,
4300 							     offset + 4));
4301 		      rtx regl = gen_rtx_REG (SFmode, i);
4302 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4303 		      rtx setl = gen_rtx_SET (meml, regl);
4304 		      rtx setr = gen_rtx_SET (memr, regr);
4305 		      rtvec vec;
4306 
4307 		      RTX_FRAME_RELATED_P (setl) = 1;
4308 		      RTX_FRAME_RELATED_P (setr) = 1;
4309 		      vec = gen_rtvec (2, setl, setr);
4310 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4311 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4312 		    }
4313 		}
4314 	      offset += GET_MODE_SIZE (DFmode);
4315 	      fr_saved++;
4316 	    }
4317 	}
4318     }
4319 }
4320 
4321 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4322    Handle case where DISP > 8k by using the add_high_const patterns.  */
4323 
4324 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4325 load_reg (int reg, HOST_WIDE_INT disp, int base)
4326 {
4327   rtx dest = gen_rtx_REG (word_mode, reg);
4328   rtx basereg = gen_rtx_REG (Pmode, base);
4329   rtx src;
4330 
4331   if (VAL_14_BITS_P (disp))
4332     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4333   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4334     {
4335       rtx delta = GEN_INT (disp);
4336       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4337 
4338       emit_move_insn (tmpreg, delta);
4339       if (TARGET_DISABLE_INDEXING)
4340 	{
4341 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4342 	  src = gen_rtx_MEM (word_mode, tmpreg);
4343 	}
4344       else
4345 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4346     }
4347   else
4348     {
4349       rtx delta = GEN_INT (disp);
4350       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4351       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4352 
4353       emit_move_insn (tmpreg, high);
4354       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4355     }
4356 
4357   emit_move_insn (dest, src);
4358 }
4359 
4360 /* Update the total code bytes output to the text section.  */
4361 
4362 static void
update_total_code_bytes(unsigned int nbytes)4363 update_total_code_bytes (unsigned int nbytes)
4364 {
4365   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4366       && !IN_NAMED_SECTION_P (cfun->decl))
4367     {
4368       unsigned int old_total = total_code_bytes;
4369 
4370       total_code_bytes += nbytes;
4371 
4372       /* Be prepared to handle overflows.  */
4373       if (old_total > total_code_bytes)
4374         total_code_bytes = UINT_MAX;
4375     }
4376 }
4377 
4378 /* This function generates the assembly code for function exit.
4379    Args are as for output_function_prologue ().
4380 
4381    The function epilogue should not depend on the current stack
4382    pointer!  It should use the frame pointer only.  This is mandatory
4383    because of alloca; we also take advantage of it to omit stack
4384    adjustments before returning.  */
4385 
4386 static void
pa_output_function_epilogue(FILE * file)4387 pa_output_function_epilogue (FILE *file)
4388 {
4389   rtx_insn *insn = get_last_insn ();
4390   bool extra_nop;
4391 
4392   /* pa_expand_epilogue does the dirty work now.  We just need
4393      to output the assembler directives which denote the end
4394      of a function.
4395 
4396      To make debuggers happy, emit a nop if the epilogue was completely
4397      eliminated due to a volatile call as the last insn in the
4398      current function.  That way the return address (in %r2) will
4399      always point to a valid instruction in the current function.  */
4400 
4401   /* Get the last real insn.  */
4402   if (NOTE_P (insn))
4403     insn = prev_real_insn (insn);
4404 
4405   /* If it is a sequence, then look inside.  */
4406   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4407     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4408 
4409   /* If insn is a CALL_INSN, then it must be a call to a volatile
4410      function (otherwise there would be epilogue insns).  */
4411   if (insn && CALL_P (insn))
4412     {
4413       fputs ("\tnop\n", file);
4414       extra_nop = true;
4415     }
4416   else
4417     extra_nop = false;
4418 
4419   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4420 
4421   if (TARGET_SOM && TARGET_GAS)
4422     {
4423       /* We are done with this subspace except possibly for some additional
4424 	 debug information.  Forget that we are in this subspace to ensure
4425 	 that the next function is output in its own subspace.  */
4426       in_section = NULL;
4427       cfun->machine->in_nsubspa = 2;
4428     }
4429 
4430   /* Thunks do their own insn accounting.  */
4431   if (cfun->is_thunk)
4432     return;
4433 
4434   if (INSN_ADDRESSES_SET_P ())
4435     {
4436       last_address = extra_nop ? 4 : 0;
4437       insn = get_last_nonnote_insn ();
4438       if (insn)
4439 	{
4440 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4441 	  if (INSN_P (insn))
4442 	    last_address += insn_default_length (insn);
4443 	}
4444       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4445 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4446     }
4447   else
4448     last_address = UINT_MAX;
4449 
4450   /* Finally, update the total number of code bytes output so far.  */
4451   update_total_code_bytes (last_address);
4452 }
4453 
4454 void
pa_expand_epilogue(void)4455 pa_expand_epilogue (void)
4456 {
4457   rtx tmpreg;
4458   HOST_WIDE_INT offset;
4459   HOST_WIDE_INT ret_off = 0;
4460   int i;
4461   int merge_sp_adjust_with_load = 0;
4462 
4463   /* We will use this often.  */
4464   tmpreg = gen_rtx_REG (word_mode, 1);
4465 
4466   /* Try to restore RP early to avoid load/use interlocks when
4467      RP gets used in the return (bv) instruction.  This appears to still
4468      be necessary even when we schedule the prologue and epilogue.  */
4469   if (rp_saved)
4470     {
4471       ret_off = TARGET_64BIT ? -16 : -20;
4472       if (frame_pointer_needed)
4473 	{
4474 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4475 	  ret_off = 0;
4476 	}
4477       else
4478 	{
4479 	  /* No frame pointer, and stack is smaller than 8k.  */
4480 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4481 	    {
4482 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4483 	      ret_off = 0;
4484 	    }
4485 	}
4486     }
4487 
4488   /* General register restores.  */
4489   if (frame_pointer_needed)
4490     {
4491       offset = local_fsize;
4492 
4493       /* If the current function calls __builtin_eh_return, then we need
4494          to restore the saved EH data registers.  */
4495       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4496 	{
4497 	  unsigned int i, regno;
4498 
4499 	  for (i = 0; ; ++i)
4500 	    {
4501 	      regno = EH_RETURN_DATA_REGNO (i);
4502 	      if (regno == INVALID_REGNUM)
4503 		break;
4504 
4505 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4506 	      offset += UNITS_PER_WORD;
4507 	    }
4508 	}
4509 
4510       for (i = 18; i >= 4; i--)
4511 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4512 	  {
4513 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4514 	    offset += UNITS_PER_WORD;
4515 	  }
4516     }
4517   else
4518     {
4519       offset = local_fsize - actual_fsize;
4520 
4521       /* If the current function calls __builtin_eh_return, then we need
4522          to restore the saved EH data registers.  */
4523       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4524 	{
4525 	  unsigned int i, regno;
4526 
4527 	  for (i = 0; ; ++i)
4528 	    {
4529 	      regno = EH_RETURN_DATA_REGNO (i);
4530 	      if (regno == INVALID_REGNUM)
4531 		break;
4532 
4533 	      /* Only for the first load.
4534 	         merge_sp_adjust_with_load holds the register load
4535 	         with which we will merge the sp adjustment.  */
4536 	      if (merge_sp_adjust_with_load == 0
4537 		  && local_fsize == 0
4538 		  && VAL_14_BITS_P (-actual_fsize))
4539 	        merge_sp_adjust_with_load = regno;
4540 	      else
4541 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4542 	      offset += UNITS_PER_WORD;
4543 	    }
4544 	}
4545 
4546       for (i = 18; i >= 3; i--)
4547 	{
4548 	  if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4549 	    {
4550 	      /* Only for the first load.
4551 	         merge_sp_adjust_with_load holds the register load
4552 	         with which we will merge the sp adjustment.  */
4553 	      if (merge_sp_adjust_with_load == 0
4554 		  && local_fsize == 0
4555 		  && VAL_14_BITS_P (-actual_fsize))
4556 	        merge_sp_adjust_with_load = i;
4557 	      else
4558 		load_reg (i, offset, STACK_POINTER_REGNUM);
4559 	      offset += UNITS_PER_WORD;
4560 	    }
4561 	}
4562     }
4563 
4564   /* Align pointer properly (doubleword boundary).  */
4565   offset = (offset + 7) & ~7;
4566 
4567   /* FP register restores.  */
4568   if (save_fregs)
4569     {
4570       /* Adjust the register to index off of.  */
4571       if (frame_pointer_needed)
4572 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4573       else
4574 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4575 
4576       /* Actually do the restores now.  */
4577       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4578 	if (df_regs_ever_live_p (i)
4579 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4580 	  {
4581 	    rtx src = gen_rtx_MEM (DFmode,
4582 				   gen_rtx_POST_INC (word_mode, tmpreg));
4583 	    rtx dest = gen_rtx_REG (DFmode, i);
4584 	    emit_move_insn (dest, src);
4585 	  }
4586     }
4587 
4588   /* Emit a blockage insn here to keep these insns from being moved to
4589      an earlier spot in the epilogue, or into the main instruction stream.
4590 
4591      This is necessary as we must not cut the stack back before all the
4592      restores are finished.  */
4593   emit_insn (gen_blockage ());
4594 
4595   /* Reset stack pointer (and possibly frame pointer).  The stack
4596      pointer is initially set to fp + 64 to avoid a race condition.  */
4597   if (frame_pointer_needed)
4598     {
4599       rtx delta = GEN_INT (-64);
4600 
4601       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4602       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4603 			       stack_pointer_rtx, delta));
4604     }
4605   /* If we were deferring a callee register restore, do it now.  */
4606   else if (merge_sp_adjust_with_load)
4607     {
4608       rtx delta = GEN_INT (-actual_fsize);
4609       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4610 
4611       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4612     }
4613   else if (actual_fsize != 0)
4614     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4615 		    - actual_fsize, 0);
4616 
4617   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4618      frame greater than 8k), do so now.  */
4619   if (ret_off != 0)
4620     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4621 
4622   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4623     {
4624       rtx sa = EH_RETURN_STACKADJ_RTX;
4625 
4626       emit_insn (gen_blockage ());
4627       emit_insn (TARGET_64BIT
4628 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4629 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4630     }
4631 }
4632 
4633 bool
pa_can_use_return_insn(void)4634 pa_can_use_return_insn (void)
4635 {
4636   if (!reload_completed)
4637     return false;
4638 
4639   if (frame_pointer_needed)
4640     return false;
4641 
4642   if (df_regs_ever_live_p (2))
4643     return false;
4644 
4645   if (crtl->profile)
4646     return false;
4647 
4648   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4649 }
4650 
4651 rtx
hppa_pic_save_rtx(void)4652 hppa_pic_save_rtx (void)
4653 {
4654   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4655 }
4656 
4657 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4658 #define NO_DEFERRED_PROFILE_COUNTERS 0
4659 #endif
4660 
4661 
4662 /* Vector of funcdef numbers.  */
4663 static vec<int> funcdef_nos;
4664 
4665 /* Output deferred profile counters.  */
4666 static void
output_deferred_profile_counters(void)4667 output_deferred_profile_counters (void)
4668 {
4669   unsigned int i;
4670   int align, n;
4671 
4672   if (funcdef_nos.is_empty ())
4673    return;
4674 
4675   switch_to_section (data_section);
4676   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4677   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4678 
4679   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4680     {
4681       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4682       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4683     }
4684 
4685   funcdef_nos.release ();
4686 }
4687 
4688 void
hppa_profile_hook(int label_no)4689 hppa_profile_hook (int label_no)
4690 {
4691   rtx_code_label *label_rtx = gen_label_rtx ();
4692   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4693   rtx arg_bytes, begin_label_rtx, mcount, sym;
4694   rtx_insn *call_insn;
4695   char begin_label_name[16];
4696   bool use_mcount_pcrel_call;
4697 
4698   /* Set up call destination.  */
4699   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4700   pa_encode_label (sym);
4701   mcount = gen_rtx_MEM (Pmode, sym);
4702 
4703   /* If we can reach _mcount with a pc-relative call, we can optimize
4704      loading the address of the current function.  This requires linker
4705      long branch stub support.  */
4706   if (!TARGET_PORTABLE_RUNTIME
4707       && !TARGET_LONG_CALLS
4708       && (TARGET_SOM || flag_function_sections))
4709     use_mcount_pcrel_call = TRUE;
4710   else
4711     use_mcount_pcrel_call = FALSE;
4712 
4713   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4714 			       label_no);
4715   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4716 
4717   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4718 
4719   if (!use_mcount_pcrel_call)
4720     {
4721       /* The address of the function is loaded into %r25 with an instruction-
4722 	 relative sequence that avoids the use of relocations.  We use SImode
4723 	 for the address of the function in both 32 and 64-bit code to avoid
4724 	 having to provide DImode versions of the lcla2 pattern.  */
4725       if (TARGET_PA_20)
4726 	emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4727       else
4728 	emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4729     }
4730 
4731   if (!NO_DEFERRED_PROFILE_COUNTERS)
4732     {
4733       rtx count_label_rtx, addr, r24;
4734       char count_label_name[16];
4735 
4736       funcdef_nos.safe_push (label_no);
4737       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4738       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4739 					    ggc_strdup (count_label_name));
4740 
4741       addr = force_reg (Pmode, count_label_rtx);
4742       r24 = gen_rtx_REG (Pmode, 24);
4743       emit_move_insn (r24, addr);
4744 
4745       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4746       if (use_mcount_pcrel_call)
4747 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4748 						     begin_label_rtx));
4749       else
4750 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4751 
4752       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4753     }
4754   else
4755     {
4756       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4757       if (use_mcount_pcrel_call)
4758 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4759 						     begin_label_rtx));
4760       else
4761 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4762     }
4763 
4764   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4765   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4766 
4767   /* Indicate the _mcount call cannot throw, nor will it execute a
4768      non-local goto.  */
4769   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4770 
4771   /* Allocate space for fixed arguments.  */
4772   if (reg_parm_stack_space > crtl->outgoing_args_size)
4773     crtl->outgoing_args_size = reg_parm_stack_space;
4774 }
4775 
4776 /* Fetch the return address for the frame COUNT steps up from
4777    the current frame, after the prologue.  FRAMEADDR is the
4778    frame pointer of the COUNT frame.
4779 
4780    We want to ignore any export stub remnants here.  To handle this,
4781    we examine the code at the return address, and if it is an export
4782    stub, we return a memory rtx for the stub return address stored
4783    at frame-24.
4784 
4785    The value returned is used in two different ways:
4786 
4787 	1. To find a function's caller.
4788 
4789 	2. To change the return address for a function.
4790 
4791    This function handles most instances of case 1; however, it will
4792    fail if there are two levels of stubs to execute on the return
4793    path.  The only way I believe that can happen is if the return value
4794    needs a parameter relocation, which never happens for C code.
4795 
4796    This function handles most instances of case 2; however, it will
4797    fail if we did not originally have stub code on the return path
4798    but will need stub code on the new return path.  This can happen if
4799    the caller & callee are both in the main program, but the new
4800    return location is in a shared library.  */
4801 
4802 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4803 pa_return_addr_rtx (int count, rtx frameaddr)
4804 {
4805   rtx label;
4806   rtx rp;
4807   rtx saved_rp;
4808   rtx ins;
4809 
4810   /* The instruction stream at the return address of a PA1.X export stub is:
4811 
4812 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4813 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4814 	0x00011820 | stub+16:  mtsp r1,sr0
4815 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4816 
4817      0xe0400002 must be specified as -532676606 so that it won't be
4818      rejected as an invalid immediate operand on 64-bit hosts.
4819 
4820      The instruction stream at the return address of a PA2.0 export stub is:
4821 
4822 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4823 	0xe840d002 | stub+12:  bve,n (rp)
4824   */
4825 
4826   HOST_WIDE_INT insns[4];
4827   int i, len;
4828 
4829   if (count != 0)
4830     return NULL_RTX;
4831 
4832   rp = get_hard_reg_initial_val (Pmode, 2);
4833 
4834   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4835     return rp;
4836 
4837   /* If there is no export stub then just use the value saved from
4838      the return pointer register.  */
4839 
4840   saved_rp = gen_reg_rtx (Pmode);
4841   emit_move_insn (saved_rp, rp);
4842 
4843   /* Get pointer to the instruction stream.  We have to mask out the
4844      privilege level from the two low order bits of the return address
4845      pointer here so that ins will point to the start of the first
4846      instruction that would have been executed if we returned.  */
4847   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4848   label = gen_label_rtx ();
4849 
4850   if (TARGET_PA_20)
4851     {
4852       insns[0] = 0x4bc23fd1;
4853       insns[1] = -398405630;
4854       len = 2;
4855     }
4856   else
4857     {
4858       insns[0] = 0x4bc23fd1;
4859       insns[1] = 0x004010a1;
4860       insns[2] = 0x00011820;
4861       insns[3] = -532676606;
4862       len = 4;
4863     }
4864 
4865   /* Check the instruction stream at the normal return address for the
4866      export stub.  If it is an export stub, than our return address is
4867      really in -24[frameaddr].  */
4868 
4869   for (i = 0; i < len; i++)
4870     {
4871       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4872       rtx op1 = GEN_INT (insns[i]);
4873       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4874     }
4875 
4876   /* Here we know that our return address points to an export
4877      stub.  We don't want to return the address of the export stub,
4878      but rather the return address of the export stub.  That return
4879      address is stored at -24[frameaddr].  */
4880 
4881   emit_move_insn (saved_rp,
4882 		  gen_rtx_MEM (Pmode,
4883 			       memory_address (Pmode,
4884 					       plus_constant (Pmode, frameaddr,
4885 							      -24))));
4886 
4887   emit_label (label);
4888 
4889   return saved_rp;
4890 }
4891 
4892 void
pa_emit_bcond_fp(rtx operands[])4893 pa_emit_bcond_fp (rtx operands[])
4894 {
4895   enum rtx_code code = GET_CODE (operands[0]);
4896   rtx operand0 = operands[1];
4897   rtx operand1 = operands[2];
4898   rtx label = operands[3];
4899 
4900   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4901 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4902 
4903   emit_jump_insn (gen_rtx_SET (pc_rtx,
4904 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4905 						     gen_rtx_fmt_ee (NE,
4906 							      VOIDmode,
4907 							      gen_rtx_REG (CCFPmode, 0),
4908 							      const0_rtx),
4909 						     gen_rtx_LABEL_REF (VOIDmode, label),
4910 						     pc_rtx)));
4911 
4912 }
4913 
4914 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4915    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4916 
4917 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4918 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4919 		unsigned int)
4920 {
4921   enum attr_type attr_type;
4922 
4923   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4924      true dependencies as they are described with bypasses now.  */
4925   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4926     return cost;
4927 
4928   if (! recog_memoized (insn))
4929     return 0;
4930 
4931   attr_type = get_attr_type (insn);
4932 
4933   switch (dep_type)
4934     {
4935     case REG_DEP_ANTI:
4936       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4937 	 cycles later.  */
4938 
4939       if (attr_type == TYPE_FPLOAD)
4940 	{
4941 	  rtx pat = PATTERN (insn);
4942 	  rtx dep_pat = PATTERN (dep_insn);
4943 	  if (GET_CODE (pat) == PARALLEL)
4944 	    {
4945 	      /* This happens for the fldXs,mb patterns.  */
4946 	      pat = XVECEXP (pat, 0, 0);
4947 	    }
4948 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4949 	    /* If this happens, we have to extend this to schedule
4950 	       optimally.  Return 0 for now.  */
4951 	  return 0;
4952 
4953 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4954 	    {
4955 	      if (! recog_memoized (dep_insn))
4956 		return 0;
4957 	      switch (get_attr_type (dep_insn))
4958 		{
4959 		case TYPE_FPALU:
4960 		case TYPE_FPMULSGL:
4961 		case TYPE_FPMULDBL:
4962 		case TYPE_FPDIVSGL:
4963 		case TYPE_FPDIVDBL:
4964 		case TYPE_FPSQRTSGL:
4965 		case TYPE_FPSQRTDBL:
4966 		  /* A fpload can't be issued until one cycle before a
4967 		     preceding arithmetic operation has finished if
4968 		     the target of the fpload is any of the sources
4969 		     (or destination) of the arithmetic operation.  */
4970 		  return insn_default_latency (dep_insn) - 1;
4971 
4972 		default:
4973 		  return 0;
4974 		}
4975 	    }
4976 	}
4977       else if (attr_type == TYPE_FPALU)
4978 	{
4979 	  rtx pat = PATTERN (insn);
4980 	  rtx dep_pat = PATTERN (dep_insn);
4981 	  if (GET_CODE (pat) == PARALLEL)
4982 	    {
4983 	      /* This happens for the fldXs,mb patterns.  */
4984 	      pat = XVECEXP (pat, 0, 0);
4985 	    }
4986 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4987 	    /* If this happens, we have to extend this to schedule
4988 	       optimally.  Return 0 for now.  */
4989 	  return 0;
4990 
4991 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4992 	    {
4993 	      if (! recog_memoized (dep_insn))
4994 		return 0;
4995 	      switch (get_attr_type (dep_insn))
4996 		{
4997 		case TYPE_FPDIVSGL:
4998 		case TYPE_FPDIVDBL:
4999 		case TYPE_FPSQRTSGL:
5000 		case TYPE_FPSQRTDBL:
5001 		  /* An ALU flop can't be issued until two cycles before a
5002 		     preceding divide or sqrt operation has finished if
5003 		     the target of the ALU flop is any of the sources
5004 		     (or destination) of the divide or sqrt operation.  */
5005 		  return insn_default_latency (dep_insn) - 2;
5006 
5007 		default:
5008 		  return 0;
5009 		}
5010 	    }
5011 	}
5012 
5013       /* For other anti dependencies, the cost is 0.  */
5014       return 0;
5015 
5016     case REG_DEP_OUTPUT:
5017       /* Output dependency; DEP_INSN writes a register that INSN writes some
5018 	 cycles later.  */
5019       if (attr_type == TYPE_FPLOAD)
5020 	{
5021 	  rtx pat = PATTERN (insn);
5022 	  rtx dep_pat = PATTERN (dep_insn);
5023 	  if (GET_CODE (pat) == PARALLEL)
5024 	    {
5025 	      /* This happens for the fldXs,mb patterns.  */
5026 	      pat = XVECEXP (pat, 0, 0);
5027 	    }
5028 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5029 	    /* If this happens, we have to extend this to schedule
5030 	       optimally.  Return 0 for now.  */
5031 	  return 0;
5032 
5033 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5034 	    {
5035 	      if (! recog_memoized (dep_insn))
5036 		return 0;
5037 	      switch (get_attr_type (dep_insn))
5038 		{
5039 		case TYPE_FPALU:
5040 		case TYPE_FPMULSGL:
5041 		case TYPE_FPMULDBL:
5042 		case TYPE_FPDIVSGL:
5043 		case TYPE_FPDIVDBL:
5044 		case TYPE_FPSQRTSGL:
5045 		case TYPE_FPSQRTDBL:
5046 		  /* A fpload can't be issued until one cycle before a
5047 		     preceding arithmetic operation has finished if
5048 		     the target of the fpload is the destination of the
5049 		     arithmetic operation.
5050 
5051 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
5052 		     is 3 cycles, unless they bundle together.   We also
5053 		     pay the penalty if the second insn is a fpload.  */
5054 		  return insn_default_latency (dep_insn) - 1;
5055 
5056 		default:
5057 		  return 0;
5058 		}
5059 	    }
5060 	}
5061       else if (attr_type == TYPE_FPALU)
5062 	{
5063 	  rtx pat = PATTERN (insn);
5064 	  rtx dep_pat = PATTERN (dep_insn);
5065 	  if (GET_CODE (pat) == PARALLEL)
5066 	    {
5067 	      /* This happens for the fldXs,mb patterns.  */
5068 	      pat = XVECEXP (pat, 0, 0);
5069 	    }
5070 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5071 	    /* If this happens, we have to extend this to schedule
5072 	       optimally.  Return 0 for now.  */
5073 	  return 0;
5074 
5075 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5076 	    {
5077 	      if (! recog_memoized (dep_insn))
5078 		return 0;
5079 	      switch (get_attr_type (dep_insn))
5080 		{
5081 		case TYPE_FPDIVSGL:
5082 		case TYPE_FPDIVDBL:
5083 		case TYPE_FPSQRTSGL:
5084 		case TYPE_FPSQRTDBL:
5085 		  /* An ALU flop can't be issued until two cycles before a
5086 		     preceding divide or sqrt operation has finished if
5087 		     the target of the ALU flop is also the target of
5088 		     the divide or sqrt operation.  */
5089 		  return insn_default_latency (dep_insn) - 2;
5090 
5091 		default:
5092 		  return 0;
5093 		}
5094 	    }
5095 	}
5096 
5097       /* For other output dependencies, the cost is 0.  */
5098       return 0;
5099 
5100     default:
5101       gcc_unreachable ();
5102     }
5103 }
5104 
5105 /* The 700 can only issue a single insn at a time.
5106    The 7XXX processors can issue two insns at a time.
5107    The 8000 can issue 4 insns at a time.  */
5108 static int
pa_issue_rate(void)5109 pa_issue_rate (void)
5110 {
5111   switch (pa_cpu)
5112     {
5113     case PROCESSOR_700:		return 1;
5114     case PROCESSOR_7100:	return 2;
5115     case PROCESSOR_7100LC:	return 2;
5116     case PROCESSOR_7200:	return 2;
5117     case PROCESSOR_7300:	return 2;
5118     case PROCESSOR_8000:	return 4;
5119 
5120     default:
5121       gcc_unreachable ();
5122     }
5123 }
5124 
5125 
5126 
5127 /* Return any length plus adjustment needed by INSN which already has
5128    its length computed as LENGTH.   Return LENGTH if no adjustment is
5129    necessary.
5130 
5131    Also compute the length of an inline block move here as it is too
5132    complicated to express as a length attribute in pa.md.  */
5133 int
pa_adjust_insn_length(rtx_insn * insn,int length)5134 pa_adjust_insn_length (rtx_insn *insn, int length)
5135 {
5136   rtx pat = PATTERN (insn);
5137 
5138   /* If length is negative or undefined, provide initial length.  */
5139   if ((unsigned int) length >= INT_MAX)
5140     {
5141       if (GET_CODE (pat) == SEQUENCE)
5142 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5143 
5144       switch (get_attr_type (insn))
5145 	{
5146 	case TYPE_MILLI:
5147 	  length = pa_attr_length_millicode_call (insn);
5148 	  break;
5149 	case TYPE_CALL:
5150 	  length = pa_attr_length_call (insn, 0);
5151 	  break;
5152 	case TYPE_SIBCALL:
5153 	  length = pa_attr_length_call (insn, 1);
5154 	  break;
5155 	case TYPE_DYNCALL:
5156 	  length = pa_attr_length_indirect_call (insn);
5157 	  break;
5158 	case TYPE_SH_FUNC_ADRS:
5159 	  length = pa_attr_length_millicode_call (insn) + 20;
5160 	  break;
5161 	default:
5162 	  gcc_unreachable ();
5163 	}
5164     }
5165 
5166   /* Block move pattern.  */
5167   if (NONJUMP_INSN_P (insn)
5168       && GET_CODE (pat) == PARALLEL
5169       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5170       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5171       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5172       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5173       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5174     length += compute_cpymem_length (insn) - 4;
5175   /* Block clear pattern.  */
5176   else if (NONJUMP_INSN_P (insn)
5177 	   && GET_CODE (pat) == PARALLEL
5178 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5179 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5180 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5181 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5182     length += compute_clrmem_length (insn) - 4;
5183   /* Conditional branch with an unfilled delay slot.  */
5184   else if (JUMP_P (insn) && ! simplejump_p (insn))
5185     {
5186       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5187       if (GET_CODE (pat) == SET
5188 	  && length == 4
5189 	  && JUMP_LABEL (insn) != NULL_RTX
5190 	  && ! forward_branch_p (insn))
5191 	length += 4;
5192       else if (GET_CODE (pat) == PARALLEL
5193 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5194 	       && length == 4)
5195 	length += 4;
5196       /* Adjust dbra insn with short backwards conditional branch with
5197 	 unfilled delay slot -- only for case where counter is in a
5198 	 general register register.  */
5199       else if (GET_CODE (pat) == PARALLEL
5200 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5201 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5202  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5203 	       && length == 4
5204 	       && ! forward_branch_p (insn))
5205 	length += 4;
5206     }
5207   return length;
5208 }
5209 
5210 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5211 
5212 static bool
pa_print_operand_punct_valid_p(unsigned char code)5213 pa_print_operand_punct_valid_p (unsigned char code)
5214 {
5215   if (code == '@'
5216       || code == '#'
5217       || code == '*'
5218       || code == '^')
5219     return true;
5220 
5221   return false;
5222 }
5223 
5224 /* Print operand X (an rtx) in assembler syntax to file FILE.
5225    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5226    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5227 
5228 void
pa_print_operand(FILE * file,rtx x,int code)5229 pa_print_operand (FILE *file, rtx x, int code)
5230 {
5231   switch (code)
5232     {
5233     case '#':
5234       /* Output a 'nop' if there's nothing for the delay slot.  */
5235       if (dbr_sequence_length () == 0)
5236 	fputs ("\n\tnop", file);
5237       return;
5238     case '*':
5239       /* Output a nullification completer if there's nothing for the */
5240       /* delay slot or nullification is requested.  */
5241       if (dbr_sequence_length () == 0 ||
5242 	  (final_sequence &&
5243 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5244         fputs (",n", file);
5245       return;
5246     case 'R':
5247       /* Print out the second register name of a register pair.
5248 	 I.e., R (6) => 7.  */
5249       fputs (reg_names[REGNO (x) + 1], file);
5250       return;
5251     case 'r':
5252       /* A register or zero.  */
5253       if (x == const0_rtx
5254 	  || (x == CONST0_RTX (DFmode))
5255 	  || (x == CONST0_RTX (SFmode)))
5256 	{
5257 	  fputs ("%r0", file);
5258 	  return;
5259 	}
5260       else
5261 	break;
5262     case 'f':
5263       /* A register or zero (floating point).  */
5264       if (x == const0_rtx
5265 	  || (x == CONST0_RTX (DFmode))
5266 	  || (x == CONST0_RTX (SFmode)))
5267 	{
5268 	  fputs ("%fr0", file);
5269 	  return;
5270 	}
5271       else
5272 	break;
5273     case 'A':
5274       {
5275 	rtx xoperands[2];
5276 
5277 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5278 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5279 	pa_output_global_address (file, xoperands[1], 0);
5280         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5281 	return;
5282       }
5283 
5284     case 'C':			/* Plain (C)ondition */
5285     case 'X':
5286       switch (GET_CODE (x))
5287 	{
5288 	case EQ:
5289 	  fputs ("=", file);  break;
5290 	case NE:
5291 	  fputs ("<>", file);  break;
5292 	case GT:
5293 	  fputs (">", file);  break;
5294 	case GE:
5295 	  fputs (">=", file);  break;
5296 	case GEU:
5297 	  fputs (">>=", file);  break;
5298 	case GTU:
5299 	  fputs (">>", file);  break;
5300 	case LT:
5301 	  fputs ("<", file);  break;
5302 	case LE:
5303 	  fputs ("<=", file);  break;
5304 	case LEU:
5305 	  fputs ("<<=", file);  break;
5306 	case LTU:
5307 	  fputs ("<<", file);  break;
5308 	default:
5309 	  gcc_unreachable ();
5310 	}
5311       return;
5312     case 'N':			/* Condition, (N)egated */
5313       switch (GET_CODE (x))
5314 	{
5315 	case EQ:
5316 	  fputs ("<>", file);  break;
5317 	case NE:
5318 	  fputs ("=", file);  break;
5319 	case GT:
5320 	  fputs ("<=", file);  break;
5321 	case GE:
5322 	  fputs ("<", file);  break;
5323 	case GEU:
5324 	  fputs ("<<", file);  break;
5325 	case GTU:
5326 	  fputs ("<<=", file);  break;
5327 	case LT:
5328 	  fputs (">=", file);  break;
5329 	case LE:
5330 	  fputs (">", file);  break;
5331 	case LEU:
5332 	  fputs (">>", file);  break;
5333 	case LTU:
5334 	  fputs (">>=", file);  break;
5335 	default:
5336 	  gcc_unreachable ();
5337 	}
5338       return;
5339     /* For floating point comparisons.  Note that the output
5340        predicates are the complement of the desired mode.  The
5341        conditions for GT, GE, LT, LE and LTGT cause an invalid
5342        operation exception if the result is unordered and this
5343        exception is enabled in the floating-point status register.  */
5344     case 'Y':
5345       switch (GET_CODE (x))
5346 	{
5347 	case EQ:
5348 	  fputs ("!=", file);  break;
5349 	case NE:
5350 	  fputs ("=", file);  break;
5351 	case GT:
5352 	  fputs ("!>", file);  break;
5353 	case GE:
5354 	  fputs ("!>=", file);  break;
5355 	case LT:
5356 	  fputs ("!<", file);  break;
5357 	case LE:
5358 	  fputs ("!<=", file);  break;
5359 	case LTGT:
5360 	  fputs ("!<>", file);  break;
5361 	case UNLE:
5362 	  fputs ("!?<=", file);  break;
5363 	case UNLT:
5364 	  fputs ("!?<", file);  break;
5365 	case UNGE:
5366 	  fputs ("!?>=", file);  break;
5367 	case UNGT:
5368 	  fputs ("!?>", file);  break;
5369 	case UNEQ:
5370 	  fputs ("!?=", file);  break;
5371 	case UNORDERED:
5372 	  fputs ("!?", file);  break;
5373 	case ORDERED:
5374 	  fputs ("?", file);  break;
5375 	default:
5376 	  gcc_unreachable ();
5377 	}
5378       return;
5379     case 'S':			/* Condition, operands are (S)wapped.  */
5380       switch (GET_CODE (x))
5381 	{
5382 	case EQ:
5383 	  fputs ("=", file);  break;
5384 	case NE:
5385 	  fputs ("<>", file);  break;
5386 	case GT:
5387 	  fputs ("<", file);  break;
5388 	case GE:
5389 	  fputs ("<=", file);  break;
5390 	case GEU:
5391 	  fputs ("<<=", file);  break;
5392 	case GTU:
5393 	  fputs ("<<", file);  break;
5394 	case LT:
5395 	  fputs (">", file);  break;
5396 	case LE:
5397 	  fputs (">=", file);  break;
5398 	case LEU:
5399 	  fputs (">>=", file);  break;
5400 	case LTU:
5401 	  fputs (">>", file);  break;
5402 	default:
5403 	  gcc_unreachable ();
5404 	}
5405       return;
5406     case 'B':			/* Condition, (B)oth swapped and negate.  */
5407       switch (GET_CODE (x))
5408 	{
5409 	case EQ:
5410 	  fputs ("<>", file);  break;
5411 	case NE:
5412 	  fputs ("=", file);  break;
5413 	case GT:
5414 	  fputs (">=", file);  break;
5415 	case GE:
5416 	  fputs (">", file);  break;
5417 	case GEU:
5418 	  fputs (">>", file);  break;
5419 	case GTU:
5420 	  fputs (">>=", file);  break;
5421 	case LT:
5422 	  fputs ("<=", file);  break;
5423 	case LE:
5424 	  fputs ("<", file);  break;
5425 	case LEU:
5426 	  fputs ("<<", file);  break;
5427 	case LTU:
5428 	  fputs ("<<=", file);  break;
5429 	default:
5430 	  gcc_unreachable ();
5431 	}
5432       return;
5433     case 'k':
5434       gcc_assert (GET_CODE (x) == CONST_INT);
5435       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5436       return;
5437     case 'Q':
5438       gcc_assert (GET_CODE (x) == CONST_INT);
5439       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5440       return;
5441     case 'L':
5442       gcc_assert (GET_CODE (x) == CONST_INT);
5443       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5444       return;
5445     case 'o':
5446       gcc_assert (GET_CODE (x) == CONST_INT
5447 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5448       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5449       return;
5450     case 'O':
5451       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5452       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5453       return;
5454     case 'p':
5455       gcc_assert (GET_CODE (x) == CONST_INT);
5456       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5457       return;
5458     case 'P':
5459       gcc_assert (GET_CODE (x) == CONST_INT);
5460       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5461       return;
5462     case 'I':
5463       if (GET_CODE (x) == CONST_INT)
5464 	fputs ("i", file);
5465       return;
5466     case 'M':
5467     case 'F':
5468       switch (GET_CODE (XEXP (x, 0)))
5469 	{
5470 	case PRE_DEC:
5471 	case PRE_INC:
5472 	  if (ASSEMBLER_DIALECT == 0)
5473 	    fputs ("s,mb", file);
5474 	  else
5475 	    fputs (",mb", file);
5476 	  break;
5477 	case POST_DEC:
5478 	case POST_INC:
5479 	  if (ASSEMBLER_DIALECT == 0)
5480 	    fputs ("s,ma", file);
5481 	  else
5482 	    fputs (",ma", file);
5483 	  break;
5484 	case PLUS:
5485 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5486 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5487 	    {
5488 	      if (ASSEMBLER_DIALECT == 0)
5489 		fputs ("x", file);
5490 	    }
5491 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5492 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5493 	    {
5494 	      if (ASSEMBLER_DIALECT == 0)
5495 		fputs ("x,s", file);
5496 	      else
5497 		fputs (",s", file);
5498 	    }
5499 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5500 	    fputs ("s", file);
5501 	  break;
5502 	default:
5503 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5504 	    fputs ("s", file);
5505 	  break;
5506 	}
5507       return;
5508     case 'G':
5509       pa_output_global_address (file, x, 0);
5510       return;
5511     case 'H':
5512       pa_output_global_address (file, x, 1);
5513       return;
5514     case 0:			/* Don't do anything special */
5515       break;
5516     case 'Z':
5517       {
5518 	unsigned op[3];
5519 	compute_zdepwi_operands (INTVAL (x), op);
5520 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5521 	return;
5522       }
5523     case 'z':
5524       {
5525 	unsigned op[3];
5526 	compute_zdepdi_operands (INTVAL (x), op);
5527 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5528 	return;
5529       }
5530     case 'c':
5531       /* We can get here from a .vtable_inherit due to our
5532 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5533 	 addresses.  */
5534       break;
5535     default:
5536       gcc_unreachable ();
5537     }
5538   if (GET_CODE (x) == REG)
5539     {
5540       fputs (reg_names [REGNO (x)], file);
5541       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5542 	{
5543 	  fputs ("R", file);
5544 	  return;
5545 	}
5546       if (FP_REG_P (x)
5547 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5548 	  && (REGNO (x) & 1) == 0)
5549 	fputs ("L", file);
5550     }
5551   else if (GET_CODE (x) == MEM)
5552     {
5553       int size = GET_MODE_SIZE (GET_MODE (x));
5554       rtx base = NULL_RTX;
5555       switch (GET_CODE (XEXP (x, 0)))
5556 	{
5557 	case PRE_DEC:
5558 	case POST_DEC:
5559           base = XEXP (XEXP (x, 0), 0);
5560 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5561 	  break;
5562 	case PRE_INC:
5563 	case POST_INC:
5564           base = XEXP (XEXP (x, 0), 0);
5565 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5566 	  break;
5567 	case PLUS:
5568 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5569 	    fprintf (file, "%s(%s)",
5570 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5571 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5572 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5573 	    fprintf (file, "%s(%s)",
5574 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5575 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5576 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5577 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5578 	    {
5579 	      /* Because the REG_POINTER flag can get lost during reload,
5580 		 pa_legitimate_address_p canonicalizes the order of the
5581 		 index and base registers in the combined move patterns.  */
5582 	      rtx base = XEXP (XEXP (x, 0), 1);
5583 	      rtx index = XEXP (XEXP (x, 0), 0);
5584 
5585 	      fprintf (file, "%s(%s)",
5586 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5587 	    }
5588 	  else
5589 	    output_address (GET_MODE (x), XEXP (x, 0));
5590 	  break;
5591 	default:
5592 	  output_address (GET_MODE (x), XEXP (x, 0));
5593 	  break;
5594 	}
5595     }
5596   else
5597     output_addr_const (file, x);
5598 }
5599 
5600 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5601 
5602 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5603 pa_output_global_address (FILE *file, rtx x, int round_constant)
5604 {
5605 
5606   /* Imagine  (high (const (plus ...))).  */
5607   if (GET_CODE (x) == HIGH)
5608     x = XEXP (x, 0);
5609 
5610   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5611     output_addr_const (file, x);
5612   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5613     {
5614       output_addr_const (file, x);
5615       fputs ("-$global$", file);
5616     }
5617   else if (GET_CODE (x) == CONST)
5618     {
5619       const char *sep = "";
5620       int offset = 0;		/* assembler wants -$global$ at end */
5621       rtx base = NULL_RTX;
5622 
5623       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5624 	{
5625 	case LABEL_REF:
5626 	case SYMBOL_REF:
5627 	  base = XEXP (XEXP (x, 0), 0);
5628 	  output_addr_const (file, base);
5629 	  break;
5630 	case CONST_INT:
5631 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5632 	  break;
5633 	default:
5634 	  gcc_unreachable ();
5635 	}
5636 
5637       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5638 	{
5639 	case LABEL_REF:
5640 	case SYMBOL_REF:
5641 	  base = XEXP (XEXP (x, 0), 1);
5642 	  output_addr_const (file, base);
5643 	  break;
5644 	case CONST_INT:
5645 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5646 	  break;
5647 	default:
5648 	  gcc_unreachable ();
5649 	}
5650 
5651       /* How bogus.  The compiler is apparently responsible for
5652 	 rounding the constant if it uses an LR field selector.
5653 
5654 	 The linker and/or assembler seem a better place since
5655 	 they have to do this kind of thing already.
5656 
5657 	 If we fail to do this, HP's optimizing linker may eliminate
5658 	 an addil, but not update the ldw/stw/ldo instruction that
5659 	 uses the result of the addil.  */
5660       if (round_constant)
5661 	offset = ((offset + 0x1000) & ~0x1fff);
5662 
5663       switch (GET_CODE (XEXP (x, 0)))
5664 	{
5665 	case PLUS:
5666 	  if (offset < 0)
5667 	    {
5668 	      offset = -offset;
5669 	      sep = "-";
5670 	    }
5671 	  else
5672 	    sep = "+";
5673 	  break;
5674 
5675 	case MINUS:
5676 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5677 	  sep = "-";
5678 	  break;
5679 
5680 	default:
5681 	  gcc_unreachable ();
5682 	}
5683 
5684       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5685 	fputs ("-$global$", file);
5686       if (offset)
5687 	fprintf (file, "%s%d", sep, offset);
5688     }
5689   else
5690     output_addr_const (file, x);
5691 }
5692 
5693 /* Output boilerplate text to appear at the beginning of the file.
5694    There are several possible versions.  */
5695 #define aputs(x) fputs(x, asm_out_file)
5696 static inline void
pa_file_start_level(void)5697 pa_file_start_level (void)
5698 {
5699   if (TARGET_64BIT)
5700     aputs ("\t.LEVEL 2.0w\n");
5701   else if (TARGET_PA_20)
5702     aputs ("\t.LEVEL 2.0\n");
5703   else if (TARGET_PA_11)
5704     aputs ("\t.LEVEL 1.1\n");
5705   else
5706     aputs ("\t.LEVEL 1.0\n");
5707 }
5708 
5709 static inline void
pa_file_start_space(int sortspace)5710 pa_file_start_space (int sortspace)
5711 {
5712   aputs ("\t.SPACE $PRIVATE$");
5713   if (sortspace)
5714     aputs (",SORT=16");
5715   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5716   if (flag_tm)
5717     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5718   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5719 	 "\n\t.SPACE $TEXT$");
5720   if (sortspace)
5721     aputs (",SORT=8");
5722   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5723 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5724 }
5725 
5726 static inline void
pa_file_start_file(int want_version)5727 pa_file_start_file (int want_version)
5728 {
5729   if (write_symbols != NO_DEBUG)
5730     {
5731       output_file_directive (asm_out_file, main_input_filename);
5732       if (want_version)
5733 	aputs ("\t.version\t\"01.01\"\n");
5734     }
5735 }
5736 
5737 static inline void
pa_file_start_mcount(const char * aswhat)5738 pa_file_start_mcount (const char *aswhat)
5739 {
5740   if (profile_flag)
5741     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5742 }
5743 
5744 static void
pa_elf_file_start(void)5745 pa_elf_file_start (void)
5746 {
5747   pa_file_start_level ();
5748   pa_file_start_mcount ("ENTRY");
5749   pa_file_start_file (0);
5750 }
5751 
5752 static void
pa_som_file_start(void)5753 pa_som_file_start (void)
5754 {
5755   pa_file_start_level ();
5756   pa_file_start_space (0);
5757   aputs ("\t.IMPORT $global$,DATA\n"
5758          "\t.IMPORT $$dyncall,MILLICODE\n");
5759   pa_file_start_mcount ("CODE");
5760   pa_file_start_file (0);
5761 }
5762 
5763 static void
pa_linux_file_start(void)5764 pa_linux_file_start (void)
5765 {
5766   pa_file_start_file (0);
5767   pa_file_start_level ();
5768   pa_file_start_mcount ("CODE");
5769 }
5770 
5771 static void
pa_hpux64_gas_file_start(void)5772 pa_hpux64_gas_file_start (void)
5773 {
5774   pa_file_start_level ();
5775 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5776   if (profile_flag)
5777     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5778 #endif
5779   pa_file_start_file (1);
5780 }
5781 
5782 static void
pa_hpux64_hpas_file_start(void)5783 pa_hpux64_hpas_file_start (void)
5784 {
5785   pa_file_start_level ();
5786   pa_file_start_space (1);
5787   pa_file_start_mcount ("CODE");
5788   pa_file_start_file (0);
5789 }
5790 #undef aputs
5791 
5792 /* Search the deferred plabel list for SYMBOL and return its internal
5793    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5794 
5795 rtx
pa_get_deferred_plabel(rtx symbol)5796 pa_get_deferred_plabel (rtx symbol)
5797 {
5798   const char *fname = XSTR (symbol, 0);
5799   size_t i;
5800 
5801   /* See if we have already put this function on the list of deferred
5802      plabels.  This list is generally small, so a liner search is not
5803      too ugly.  If it proves too slow replace it with something faster.  */
5804   for (i = 0; i < n_deferred_plabels; i++)
5805     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5806       break;
5807 
5808   /* If the deferred plabel list is empty, or this entry was not found
5809      on the list, create a new entry on the list.  */
5810   if (deferred_plabels == NULL || i == n_deferred_plabels)
5811     {
5812       tree id;
5813 
5814       if (deferred_plabels == 0)
5815 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5816       else
5817         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5818                                           deferred_plabels,
5819                                           n_deferred_plabels + 1);
5820 
5821       i = n_deferred_plabels++;
5822       deferred_plabels[i].internal_label = gen_label_rtx ();
5823       deferred_plabels[i].symbol = symbol;
5824 
5825       /* Gross.  We have just implicitly taken the address of this
5826 	 function.  Mark it in the same manner as assemble_name.  */
5827       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5828       if (id)
5829 	mark_referenced (id);
5830     }
5831 
5832   return deferred_plabels[i].internal_label;
5833 }
5834 
5835 static void
output_deferred_plabels(void)5836 output_deferred_plabels (void)
5837 {
5838   size_t i;
5839 
5840   /* If we have some deferred plabels, then we need to switch into the
5841      data or readonly data section, and align it to a 4 byte boundary
5842      before outputting the deferred plabels.  */
5843   if (n_deferred_plabels)
5844     {
5845       switch_to_section (flag_pic ? data_section : readonly_data_section);
5846       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5847     }
5848 
5849   /* Now output the deferred plabels.  */
5850   for (i = 0; i < n_deferred_plabels; i++)
5851     {
5852       targetm.asm_out.internal_label (asm_out_file, "L",
5853 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5854       assemble_integer (deferred_plabels[i].symbol,
5855 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5856     }
5857 }
5858 
5859 /* Initialize optabs to point to emulation routines.  */
5860 
5861 static void
pa_init_libfuncs(void)5862 pa_init_libfuncs (void)
5863 {
5864   if (HPUX_LONG_DOUBLE_LIBRARY)
5865     {
5866       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5867       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5868       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5869       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5870       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5871       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5872       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5873       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5874       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5875 
5876       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5877       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5878       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5879       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5880       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5881       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5882       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5883 
5884       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5885       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5886       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5887       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5888 
5889       set_conv_libfunc (sfix_optab, SImode, TFmode,
5890 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5891 				     : "_U_Qfcnvfxt_quad_to_sgl");
5892       set_conv_libfunc (sfix_optab, DImode, TFmode,
5893 			"_U_Qfcnvfxt_quad_to_dbl");
5894       set_conv_libfunc (ufix_optab, SImode, TFmode,
5895 			"_U_Qfcnvfxt_quad_to_usgl");
5896       set_conv_libfunc (ufix_optab, DImode, TFmode,
5897 			"_U_Qfcnvfxt_quad_to_udbl");
5898 
5899       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5900 			"_U_Qfcnvxf_sgl_to_quad");
5901       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5902 			"_U_Qfcnvxf_dbl_to_quad");
5903       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5904 			"_U_Qfcnvxf_usgl_to_quad");
5905       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5906 			"_U_Qfcnvxf_udbl_to_quad");
5907     }
5908 
5909   if (TARGET_SYNC_LIBCALL)
5910     init_sync_libfuncs (8);
5911 }
5912 
5913 /* HP's millicode routines mean something special to the assembler.
5914    Keep track of which ones we have used.  */
5915 
5916 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5917 static void import_milli (enum millicodes);
5918 static char imported[(int) end1000];
5919 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5920 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5921 #define MILLI_START 10
5922 
5923 static void
import_milli(enum millicodes code)5924 import_milli (enum millicodes code)
5925 {
5926   char str[sizeof (import_string)];
5927 
5928   if (!imported[(int) code])
5929     {
5930       imported[(int) code] = 1;
5931       strcpy (str, import_string);
5932       memcpy (str + MILLI_START, milli_names[(int) code], 4);
5933       output_asm_insn (str, 0);
5934     }
5935 }
5936 
5937 /* The register constraints have put the operands and return value in
5938    the proper registers.  */
5939 
5940 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5941 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5942 {
5943   import_milli (mulI);
5944   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5945 }
5946 
5947 /* Emit the rtl for doing a division by a constant.  */
5948 
5949 /* Do magic division millicodes exist for this value? */
5950 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5951 
5952 /* We'll use an array to keep track of the magic millicodes and
5953    whether or not we've used them already. [n][0] is signed, [n][1] is
5954    unsigned.  */
5955 
5956 static int div_milli[16][2];
5957 
5958 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5959 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5960 {
5961   if (GET_CODE (operands[2]) == CONST_INT
5962       && INTVAL (operands[2]) > 0
5963       && INTVAL (operands[2]) < 16
5964       && pa_magic_milli[INTVAL (operands[2])])
5965     {
5966       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5967 
5968       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5969       emit
5970 	(gen_rtx_PARALLEL
5971 	 (VOIDmode,
5972 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5973 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5974 						     SImode,
5975 						     gen_rtx_REG (SImode, 26),
5976 						     operands[2])),
5977 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5978 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5979 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5980 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5981 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5982       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5983       return 1;
5984     }
5985   return 0;
5986 }
5987 
5988 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)5989 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5990 {
5991   HOST_WIDE_INT divisor;
5992 
5993   /* If the divisor is a constant, try to use one of the special
5994      opcodes .*/
5995   if (GET_CODE (operands[0]) == CONST_INT)
5996     {
5997       static char buf[100];
5998       divisor = INTVAL (operands[0]);
5999       if (!div_milli[divisor][unsignedp])
6000 	{
6001 	  div_milli[divisor][unsignedp] = 1;
6002 	  if (unsignedp)
6003 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6004 	  else
6005 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6006 	}
6007       if (unsignedp)
6008 	{
6009 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6010 		   INTVAL (operands[0]));
6011 	  return pa_output_millicode_call (insn,
6012 					   gen_rtx_SYMBOL_REF (SImode, buf));
6013 	}
6014       else
6015 	{
6016 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6017 		   INTVAL (operands[0]));
6018 	  return pa_output_millicode_call (insn,
6019 					   gen_rtx_SYMBOL_REF (SImode, buf));
6020 	}
6021     }
6022   /* Divisor isn't a special constant.  */
6023   else
6024     {
6025       if (unsignedp)
6026 	{
6027 	  import_milli (divU);
6028 	  return pa_output_millicode_call (insn,
6029 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6030 	}
6031       else
6032 	{
6033 	  import_milli (divI);
6034 	  return pa_output_millicode_call (insn,
6035 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6036 	}
6037     }
6038 }
6039 
6040 /* Output a $$rem millicode to do mod.  */
6041 
6042 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)6043 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6044 {
6045   if (unsignedp)
6046     {
6047       import_milli (remU);
6048       return pa_output_millicode_call (insn,
6049 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6050     }
6051   else
6052     {
6053       import_milli (remI);
6054       return pa_output_millicode_call (insn,
6055 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6056     }
6057 }
6058 
6059 void
pa_output_arg_descriptor(rtx_insn * call_insn)6060 pa_output_arg_descriptor (rtx_insn *call_insn)
6061 {
6062   const char *arg_regs[4];
6063   machine_mode arg_mode;
6064   rtx link;
6065   int i, output_flag = 0;
6066   int regno;
6067 
6068   /* We neither need nor want argument location descriptors for the
6069      64bit runtime environment or the ELF32 environment.  */
6070   if (TARGET_64BIT || TARGET_ELF32)
6071     return;
6072 
6073   for (i = 0; i < 4; i++)
6074     arg_regs[i] = 0;
6075 
6076   /* Specify explicitly that no argument relocations should take place
6077      if using the portable runtime calling conventions.  */
6078   if (TARGET_PORTABLE_RUNTIME)
6079     {
6080       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6081 	     asm_out_file);
6082       return;
6083     }
6084 
6085   gcc_assert (CALL_P (call_insn));
6086   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6087        link; link = XEXP (link, 1))
6088     {
6089       rtx use = XEXP (link, 0);
6090 
6091       if (! (GET_CODE (use) == USE
6092 	     && GET_CODE (XEXP (use, 0)) == REG
6093 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6094 	continue;
6095 
6096       arg_mode = GET_MODE (XEXP (use, 0));
6097       regno = REGNO (XEXP (use, 0));
6098       if (regno >= 23 && regno <= 26)
6099 	{
6100 	  arg_regs[26 - regno] = "GR";
6101 	  if (arg_mode == DImode)
6102 	    arg_regs[25 - regno] = "GR";
6103 	}
6104       else if (regno >= 32 && regno <= 39)
6105 	{
6106 	  if (arg_mode == SFmode)
6107 	    arg_regs[(regno - 32) / 2] = "FR";
6108 	  else
6109 	    {
6110 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6111 	      arg_regs[(regno - 34) / 2] = "FR";
6112 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
6113 #else
6114 	      arg_regs[(regno - 34) / 2] = "FU";
6115 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6116 #endif
6117 	    }
6118 	}
6119     }
6120   fputs ("\t.CALL ", asm_out_file);
6121   for (i = 0; i < 4; i++)
6122     {
6123       if (arg_regs[i])
6124 	{
6125 	  if (output_flag++)
6126 	    fputc (',', asm_out_file);
6127 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6128 	}
6129     }
6130   fputc ('\n', asm_out_file);
6131 }
6132 
6133 /* Inform reload about cases where moving X with a mode MODE to or from
6134    a register in RCLASS requires an extra scratch or immediate register.
6135    Return the class needed for the immediate register.  */
6136 
6137 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6138 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6139 		     machine_mode mode, secondary_reload_info *sri)
6140 {
6141   int regno;
6142   enum reg_class rclass = (enum reg_class) rclass_i;
6143 
6144   /* Handle the easy stuff first.  */
6145   if (rclass == R1_REGS)
6146     return NO_REGS;
6147 
6148   if (REG_P (x))
6149     {
6150       regno = REGNO (x);
6151       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6152 	return NO_REGS;
6153     }
6154   else
6155     regno = -1;
6156 
6157   /* If we have something like (mem (mem (...)), we can safely assume the
6158      inner MEM will end up in a general register after reloading, so there's
6159      no need for a secondary reload.  */
6160   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6161     return NO_REGS;
6162 
6163   /* Trying to load a constant into a FP register during PIC code
6164      generation requires %r1 as a scratch register.  For float modes,
6165      the only legitimate constant is CONST0_RTX.  However, there are
6166      a few patterns that accept constant double operands.  */
6167   if (flag_pic
6168       && FP_REG_CLASS_P (rclass)
6169       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6170     {
6171       switch (mode)
6172 	{
6173 	case E_SImode:
6174 	  sri->icode = CODE_FOR_reload_insi_r1;
6175 	  break;
6176 
6177 	case E_DImode:
6178 	  sri->icode = CODE_FOR_reload_indi_r1;
6179 	  break;
6180 
6181 	case E_SFmode:
6182 	  sri->icode = CODE_FOR_reload_insf_r1;
6183 	  break;
6184 
6185 	case E_DFmode:
6186 	  sri->icode = CODE_FOR_reload_indf_r1;
6187 	  break;
6188 
6189 	default:
6190 	  gcc_unreachable ();
6191 	}
6192       return NO_REGS;
6193     }
6194 
6195   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6196      register when we're generating PIC code or when the operand isn't
6197      readonly.  */
6198   if (pa_symbolic_expression_p (x))
6199     {
6200       if (GET_CODE (x) == HIGH)
6201 	x = XEXP (x, 0);
6202 
6203       if (flag_pic || !read_only_operand (x, VOIDmode))
6204 	{
6205 	  switch (mode)
6206 	    {
6207 	    case E_SImode:
6208 	      sri->icode = CODE_FOR_reload_insi_r1;
6209 	      break;
6210 
6211 	    case E_DImode:
6212 	      sri->icode = CODE_FOR_reload_indi_r1;
6213 	      break;
6214 
6215 	    default:
6216 	      gcc_unreachable ();
6217 	    }
6218 	  return NO_REGS;
6219 	}
6220     }
6221 
6222   /* Profiling showed the PA port spends about 1.3% of its compilation
6223      time in true_regnum from calls inside pa_secondary_reload_class.  */
6224   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6225     regno = true_regnum (x);
6226 
6227   /* Handle reloads for floating point loads and stores.  */
6228   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6229       && FP_REG_CLASS_P (rclass))
6230     {
6231       if (MEM_P (x))
6232 	{
6233 	  x = XEXP (x, 0);
6234 
6235 	  /* We don't need a secondary reload for indexed memory addresses.
6236 
6237 	     When INT14_OK_STRICT is true, it might appear that we could
6238 	     directly allow register indirect memory addresses.  However,
6239 	     this doesn't work because we don't support SUBREGs in
6240 	     floating-point register copies and reload doesn't tell us
6241 	     when it's going to use a SUBREG.  */
6242 	  if (IS_INDEX_ADDR_P (x))
6243 	    return NO_REGS;
6244 	}
6245 
6246       /* Request a secondary reload with a general scratch register
6247 	 for everything else.  ??? Could symbolic operands be handled
6248 	 directly when generating non-pic PA 2.0 code?  */
6249       sri->icode = (in_p
6250 		    ? direct_optab_handler (reload_in_optab, mode)
6251 		    : direct_optab_handler (reload_out_optab, mode));
6252       return NO_REGS;
6253     }
6254 
6255   /* A SAR<->FP register copy requires an intermediate general register
6256      and secondary memory.  We need a secondary reload with a general
6257      scratch register for spills.  */
6258   if (rclass == SHIFT_REGS)
6259     {
6260       /* Handle spill.  */
6261       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6262 	{
6263 	  sri->icode = (in_p
6264 			? direct_optab_handler (reload_in_optab, mode)
6265 			: direct_optab_handler (reload_out_optab, mode));
6266 	  return NO_REGS;
6267 	}
6268 
6269       /* Handle FP copy.  */
6270       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6271 	return GENERAL_REGS;
6272     }
6273 
6274   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6275       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6276       && FP_REG_CLASS_P (rclass))
6277     return GENERAL_REGS;
6278 
6279   return NO_REGS;
6280 }
6281 
6282 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6283 
6284 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6285 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6286 			    reg_class_t class1 ATTRIBUTE_UNUSED,
6287 			    reg_class_t class2 ATTRIBUTE_UNUSED)
6288 {
6289 #ifdef PA_SECONDARY_MEMORY_NEEDED
6290   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6291 #else
6292   return false;
6293 #endif
6294 }
6295 
6296 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6297    is only marked as live on entry by df-scan when it is a fixed
6298    register.  It isn't a fixed register in the 64-bit runtime,
6299    so we need to mark it here.  */
6300 
6301 static void
pa_extra_live_on_entry(bitmap regs)6302 pa_extra_live_on_entry (bitmap regs)
6303 {
6304   if (TARGET_64BIT)
6305     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6306 }
6307 
6308 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6309    to prevent it from being deleted.  */
6310 
6311 rtx
pa_eh_return_handler_rtx(void)6312 pa_eh_return_handler_rtx (void)
6313 {
6314   rtx tmp;
6315 
6316   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6317 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6318   tmp = gen_rtx_MEM (word_mode, tmp);
6319   tmp->volatil = 1;
6320   return tmp;
6321 }
6322 
6323 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6324    by invisible reference.  As a GCC extension, we also pass anything
6325    with a zero or variable size by reference.
6326 
6327    The 64-bit runtime does not describe passing any types by invisible
6328    reference.  The internals of GCC can't currently handle passing
6329    empty structures, and zero or variable length arrays when they are
6330    not passed entirely on the stack or by reference.  Thus, as a GCC
6331    extension, we pass these types by reference.  The HP compiler doesn't
6332    support these types, so hopefully there shouldn't be any compatibility
6333    issues.  This may have to be revisited when HP releases a C99 compiler
6334    or updates the ABI.  */
6335 
6336 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6337 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6338 {
6339   HOST_WIDE_INT size = arg.type_size_in_bytes ();
6340   if (TARGET_64BIT)
6341     return size <= 0;
6342   else
6343     return size <= 0 || size > 8;
6344 }
6345 
6346 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6347 
6348 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6349 pa_function_arg_padding (machine_mode mode, const_tree type)
6350 {
6351   if (mode == BLKmode
6352       || (TARGET_64BIT
6353 	  && type
6354 	  && (AGGREGATE_TYPE_P (type)
6355 	      || TREE_CODE (type) == COMPLEX_TYPE
6356 	      || TREE_CODE (type) == VECTOR_TYPE)))
6357     {
6358       /* Return PAD_NONE if justification is not required.  */
6359       if (type
6360 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6361 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6362 	return PAD_NONE;
6363 
6364       /* The directions set here are ignored when a BLKmode argument larger
6365 	 than a word is placed in a register.  Different code is used for
6366 	 the stack and registers.  This makes it difficult to have a
6367 	 consistent data representation for both the stack and registers.
6368 	 For both runtimes, the justification and padding for arguments on
6369 	 the stack and in registers should be identical.  */
6370       if (TARGET_64BIT)
6371 	/* The 64-bit runtime specifies left justification for aggregates.  */
6372 	return PAD_UPWARD;
6373       else
6374 	/* The 32-bit runtime architecture specifies right justification.
6375 	   When the argument is passed on the stack, the argument is padded
6376 	   with garbage on the left.  The HP compiler pads with zeros.  */
6377 	return PAD_DOWNWARD;
6378     }
6379 
6380   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6381     return PAD_DOWNWARD;
6382   else
6383     return PAD_NONE;
6384 }
6385 
6386 
6387 /* Do what is necessary for `va_start'.  We look at the current function
6388    to determine if stdargs or varargs is used and fill in an initial
6389    va_list.  A pointer to this constructor is returned.  */
6390 
6391 static rtx
hppa_builtin_saveregs(void)6392 hppa_builtin_saveregs (void)
6393 {
6394   rtx offset, dest;
6395   tree fntype = TREE_TYPE (current_function_decl);
6396   int argadj = ((!stdarg_p (fntype))
6397 		? UNITS_PER_WORD : 0);
6398 
6399   if (argadj)
6400     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6401   else
6402     offset = crtl->args.arg_offset_rtx;
6403 
6404   if (TARGET_64BIT)
6405     {
6406       int i, off;
6407 
6408       /* Adjust for varargs/stdarg differences.  */
6409       if (argadj)
6410 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6411       else
6412 	offset = crtl->args.arg_offset_rtx;
6413 
6414       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6415 	 from the incoming arg pointer and growing to larger addresses.  */
6416       for (i = 26, off = -64; i >= 19; i--, off += 8)
6417 	emit_move_insn (gen_rtx_MEM (word_mode,
6418 				     plus_constant (Pmode,
6419 						    arg_pointer_rtx, off)),
6420 			gen_rtx_REG (word_mode, i));
6421 
6422       /* The incoming args pointer points just beyond the flushback area;
6423 	 normally this is not a serious concern.  However, when we are doing
6424 	 varargs/stdargs we want to make the arg pointer point to the start
6425 	 of the incoming argument area.  */
6426       emit_move_insn (virtual_incoming_args_rtx,
6427 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6428 
6429       /* Now return a pointer to the first anonymous argument.  */
6430       return copy_to_reg (expand_binop (Pmode, add_optab,
6431 					virtual_incoming_args_rtx,
6432 					offset, 0, 0, OPTAB_LIB_WIDEN));
6433     }
6434 
6435   /* Store general registers on the stack.  */
6436   dest = gen_rtx_MEM (BLKmode,
6437 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6438 				     -16));
6439   set_mem_alias_set (dest, get_varargs_alias_set ());
6440   set_mem_align (dest, BITS_PER_WORD);
6441   move_block_from_reg (23, dest, 4);
6442 
6443   /* move_block_from_reg will emit code to store the argument registers
6444      individually as scalar stores.
6445 
6446      However, other insns may later load from the same addresses for
6447      a structure load (passing a struct to a varargs routine).
6448 
6449      The alias code assumes that such aliasing can never happen, so we
6450      have to keep memory referencing insns from moving up beyond the
6451      last argument register store.  So we emit a blockage insn here.  */
6452   emit_insn (gen_blockage ());
6453 
6454   return copy_to_reg (expand_binop (Pmode, add_optab,
6455 				    crtl->args.internal_arg_pointer,
6456 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6457 }
6458 
6459 static void
hppa_va_start(tree valist,rtx nextarg)6460 hppa_va_start (tree valist, rtx nextarg)
6461 {
6462   nextarg = expand_builtin_saveregs ();
6463   std_expand_builtin_va_start (valist, nextarg);
6464 }
6465 
6466 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6467 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6468 			   gimple_seq *post_p)
6469 {
6470   if (TARGET_64BIT)
6471     {
6472       /* Args grow upward.  We can use the generic routines.  */
6473       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6474     }
6475   else /* !TARGET_64BIT */
6476     {
6477       tree ptr = build_pointer_type (type);
6478       tree valist_type;
6479       tree t, u;
6480       unsigned int size, ofs;
6481       bool indirect;
6482 
6483       indirect = pass_va_arg_by_reference (type);
6484       if (indirect)
6485 	{
6486 	  type = ptr;
6487 	  ptr = build_pointer_type (type);
6488 	}
6489       size = int_size_in_bytes (type);
6490       valist_type = TREE_TYPE (valist);
6491 
6492       /* Args grow down.  Not handled by generic routines.  */
6493 
6494       u = fold_convert (sizetype, size_in_bytes (type));
6495       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6496       t = fold_build_pointer_plus (valist, u);
6497 
6498       /* Align to 4 or 8 byte boundary depending on argument size.  */
6499 
6500       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6501       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6502       t = fold_convert (valist_type, t);
6503 
6504       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6505 
6506       ofs = (8 - size) % 4;
6507       if (ofs != 0)
6508 	t = fold_build_pointer_plus_hwi (t, ofs);
6509 
6510       t = fold_convert (ptr, t);
6511       t = build_va_arg_indirect_ref (t);
6512 
6513       if (indirect)
6514 	t = build_va_arg_indirect_ref (t);
6515 
6516       return t;
6517     }
6518 }
6519 
6520 /* True if MODE is valid for the target.  By "valid", we mean able to
6521    be manipulated in non-trivial ways.  In particular, this means all
6522    the arithmetic is supported.
6523 
6524    Currently, TImode is not valid as the HP 64-bit runtime documentation
6525    doesn't document the alignment and calling conventions for this type.
6526    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6527    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6528 
6529 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6530 pa_scalar_mode_supported_p (scalar_mode mode)
6531 {
6532   int precision = GET_MODE_PRECISION (mode);
6533 
6534   switch (GET_MODE_CLASS (mode))
6535     {
6536     case MODE_PARTIAL_INT:
6537     case MODE_INT:
6538       if (precision == CHAR_TYPE_SIZE)
6539 	return true;
6540       if (precision == SHORT_TYPE_SIZE)
6541 	return true;
6542       if (precision == INT_TYPE_SIZE)
6543 	return true;
6544       if (precision == LONG_TYPE_SIZE)
6545 	return true;
6546       if (precision == LONG_LONG_TYPE_SIZE)
6547 	return true;
6548       return false;
6549 
6550     case MODE_FLOAT:
6551       if (precision == FLOAT_TYPE_SIZE)
6552 	return true;
6553       if (precision == DOUBLE_TYPE_SIZE)
6554 	return true;
6555       if (precision == LONG_DOUBLE_TYPE_SIZE)
6556 	return true;
6557       return false;
6558 
6559     case MODE_DECIMAL_FLOAT:
6560       return false;
6561 
6562     default:
6563       gcc_unreachable ();
6564     }
6565 }
6566 
6567 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6568    it branches into the delay slot.  Otherwise, return FALSE.  */
6569 
6570 static bool
branch_to_delay_slot_p(rtx_insn * insn)6571 branch_to_delay_slot_p (rtx_insn *insn)
6572 {
6573   rtx_insn *jump_insn;
6574 
6575   if (dbr_sequence_length ())
6576     return FALSE;
6577 
6578   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6579   while (insn)
6580     {
6581       insn = next_active_insn (insn);
6582       if (jump_insn == insn)
6583 	return TRUE;
6584 
6585       /* We can't rely on the length of asms.  So, we return FALSE when
6586 	 the branch is followed by an asm.  */
6587       if (!insn
6588 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6589 	  || asm_noperands (PATTERN (insn)) >= 0
6590 	  || get_attr_length (insn) > 0)
6591 	break;
6592     }
6593 
6594   return FALSE;
6595 }
6596 
6597 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6598 
6599    This occurs when INSN has an unfilled delay slot and is followed
6600    by an asm.  Disaster can occur if the asm is empty and the jump
6601    branches into the delay slot.  So, we add a nop in the delay slot
6602    when this occurs.  */
6603 
6604 static bool
branch_needs_nop_p(rtx_insn * insn)6605 branch_needs_nop_p (rtx_insn *insn)
6606 {
6607   rtx_insn *jump_insn;
6608 
6609   if (dbr_sequence_length ())
6610     return FALSE;
6611 
6612   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6613   while (insn)
6614     {
6615       insn = next_active_insn (insn);
6616       if (!insn || jump_insn == insn)
6617 	return TRUE;
6618 
6619       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6620 	   || asm_noperands (PATTERN (insn)) >= 0)
6621 	  && get_attr_length (insn) > 0)
6622 	break;
6623     }
6624 
6625   return FALSE;
6626 }
6627 
6628 /* Return TRUE if INSN, a forward jump insn, can use nullification
6629    to skip the following instruction.  This avoids an extra cycle due
6630    to a mis-predicted branch when we fall through.  */
6631 
6632 static bool
use_skip_p(rtx_insn * insn)6633 use_skip_p (rtx_insn *insn)
6634 {
6635   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6636 
6637   while (insn)
6638     {
6639       insn = next_active_insn (insn);
6640 
6641       /* We can't rely on the length of asms, so we can't skip asms.  */
6642       if (!insn
6643 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6644 	  || asm_noperands (PATTERN (insn)) >= 0)
6645 	break;
6646       if (get_attr_length (insn) == 4
6647 	  && jump_insn == next_active_insn (insn))
6648 	return TRUE;
6649       if (get_attr_length (insn) > 0)
6650 	break;
6651     }
6652 
6653   return FALSE;
6654 }
6655 
6656 /* This routine handles all the normal conditional branch sequences we
6657    might need to generate.  It handles compare immediate vs compare
6658    register, nullification of delay slots, varying length branches,
6659    negated branches, and all combinations of the above.  It returns the
6660    output appropriate to emit the branch corresponding to all given
6661    parameters.  */
6662 
6663 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6664 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6665 {
6666   static char buf[100];
6667   bool useskip;
6668   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6669   int length = get_attr_length (insn);
6670   int xdelay;
6671 
6672   /* A conditional branch to the following instruction (e.g. the delay slot)
6673      is asking for a disaster.  This can happen when not optimizing and
6674      when jump optimization fails.
6675 
6676      While it is usually safe to emit nothing, this can fail if the
6677      preceding instruction is a nullified branch with an empty delay
6678      slot and the same branch target as this branch.  We could check
6679      for this but jump optimization should eliminate nop jumps.  It
6680      is always safe to emit a nop.  */
6681   if (branch_to_delay_slot_p (insn))
6682     return "nop";
6683 
6684   /* The doubleword form of the cmpib instruction doesn't have the LEU
6685      and GTU conditions while the cmpb instruction does.  Since we accept
6686      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6687   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6688     operands[2] = gen_rtx_REG (DImode, 0);
6689   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6690     operands[1] = gen_rtx_REG (DImode, 0);
6691 
6692   /* If this is a long branch with its delay slot unfilled, set `nullify'
6693      as it can nullify the delay slot and save a nop.  */
6694   if (length == 8 && dbr_sequence_length () == 0)
6695     nullify = 1;
6696 
6697   /* If this is a short forward conditional branch which did not get
6698      its delay slot filled, the delay slot can still be nullified.  */
6699   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6700     nullify = forward_branch_p (insn);
6701 
6702   /* A forward branch over a single nullified insn can be done with a
6703      comclr instruction.  This avoids a single cycle penalty due to
6704      mis-predicted branch if we fall through (branch not taken).  */
6705   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6706 
6707   switch (length)
6708     {
6709       /* All short conditional branches except backwards with an unfilled
6710 	 delay slot.  */
6711       case 4:
6712 	if (useskip)
6713 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6714 	else
6715 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6716 	if (GET_MODE (operands[1]) == DImode)
6717 	  strcat (buf, "*");
6718 	if (negated)
6719 	  strcat (buf, "%B3");
6720 	else
6721 	  strcat (buf, "%S3");
6722 	if (useskip)
6723 	  strcat (buf, " %2,%r1,%%r0");
6724 	else if (nullify)
6725 	  {
6726 	    if (branch_needs_nop_p (insn))
6727 	      strcat (buf, ",n %2,%r1,%0%#");
6728 	    else
6729 	      strcat (buf, ",n %2,%r1,%0");
6730 	  }
6731 	else
6732 	  strcat (buf, " %2,%r1,%0");
6733 	break;
6734 
6735      /* All long conditionals.  Note a short backward branch with an
6736 	unfilled delay slot is treated just like a long backward branch
6737 	with an unfilled delay slot.  */
6738       case 8:
6739 	/* Handle weird backwards branch with a filled delay slot
6740 	   which is nullified.  */
6741 	if (dbr_sequence_length () != 0
6742 	    && ! forward_branch_p (insn)
6743 	    && nullify)
6744 	  {
6745 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6746 	    if (GET_MODE (operands[1]) == DImode)
6747 	      strcat (buf, "*");
6748 	    if (negated)
6749 	      strcat (buf, "%S3");
6750 	    else
6751 	      strcat (buf, "%B3");
6752 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6753 	  }
6754 	/* Handle short backwards branch with an unfilled delay slot.
6755 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6756 	   taken and untaken branches.  */
6757 	else if (dbr_sequence_length () == 0
6758 		 && ! forward_branch_p (insn)
6759 		 && INSN_ADDRESSES_SET_P ()
6760 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6761 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6762 	  {
6763 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6764 	    if (GET_MODE (operands[1]) == DImode)
6765 	      strcat (buf, "*");
6766 	    if (negated)
6767 	      strcat (buf, "%B3 %2,%r1,%0%#");
6768 	    else
6769 	      strcat (buf, "%S3 %2,%r1,%0%#");
6770 	  }
6771 	else
6772 	  {
6773 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6774 	    if (GET_MODE (operands[1]) == DImode)
6775 	      strcat (buf, "*");
6776 	    if (negated)
6777 	      strcat (buf, "%S3");
6778 	    else
6779 	      strcat (buf, "%B3");
6780 	    if (nullify)
6781 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6782 	    else
6783 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6784 	  }
6785 	break;
6786 
6787       default:
6788 	/* The reversed conditional branch must branch over one additional
6789 	   instruction if the delay slot is filled and needs to be extracted
6790 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6791 	   nullified forward branch, the instruction after the reversed
6792 	   condition branch must be nullified.  */
6793 	if (dbr_sequence_length () == 0
6794 	    || (nullify && forward_branch_p (insn)))
6795 	  {
6796 	    nullify = 1;
6797 	    xdelay = 0;
6798 	    operands[4] = GEN_INT (length);
6799 	  }
6800 	else
6801 	  {
6802 	    xdelay = 1;
6803 	    operands[4] = GEN_INT (length + 4);
6804 	  }
6805 
6806 	/* Create a reversed conditional branch which branches around
6807 	   the following insns.  */
6808 	if (GET_MODE (operands[1]) != DImode)
6809 	  {
6810 	    if (nullify)
6811 	      {
6812 		if (negated)
6813 		  strcpy (buf,
6814 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6815 		else
6816 		  strcpy (buf,
6817 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6818 	      }
6819 	    else
6820 	      {
6821 		if (negated)
6822 		  strcpy (buf,
6823 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6824 		else
6825 		  strcpy (buf,
6826 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6827 	      }
6828 	  }
6829 	else
6830 	  {
6831 	    if (nullify)
6832 	      {
6833 		if (negated)
6834 		  strcpy (buf,
6835 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6836 		else
6837 		  strcpy (buf,
6838 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6839 	      }
6840 	    else
6841 	      {
6842 		if (negated)
6843 		  strcpy (buf,
6844 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6845 		else
6846 		  strcpy (buf,
6847 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6848 	      }
6849 	  }
6850 
6851 	output_asm_insn (buf, operands);
6852 	return pa_output_lbranch (operands[0], insn, xdelay);
6853     }
6854   return buf;
6855 }
6856 
6857 /* Output a PIC pc-relative instruction sequence to load the address of
6858    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6859    or a code label.  OPERANDS[1] specifies the register to use to load
6860    the program counter.  OPERANDS[3] may be used for label generation
6861    The sequence is always three instructions in length.  The program
6862    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6863    Register %r1 is clobbered.  */
6864 
6865 static void
pa_output_pic_pcrel_sequence(rtx * operands)6866 pa_output_pic_pcrel_sequence (rtx *operands)
6867 {
6868   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6869   if (TARGET_PA_20)
6870     {
6871       /* We can use mfia to determine the current program counter.  */
6872       if (TARGET_SOM || !TARGET_GAS)
6873 	{
6874 	  operands[3] = gen_label_rtx ();
6875 	  targetm.asm_out.internal_label (asm_out_file, "L",
6876 					  CODE_LABEL_NUMBER (operands[3]));
6877 	  output_asm_insn ("mfia %1", operands);
6878 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6879 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6880 	}
6881       else
6882 	{
6883 	  output_asm_insn ("mfia %1", operands);
6884 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6885 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6886 	}
6887     }
6888   else
6889     {
6890       /* We need to use a branch to determine the current program counter.  */
6891       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6892       if (TARGET_SOM || !TARGET_GAS)
6893 	{
6894 	  operands[3] = gen_label_rtx ();
6895 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6896 	  targetm.asm_out.internal_label (asm_out_file, "L",
6897 					  CODE_LABEL_NUMBER (operands[3]));
6898 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6899 	}
6900       else
6901 	{
6902 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6903 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6904 	}
6905     }
6906 }
6907 
6908 /* This routine handles output of long unconditional branches that
6909    exceed the maximum range of a simple branch instruction.  Since
6910    we don't have a register available for the branch, we save register
6911    %r1 in the frame marker, load the branch destination DEST into %r1,
6912    execute the branch, and restore %r1 in the delay slot of the branch.
6913 
6914    Since long branches may have an insn in the delay slot and the
6915    delay slot is used to restore %r1, we in general need to extract
6916    this insn and execute it before the branch.  However, to facilitate
6917    use of this function by conditional branches, we also provide an
6918    option to not extract the delay insn so that it will be emitted
6919    after the long branch.  So, if there is an insn in the delay slot,
6920    it is extracted if XDELAY is nonzero.
6921 
6922    The lengths of the various long-branch sequences are 20, 16 and 24
6923    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6924 
6925 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6926 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6927 {
6928   rtx xoperands[4];
6929 
6930   xoperands[0] = dest;
6931 
6932   /* First, free up the delay slot.  */
6933   if (xdelay && dbr_sequence_length () != 0)
6934     {
6935       /* We can't handle a jump in the delay slot.  */
6936       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6937 
6938       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6939 		       optimize, 0, NULL);
6940 
6941       /* Now delete the delay insn.  */
6942       SET_INSN_DELETED (NEXT_INSN (insn));
6943     }
6944 
6945   /* Output an insn to save %r1.  The runtime documentation doesn't
6946      specify whether the "Clean Up" slot in the callers frame can
6947      be clobbered by the callee.  It isn't copied by HP's builtin
6948      alloca, so this suggests that it can be clobbered if necessary.
6949      The "Static Link" location is copied by HP builtin alloca, so
6950      we avoid using it.  Using the cleanup slot might be a problem
6951      if we have to interoperate with languages that pass cleanup
6952      information.  However, it should be possible to handle these
6953      situations with GCC's asm feature.
6954 
6955      The "Current RP" slot is reserved for the called procedure, so
6956      we try to use it when we don't have a frame of our own.  It's
6957      rather unlikely that we won't have a frame when we need to emit
6958      a very long branch.
6959 
6960      Really the way to go long term is a register scavenger; goto
6961      the target of the jump and find a register which we can use
6962      as a scratch to hold the value in %r1.  Then, we wouldn't have
6963      to free up the delay slot or clobber a slot that may be needed
6964      for other purposes.  */
6965   if (TARGET_64BIT)
6966     {
6967       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6968 	/* Use the return pointer slot in the frame marker.  */
6969 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6970       else
6971 	/* Use the slot at -40 in the frame marker since HP builtin
6972 	   alloca doesn't copy it.  */
6973 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6974     }
6975   else
6976     {
6977       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6978 	/* Use the return pointer slot in the frame marker.  */
6979 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6980       else
6981 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6982 	   the only other use of this location is for copying a
6983 	   floating point double argument from a floating-point
6984 	   register to two general registers.  The copy is done
6985 	   as an "atomic" operation when outputting a call, so it
6986 	   won't interfere with our using the location here.  */
6987 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6988     }
6989 
6990   if (TARGET_PORTABLE_RUNTIME)
6991     {
6992       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6993       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6994       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6995     }
6996   else if (flag_pic)
6997     {
6998       xoperands[1] = gen_rtx_REG (Pmode, 1);
6999       xoperands[2] = xoperands[1];
7000       pa_output_pic_pcrel_sequence (xoperands);
7001       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7002     }
7003   else
7004     /* Now output a very long branch to the original target.  */
7005     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7006 
7007   /* Now restore the value of %r1 in the delay slot.  */
7008   if (TARGET_64BIT)
7009     {
7010       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7011 	return "ldd -16(%%r30),%%r1";
7012       else
7013 	return "ldd -40(%%r30),%%r1";
7014     }
7015   else
7016     {
7017       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7018 	return "ldw -20(%%r30),%%r1";
7019       else
7020 	return "ldw -12(%%r30),%%r1";
7021     }
7022 }
7023 
7024 /* This routine handles all the branch-on-bit conditional branch sequences we
7025    might need to generate.  It handles nullification of delay slots,
7026    varying length branches, negated branches and all combinations of the
7027    above.  it returns the appropriate output template to emit the branch.  */
7028 
7029 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7030 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7031 {
7032   static char buf[100];
7033   bool useskip;
7034   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7035   int length = get_attr_length (insn);
7036   int xdelay;
7037 
7038   /* A conditional branch to the following instruction (e.g. the delay slot) is
7039      asking for a disaster.  I do not think this can happen as this pattern
7040      is only used when optimizing; jump optimization should eliminate the
7041      jump.  But be prepared just in case.  */
7042 
7043   if (branch_to_delay_slot_p (insn))
7044     return "nop";
7045 
7046   /* If this is a long branch with its delay slot unfilled, set `nullify'
7047      as it can nullify the delay slot and save a nop.  */
7048   if (length == 8 && dbr_sequence_length () == 0)
7049     nullify = 1;
7050 
7051   /* If this is a short forward conditional branch which did not get
7052      its delay slot filled, the delay slot can still be nullified.  */
7053   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7054     nullify = forward_branch_p (insn);
7055 
7056   /* A forward branch over a single nullified insn can be done with a
7057      extrs instruction.  This avoids a single cycle penalty due to
7058      mis-predicted branch if we fall through (branch not taken).  */
7059   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7060 
7061   switch (length)
7062     {
7063 
7064       /* All short conditional branches except backwards with an unfilled
7065 	 delay slot.  */
7066       case 4:
7067 	if (useskip)
7068 	  strcpy (buf, "{extrs,|extrw,s,}");
7069 	else
7070 	  strcpy (buf, "bb,");
7071 	if (useskip && GET_MODE (operands[0]) == DImode)
7072 	  strcpy (buf, "extrd,s,*");
7073 	else if (GET_MODE (operands[0]) == DImode)
7074 	  strcpy (buf, "bb,*");
7075 	if ((which == 0 && negated)
7076 	     || (which == 1 && ! negated))
7077 	  strcat (buf, ">=");
7078 	else
7079 	  strcat (buf, "<");
7080 	if (useskip)
7081 	  strcat (buf, " %0,%1,1,%%r0");
7082 	else if (nullify && negated)
7083 	  {
7084 	    if (branch_needs_nop_p (insn))
7085 	      strcat (buf, ",n %0,%1,%3%#");
7086 	    else
7087 	      strcat (buf, ",n %0,%1,%3");
7088 	  }
7089 	else if (nullify && ! negated)
7090 	  {
7091 	    if (branch_needs_nop_p (insn))
7092 	      strcat (buf, ",n %0,%1,%2%#");
7093 	    else
7094 	      strcat (buf, ",n %0,%1,%2");
7095 	  }
7096 	else if (! nullify && negated)
7097 	  strcat (buf, " %0,%1,%3");
7098 	else if (! nullify && ! negated)
7099 	  strcat (buf, " %0,%1,%2");
7100 	break;
7101 
7102      /* All long conditionals.  Note a short backward branch with an
7103 	unfilled delay slot is treated just like a long backward branch
7104 	with an unfilled delay slot.  */
7105       case 8:
7106 	/* Handle weird backwards branch with a filled delay slot
7107 	   which is nullified.  */
7108 	if (dbr_sequence_length () != 0
7109 	    && ! forward_branch_p (insn)
7110 	    && nullify)
7111 	  {
7112 	    strcpy (buf, "bb,");
7113 	    if (GET_MODE (operands[0]) == DImode)
7114 	      strcat (buf, "*");
7115 	    if ((which == 0 && negated)
7116 		|| (which == 1 && ! negated))
7117 	      strcat (buf, "<");
7118 	    else
7119 	      strcat (buf, ">=");
7120 	    if (negated)
7121 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
7122 	    else
7123 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
7124 	  }
7125 	/* Handle short backwards branch with an unfilled delay slot.
7126 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7127 	   taken and untaken branches.  */
7128 	else if (dbr_sequence_length () == 0
7129 		 && ! forward_branch_p (insn)
7130 		 && INSN_ADDRESSES_SET_P ()
7131 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7132 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7133 	  {
7134 	    strcpy (buf, "bb,");
7135 	    if (GET_MODE (operands[0]) == DImode)
7136 	      strcat (buf, "*");
7137 	    if ((which == 0 && negated)
7138 		|| (which == 1 && ! negated))
7139 	      strcat (buf, ">=");
7140 	    else
7141 	      strcat (buf, "<");
7142 	    if (negated)
7143 	      strcat (buf, " %0,%1,%3%#");
7144 	    else
7145 	      strcat (buf, " %0,%1,%2%#");
7146 	  }
7147 	else
7148 	  {
7149 	    if (GET_MODE (operands[0]) == DImode)
7150 	      strcpy (buf, "extrd,s,*");
7151 	    else
7152 	      strcpy (buf, "{extrs,|extrw,s,}");
7153 	    if ((which == 0 && negated)
7154 		|| (which == 1 && ! negated))
7155 	      strcat (buf, "<");
7156 	    else
7157 	      strcat (buf, ">=");
7158 	    if (nullify && negated)
7159 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7160 	    else if (nullify && ! negated)
7161 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7162 	    else if (negated)
7163 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7164 	    else
7165 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7166 	  }
7167 	break;
7168 
7169       default:
7170 	/* The reversed conditional branch must branch over one additional
7171 	   instruction if the delay slot is filled and needs to be extracted
7172 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7173 	   nullified forward branch, the instruction after the reversed
7174 	   condition branch must be nullified.  */
7175 	if (dbr_sequence_length () == 0
7176 	    || (nullify && forward_branch_p (insn)))
7177 	  {
7178 	    nullify = 1;
7179 	    xdelay = 0;
7180 	    operands[4] = GEN_INT (length);
7181 	  }
7182 	else
7183 	  {
7184 	    xdelay = 1;
7185 	    operands[4] = GEN_INT (length + 4);
7186 	  }
7187 
7188 	if (GET_MODE (operands[0]) == DImode)
7189 	  strcpy (buf, "bb,*");
7190 	else
7191 	  strcpy (buf, "bb,");
7192 	if ((which == 0 && negated)
7193 	    || (which == 1 && !negated))
7194 	  strcat (buf, "<");
7195 	else
7196 	  strcat (buf, ">=");
7197 	if (nullify)
7198 	  strcat (buf, ",n %0,%1,.+%4");
7199 	else
7200 	  strcat (buf, " %0,%1,.+%4");
7201 	output_asm_insn (buf, operands);
7202 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7203 				  insn, xdelay);
7204     }
7205   return buf;
7206 }
7207 
7208 /* This routine handles all the branch-on-variable-bit conditional branch
7209    sequences we might need to generate.  It handles nullification of delay
7210    slots, varying length branches, negated branches and all combinations
7211    of the above.  it returns the appropriate output template to emit the
7212    branch.  */
7213 
7214 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7215 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7216 	       int which)
7217 {
7218   static char buf[100];
7219   bool useskip;
7220   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7221   int length = get_attr_length (insn);
7222   int xdelay;
7223 
7224   /* A conditional branch to the following instruction (e.g. the delay slot) is
7225      asking for a disaster.  I do not think this can happen as this pattern
7226      is only used when optimizing; jump optimization should eliminate the
7227      jump.  But be prepared just in case.  */
7228 
7229   if (branch_to_delay_slot_p (insn))
7230     return "nop";
7231 
7232   /* If this is a long branch with its delay slot unfilled, set `nullify'
7233      as it can nullify the delay slot and save a nop.  */
7234   if (length == 8 && dbr_sequence_length () == 0)
7235     nullify = 1;
7236 
7237   /* If this is a short forward conditional branch which did not get
7238      its delay slot filled, the delay slot can still be nullified.  */
7239   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7240     nullify = forward_branch_p (insn);
7241 
7242   /* A forward branch over a single nullified insn can be done with a
7243      extrs instruction.  This avoids a single cycle penalty due to
7244      mis-predicted branch if we fall through (branch not taken).  */
7245   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7246 
7247   switch (length)
7248     {
7249 
7250       /* All short conditional branches except backwards with an unfilled
7251 	 delay slot.  */
7252       case 4:
7253 	if (useskip)
7254 	  strcpy (buf, "{vextrs,|extrw,s,}");
7255 	else
7256 	  strcpy (buf, "{bvb,|bb,}");
7257 	if (useskip && GET_MODE (operands[0]) == DImode)
7258 	  strcpy (buf, "extrd,s,*");
7259 	else if (GET_MODE (operands[0]) == DImode)
7260 	  strcpy (buf, "bb,*");
7261 	if ((which == 0 && negated)
7262 	     || (which == 1 && ! negated))
7263 	  strcat (buf, ">=");
7264 	else
7265 	  strcat (buf, "<");
7266 	if (useskip)
7267 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7268 	else if (nullify && negated)
7269 	  {
7270 	    if (branch_needs_nop_p (insn))
7271 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7272 	    else
7273 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7274 	  }
7275 	else if (nullify && ! negated)
7276 	  {
7277 	    if (branch_needs_nop_p (insn))
7278 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7279 	    else
7280 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7281 	  }
7282 	else if (! nullify && negated)
7283 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7284 	else if (! nullify && ! negated)
7285 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7286 	break;
7287 
7288      /* All long conditionals.  Note a short backward branch with an
7289 	unfilled delay slot is treated just like a long backward branch
7290 	with an unfilled delay slot.  */
7291       case 8:
7292 	/* Handle weird backwards branch with a filled delay slot
7293 	   which is nullified.  */
7294 	if (dbr_sequence_length () != 0
7295 	    && ! forward_branch_p (insn)
7296 	    && nullify)
7297 	  {
7298 	    strcpy (buf, "{bvb,|bb,}");
7299 	    if (GET_MODE (operands[0]) == DImode)
7300 	      strcat (buf, "*");
7301 	    if ((which == 0 && negated)
7302 		|| (which == 1 && ! negated))
7303 	      strcat (buf, "<");
7304 	    else
7305 	      strcat (buf, ">=");
7306 	    if (negated)
7307 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7308 	    else
7309 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7310 	  }
7311 	/* Handle short backwards branch with an unfilled delay slot.
7312 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7313 	   taken and untaken branches.  */
7314 	else if (dbr_sequence_length () == 0
7315 		 && ! forward_branch_p (insn)
7316 		 && INSN_ADDRESSES_SET_P ()
7317 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7318 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7319 	  {
7320 	    strcpy (buf, "{bvb,|bb,}");
7321 	    if (GET_MODE (operands[0]) == DImode)
7322 	      strcat (buf, "*");
7323 	    if ((which == 0 && negated)
7324 		|| (which == 1 && ! negated))
7325 	      strcat (buf, ">=");
7326 	    else
7327 	      strcat (buf, "<");
7328 	    if (negated)
7329 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7330 	    else
7331 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7332 	  }
7333 	else
7334 	  {
7335 	    strcpy (buf, "{vextrs,|extrw,s,}");
7336 	    if (GET_MODE (operands[0]) == DImode)
7337 	      strcpy (buf, "extrd,s,*");
7338 	    if ((which == 0 && negated)
7339 		|| (which == 1 && ! negated))
7340 	      strcat (buf, "<");
7341 	    else
7342 	      strcat (buf, ">=");
7343 	    if (nullify && negated)
7344 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7345 	    else if (nullify && ! negated)
7346 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7347 	    else if (negated)
7348 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7349 	    else
7350 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7351 	  }
7352 	break;
7353 
7354       default:
7355 	/* The reversed conditional branch must branch over one additional
7356 	   instruction if the delay slot is filled and needs to be extracted
7357 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7358 	   nullified forward branch, the instruction after the reversed
7359 	   condition branch must be nullified.  */
7360 	if (dbr_sequence_length () == 0
7361 	    || (nullify && forward_branch_p (insn)))
7362 	  {
7363 	    nullify = 1;
7364 	    xdelay = 0;
7365 	    operands[4] = GEN_INT (length);
7366 	  }
7367 	else
7368 	  {
7369 	    xdelay = 1;
7370 	    operands[4] = GEN_INT (length + 4);
7371 	  }
7372 
7373 	if (GET_MODE (operands[0]) == DImode)
7374 	  strcpy (buf, "bb,*");
7375 	else
7376 	  strcpy (buf, "{bvb,|bb,}");
7377 	if ((which == 0 && negated)
7378 	    || (which == 1 && !negated))
7379 	  strcat (buf, "<");
7380 	else
7381 	  strcat (buf, ">=");
7382 	if (nullify)
7383 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7384 	else
7385 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7386 	output_asm_insn (buf, operands);
7387 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7388 				  insn, xdelay);
7389     }
7390   return buf;
7391 }
7392 
7393 /* Return the output template for emitting a dbra type insn.
7394 
7395    Note it may perform some output operations on its own before
7396    returning the final output string.  */
7397 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7398 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7399 {
7400   int length = get_attr_length (insn);
7401 
7402   /* A conditional branch to the following instruction (e.g. the delay slot) is
7403      asking for a disaster.  Be prepared!  */
7404 
7405   if (branch_to_delay_slot_p (insn))
7406     {
7407       if (which_alternative == 0)
7408 	return "ldo %1(%0),%0";
7409       else if (which_alternative == 1)
7410 	{
7411 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7412 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7413 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7414 	  return "{fldws|fldw} -16(%%r30),%0";
7415 	}
7416       else
7417 	{
7418 	  output_asm_insn ("ldw %0,%4", operands);
7419 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7420 	}
7421     }
7422 
7423   if (which_alternative == 0)
7424     {
7425       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7426       int xdelay;
7427 
7428       /* If this is a long branch with its delay slot unfilled, set `nullify'
7429 	 as it can nullify the delay slot and save a nop.  */
7430       if (length == 8 && dbr_sequence_length () == 0)
7431 	nullify = 1;
7432 
7433       /* If this is a short forward conditional branch which did not get
7434 	 its delay slot filled, the delay slot can still be nullified.  */
7435       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7436 	nullify = forward_branch_p (insn);
7437 
7438       switch (length)
7439 	{
7440 	case 4:
7441 	  if (nullify)
7442 	    {
7443 	      if (branch_needs_nop_p (insn))
7444 		return "addib,%C2,n %1,%0,%3%#";
7445 	      else
7446 		return "addib,%C2,n %1,%0,%3";
7447 	    }
7448 	  else
7449 	    return "addib,%C2 %1,%0,%3";
7450 
7451 	case 8:
7452 	  /* Handle weird backwards branch with a fulled delay slot
7453 	     which is nullified.  */
7454 	  if (dbr_sequence_length () != 0
7455 	      && ! forward_branch_p (insn)
7456 	      && nullify)
7457 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7458 	  /* Handle short backwards branch with an unfilled delay slot.
7459 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7460 	     taken and untaken branches.  */
7461 	  else if (dbr_sequence_length () == 0
7462 		   && ! forward_branch_p (insn)
7463 		   && INSN_ADDRESSES_SET_P ()
7464 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7465 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7466 	      return "addib,%C2 %1,%0,%3%#";
7467 
7468 	  /* Handle normal cases.  */
7469 	  if (nullify)
7470 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7471 	  else
7472 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7473 
7474 	default:
7475 	  /* The reversed conditional branch must branch over one additional
7476 	     instruction if the delay slot is filled and needs to be extracted
7477 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7478 	     nullified forward branch, the instruction after the reversed
7479 	     condition branch must be nullified.  */
7480 	  if (dbr_sequence_length () == 0
7481 	      || (nullify && forward_branch_p (insn)))
7482 	    {
7483 	      nullify = 1;
7484 	      xdelay = 0;
7485 	      operands[4] = GEN_INT (length);
7486 	    }
7487 	  else
7488 	    {
7489 	      xdelay = 1;
7490 	      operands[4] = GEN_INT (length + 4);
7491 	    }
7492 
7493 	  if (nullify)
7494 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7495 	  else
7496 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7497 
7498 	  return pa_output_lbranch (operands[3], insn, xdelay);
7499 	}
7500 
7501     }
7502   /* Deal with gross reload from FP register case.  */
7503   else if (which_alternative == 1)
7504     {
7505       /* Move loop counter from FP register to MEM then into a GR,
7506 	 increment the GR, store the GR into MEM, and finally reload
7507 	 the FP register from MEM from within the branch's delay slot.  */
7508       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7509 		       operands);
7510       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7511       if (length == 24)
7512 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7513       else if (length == 28)
7514 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7515       else
7516 	{
7517 	  operands[5] = GEN_INT (length - 16);
7518 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7519 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7520 	  return pa_output_lbranch (operands[3], insn, 0);
7521 	}
7522     }
7523   /* Deal with gross reload from memory case.  */
7524   else
7525     {
7526       /* Reload loop counter from memory, the store back to memory
7527 	 happens in the branch's delay slot.  */
7528       output_asm_insn ("ldw %0,%4", operands);
7529       if (length == 12)
7530 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7531       else if (length == 16)
7532 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7533       else
7534 	{
7535 	  operands[5] = GEN_INT (length - 4);
7536 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7537 	  return pa_output_lbranch (operands[3], insn, 0);
7538 	}
7539     }
7540 }
7541 
7542 /* Return the output template for emitting a movb type insn.
7543 
7544    Note it may perform some output operations on its own before
7545    returning the final output string.  */
7546 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7547 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7548 	     int reverse_comparison)
7549 {
7550   int length = get_attr_length (insn);
7551 
7552   /* A conditional branch to the following instruction (e.g. the delay slot) is
7553      asking for a disaster.  Be prepared!  */
7554 
7555   if (branch_to_delay_slot_p (insn))
7556     {
7557       if (which_alternative == 0)
7558 	return "copy %1,%0";
7559       else if (which_alternative == 1)
7560 	{
7561 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7562 	  return "{fldws|fldw} -16(%%r30),%0";
7563 	}
7564       else if (which_alternative == 2)
7565 	return "stw %1,%0";
7566       else
7567 	return "mtsar %r1";
7568     }
7569 
7570   /* Support the second variant.  */
7571   if (reverse_comparison)
7572     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7573 
7574   if (which_alternative == 0)
7575     {
7576       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7577       int xdelay;
7578 
7579       /* If this is a long branch with its delay slot unfilled, set `nullify'
7580 	 as it can nullify the delay slot and save a nop.  */
7581       if (length == 8 && dbr_sequence_length () == 0)
7582 	nullify = 1;
7583 
7584       /* If this is a short forward conditional branch which did not get
7585 	 its delay slot filled, the delay slot can still be nullified.  */
7586       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7587 	nullify = forward_branch_p (insn);
7588 
7589       switch (length)
7590 	{
7591 	case 4:
7592 	  if (nullify)
7593 	    {
7594 	      if (branch_needs_nop_p (insn))
7595 		return "movb,%C2,n %1,%0,%3%#";
7596 	      else
7597 		return "movb,%C2,n %1,%0,%3";
7598 	    }
7599 	  else
7600 	    return "movb,%C2 %1,%0,%3";
7601 
7602 	case 8:
7603 	  /* Handle weird backwards branch with a filled delay slot
7604 	     which is nullified.  */
7605 	  if (dbr_sequence_length () != 0
7606 	      && ! forward_branch_p (insn)
7607 	      && nullify)
7608 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7609 
7610 	  /* Handle short backwards branch with an unfilled delay slot.
7611 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7612 	     taken and untaken branches.  */
7613 	  else if (dbr_sequence_length () == 0
7614 		   && ! forward_branch_p (insn)
7615 		   && INSN_ADDRESSES_SET_P ()
7616 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7617 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7618 	    return "movb,%C2 %1,%0,%3%#";
7619 	  /* Handle normal cases.  */
7620 	  if (nullify)
7621 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7622 	  else
7623 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7624 
7625 	default:
7626 	  /* The reversed conditional branch must branch over one additional
7627 	     instruction if the delay slot is filled and needs to be extracted
7628 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7629 	     nullified forward branch, the instruction after the reversed
7630 	     condition branch must be nullified.  */
7631 	  if (dbr_sequence_length () == 0
7632 	      || (nullify && forward_branch_p (insn)))
7633 	    {
7634 	      nullify = 1;
7635 	      xdelay = 0;
7636 	      operands[4] = GEN_INT (length);
7637 	    }
7638 	  else
7639 	    {
7640 	      xdelay = 1;
7641 	      operands[4] = GEN_INT (length + 4);
7642 	    }
7643 
7644 	  if (nullify)
7645 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7646 	  else
7647 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7648 
7649 	  return pa_output_lbranch (operands[3], insn, xdelay);
7650 	}
7651     }
7652   /* Deal with gross reload for FP destination register case.  */
7653   else if (which_alternative == 1)
7654     {
7655       /* Move source register to MEM, perform the branch test, then
7656 	 finally load the FP register from MEM from within the branch's
7657 	 delay slot.  */
7658       output_asm_insn ("stw %1,-16(%%r30)", operands);
7659       if (length == 12)
7660 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7661       else if (length == 16)
7662 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7663       else
7664 	{
7665 	  operands[4] = GEN_INT (length - 4);
7666 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7667 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7668 	  return pa_output_lbranch (operands[3], insn, 0);
7669 	}
7670     }
7671   /* Deal with gross reload from memory case.  */
7672   else if (which_alternative == 2)
7673     {
7674       /* Reload loop counter from memory, the store back to memory
7675 	 happens in the branch's delay slot.  */
7676       if (length == 8)
7677 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7678       else if (length == 12)
7679 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7680       else
7681 	{
7682 	  operands[4] = GEN_INT (length);
7683 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7684 			   operands);
7685 	  return pa_output_lbranch (operands[3], insn, 0);
7686 	}
7687     }
7688   /* Handle SAR as a destination.  */
7689   else
7690     {
7691       if (length == 8)
7692 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7693       else if (length == 12)
7694 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7695       else
7696 	{
7697 	  operands[4] = GEN_INT (length);
7698 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7699 			   operands);
7700 	  return pa_output_lbranch (operands[3], insn, 0);
7701 	}
7702     }
7703 }
7704 
7705 /* Copy any FP arguments in INSN into integer registers.  */
7706 static void
copy_fp_args(rtx_insn * insn)7707 copy_fp_args (rtx_insn *insn)
7708 {
7709   rtx link;
7710   rtx xoperands[2];
7711 
7712   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7713     {
7714       int arg_mode, regno;
7715       rtx use = XEXP (link, 0);
7716 
7717       if (! (GET_CODE (use) == USE
7718 	  && GET_CODE (XEXP (use, 0)) == REG
7719 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7720 	continue;
7721 
7722       arg_mode = GET_MODE (XEXP (use, 0));
7723       regno = REGNO (XEXP (use, 0));
7724 
7725       /* Is it a floating point register?  */
7726       if (regno >= 32 && regno <= 39)
7727 	{
7728 	  /* Copy the FP register into an integer register via memory.  */
7729 	  if (arg_mode == SFmode)
7730 	    {
7731 	      xoperands[0] = XEXP (use, 0);
7732 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7733 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7734 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7735 	    }
7736 	  else
7737 	    {
7738 	      xoperands[0] = XEXP (use, 0);
7739 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7740 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7741 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7742 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7743 	    }
7744 	}
7745     }
7746 }
7747 
7748 /* Compute length of the FP argument copy sequence for INSN.  */
7749 static int
length_fp_args(rtx_insn * insn)7750 length_fp_args (rtx_insn *insn)
7751 {
7752   int length = 0;
7753   rtx link;
7754 
7755   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7756     {
7757       int arg_mode, regno;
7758       rtx use = XEXP (link, 0);
7759 
7760       if (! (GET_CODE (use) == USE
7761 	  && GET_CODE (XEXP (use, 0)) == REG
7762 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7763 	continue;
7764 
7765       arg_mode = GET_MODE (XEXP (use, 0));
7766       regno = REGNO (XEXP (use, 0));
7767 
7768       /* Is it a floating point register?  */
7769       if (regno >= 32 && regno <= 39)
7770 	{
7771 	  if (arg_mode == SFmode)
7772 	    length += 8;
7773 	  else
7774 	    length += 12;
7775 	}
7776     }
7777 
7778   return length;
7779 }
7780 
7781 /* Return the attribute length for the millicode call instruction INSN.
7782    The length must match the code generated by pa_output_millicode_call.
7783    We include the delay slot in the returned length as it is better to
7784    over estimate the length than to under estimate it.  */
7785 
7786 int
pa_attr_length_millicode_call(rtx_insn * insn)7787 pa_attr_length_millicode_call (rtx_insn *insn)
7788 {
7789   unsigned long distance = -1;
7790   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7791 
7792   if (INSN_ADDRESSES_SET_P ())
7793     {
7794       distance = (total + insn_current_reference_address (insn));
7795       if (distance < total)
7796 	distance = -1;
7797     }
7798 
7799   if (TARGET_64BIT)
7800     {
7801       if (!TARGET_LONG_CALLS && distance < 7600000)
7802 	return 8;
7803 
7804       return 20;
7805     }
7806   else if (TARGET_PORTABLE_RUNTIME)
7807     return 24;
7808   else
7809     {
7810       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7811 	return 8;
7812 
7813       if (!flag_pic)
7814 	return 12;
7815 
7816       return 24;
7817     }
7818 }
7819 
7820 /* INSN is a function call.
7821 
7822    CALL_DEST is the routine we are calling.  */
7823 
7824 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7825 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7826 {
7827   int attr_length = get_attr_length (insn);
7828   int seq_length = dbr_sequence_length ();
7829   rtx xoperands[4];
7830 
7831   xoperands[0] = call_dest;
7832 
7833   /* Handle the common case where we are sure that the branch will
7834      reach the beginning of the $CODE$ subspace.  The within reach
7835      form of the $$sh_func_adrs call has a length of 28.  Because it
7836      has an attribute type of sh_func_adrs, it never has a nonzero
7837      sequence length (i.e., the delay slot is never filled).  */
7838   if (!TARGET_LONG_CALLS
7839       && (attr_length == 8
7840 	  || (attr_length == 28
7841 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7842     {
7843       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7844       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7845     }
7846   else
7847     {
7848       if (TARGET_64BIT)
7849 	{
7850 	  /* It might seem that one insn could be saved by accessing
7851 	     the millicode function using the linkage table.  However,
7852 	     this doesn't work in shared libraries and other dynamically
7853 	     loaded objects.  Using a pc-relative sequence also avoids
7854 	     problems related to the implicit use of the gp register.  */
7855 	  xoperands[1] = gen_rtx_REG (Pmode, 1);
7856 	  xoperands[2] = xoperands[1];
7857 	  pa_output_pic_pcrel_sequence (xoperands);
7858 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7859 	}
7860       else if (TARGET_PORTABLE_RUNTIME)
7861 	{
7862 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7863 	     have PIC support in the assembler/linker, so this sequence
7864 	     is needed.  */
7865 
7866 	  /* Get the address of our target into %r1.  */
7867 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7868 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7869 
7870 	  /* Get our return address into %r31.  */
7871 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7872 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7873 
7874 	  /* Jump to our target address in %r1.  */
7875 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7876 	}
7877       else if (!flag_pic)
7878 	{
7879 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7880 	  if (TARGET_PA_20)
7881 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7882 	  else
7883 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7884 	}
7885       else
7886 	{
7887 	  xoperands[1] = gen_rtx_REG (Pmode, 31);
7888 	  xoperands[2] = gen_rtx_REG (Pmode, 1);
7889 	  pa_output_pic_pcrel_sequence (xoperands);
7890 
7891 	  /* Adjust return address.  */
7892 	  output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7893 
7894 	  /* Jump to our target address in %r1.  */
7895 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7896 	}
7897     }
7898 
7899   if (seq_length == 0)
7900     output_asm_insn ("nop", xoperands);
7901 
7902   return "";
7903 }
7904 
7905 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7906    flag indicates whether INSN is a regular call or a sibling call.  The
7907    length returned must be longer than the code actually generated by
7908    pa_output_call.  Since branch shortening is done before delay branch
7909    sequencing, there is no way to determine whether or not the delay
7910    slot will be filled during branch shortening.  Even when the delay
7911    slot is filled, we may have to add a nop if the delay slot contains
7912    a branch that can't reach its target.  Thus, we always have to include
7913    the delay slot in the length estimate.  This used to be done in
7914    pa_adjust_insn_length but we do it here now as some sequences always
7915    fill the delay slot and we can save four bytes in the estimate for
7916    these sequences.  */
7917 
7918 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7919 pa_attr_length_call (rtx_insn *insn, int sibcall)
7920 {
7921   int local_call;
7922   rtx call, call_dest;
7923   tree call_decl;
7924   int length = 0;
7925   rtx pat = PATTERN (insn);
7926   unsigned long distance = -1;
7927 
7928   gcc_assert (CALL_P (insn));
7929 
7930   if (INSN_ADDRESSES_SET_P ())
7931     {
7932       unsigned long total;
7933 
7934       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7935       distance = (total + insn_current_reference_address (insn));
7936       if (distance < total)
7937 	distance = -1;
7938     }
7939 
7940   gcc_assert (GET_CODE (pat) == PARALLEL);
7941 
7942   /* Get the call rtx.  */
7943   call = XVECEXP (pat, 0, 0);
7944   if (GET_CODE (call) == SET)
7945     call = SET_SRC (call);
7946 
7947   gcc_assert (GET_CODE (call) == CALL);
7948 
7949   /* Determine if this is a local call.  */
7950   call_dest = XEXP (XEXP (call, 0), 0);
7951   call_decl = SYMBOL_REF_DECL (call_dest);
7952   local_call = call_decl && targetm.binds_local_p (call_decl);
7953 
7954   /* pc-relative branch.  */
7955   if (!TARGET_LONG_CALLS
7956       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7957 	  || distance < MAX_PCREL17F_OFFSET))
7958     length += 8;
7959 
7960   /* 64-bit plabel sequence.  */
7961   else if (TARGET_64BIT && !local_call)
7962     length += 24;
7963 
7964   /* non-pic long absolute branch sequence.  */
7965   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7966     length += 12;
7967 
7968   /* long pc-relative branch sequence.  */
7969   else if (TARGET_LONG_PIC_SDIFF_CALL
7970 	   || (TARGET_GAS && !TARGET_SOM && local_call))
7971     {
7972       length += 20;
7973 
7974       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7975 	length += 8;
7976     }
7977 
7978   /* 32-bit plabel sequence.  */
7979   else
7980     {
7981       length += 32;
7982 
7983       if (TARGET_SOM)
7984 	length += length_fp_args (insn);
7985 
7986       if (flag_pic)
7987 	length += 4;
7988 
7989       if (!TARGET_PA_20)
7990 	{
7991 	  if (!sibcall)
7992 	    length += 8;
7993 
7994 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7995 	    length += 8;
7996 	}
7997     }
7998 
7999   return length;
8000 }
8001 
8002 /* INSN is a function call.
8003 
8004    CALL_DEST is the routine we are calling.  */
8005 
8006 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)8007 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8008 {
8009   int seq_length = dbr_sequence_length ();
8010   tree call_decl = SYMBOL_REF_DECL (call_dest);
8011   int local_call = call_decl && targetm.binds_local_p (call_decl);
8012   rtx xoperands[4];
8013 
8014   xoperands[0] = call_dest;
8015 
8016   /* Handle the common case where we're sure that the branch will reach
8017      the beginning of the "$CODE$" subspace.  This is the beginning of
8018      the current function if we are in a named section.  */
8019   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8020     {
8021       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8022       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8023     }
8024   else
8025     {
8026       if (TARGET_64BIT && !local_call)
8027 	{
8028 	  /* ??? As far as I can tell, the HP linker doesn't support the
8029 	     long pc-relative sequence described in the 64-bit runtime
8030 	     architecture.  So, we use a slightly longer indirect call.  */
8031 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
8032 	  xoperands[1] = gen_label_rtx ();
8033 
8034 	  /* Put the load of %r27 into the delay slot.  We don't need to
8035 	     do anything when generating fast indirect calls.  */
8036 	  if (seq_length != 0)
8037 	    {
8038 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
8039 			       optimize, 0, NULL);
8040 
8041 	      /* Now delete the delay insn.  */
8042 	      SET_INSN_DELETED (NEXT_INSN (insn));
8043 	    }
8044 
8045 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
8046 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8047 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8048 	  output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8049 	  output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8050 	  output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8051 	  seq_length = 1;
8052 	}
8053       else
8054 	{
8055 	  int indirect_call = 0;
8056 
8057 	  /* Emit a long call.  There are several different sequences
8058 	     of increasing length and complexity.  In most cases,
8059              they don't allow an instruction in the delay slot.  */
8060 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8061 	      && !TARGET_LONG_PIC_SDIFF_CALL
8062 	      && !(TARGET_GAS && !TARGET_SOM && local_call)
8063 	      && !TARGET_64BIT)
8064 	    indirect_call = 1;
8065 
8066 	  if (seq_length != 0
8067 	      && !sibcall
8068 	      && (!TARGET_PA_20
8069 		  || indirect_call
8070 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8071 	    {
8072 	      /* A non-jump insn in the delay slot.  By definition we can
8073 		 emit this insn before the call (and in fact before argument
8074 		 relocating.  */
8075 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8076 			       NULL);
8077 
8078 	      /* Now delete the delay insn.  */
8079 	      SET_INSN_DELETED (NEXT_INSN (insn));
8080 	      seq_length = 0;
8081 	    }
8082 
8083 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8084 	    {
8085 	      /* This is the best sequence for making long calls in
8086 		 non-pic code.  Unfortunately, GNU ld doesn't provide
8087 		 the stub needed for external calls, and GAS's support
8088 		 for this with the SOM linker is buggy.  It is safe
8089 		 to use this for local calls.  */
8090 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
8091 	      if (sibcall)
8092 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8093 	      else
8094 		{
8095 		  if (TARGET_PA_20)
8096 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8097 				     xoperands);
8098 		  else
8099 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8100 
8101 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
8102 		  seq_length = 1;
8103 		}
8104 	    }
8105 	  else
8106 	    {
8107 	      /* The HP assembler and linker can handle relocations for
8108 		 the difference of two symbols.  The HP assembler
8109 		 recognizes the sequence as a pc-relative call and
8110 		 the linker provides stubs when needed.  */
8111 
8112 	      /* GAS currently can't generate the relocations that
8113 		 are needed for the SOM linker under HP-UX using this
8114 		 sequence.  The GNU linker doesn't generate the stubs
8115 		 that are needed for external calls on TARGET_ELF32
8116 		 with this sequence.  For now, we have to use a longer
8117 	         plabel sequence when using GAS for non local calls.  */
8118 	      if (TARGET_LONG_PIC_SDIFF_CALL
8119 		  || (TARGET_GAS && !TARGET_SOM && local_call))
8120 		{
8121 		  xoperands[1] = gen_rtx_REG (Pmode, 1);
8122 		  xoperands[2] = xoperands[1];
8123 		  pa_output_pic_pcrel_sequence (xoperands);
8124 		}
8125 	      else
8126 		{
8127 		  /* Emit a long plabel-based call sequence.  This is
8128 		     essentially an inline implementation of $$dyncall.
8129 		     We don't actually try to call $$dyncall as this is
8130 		     as difficult as calling the function itself.  */
8131 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8132 		  xoperands[1] = gen_label_rtx ();
8133 
8134 		  /* Since the call is indirect, FP arguments in registers
8135 		     need to be copied to the general registers.  Then, the
8136 		     argument relocation stub will copy them back.  */
8137 		  if (TARGET_SOM)
8138 		    copy_fp_args (insn);
8139 
8140 		  if (flag_pic)
8141 		    {
8142 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8143 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8144 		      output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8145 		    }
8146 		  else
8147 		    {
8148 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8149 				       xoperands);
8150 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8151 				       xoperands);
8152 		    }
8153 
8154 		  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8155 		  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8156 		  /* Should this be an ordered load to ensure the target
8157 	             address is loaded before the global pointer?  */
8158 		  output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8159 		  output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8160 
8161 		  if (!sibcall && !TARGET_PA_20)
8162 		    {
8163 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8164 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8165 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8166 		      else
8167 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8168 		    }
8169 		}
8170 
8171 	      if (TARGET_PA_20)
8172 		{
8173 		  if (sibcall)
8174 		    output_asm_insn ("bve (%%r1)", xoperands);
8175 		  else
8176 		    {
8177 		      if (indirect_call)
8178 			{
8179 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8180 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8181 			  seq_length = 1;
8182 			}
8183 		      else
8184 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8185 		    }
8186 		}
8187 	      else
8188 		{
8189 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8190 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8191 				     xoperands);
8192 
8193 		  if (sibcall)
8194 		    {
8195 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8196 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8197 		      else
8198 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8199 		    }
8200 		  else
8201 		    {
8202 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8203 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8204 		      else
8205 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8206 
8207 		      if (indirect_call)
8208 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8209 		      else
8210 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8211 		      seq_length = 1;
8212 		    }
8213 		}
8214 	    }
8215 	}
8216     }
8217 
8218   if (seq_length == 0)
8219     output_asm_insn ("nop", xoperands);
8220 
8221   return "";
8222 }
8223 
8224 /* Return the attribute length of the indirect call instruction INSN.
8225    The length must match the code generated by output_indirect call.
8226    The returned length includes the delay slot.  Currently, the delay
8227    slot of an indirect call sequence is not exposed and it is used by
8228    the sequence itself.  */
8229 
8230 int
pa_attr_length_indirect_call(rtx_insn * insn)8231 pa_attr_length_indirect_call (rtx_insn *insn)
8232 {
8233   unsigned long distance = -1;
8234   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8235 
8236   if (INSN_ADDRESSES_SET_P ())
8237     {
8238       distance = (total + insn_current_reference_address (insn));
8239       if (distance < total)
8240 	distance = -1;
8241     }
8242 
8243   if (TARGET_64BIT)
8244     return 12;
8245 
8246   if (TARGET_FAST_INDIRECT_CALLS)
8247     return 8;
8248 
8249   if (TARGET_PORTABLE_RUNTIME)
8250     return 16;
8251 
8252   if (!TARGET_LONG_CALLS
8253       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8254 	  || distance < MAX_PCREL17F_OFFSET))
8255     return 8;
8256 
8257   /* Out of reach, can use ble.  */
8258   if (!flag_pic)
8259     return 12;
8260 
8261   /* Inline versions of $$dyncall.  */
8262   if (!optimize_size)
8263     {
8264       if (TARGET_NO_SPACE_REGS)
8265 	return 28;
8266 
8267       if (TARGET_PA_20)
8268 	return 32;
8269     }
8270 
8271   /* Long PIC pc-relative call.  */
8272   return 20;
8273 }
8274 
8275 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8276 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8277 {
8278   rtx xoperands[4];
8279   int length;
8280 
8281   if (TARGET_64BIT)
8282     {
8283       xoperands[0] = call_dest;
8284       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8285 		       "bve,l (%%r2),%%r2\n\t"
8286 		       "ldd 24(%0),%%r27", xoperands);
8287       return "";
8288     }
8289 
8290   /* First the special case for kernels, level 0 systems, etc.  */
8291   if (TARGET_FAST_INDIRECT_CALLS)
8292     {
8293       pa_output_arg_descriptor (insn);
8294       if (TARGET_PA_20)
8295 	return "bve,l,n (%%r22),%%r2\n\tnop";
8296       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8297     }
8298 
8299   if (TARGET_PORTABLE_RUNTIME)
8300     {
8301       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8302 		       "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8303       pa_output_arg_descriptor (insn);
8304       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8305     }
8306 
8307   /* Now the normal case -- we can reach $$dyncall directly or
8308      we're sure that we can get there via a long-branch stub.
8309 
8310      No need to check target flags as the length uniquely identifies
8311      the remaining cases.  */
8312   length = pa_attr_length_indirect_call (insn);
8313   if (length == 8)
8314     {
8315       pa_output_arg_descriptor (insn);
8316 
8317       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8318 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8319 	 variant of the B,L instruction can't be used on the SOM target.  */
8320       if (TARGET_PA_20 && !TARGET_SOM)
8321 	return "b,l,n $$dyncall,%%r2\n\tnop";
8322       else
8323 	return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8324     }
8325 
8326   /* Long millicode call, but we are not generating PIC or portable runtime
8327      code.  */
8328   if (length == 12)
8329     {
8330       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8331       pa_output_arg_descriptor (insn);
8332       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8333     }
8334 
8335   /* The long PIC pc-relative call sequence is five instructions.  So,
8336      let's use an inline version of $$dyncall when the calling sequence
8337      has a roughly similar number of instructions and we are not optimizing
8338      for size.  We need two instructions to load the return pointer plus
8339      the $$dyncall implementation.  */
8340   if (!optimize_size)
8341     {
8342       if (TARGET_NO_SPACE_REGS)
8343 	{
8344 	  pa_output_arg_descriptor (insn);
8345 	  output_asm_insn ("bl .+8,%%r2\n\t"
8346 			   "ldo 20(%%r2),%%r2\n\t"
8347 			   "extru,<> %%r22,30,1,%%r0\n\t"
8348 			   "bv,n %%r0(%%r22)\n\t"
8349 			   "ldw -2(%%r22),%%r21\n\t"
8350 			   "bv %%r0(%%r21)\n\t"
8351 			   "ldw 2(%%r22),%%r19", xoperands);
8352 	  return "";
8353 	}
8354       if (TARGET_PA_20)
8355 	{
8356 	  pa_output_arg_descriptor (insn);
8357 	  output_asm_insn ("bl .+8,%%r2\n\t"
8358 			   "ldo 24(%%r2),%%r2\n\t"
8359 			   "stw %%r2,-24(%%sp)\n\t"
8360 			   "extru,<> %r22,30,1,%%r0\n\t"
8361 			   "bve,n (%%r22)\n\t"
8362 			   "ldw -2(%%r22),%%r21\n\t"
8363 			   "bve (%%r21)\n\t"
8364 			   "ldw 2(%%r22),%%r19", xoperands);
8365 	  return "";
8366 	}
8367     }
8368 
8369   /* We need a long PIC call to $$dyncall.  */
8370   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8371   xoperands[1] = gen_rtx_REG (Pmode, 2);
8372   xoperands[2] = gen_rtx_REG (Pmode, 1);
8373   pa_output_pic_pcrel_sequence (xoperands);
8374   pa_output_arg_descriptor (insn);
8375   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8376 }
8377 
8378 /* In HPUX 8.0's shared library scheme, special relocations are needed
8379    for function labels if they might be passed to a function
8380    in a shared library (because shared libraries don't live in code
8381    space), and special magic is needed to construct their address.  */
8382 
8383 void
pa_encode_label(rtx sym)8384 pa_encode_label (rtx sym)
8385 {
8386   const char *str = XSTR (sym, 0);
8387   int len = strlen (str) + 1;
8388   char *newstr, *p;
8389 
8390   p = newstr = XALLOCAVEC (char, len + 1);
8391   *p++ = '@';
8392   strcpy (p, str);
8393 
8394   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8395 }
8396 
8397 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8398 pa_encode_section_info (tree decl, rtx rtl, int first)
8399 {
8400   int old_referenced = 0;
8401 
8402   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8403     old_referenced
8404       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8405 
8406   default_encode_section_info (decl, rtl, first);
8407 
8408   if (first && TEXT_SPACE_P (decl))
8409     {
8410       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8411       if (TREE_CODE (decl) == FUNCTION_DECL)
8412 	pa_encode_label (XEXP (rtl, 0));
8413     }
8414   else if (old_referenced)
8415     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8416 }
8417 
8418 /* This is sort of inverse to pa_encode_section_info.  */
8419 
8420 static const char *
pa_strip_name_encoding(const char * str)8421 pa_strip_name_encoding (const char *str)
8422 {
8423   str += (*str == '@');
8424   str += (*str == '*');
8425   return str;
8426 }
8427 
8428 /* Returns 1 if OP is a function label involved in a simple addition
8429    with a constant.  Used to keep certain patterns from matching
8430    during instruction combination.  */
8431 int
pa_is_function_label_plus_const(rtx op)8432 pa_is_function_label_plus_const (rtx op)
8433 {
8434   /* Strip off any CONST.  */
8435   if (GET_CODE (op) == CONST)
8436     op = XEXP (op, 0);
8437 
8438   return (GET_CODE (op) == PLUS
8439 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8440 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8441 }
8442 
8443 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8444    declaration for the thunk function itself, FUNCTION is the decl for
8445    the target function.  DELTA is an immediate constant offset to be
8446    added to THIS.  If VCALL_OFFSET is nonzero, the word at
8447    *(*this + vcall_offset) should be added to THIS.  */
8448 
8449 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8450 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8451 			HOST_WIDE_INT vcall_offset, tree function)
8452 {
8453   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8454   static unsigned int current_thunk_number;
8455   int val_14 = VAL_14_BITS_P (delta);
8456   unsigned int old_last_address = last_address, nbytes = 0;
8457   char label[17];
8458   rtx xoperands[4];
8459 
8460   xoperands[0] = XEXP (DECL_RTL (function), 0);
8461   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8462   xoperands[2] = GEN_INT (delta);
8463 
8464   assemble_start_function (thunk_fndecl, fnname);
8465   final_start_function (emit_barrier (), file, 1);
8466 
8467   if (!vcall_offset)
8468     {
8469       /* Output the thunk.  We know that the function is in the same
8470 	 translation unit (i.e., the same space) as the thunk, and that
8471 	 thunks are output after their method.  Thus, we don't need an
8472 	 external branch to reach the function.  With SOM and GAS,
8473 	 functions and thunks are effectively in different sections.
8474 	 Thus, we can always use a IA-relative branch and the linker
8475 	 will add a long branch stub if necessary.
8476 
8477 	 However, we have to be careful when generating PIC code on the
8478 	 SOM port to ensure that the sequence does not transfer to an
8479 	 import stub for the target function as this could clobber the
8480 	 return value saved at SP-24.  This would also apply to the
8481 	32-bit linux port if the multi-space model is implemented.  */
8482       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8483 	   && !(flag_pic && TREE_PUBLIC (function))
8484 	   && (TARGET_GAS || last_address < 262132))
8485 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8486 	      && ((targetm_common.have_named_sections
8487 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8488 		   /* The GNU 64-bit linker has rather poor stub management.
8489 		      So, we use a long branch from thunks that aren't in
8490 		      the same section as the target function.  */
8491 		    && ((!TARGET_64BIT
8492 			 && (DECL_SECTION_NAME (thunk_fndecl)
8493 			     != DECL_SECTION_NAME (function)))
8494 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8495 			     == DECL_SECTION_NAME (function))
8496 			    && last_address < 262132)))
8497 		  /* In this case, we need to be able to reach the start of
8498 		     the stub table even though the function is likely closer
8499 		     and can be jumped to directly.  */
8500 		  || (targetm_common.have_named_sections
8501 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8502 		      && DECL_SECTION_NAME (function) == NULL
8503 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8504 		  /* Likewise.  */
8505 		  || (!targetm_common.have_named_sections
8506 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8507 	{
8508 	  if (!val_14)
8509 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8510 
8511 	  output_asm_insn ("b %0", xoperands);
8512 
8513 	  if (val_14)
8514 	    {
8515 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8516 	      nbytes += 8;
8517 	    }
8518 	  else
8519 	    {
8520 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8521 	      nbytes += 12;
8522 	    }
8523 	}
8524       else if (TARGET_64BIT)
8525 	{
8526 	  rtx xop[4];
8527 
8528 	  /* We only have one call-clobbered scratch register, so we can't
8529 	     make use of the delay slot if delta doesn't fit in 14 bits.  */
8530 	  if (!val_14)
8531 	    {
8532 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8533 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8534 	    }
8535 
8536 	  /* Load function address into %r1.  */
8537 	  xop[0] = xoperands[0];
8538 	  xop[1] = gen_rtx_REG (Pmode, 1);
8539 	  xop[2] = xop[1];
8540 	  pa_output_pic_pcrel_sequence (xop);
8541 
8542 	  if (val_14)
8543 	    {
8544 	      output_asm_insn ("bv %%r0(%%r1)", xoperands);
8545 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8546 	      nbytes += 20;
8547 	    }
8548 	  else
8549 	    {
8550 	      output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8551 	      nbytes += 24;
8552 	    }
8553 	}
8554       else if (TARGET_PORTABLE_RUNTIME)
8555 	{
8556 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
8557 	  output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8558 
8559 	  if (!val_14)
8560 	    output_asm_insn ("ldil L'%2,%%r26", xoperands);
8561 
8562 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8563 
8564 	  if (val_14)
8565 	    {
8566 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8567 	      nbytes += 16;
8568 	    }
8569 	  else
8570 	    {
8571 	      output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8572 	      nbytes += 20;
8573 	    }
8574 	}
8575       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8576 	{
8577 	  /* The function is accessible from outside this module.  The only
8578 	     way to avoid an import stub between the thunk and function is to
8579 	     call the function directly with an indirect sequence similar to
8580 	     that used by $$dyncall.  This is possible because $$dyncall acts
8581 	     as the import stub in an indirect call.  */
8582 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8583 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8584 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8585 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8586 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8587 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8588 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8589 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8590 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8591 
8592 	  if (!val_14)
8593 	    {
8594 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8595 	      nbytes += 4;
8596 	    }
8597 
8598 	  if (TARGET_PA_20)
8599 	    {
8600 	      output_asm_insn ("bve (%%r22)", xoperands);
8601 	      nbytes += 36;
8602 	    }
8603 	  else if (TARGET_NO_SPACE_REGS)
8604 	    {
8605 	      output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8606 	      nbytes += 36;
8607 	    }
8608 	  else
8609 	    {
8610 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8611 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8612 	      output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8613 	      nbytes += 44;
8614 	    }
8615 
8616 	  if (val_14)
8617 	    output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8618 	  else
8619 	    output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8620 	}
8621       else if (flag_pic)
8622 	{
8623 	  rtx xop[4];
8624 
8625 	  /* Load function address into %r22.  */
8626 	  xop[0] = xoperands[0];
8627 	  xop[1] = gen_rtx_REG (Pmode, 1);
8628 	  xop[2] = gen_rtx_REG (Pmode, 22);
8629 	  pa_output_pic_pcrel_sequence (xop);
8630 
8631 	  if (!val_14)
8632 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8633 
8634 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8635 
8636 	  if (val_14)
8637 	    {
8638 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8639 	      nbytes += 20;
8640 	    }
8641 	  else
8642 	    {
8643 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8644 	      nbytes += 24;
8645 	    }
8646 	}
8647       else
8648 	{
8649 	  if (!val_14)
8650 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8651 
8652 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8653 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8654 
8655 	  if (val_14)
8656 	    {
8657 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8658 	      nbytes += 12;
8659 	    }
8660 	  else
8661 	    {
8662 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8663 	      nbytes += 16;
8664 	    }
8665 	}
8666     }
8667   else
8668     {
8669       rtx xop[4];
8670 
8671       /* Add DELTA to THIS.  */
8672       if (val_14)
8673 	{
8674 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8675 	  nbytes += 4;
8676 	}
8677       else
8678 	{
8679 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8680 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8681 	  nbytes += 8;
8682 	}
8683 
8684       if (TARGET_64BIT)
8685 	{
8686 	  /* Load *(THIS + DELTA) to %r1.  */
8687 	  output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8688 
8689 	  val_14 = VAL_14_BITS_P (vcall_offset);
8690 	  xoperands[2] = GEN_INT (vcall_offset);
8691 
8692 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8693 	  if (val_14)
8694 	    {
8695 	      output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8696 	      nbytes += 8;
8697 	    }
8698 	  else
8699 	    {
8700 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8701 	      output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8702 	      nbytes += 12;
8703 	    }
8704 	}
8705       else
8706 	{
8707 	  /* Load *(THIS + DELTA) to %r1.  */
8708 	  output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8709 
8710 	  val_14 = VAL_14_BITS_P (vcall_offset);
8711 	  xoperands[2] = GEN_INT (vcall_offset);
8712 
8713 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8714 	  if (val_14)
8715 	    {
8716 	      output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8717 	      nbytes += 8;
8718 	    }
8719 	  else
8720 	    {
8721 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8722 	      output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8723 	      nbytes += 12;
8724 	    }
8725 	}
8726 
8727       /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible.  */
8728       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8729 	   && !(flag_pic && TREE_PUBLIC (function))
8730 	   && (TARGET_GAS || last_address < 262132))
8731 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8732 	      && ((targetm_common.have_named_sections
8733 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8734 		   /* The GNU 64-bit linker has rather poor stub management.
8735 		      So, we use a long branch from thunks that aren't in
8736 		      the same section as the target function.  */
8737 		    && ((!TARGET_64BIT
8738 			 && (DECL_SECTION_NAME (thunk_fndecl)
8739 			     != DECL_SECTION_NAME (function)))
8740 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8741 			     == DECL_SECTION_NAME (function))
8742 			    && last_address < 262132)))
8743 		  /* In this case, we need to be able to reach the start of
8744 		     the stub table even though the function is likely closer
8745 		     and can be jumped to directly.  */
8746 		  || (targetm_common.have_named_sections
8747 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8748 		      && DECL_SECTION_NAME (function) == NULL
8749 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8750 		  /* Likewise.  */
8751 		  || (!targetm_common.have_named_sections
8752 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8753 	{
8754 	  nbytes += 4;
8755 	  output_asm_insn ("b %0", xoperands);
8756 
8757 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8758 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8759 	}
8760       else if (TARGET_64BIT)
8761 	{
8762 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8763 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8764 
8765 	  /* Load function address into %r1.  */
8766 	  nbytes += 16;
8767 	  xop[0] = xoperands[0];
8768 	  xop[1] = gen_rtx_REG (Pmode, 1);
8769 	  xop[2] = xop[1];
8770 	  pa_output_pic_pcrel_sequence (xop);
8771 
8772 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8773 	}
8774       else if (TARGET_PORTABLE_RUNTIME)
8775 	{
8776 	  /* Load function address into %r22.  */
8777 	  nbytes += 12;
8778 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8779 	  output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8780 
8781 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8782 
8783 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8784 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8785 	}
8786       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8787 	{
8788 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8789 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8790 
8791 	  /* The function is accessible from outside this module.  The only
8792 	     way to avoid an import stub between the thunk and function is to
8793 	     call the function directly with an indirect sequence similar to
8794 	     that used by $$dyncall.  This is possible because $$dyncall acts
8795 	     as the import stub in an indirect call.  */
8796 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8797 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8798 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8799 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8800 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8801 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8802 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8803 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8804 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8805 
8806 	  if (TARGET_PA_20)
8807 	    {
8808 	      output_asm_insn ("bve,n (%%r22)", xoperands);
8809 	      nbytes += 32;
8810 	    }
8811 	  else if (TARGET_NO_SPACE_REGS)
8812 	    {
8813 	      output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8814 	      nbytes += 32;
8815 	    }
8816 	  else
8817 	    {
8818 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8819 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8820 	      output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8821 	      nbytes += 40;
8822 	    }
8823 	}
8824       else if (flag_pic)
8825 	{
8826 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8827 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8828 
8829 	  /* Load function address into %r1.  */
8830 	  nbytes += 16;
8831 	  xop[0] = xoperands[0];
8832 	  xop[1] = gen_rtx_REG (Pmode, 1);
8833 	  xop[2] = xop[1];
8834 	  pa_output_pic_pcrel_sequence (xop);
8835 
8836 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8837 	}
8838       else
8839 	{
8840 	  /* Load function address into %r22.  */
8841 	  nbytes += 8;
8842 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8843 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8844 
8845 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8846 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8847 	}
8848     }
8849 
8850   final_end_function ();
8851 
8852   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8853     {
8854       switch_to_section (data_section);
8855       output_asm_insn (".align 4", xoperands);
8856       ASM_OUTPUT_LABEL (file, label);
8857       output_asm_insn (".word P'%0", xoperands);
8858     }
8859 
8860   current_thunk_number++;
8861   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8862 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8863   last_address += nbytes;
8864   if (old_last_address > last_address)
8865     last_address = UINT_MAX;
8866   update_total_code_bytes (nbytes);
8867   assemble_end_function (thunk_fndecl, fnname);
8868 }
8869 
8870 /* Only direct calls to static functions are allowed to be sibling (tail)
8871    call optimized.
8872 
8873    This restriction is necessary because some linker generated stubs will
8874    store return pointers into rp' in some cases which might clobber a
8875    live value already in rp'.
8876 
8877    In a sibcall the current function and the target function share stack
8878    space.  Thus if the path to the current function and the path to the
8879    target function save a value in rp', they save the value into the
8880    same stack slot, which has undesirable consequences.
8881 
8882    Because of the deferred binding nature of shared libraries any function
8883    with external scope could be in a different load module and thus require
8884    rp' to be saved when calling that function.  So sibcall optimizations
8885    can only be safe for static function.
8886 
8887    Note that GCC never needs return value relocations, so we don't have to
8888    worry about static calls with return value relocations (which require
8889    saving rp').
8890 
8891    It is safe to perform a sibcall optimization when the target function
8892    will never return.  */
8893 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8894 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8895 {
8896   /* Sibcalls are not ok because the arg pointer register is not a fixed
8897      register.  This prevents the sibcall optimization from occurring.  In
8898      addition, there are problems with stub placement using GNU ld.  This
8899      is because a normal sibcall branch uses a 17-bit relocation while
8900      a regular call branch uses a 22-bit relocation.  As a result, more
8901      care needs to be taken in the placement of long-branch stubs.  */
8902   if (TARGET_64BIT)
8903     return false;
8904 
8905   if (TARGET_PORTABLE_RUNTIME)
8906     return false;
8907 
8908   /* Sibcalls are only ok within a translation unit.  */
8909   return decl && targetm.binds_local_p (decl);
8910 }
8911 
8912 /* ??? Addition is not commutative on the PA due to the weird implicit
8913    space register selection rules for memory addresses.  Therefore, we
8914    don't consider a + b == b + a, as this might be inside a MEM.  */
8915 static bool
pa_commutative_p(const_rtx x,int outer_code)8916 pa_commutative_p (const_rtx x, int outer_code)
8917 {
8918   return (COMMUTATIVE_P (x)
8919 	  && (TARGET_NO_SPACE_REGS
8920 	      || (outer_code != UNKNOWN && outer_code != MEM)
8921 	      || GET_CODE (x) != PLUS));
8922 }
8923 
8924 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8925    use in fmpyadd instructions.  */
8926 int
pa_fmpyaddoperands(rtx * operands)8927 pa_fmpyaddoperands (rtx *operands)
8928 {
8929   machine_mode mode = GET_MODE (operands[0]);
8930 
8931   /* Must be a floating point mode.  */
8932   if (mode != SFmode && mode != DFmode)
8933     return 0;
8934 
8935   /* All modes must be the same.  */
8936   if (! (mode == GET_MODE (operands[1])
8937 	 && mode == GET_MODE (operands[2])
8938 	 && mode == GET_MODE (operands[3])
8939 	 && mode == GET_MODE (operands[4])
8940 	 && mode == GET_MODE (operands[5])))
8941     return 0;
8942 
8943   /* All operands must be registers.  */
8944   if (! (GET_CODE (operands[1]) == REG
8945 	 && GET_CODE (operands[2]) == REG
8946 	 && GET_CODE (operands[3]) == REG
8947 	 && GET_CODE (operands[4]) == REG
8948 	 && GET_CODE (operands[5]) == REG))
8949     return 0;
8950 
8951   /* Only 2 real operands to the addition.  One of the input operands must
8952      be the same as the output operand.  */
8953   if (! rtx_equal_p (operands[3], operands[4])
8954       && ! rtx_equal_p (operands[3], operands[5]))
8955     return 0;
8956 
8957   /* Inout operand of add cannot conflict with any operands from multiply.  */
8958   if (rtx_equal_p (operands[3], operands[0])
8959      || rtx_equal_p (operands[3], operands[1])
8960      || rtx_equal_p (operands[3], operands[2]))
8961     return 0;
8962 
8963   /* multiply cannot feed into addition operands.  */
8964   if (rtx_equal_p (operands[4], operands[0])
8965       || rtx_equal_p (operands[5], operands[0]))
8966     return 0;
8967 
8968   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8969   if (mode == SFmode
8970       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8971 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8972 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8973 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8974 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8975 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8976     return 0;
8977 
8978   /* Passed.  Operands are suitable for fmpyadd.  */
8979   return 1;
8980 }
8981 
8982 #if !defined(USE_COLLECT2)
8983 static void
pa_asm_out_constructor(rtx symbol,int priority)8984 pa_asm_out_constructor (rtx symbol, int priority)
8985 {
8986   if (!function_label_operand (symbol, VOIDmode))
8987     pa_encode_label (symbol);
8988 
8989 #ifdef CTORS_SECTION_ASM_OP
8990   default_ctor_section_asm_out_constructor (symbol, priority);
8991 #else
8992 # ifdef TARGET_ASM_NAMED_SECTION
8993   default_named_section_asm_out_constructor (symbol, priority);
8994 # else
8995   default_stabs_asm_out_constructor (symbol, priority);
8996 # endif
8997 #endif
8998 }
8999 
9000 static void
pa_asm_out_destructor(rtx symbol,int priority)9001 pa_asm_out_destructor (rtx symbol, int priority)
9002 {
9003   if (!function_label_operand (symbol, VOIDmode))
9004     pa_encode_label (symbol);
9005 
9006 #ifdef DTORS_SECTION_ASM_OP
9007   default_dtor_section_asm_out_destructor (symbol, priority);
9008 #else
9009 # ifdef TARGET_ASM_NAMED_SECTION
9010   default_named_section_asm_out_destructor (symbol, priority);
9011 # else
9012   default_stabs_asm_out_destructor (symbol, priority);
9013 # endif
9014 #endif
9015 }
9016 #endif
9017 
9018 /* This function places uninitialized global data in the bss section.
9019    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9020    function on the SOM port to prevent uninitialized global data from
9021    being placed in the data section.  */
9022 
9023 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9024 pa_asm_output_aligned_bss (FILE *stream,
9025 			   const char *name,
9026 			   unsigned HOST_WIDE_INT size,
9027 			   unsigned int align)
9028 {
9029   switch_to_section (bss_section);
9030 
9031 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9032   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9033 #endif
9034 
9035 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9036   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9037 #endif
9038 
9039   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9040   ASM_OUTPUT_LABEL (stream, name);
9041   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9042 }
9043 
9044 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9045    that doesn't allow the alignment of global common storage to be directly
9046    specified.  The SOM linker aligns common storage based on the rounded
9047    value of the NUM_BYTES parameter in the .comm directive.  It's not
9048    possible to use the .align directive as it doesn't affect the alignment
9049    of the label associated with a .comm directive.  */
9050 
9051 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9052 pa_asm_output_aligned_common (FILE *stream,
9053 			      const char *name,
9054 			      unsigned HOST_WIDE_INT size,
9055 			      unsigned int align)
9056 {
9057   unsigned int max_common_align;
9058 
9059   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9060   if (align > max_common_align)
9061     {
9062       warning (0, "alignment (%u) for %s exceeds maximum alignment "
9063 	       "for global common data.  Using %u",
9064 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
9065       align = max_common_align;
9066     }
9067 
9068   switch_to_section (bss_section);
9069 
9070   assemble_name (stream, name);
9071   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9072            MAX (size, align / BITS_PER_UNIT));
9073 }
9074 
9075 /* We can't use .comm for local common storage as the SOM linker effectively
9076    treats the symbol as universal and uses the same storage for local symbols
9077    with the same name in different object files.  The .block directive
9078    reserves an uninitialized block of storage.  However, it's not common
9079    storage.  Fortunately, GCC never requests common storage with the same
9080    name in any given translation unit.  */
9081 
9082 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9083 pa_asm_output_aligned_local (FILE *stream,
9084 			     const char *name,
9085 			     unsigned HOST_WIDE_INT size,
9086 			     unsigned int align)
9087 {
9088   switch_to_section (bss_section);
9089   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9090 
9091 #ifdef LOCAL_ASM_OP
9092   fprintf (stream, "%s", LOCAL_ASM_OP);
9093   assemble_name (stream, name);
9094   fprintf (stream, "\n");
9095 #endif
9096 
9097   ASM_OUTPUT_LABEL (stream, name);
9098   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9099 }
9100 
9101 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9102    use in fmpysub instructions.  */
9103 int
pa_fmpysuboperands(rtx * operands)9104 pa_fmpysuboperands (rtx *operands)
9105 {
9106   machine_mode mode = GET_MODE (operands[0]);
9107 
9108   /* Must be a floating point mode.  */
9109   if (mode != SFmode && mode != DFmode)
9110     return 0;
9111 
9112   /* All modes must be the same.  */
9113   if (! (mode == GET_MODE (operands[1])
9114 	 && mode == GET_MODE (operands[2])
9115 	 && mode == GET_MODE (operands[3])
9116 	 && mode == GET_MODE (operands[4])
9117 	 && mode == GET_MODE (operands[5])))
9118     return 0;
9119 
9120   /* All operands must be registers.  */
9121   if (! (GET_CODE (operands[1]) == REG
9122 	 && GET_CODE (operands[2]) == REG
9123 	 && GET_CODE (operands[3]) == REG
9124 	 && GET_CODE (operands[4]) == REG
9125 	 && GET_CODE (operands[5]) == REG))
9126     return 0;
9127 
9128   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
9129      operation, so operands[4] must be the same as operand[3].  */
9130   if (! rtx_equal_p (operands[3], operands[4]))
9131     return 0;
9132 
9133   /* multiply cannot feed into subtraction.  */
9134   if (rtx_equal_p (operands[5], operands[0]))
9135     return 0;
9136 
9137   /* Inout operand of sub cannot conflict with any operands from multiply.  */
9138   if (rtx_equal_p (operands[3], operands[0])
9139      || rtx_equal_p (operands[3], operands[1])
9140      || rtx_equal_p (operands[3], operands[2]))
9141     return 0;
9142 
9143   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
9144   if (mode == SFmode
9145       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9146 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9147 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9148 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9149 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9150 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9151     return 0;
9152 
9153   /* Passed.  Operands are suitable for fmpysub.  */
9154   return 1;
9155 }
9156 
9157 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
9158    constants for a MULT embedded inside a memory address.  */
9159 int
pa_mem_shadd_constant_p(int val)9160 pa_mem_shadd_constant_p (int val)
9161 {
9162   if (val == 2 || val == 4 || val == 8)
9163     return 1;
9164   else
9165     return 0;
9166 }
9167 
9168 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
9169    constants for shadd instructions.  */
9170 int
pa_shadd_constant_p(int val)9171 pa_shadd_constant_p (int val)
9172 {
9173   if (val == 1 || val == 2 || val == 3)
9174     return 1;
9175   else
9176     return 0;
9177 }
9178 
9179 /* Return TRUE if INSN branches forward.  */
9180 
9181 static bool
forward_branch_p(rtx_insn * insn)9182 forward_branch_p (rtx_insn *insn)
9183 {
9184   rtx lab = JUMP_LABEL (insn);
9185 
9186   /* The INSN must have a jump label.  */
9187   gcc_assert (lab != NULL_RTX);
9188 
9189   if (INSN_ADDRESSES_SET_P ())
9190     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9191 
9192   while (insn)
9193     {
9194       if (insn == lab)
9195 	return true;
9196       else
9197 	insn = NEXT_INSN (insn);
9198     }
9199 
9200   return false;
9201 }
9202 
9203 /* Output an unconditional move and branch insn.  */
9204 
9205 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)9206 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9207 {
9208   int length = get_attr_length (insn);
9209 
9210   /* These are the cases in which we win.  */
9211   if (length == 4)
9212     return "mov%I1b,tr %1,%0,%2";
9213 
9214   /* None of the following cases win, but they don't lose either.  */
9215   if (length == 8)
9216     {
9217       if (dbr_sequence_length () == 0)
9218 	{
9219 	  /* Nothing in the delay slot, fake it by putting the combined
9220 	     insn (the copy or add) in the delay slot of a bl.  */
9221 	  if (GET_CODE (operands[1]) == CONST_INT)
9222 	    return "b %2\n\tldi %1,%0";
9223 	  else
9224 	    return "b %2\n\tcopy %1,%0";
9225 	}
9226       else
9227 	{
9228 	  /* Something in the delay slot, but we've got a long branch.  */
9229 	  if (GET_CODE (operands[1]) == CONST_INT)
9230 	    return "ldi %1,%0\n\tb %2";
9231 	  else
9232 	    return "copy %1,%0\n\tb %2";
9233 	}
9234     }
9235 
9236   if (GET_CODE (operands[1]) == CONST_INT)
9237     output_asm_insn ("ldi %1,%0", operands);
9238   else
9239     output_asm_insn ("copy %1,%0", operands);
9240   return pa_output_lbranch (operands[2], insn, 1);
9241 }
9242 
9243 /* Output an unconditional add and branch insn.  */
9244 
9245 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)9246 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9247 {
9248   int length = get_attr_length (insn);
9249 
9250   /* To make life easy we want operand0 to be the shared input/output
9251      operand and operand1 to be the readonly operand.  */
9252   if (operands[0] == operands[1])
9253     operands[1] = operands[2];
9254 
9255   /* These are the cases in which we win.  */
9256   if (length == 4)
9257     return "add%I1b,tr %1,%0,%3";
9258 
9259   /* None of the following cases win, but they don't lose either.  */
9260   if (length == 8)
9261     {
9262       if (dbr_sequence_length () == 0)
9263 	/* Nothing in the delay slot, fake it by putting the combined
9264 	   insn (the copy or add) in the delay slot of a bl.  */
9265 	return "b %3\n\tadd%I1 %1,%0,%0";
9266       else
9267 	/* Something in the delay slot, but we've got a long branch.  */
9268 	return "add%I1 %1,%0,%0\n\tb %3";
9269     }
9270 
9271   output_asm_insn ("add%I1 %1,%0,%0", operands);
9272   return pa_output_lbranch (operands[3], insn, 1);
9273 }
9274 
9275 /* We use this hook to perform a PA specific optimization which is difficult
9276    to do in earlier passes.  */
9277 
9278 static void
pa_reorg(void)9279 pa_reorg (void)
9280 {
9281   remove_useless_addtr_insns (1);
9282 
9283   if (pa_cpu < PROCESSOR_8000)
9284     pa_combine_instructions ();
9285 }
9286 
9287 /* The PA has a number of odd instructions which can perform multiple
9288    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
9289    it may be profitable to combine two instructions into one instruction
9290    with two outputs.  It's not profitable PA2.0 machines because the
9291    two outputs would take two slots in the reorder buffers.
9292 
9293    This routine finds instructions which can be combined and combines
9294    them.  We only support some of the potential combinations, and we
9295    only try common ways to find suitable instructions.
9296 
9297       * addb can add two registers or a register and a small integer
9298       and jump to a nearby (+-8k) location.  Normally the jump to the
9299       nearby location is conditional on the result of the add, but by
9300       using the "true" condition we can make the jump unconditional.
9301       Thus addb can perform two independent operations in one insn.
9302 
9303       * movb is similar to addb in that it can perform a reg->reg
9304       or small immediate->reg copy and jump to a nearby (+-8k location).
9305 
9306       * fmpyadd and fmpysub can perform a FP multiply and either an
9307       FP add or FP sub if the operands of the multiply and add/sub are
9308       independent (there are other minor restrictions).  Note both
9309       the fmpy and fadd/fsub can in theory move to better spots according
9310       to data dependencies, but for now we require the fmpy stay at a
9311       fixed location.
9312 
9313       * Many of the memory operations can perform pre & post updates
9314       of index registers.  GCC's pre/post increment/decrement addressing
9315       is far too simple to take advantage of all the possibilities.  This
9316       pass may not be suitable since those insns may not be independent.
9317 
9318       * comclr can compare two ints or an int and a register, nullify
9319       the following instruction and zero some other register.  This
9320       is more difficult to use as it's harder to find an insn which
9321       will generate a comclr than finding something like an unconditional
9322       branch.  (conditional moves & long branches create comclr insns).
9323 
9324       * Most arithmetic operations can conditionally skip the next
9325       instruction.  They can be viewed as "perform this operation
9326       and conditionally jump to this nearby location" (where nearby
9327       is an insns away).  These are difficult to use due to the
9328       branch length restrictions.  */
9329 
9330 static void
pa_combine_instructions(void)9331 pa_combine_instructions (void)
9332 {
9333   rtx_insn *anchor;
9334 
9335   /* This can get expensive since the basic algorithm is on the
9336      order of O(n^2) (or worse).  Only do it for -O2 or higher
9337      levels of optimization.  */
9338   if (optimize < 2)
9339     return;
9340 
9341   /* Walk down the list of insns looking for "anchor" insns which
9342      may be combined with "floating" insns.  As the name implies,
9343      "anchor" instructions don't move, while "floating" insns may
9344      move around.  */
9345   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9346   rtx_insn *new_rtx = make_insn_raw (par);
9347 
9348   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9349     {
9350       enum attr_pa_combine_type anchor_attr;
9351       enum attr_pa_combine_type floater_attr;
9352 
9353       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9354 	 Also ignore any special USE insns.  */
9355       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9356 	  || GET_CODE (PATTERN (anchor)) == USE
9357 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9358 	continue;
9359 
9360       anchor_attr = get_attr_pa_combine_type (anchor);
9361       /* See if anchor is an insn suitable for combination.  */
9362       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9363 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9364 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9365 	      && ! forward_branch_p (anchor)))
9366 	{
9367 	  rtx_insn *floater;
9368 
9369 	  for (floater = PREV_INSN (anchor);
9370 	       floater;
9371 	       floater = PREV_INSN (floater))
9372 	    {
9373 	      if (NOTE_P (floater)
9374 		  || (NONJUMP_INSN_P (floater)
9375 		      && (GET_CODE (PATTERN (floater)) == USE
9376 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9377 		continue;
9378 
9379 	      /* Anything except a regular INSN will stop our search.  */
9380 	      if (! NONJUMP_INSN_P (floater))
9381 		{
9382 		  floater = NULL;
9383 		  break;
9384 		}
9385 
9386 	      /* See if FLOATER is suitable for combination with the
9387 		 anchor.  */
9388 	      floater_attr = get_attr_pa_combine_type (floater);
9389 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9390 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9391 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9392 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9393 		{
9394 		  /* If ANCHOR and FLOATER can be combined, then we're
9395 		     done with this pass.  */
9396 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9397 					SET_DEST (PATTERN (floater)),
9398 					XEXP (SET_SRC (PATTERN (floater)), 0),
9399 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9400 		    break;
9401 		}
9402 
9403 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9404 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9405 		{
9406 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9407 		    {
9408 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9409 					    SET_DEST (PATTERN (floater)),
9410 					XEXP (SET_SRC (PATTERN (floater)), 0),
9411 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9412 			break;
9413 		    }
9414 		  else
9415 		    {
9416 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9417 					    SET_DEST (PATTERN (floater)),
9418 					    SET_SRC (PATTERN (floater)),
9419 					    SET_SRC (PATTERN (floater))))
9420 			break;
9421 		    }
9422 		}
9423 	    }
9424 
9425 	  /* If we didn't find anything on the backwards scan try forwards.  */
9426 	  if (!floater
9427 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9428 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9429 	    {
9430 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9431 		{
9432 		  if (NOTE_P (floater)
9433 		      || (NONJUMP_INSN_P (floater)
9434 			  && (GET_CODE (PATTERN (floater)) == USE
9435 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9436 
9437 		    continue;
9438 
9439 		  /* Anything except a regular INSN will stop our search.  */
9440 		  if (! NONJUMP_INSN_P (floater))
9441 		    {
9442 		      floater = NULL;
9443 		      break;
9444 		    }
9445 
9446 		  /* See if FLOATER is suitable for combination with the
9447 		     anchor.  */
9448 		  floater_attr = get_attr_pa_combine_type (floater);
9449 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9450 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9451 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9452 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9453 		    {
9454 		      /* If ANCHOR and FLOATER can be combined, then we're
9455 			 done with this pass.  */
9456 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9457 					    SET_DEST (PATTERN (floater)),
9458 					    XEXP (SET_SRC (PATTERN (floater)),
9459 						  0),
9460 					    XEXP (SET_SRC (PATTERN (floater)),
9461 						  1)))
9462 			break;
9463 		    }
9464 		}
9465 	    }
9466 
9467 	  /* FLOATER will be nonzero if we found a suitable floating
9468 	     insn for combination with ANCHOR.  */
9469 	  if (floater
9470 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9471 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9472 	    {
9473 	      /* Emit the new instruction and delete the old anchor.  */
9474 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9475 				       copy_rtx (PATTERN (floater)));
9476 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9477 	      emit_insn_before (temp, anchor);
9478 
9479 	      SET_INSN_DELETED (anchor);
9480 
9481 	      /* Emit a special USE insn for FLOATER, then delete
9482 		 the floating insn.  */
9483 	      temp = copy_rtx (PATTERN (floater));
9484 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9485 	      delete_insn (floater);
9486 
9487 	      continue;
9488 	    }
9489 	  else if (floater
9490 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9491 	    {
9492 	      /* Emit the new_jump instruction and delete the old anchor.  */
9493 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9494 				       copy_rtx (PATTERN (floater)));
9495 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9496 	      temp = emit_jump_insn_before (temp, anchor);
9497 
9498 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9499 	      SET_INSN_DELETED (anchor);
9500 
9501 	      /* Emit a special USE insn for FLOATER, then delete
9502 		 the floating insn.  */
9503 	      temp = copy_rtx (PATTERN (floater));
9504 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9505 	      delete_insn (floater);
9506 	      continue;
9507 	    }
9508 	}
9509     }
9510 }
9511 
9512 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9513 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9514 		  int reversed, rtx dest,
9515 		  rtx src1, rtx src2)
9516 {
9517   int insn_code_number;
9518   rtx_insn *start, *end;
9519 
9520   /* Create a PARALLEL with the patterns of ANCHOR and
9521      FLOATER, try to recognize it, then test constraints
9522      for the resulting pattern.
9523 
9524      If the pattern doesn't match or the constraints
9525      aren't met keep searching for a suitable floater
9526      insn.  */
9527   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9528   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9529   INSN_CODE (new_rtx) = -1;
9530   insn_code_number = recog_memoized (new_rtx);
9531   basic_block bb = BLOCK_FOR_INSN (anchor);
9532   if (insn_code_number < 0
9533       || (extract_insn (new_rtx),
9534 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9535     return 0;
9536 
9537   if (reversed)
9538     {
9539       start = anchor;
9540       end = floater;
9541     }
9542   else
9543     {
9544       start = floater;
9545       end = anchor;
9546     }
9547 
9548   /* There's up to three operands to consider.  One
9549      output and two inputs.
9550 
9551      The output must not be used between FLOATER & ANCHOR
9552      exclusive.  The inputs must not be set between
9553      FLOATER and ANCHOR exclusive.  */
9554 
9555   if (reg_used_between_p (dest, start, end))
9556     return 0;
9557 
9558   if (reg_set_between_p (src1, start, end))
9559     return 0;
9560 
9561   if (reg_set_between_p (src2, start, end))
9562     return 0;
9563 
9564   /* If we get here, then everything is good.  */
9565   return 1;
9566 }
9567 
9568 /* Return nonzero if references for INSN are delayed.
9569 
9570    Millicode insns are actually function calls with some special
9571    constraints on arguments and register usage.
9572 
9573    Millicode calls always expect their arguments in the integer argument
9574    registers, and always return their result in %r29 (ret1).  They
9575    are expected to clobber their arguments, %r1, %r29, and the return
9576    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9577 
9578    This function tells reorg that the references to arguments and
9579    millicode calls do not appear to happen until after the millicode call.
9580    This allows reorg to put insns which set the argument registers into the
9581    delay slot of the millicode call -- thus they act more like traditional
9582    CALL_INSNs.
9583 
9584    Note we cannot consider side effects of the insn to be delayed because
9585    the branch and link insn will clobber the return pointer.  If we happened
9586    to use the return pointer in the delay slot of the call, then we lose.
9587 
9588    get_attr_type will try to recognize the given insn, so make sure to
9589    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9590    in particular.  */
9591 int
pa_insn_refs_are_delayed(rtx_insn * insn)9592 pa_insn_refs_are_delayed (rtx_insn *insn)
9593 {
9594   return ((NONJUMP_INSN_P (insn)
9595 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9596 	   && GET_CODE (PATTERN (insn)) != USE
9597 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9598 	   && get_attr_type (insn) == TYPE_MILLI));
9599 }
9600 
9601 /* Promote the return value, but not the arguments.  */
9602 
9603 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9604 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9605                           machine_mode mode,
9606                           int *punsignedp ATTRIBUTE_UNUSED,
9607                           const_tree fntype ATTRIBUTE_UNUSED,
9608                           int for_return)
9609 {
9610   if (for_return == 0)
9611     return mode;
9612   return promote_mode (type, mode, punsignedp);
9613 }
9614 
9615 /* On the HP-PA the value is found in register(s) 28(-29), unless
9616    the mode is SF or DF. Then the value is returned in fr4 (32).
9617 
9618    This must perform the same promotions as PROMOTE_MODE, else promoting
9619    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9620 
9621    Small structures must be returned in a PARALLEL on PA64 in order
9622    to match the HP Compiler ABI.  */
9623 
9624 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9625 pa_function_value (const_tree valtype,
9626                    const_tree func ATTRIBUTE_UNUSED,
9627                    bool outgoing ATTRIBUTE_UNUSED)
9628 {
9629   machine_mode valmode;
9630 
9631   if (AGGREGATE_TYPE_P (valtype)
9632       || TREE_CODE (valtype) == COMPLEX_TYPE
9633       || TREE_CODE (valtype) == VECTOR_TYPE)
9634     {
9635       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9636 
9637       /* Handle aggregates that fit exactly in a word or double word.  */
9638       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9639 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9640 
9641       if (TARGET_64BIT)
9642 	{
9643           /* Aggregates with a size less than or equal to 128 bits are
9644 	     returned in GR 28(-29).  They are left justified.  The pad
9645 	     bits are undefined.  Larger aggregates are returned in
9646 	     memory.  */
9647 	  rtx loc[2];
9648 	  int i, offset = 0;
9649 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9650 
9651 	  for (i = 0; i < ub; i++)
9652 	    {
9653 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9654 					  gen_rtx_REG (DImode, 28 + i),
9655 					  GEN_INT (offset));
9656 	      offset += 8;
9657 	    }
9658 
9659 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9660 	}
9661       else if (valsize > UNITS_PER_WORD)
9662 	{
9663 	  /* Aggregates 5 to 8 bytes in size are returned in general
9664 	     registers r28-r29 in the same manner as other non
9665 	     floating-point objects.  The data is right-justified and
9666 	     zero-extended to 64 bits.  This is opposite to the normal
9667 	     justification used on big endian targets and requires
9668 	     special treatment.  */
9669 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9670 				       gen_rtx_REG (DImode, 28), const0_rtx);
9671 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9672 	}
9673     }
9674 
9675   if ((INTEGRAL_TYPE_P (valtype)
9676        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9677       || POINTER_TYPE_P (valtype))
9678     valmode = word_mode;
9679   else
9680     valmode = TYPE_MODE (valtype);
9681 
9682   if (TREE_CODE (valtype) == REAL_TYPE
9683       && !AGGREGATE_TYPE_P (valtype)
9684       && TYPE_MODE (valtype) != TFmode
9685       && !TARGET_SOFT_FLOAT)
9686     return gen_rtx_REG (valmode, 32);
9687 
9688   return gen_rtx_REG (valmode, 28);
9689 }
9690 
9691 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9692 
9693 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9694 pa_libcall_value (machine_mode mode,
9695 		  const_rtx fun ATTRIBUTE_UNUSED)
9696 {
9697   if (! TARGET_SOFT_FLOAT
9698       && (mode == SFmode || mode == DFmode))
9699     return  gen_rtx_REG (mode, 32);
9700   else
9701     return  gen_rtx_REG (mode, 28);
9702 }
9703 
9704 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9705 
9706 static bool
pa_function_value_regno_p(const unsigned int regno)9707 pa_function_value_regno_p (const unsigned int regno)
9708 {
9709   if (regno == 28
9710       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9711     return true;
9712 
9713   return false;
9714 }
9715 
9716 /* Update the data in CUM to advance over argument ARG.  */
9717 
9718 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9719 pa_function_arg_advance (cumulative_args_t cum_v,
9720 			 const function_arg_info &arg)
9721 {
9722   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9723   int arg_size = pa_function_arg_size (arg.mode, arg.type);
9724 
9725   cum->nargs_prototype--;
9726   cum->words += (arg_size
9727 		 + ((cum->words & 01)
9728 		    && arg.type != NULL_TREE
9729 		    && arg_size > 1));
9730 }
9731 
9732 /* Return the location of a parameter that is passed in a register or NULL
9733    if the parameter has any component that is passed in memory.
9734 
9735    This is new code and will be pushed to into the net sources after
9736    further testing.
9737 
9738    ??? We might want to restructure this so that it looks more like other
9739    ports.  */
9740 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9741 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9742 {
9743   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9744   tree type = arg.type;
9745   machine_mode mode = arg.mode;
9746   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9747   int alignment = 0;
9748   int arg_size;
9749   int fpr_reg_base;
9750   int gpr_reg_base;
9751   rtx retval;
9752 
9753   if (arg.end_marker_p ())
9754     return NULL_RTX;
9755 
9756   arg_size = pa_function_arg_size (mode, type);
9757 
9758   /* If this arg would be passed partially or totally on the stack, then
9759      this routine should return zero.  pa_arg_partial_bytes will
9760      handle arguments which are split between regs and stack slots if
9761      the ABI mandates split arguments.  */
9762   if (!TARGET_64BIT)
9763     {
9764       /* The 32-bit ABI does not split arguments.  */
9765       if (cum->words + arg_size > max_arg_words)
9766 	return NULL_RTX;
9767     }
9768   else
9769     {
9770       if (arg_size > 1)
9771 	alignment = cum->words & 1;
9772       if (cum->words + alignment >= max_arg_words)
9773 	return NULL_RTX;
9774     }
9775 
9776   /* The 32bit ABIs and the 64bit ABIs are rather different,
9777      particularly in their handling of FP registers.  We might
9778      be able to cleverly share code between them, but I'm not
9779      going to bother in the hope that splitting them up results
9780      in code that is more easily understood.  */
9781 
9782   if (TARGET_64BIT)
9783     {
9784       /* Advance the base registers to their current locations.
9785 
9786          Remember, gprs grow towards smaller register numbers while
9787 	 fprs grow to higher register numbers.  Also remember that
9788 	 although FP regs are 32-bit addressable, we pretend that
9789 	 the registers are 64-bits wide.  */
9790       gpr_reg_base = 26 - cum->words;
9791       fpr_reg_base = 32 + cum->words;
9792 
9793       /* Arguments wider than one word and small aggregates need special
9794 	 treatment.  */
9795       if (arg_size > 1
9796 	  || mode == BLKmode
9797 	  || (type && (AGGREGATE_TYPE_P (type)
9798 		       || TREE_CODE (type) == COMPLEX_TYPE
9799 		       || TREE_CODE (type) == VECTOR_TYPE)))
9800 	{
9801 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9802 	     and aggregates including complex numbers are aligned on
9803 	     128-bit boundaries.  The first eight 64-bit argument slots
9804 	     are associated one-to-one, with general registers r26
9805 	     through r19, and also with floating-point registers fr4
9806 	     through fr11.  Arguments larger than one word are always
9807 	     passed in general registers.
9808 
9809 	     Using a PARALLEL with a word mode register results in left
9810 	     justified data on a big-endian target.  */
9811 
9812 	  rtx loc[8];
9813 	  int i, offset = 0, ub = arg_size;
9814 
9815 	  /* Align the base register.  */
9816 	  gpr_reg_base -= alignment;
9817 
9818 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9819 	  for (i = 0; i < ub; i++)
9820 	    {
9821 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9822 					  gen_rtx_REG (DImode, gpr_reg_base),
9823 					  GEN_INT (offset));
9824 	      gpr_reg_base -= 1;
9825 	      offset += 8;
9826 	    }
9827 
9828 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9829 	}
9830      }
9831   else
9832     {
9833       /* If the argument is larger than a word, then we know precisely
9834 	 which registers we must use.  */
9835       if (arg_size > 1)
9836 	{
9837 	  if (cum->words)
9838 	    {
9839 	      gpr_reg_base = 23;
9840 	      fpr_reg_base = 38;
9841 	    }
9842 	  else
9843 	    {
9844 	      gpr_reg_base = 25;
9845 	      fpr_reg_base = 34;
9846 	    }
9847 
9848 	  /* Structures 5 to 8 bytes in size are passed in the general
9849 	     registers in the same manner as other non floating-point
9850 	     objects.  The data is right-justified and zero-extended
9851 	     to 64 bits.  This is opposite to the normal justification
9852 	     used on big endian targets and requires special treatment.
9853 	     We now define BLOCK_REG_PADDING to pad these objects.
9854 	     Aggregates, complex and vector types are passed in the same
9855 	     manner as structures.  */
9856 	  if (mode == BLKmode
9857 	      || (type && (AGGREGATE_TYPE_P (type)
9858 			   || TREE_CODE (type) == COMPLEX_TYPE
9859 			   || TREE_CODE (type) == VECTOR_TYPE)))
9860 	    {
9861 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9862 					   gen_rtx_REG (DImode, gpr_reg_base),
9863 					   const0_rtx);
9864 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9865 	    }
9866 	}
9867       else
9868         {
9869 	   /* We have a single word (32 bits).  A simple computation
9870 	      will get us the register #s we need.  */
9871 	   gpr_reg_base = 26 - cum->words;
9872 	   fpr_reg_base = 32 + 2 * cum->words;
9873 	}
9874     }
9875 
9876   /* Determine if the argument needs to be passed in both general and
9877      floating point registers.  */
9878   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9879        /* If we are doing soft-float with portable runtime, then there
9880 	  is no need to worry about FP regs.  */
9881        && !TARGET_SOFT_FLOAT
9882        /* The parameter must be some kind of scalar float, else we just
9883 	  pass it in integer registers.  */
9884        && GET_MODE_CLASS (mode) == MODE_FLOAT
9885        /* The target function must not have a prototype.  */
9886        && cum->nargs_prototype <= 0
9887        /* libcalls do not need to pass items in both FP and general
9888 	  registers.  */
9889        && type != NULL_TREE
9890        /* All this hair applies to "outgoing" args only.  This includes
9891 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9892        && !cum->incoming)
9893       /* Also pass outgoing floating arguments in both registers in indirect
9894 	 calls with the 32 bit ABI and the HP assembler since there is no
9895 	 way to the specify argument locations in static functions.  */
9896       || (!TARGET_64BIT
9897 	  && !TARGET_GAS
9898 	  && !cum->incoming
9899 	  && cum->indirect
9900 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9901     {
9902       retval
9903 	= gen_rtx_PARALLEL
9904 	    (mode,
9905 	     gen_rtvec (2,
9906 			gen_rtx_EXPR_LIST (VOIDmode,
9907 					   gen_rtx_REG (mode, fpr_reg_base),
9908 					   const0_rtx),
9909 			gen_rtx_EXPR_LIST (VOIDmode,
9910 					   gen_rtx_REG (mode, gpr_reg_base),
9911 					   const0_rtx)));
9912     }
9913   else
9914     {
9915       /* See if we should pass this parameter in a general register.  */
9916       if (TARGET_SOFT_FLOAT
9917 	  /* Indirect calls in the normal 32bit ABI require all arguments
9918 	     to be passed in general registers.  */
9919 	  || (!TARGET_PORTABLE_RUNTIME
9920 	      && !TARGET_64BIT
9921 	      && !TARGET_ELF32
9922 	      && cum->indirect)
9923 	  /* If the parameter is not a scalar floating-point parameter,
9924 	     then it belongs in GPRs.  */
9925 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9926 	  /* Structure with single SFmode field belongs in GPR.  */
9927 	  || (type && AGGREGATE_TYPE_P (type)))
9928 	retval = gen_rtx_REG (mode, gpr_reg_base);
9929       else
9930 	retval = gen_rtx_REG (mode, fpr_reg_base);
9931     }
9932   return retval;
9933 }
9934 
9935 /* Arguments larger than one word are double word aligned.  */
9936 
9937 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9938 pa_function_arg_boundary (machine_mode mode, const_tree type)
9939 {
9940   bool singleword = (type
9941 		     ? (integer_zerop (TYPE_SIZE (type))
9942 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9943 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9944 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9945 
9946   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9947 }
9948 
9949 /* If this arg would be passed totally in registers or totally on the stack,
9950    then this routine should return zero.  */
9951 
9952 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9953 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9954 {
9955   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9956   unsigned int max_arg_words = 8;
9957   unsigned int offset = 0;
9958 
9959   if (!TARGET_64BIT)
9960     return 0;
9961 
9962   if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1))
9963     offset = 1;
9964 
9965   if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type)
9966       <= max_arg_words)
9967     /* Arg fits fully into registers.  */
9968     return 0;
9969   else if (cum->words + offset >= max_arg_words)
9970     /* Arg fully on the stack.  */
9971     return 0;
9972   else
9973     /* Arg is split.  */
9974     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9975 }
9976 
9977 
9978 /* A get_unnamed_section callback for switching to the text section.
9979 
9980    This function is only used with SOM.  Because we don't support
9981    named subspaces, we can only create a new subspace or switch back
9982    to the default text subspace.  */
9983 
9984 static void
som_output_text_section_asm_op(const void * data ATTRIBUTE_UNUSED)9985 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9986 {
9987   gcc_assert (TARGET_SOM);
9988   if (TARGET_GAS)
9989     {
9990       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9991 	{
9992 	  /* We only want to emit a .nsubspa directive once at the
9993 	     start of the function.  */
9994 	  cfun->machine->in_nsubspa = 1;
9995 
9996 	  /* Create a new subspace for the text.  This provides
9997 	     better stub placement and one-only functions.  */
9998 	  if (cfun->decl
9999 	      && DECL_ONE_ONLY (cfun->decl)
10000 	      && !DECL_WEAK (cfun->decl))
10001 	    {
10002 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
10003 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10004 				     "ACCESS=44,SORT=24,COMDAT");
10005 	      return;
10006 	    }
10007 	}
10008       else
10009 	{
10010 	  /* There isn't a current function or the body of the current
10011 	     function has been completed.  So, we are changing to the
10012 	     text section to output debugging information.  Thus, we
10013 	     need to forget that we are in the text section so that
10014 	     varasm.c will call us when text_section is selected again.  */
10015 	  gcc_assert (!cfun || !cfun->machine
10016 		      || cfun->machine->in_nsubspa == 2);
10017 	  in_section = NULL;
10018 	}
10019       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10020       return;
10021     }
10022   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10023 }
10024 
10025 /* A get_unnamed_section callback for switching to comdat data
10026    sections.  This function is only used with SOM.  */
10027 
10028 static void
som_output_comdat_data_section_asm_op(const void * data)10029 som_output_comdat_data_section_asm_op (const void *data)
10030 {
10031   in_section = NULL;
10032   output_section_asm_op (data);
10033 }
10034 
10035 /* Implement TARGET_ASM_INIT_SECTIONS.  */
10036 
10037 static void
pa_som_asm_init_sections(void)10038 pa_som_asm_init_sections (void)
10039 {
10040   text_section
10041     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10042 
10043   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10044      is not being generated.  */
10045   som_readonly_data_section
10046     = get_unnamed_section (0, output_section_asm_op,
10047 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10048 
10049   /* When secondary definitions are not supported, SOM makes readonly
10050      data one-only by creating a new $LIT$ subspace in $TEXT$ with
10051      the comdat flag.  */
10052   som_one_only_readonly_data_section
10053     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10054 			   "\t.SPACE $TEXT$\n"
10055 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10056 			   "ACCESS=0x2c,SORT=16,COMDAT");
10057 
10058 
10059   /* When secondary definitions are not supported, SOM makes data one-only
10060      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
10061   som_one_only_data_section
10062     = get_unnamed_section (SECTION_WRITE,
10063 			   som_output_comdat_data_section_asm_op,
10064 			   "\t.SPACE $PRIVATE$\n"
10065 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10066 			   "ACCESS=31,SORT=24,COMDAT");
10067 
10068   if (flag_tm)
10069     som_tm_clone_table_section
10070       = get_unnamed_section (0, output_section_asm_op,
10071 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10072 
10073   /* HPUX ld generates incorrect GOT entries for "T" fixups which
10074      reference data within the $TEXT$ space (for example constant
10075      strings in the $LIT$ subspace).
10076 
10077      The assemblers (GAS and HP as) both have problems with handling
10078      the difference of two symbols.  This is the other correct way to
10079      reference constant data during PIC code generation.
10080 
10081      Thus, we can't put constant data needing relocation in the $TEXT$
10082      space during PIC generation.
10083 
10084      Previously, we placed all constant data into the $DATA$ subspace
10085      when generating PIC code.  This reduces sharing, but it works
10086      correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
10087      This puts constant data not needing relocation into the $TEXT$ space.  */
10088   readonly_data_section = som_readonly_data_section;
10089 
10090   /* We must not have a reference to an external symbol defined in a
10091      shared library in a readonly section, else the SOM linker will
10092      complain.
10093 
10094      So, we force exception information into the data section.  */
10095   exception_section = data_section;
10096 }
10097 
10098 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
10099 
10100 static section *
pa_som_tm_clone_table_section(void)10101 pa_som_tm_clone_table_section (void)
10102 {
10103   return som_tm_clone_table_section;
10104 }
10105 
10106 /* On hpux10, the linker will give an error if we have a reference
10107    in the read-only data section to a symbol defined in a shared
10108    library.  Therefore, expressions that might require a reloc
10109    cannot be placed in the read-only data section.  */
10110 
10111 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)10112 pa_select_section (tree exp, int reloc,
10113 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10114 {
10115   if (TREE_CODE (exp) == VAR_DECL
10116       && TREE_READONLY (exp)
10117       && !TREE_THIS_VOLATILE (exp)
10118       && DECL_INITIAL (exp)
10119       && (DECL_INITIAL (exp) == error_mark_node
10120           || TREE_CONSTANT (DECL_INITIAL (exp)))
10121       && !(reloc & pa_reloc_rw_mask ()))
10122     {
10123       if (TARGET_SOM
10124 	  && DECL_ONE_ONLY (exp)
10125 	  && !DECL_WEAK (exp))
10126 	return som_one_only_readonly_data_section;
10127       else
10128 	return readonly_data_section;
10129     }
10130   else if (CONSTANT_CLASS_P (exp)
10131 	   && !(reloc & pa_reloc_rw_mask ()))
10132     return readonly_data_section;
10133   else if (TARGET_SOM
10134 	   && TREE_CODE (exp) == VAR_DECL
10135 	   && DECL_ONE_ONLY (exp)
10136 	   && !DECL_WEAK (exp))
10137     return som_one_only_data_section;
10138   else
10139     return data_section;
10140 }
10141 
10142 /* Implement pa_elf_select_rtx_section.  If X is a function label operand
10143    and the function is in a COMDAT group, place the plabel reference in the
10144    .data.rel.ro.local section.  The linker ignores references to symbols in
10145    discarded sections from this section.  */
10146 
10147 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10148 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10149 			   unsigned HOST_WIDE_INT align)
10150 {
10151   if (function_label_operand (x, VOIDmode))
10152     {
10153       tree decl = SYMBOL_REF_DECL (x);
10154 
10155       if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10156 	return get_named_section (NULL, ".data.rel.ro.local", 1);
10157     }
10158 
10159   return default_elf_select_rtx_section (mode, x, align);
10160 }
10161 
10162 /* Implement pa_reloc_rw_mask.  */
10163 
10164 static int
pa_reloc_rw_mask(void)10165 pa_reloc_rw_mask (void)
10166 {
10167   if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10168     return 3;
10169 
10170   /* HP linker does not support global relocs in readonly memory.  */
10171   return TARGET_SOM ? 2 : 0;
10172 }
10173 
10174 static void
pa_globalize_label(FILE * stream,const char * name)10175 pa_globalize_label (FILE *stream, const char *name)
10176 {
10177   /* We only handle DATA objects here, functions are globalized in
10178      ASM_DECLARE_FUNCTION_NAME.  */
10179   if (! FUNCTION_NAME_P (name))
10180   {
10181     fputs ("\t.EXPORT ", stream);
10182     assemble_name (stream, name);
10183     fputs (",DATA\n", stream);
10184   }
10185 }
10186 
10187 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10188 
10189 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)10190 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10191 		     int incoming ATTRIBUTE_UNUSED)
10192 {
10193   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10194 }
10195 
10196 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
10197 
10198 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)10199 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10200 {
10201   /* SOM ABI says that objects larger than 64 bits are returned in memory.
10202      PA64 ABI says that objects larger than 128 bits are returned in memory.
10203      Note, int_size_in_bytes can return -1 if the size of the object is
10204      variable or larger than the maximum value that can be expressed as
10205      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
10206      simplest way to handle variable and empty types is to pass them in
10207      memory.  This avoids problems in defining the boundaries of argument
10208      slots, allocating registers, etc.  */
10209   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10210 	  || int_size_in_bytes (type) <= 0);
10211 }
10212 
10213 /* Structure to hold declaration and name of external symbols that are
10214    emitted by GCC.  We generate a vector of these symbols and output them
10215    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10216    This avoids putting out names that are never really used.  */
10217 
10218 typedef struct GTY(()) extern_symbol
10219 {
10220   tree decl;
10221   const char *name;
10222 } extern_symbol;
10223 
10224 /* Define gc'd vector type for extern_symbol.  */
10225 
10226 /* Vector of extern_symbol pointers.  */
10227 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10228 
10229 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10230 /* Mark DECL (name NAME) as an external reference (assembler output
10231    file FILE).  This saves the names to output at the end of the file
10232    if actually referenced.  */
10233 
10234 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)10235 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10236 {
10237   gcc_assert (file == asm_out_file);
10238   extern_symbol p = {decl, name};
10239   vec_safe_push (extern_symbols, p);
10240 }
10241 #endif
10242 
10243 /* Output text required at the end of an assembler file.
10244    This includes deferred plabels and .import directives for
10245    all external symbols that were actually referenced.  */
10246 
10247 static void
pa_file_end(void)10248 pa_file_end (void)
10249 {
10250 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10251   unsigned int i;
10252   extern_symbol *p;
10253 
10254   if (!NO_DEFERRED_PROFILE_COUNTERS)
10255     output_deferred_profile_counters ();
10256 #endif
10257 
10258   output_deferred_plabels ();
10259 
10260 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10261   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10262     {
10263       tree decl = p->decl;
10264 
10265       if (!TREE_ASM_WRITTEN (decl)
10266 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10267 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10268     }
10269 
10270   vec_free (extern_symbols);
10271 #endif
10272 
10273   if (NEED_INDICATE_EXEC_STACK)
10274     file_end_indicate_exec_stack ();
10275 }
10276 
10277 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10278 
10279 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10280 pa_can_change_mode_class (machine_mode from, machine_mode to,
10281 			  reg_class_t rclass)
10282 {
10283   if (from == to)
10284     return true;
10285 
10286   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10287     return true;
10288 
10289   /* Reject changes to/from modes with zero size.  */
10290   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10291     return false;
10292 
10293   /* Reject changes to/from complex and vector modes.  */
10294   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10295       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10296     return false;
10297 
10298   /* There is no way to load QImode or HImode values directly from memory
10299      to a FP register.  SImode loads to the FP registers are not zero
10300      extended.  On the 64-bit target, this conflicts with the definition
10301      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
10302      except for DImode to SImode on the 64-bit target.  It is handled by
10303      register renaming in pa_print_operand.  */
10304   if (MAYBE_FP_REG_CLASS_P (rclass))
10305     return TARGET_64BIT && from == DImode && to == SImode;
10306 
10307   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10308      in specific sets of registers.  Thus, we cannot allow changing
10309      to a larger mode when it's larger than a word.  */
10310   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10311       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10312     return false;
10313 
10314   return true;
10315 }
10316 
10317 /* Implement TARGET_MODES_TIEABLE_P.
10318 
10319    We should return FALSE for QImode and HImode because these modes
10320    are not ok in the floating-point registers.  However, this prevents
10321    tieing these modes to SImode and DImode in the general registers.
10322    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10323    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10324    in the floating-point registers.  */
10325 
10326 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10327 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10328 {
10329   /* Don't tie modes in different classes.  */
10330   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10331     return false;
10332 
10333   return true;
10334 }
10335 
10336 
10337 /* Length in units of the trampoline instruction code.  */
10338 
10339 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10340 
10341 
10342 /* Output assembler code for a block containing the constant parts
10343    of a trampoline, leaving space for the variable parts.\
10344 
10345    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10346    and then branches to the specified routine.
10347 
10348    This code template is copied from text segment to stack location
10349    and then patched with pa_trampoline_init to contain valid values,
10350    and then entered as a subroutine.
10351 
10352    It is best to keep this as small as possible to avoid having to
10353    flush multiple lines in the cache.  */
10354 
10355 static void
pa_asm_trampoline_template(FILE * f)10356 pa_asm_trampoline_template (FILE *f)
10357 {
10358   if (!TARGET_64BIT)
10359     {
10360       if (TARGET_PA_20)
10361 	{
10362 	  fputs ("\tmfia	%r20\n", f);
10363 	  fputs ("\tldw		48(%r20),%r22\n", f);
10364 	  fputs ("\tcopy	%r22,%r21\n", f);
10365 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10366 	  fputs ("\tdepwi	0,31,2,%r22\n", f);
10367 	  fputs ("\tldw		0(%r22),%r21\n", f);
10368 	  fputs ("\tldw		4(%r22),%r19\n", f);
10369 	  fputs ("\tbve		(%r21)\n", f);
10370 	  fputs ("\tldw		52(%r1),%r29\n", f);
10371 	  fputs ("\t.word	0\n", f);
10372 	  fputs ("\t.word	0\n", f);
10373 	  fputs ("\t.word	0\n", f);
10374 	}
10375       else
10376 	{
10377 	  if (ASSEMBLER_DIALECT == 0)
10378 	    {
10379 	      fputs ("\tbl	.+8,%r20\n", f);
10380 	      fputs ("\tdepi	0,31,2,%r20\n", f);
10381 	    }
10382 	  else
10383 	    {
10384 	      fputs ("\tb,l	.+8,%r20\n", f);
10385 	      fputs ("\tdepwi	0,31,2,%r20\n", f);
10386 	    }
10387 	  fputs ("\tldw		40(%r20),%r22\n", f);
10388 	  fputs ("\tcopy	%r22,%r21\n", f);
10389 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10390 	  if (ASSEMBLER_DIALECT == 0)
10391 	    fputs ("\tdepi	0,31,2,%r22\n", f);
10392 	  else
10393 	    fputs ("\tdepwi	0,31,2,%r22\n", f);
10394 	  fputs ("\tldw		0(%r22),%r21\n", f);
10395 	  fputs ("\tldw		4(%r22),%r19\n", f);
10396 	  fputs ("\tldsid	(%r21),%r1\n", f);
10397 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10398 	  fputs ("\tbe		0(%sr0,%r21)\n", f);
10399 	  fputs ("\tldw		44(%r20),%r29\n", f);
10400 	}
10401       fputs ("\t.word	0\n", f);
10402       fputs ("\t.word	0\n", f);
10403       fputs ("\t.word	0\n", f);
10404       fputs ("\t.word	0\n", f);
10405     }
10406   else
10407     {
10408       fputs ("\t.dword 0\n", f);
10409       fputs ("\t.dword 0\n", f);
10410       fputs ("\t.dword 0\n", f);
10411       fputs ("\t.dword 0\n", f);
10412       fputs ("\tmfia	%r31\n", f);
10413       fputs ("\tldd	24(%r31),%r27\n", f);
10414       fputs ("\tldd	32(%r31),%r31\n", f);
10415       fputs ("\tldd	16(%r27),%r1\n", f);
10416       fputs ("\tbve	(%r1)\n", f);
10417       fputs ("\tldd	24(%r27),%r27\n", f);
10418       fputs ("\t.dword 0  ; fptr\n", f);
10419       fputs ("\t.dword 0  ; static link\n", f);
10420     }
10421 }
10422 
10423 /* Emit RTL insns to initialize the variable parts of a trampoline.
10424    FNADDR is an RTX for the address of the function's pure code.
10425    CXT is an RTX for the static chain value for the function.
10426 
10427    Move the function address to the trampoline template at offset 48.
10428    Move the static chain value to trampoline template at offset 52.
10429    Move the trampoline address to trampoline template at offset 56.
10430    Move r19 to trampoline template at offset 60.  The latter two
10431    words create a plabel for the indirect call to the trampoline.
10432 
10433    A similar sequence is used for the 64-bit port but the plabel is
10434    at the beginning of the trampoline.
10435 
10436    Finally, the cache entries for the trampoline code are flushed.
10437    This is necessary to ensure that the trampoline instruction sequence
10438    is written to memory prior to any attempts at prefetching the code
10439    sequence.  */
10440 
10441 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10442 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10443 {
10444   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10445   rtx start_addr = gen_reg_rtx (Pmode);
10446   rtx end_addr = gen_reg_rtx (Pmode);
10447   rtx line_length = gen_reg_rtx (Pmode);
10448   rtx r_tramp, tmp;
10449 
10450   emit_block_move (m_tramp, assemble_trampoline_template (),
10451 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10452   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10453 
10454   if (!TARGET_64BIT)
10455     {
10456       tmp = adjust_address (m_tramp, Pmode, 48);
10457       emit_move_insn (tmp, fnaddr);
10458       tmp = adjust_address (m_tramp, Pmode, 52);
10459       emit_move_insn (tmp, chain_value);
10460 
10461       /* Create a fat pointer for the trampoline.  */
10462       tmp = adjust_address (m_tramp, Pmode, 56);
10463       emit_move_insn (tmp, r_tramp);
10464       tmp = adjust_address (m_tramp, Pmode, 60);
10465       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10466 
10467       /* fdc and fic only use registers for the address to flush,
10468 	 they do not accept integer displacements.  We align the
10469 	 start and end addresses to the beginning of their respective
10470 	 cache lines to minimize the number of lines flushed.  */
10471       emit_insn (gen_andsi3 (start_addr, r_tramp,
10472 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10473       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10474 					     TRAMPOLINE_CODE_SIZE-1));
10475       emit_insn (gen_andsi3 (end_addr, tmp,
10476 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10477       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10478       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10479       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10480 				    gen_reg_rtx (Pmode),
10481 				    gen_reg_rtx (Pmode)));
10482     }
10483   else
10484     {
10485       tmp = adjust_address (m_tramp, Pmode, 56);
10486       emit_move_insn (tmp, fnaddr);
10487       tmp = adjust_address (m_tramp, Pmode, 64);
10488       emit_move_insn (tmp, chain_value);
10489 
10490       /* Create a fat pointer for the trampoline.  */
10491       tmp = adjust_address (m_tramp, Pmode, 16);
10492       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10493 							    r_tramp, 32)));
10494       tmp = adjust_address (m_tramp, Pmode, 24);
10495       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10496 
10497       /* fdc and fic only use registers for the address to flush,
10498 	 they do not accept integer displacements.  We align the
10499 	 start and end addresses to the beginning of their respective
10500 	 cache lines to minimize the number of lines flushed.  */
10501       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10502       emit_insn (gen_anddi3 (start_addr, tmp,
10503 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10504       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10505 					     TRAMPOLINE_CODE_SIZE - 1));
10506       emit_insn (gen_anddi3 (end_addr, tmp,
10507 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10508       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10509       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10510       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10511 				    gen_reg_rtx (Pmode),
10512 				    gen_reg_rtx (Pmode)));
10513     }
10514 
10515 #ifdef HAVE_ENABLE_EXECUTE_STACK
10516   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10517 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10518 #endif
10519 }
10520 
10521 /* Perform any machine-specific adjustment in the address of the trampoline.
10522    ADDR contains the address that was passed to pa_trampoline_init.
10523    Adjust the trampoline address to point to the plabel at offset 56.  */
10524 
10525 static rtx
pa_trampoline_adjust_address(rtx addr)10526 pa_trampoline_adjust_address (rtx addr)
10527 {
10528   if (!TARGET_64BIT)
10529     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10530   return addr;
10531 }
10532 
10533 static rtx
pa_delegitimize_address(rtx orig_x)10534 pa_delegitimize_address (rtx orig_x)
10535 {
10536   rtx x = delegitimize_mem_from_attrs (orig_x);
10537 
10538   if (GET_CODE (x) == LO_SUM
10539       && GET_CODE (XEXP (x, 1)) == UNSPEC
10540       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10541     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10542   return x;
10543 }
10544 
10545 static rtx
pa_internal_arg_pointer(void)10546 pa_internal_arg_pointer (void)
10547 {
10548   /* The argument pointer and the hard frame pointer are the same in
10549      the 32-bit runtime, so we don't need a copy.  */
10550   if (TARGET_64BIT)
10551     return copy_to_reg (virtual_incoming_args_rtx);
10552   else
10553     return virtual_incoming_args_rtx;
10554 }
10555 
10556 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10557    Frame pointer elimination is automatically handled.  */
10558 
10559 static bool
pa_can_eliminate(const int from,const int to)10560 pa_can_eliminate (const int from, const int to)
10561 {
10562   /* The argument cannot be eliminated in the 64-bit runtime.  */
10563   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10564     return false;
10565 
10566   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10567           ? ! frame_pointer_needed
10568           : true);
10569 }
10570 
10571 /* Define the offset between two registers, FROM to be eliminated and its
10572    replacement TO, at the start of a routine.  */
10573 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10574 pa_initial_elimination_offset (int from, int to)
10575 {
10576   HOST_WIDE_INT offset;
10577 
10578   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10579       && to == STACK_POINTER_REGNUM)
10580     offset = -pa_compute_frame_size (get_frame_size (), 0);
10581   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10582     offset = 0;
10583   else
10584     gcc_unreachable ();
10585 
10586   return offset;
10587 }
10588 
10589 static void
pa_conditional_register_usage(void)10590 pa_conditional_register_usage (void)
10591 {
10592   int i;
10593 
10594   if (!TARGET_64BIT && !TARGET_PA_11)
10595     {
10596       for (i = 56; i <= FP_REG_LAST; i++)
10597 	fixed_regs[i] = call_used_regs[i] = 1;
10598       for (i = 33; i < 56; i += 2)
10599 	fixed_regs[i] = call_used_regs[i] = 1;
10600     }
10601   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10602     {
10603       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10604 	fixed_regs[i] = call_used_regs[i] = 1;
10605     }
10606   if (flag_pic)
10607     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10608 }
10609 
10610 /* Target hook for c_mode_for_suffix.  */
10611 
10612 static machine_mode
pa_c_mode_for_suffix(char suffix)10613 pa_c_mode_for_suffix (char suffix)
10614 {
10615   if (HPUX_LONG_DOUBLE_LIBRARY)
10616     {
10617       if (suffix == 'q')
10618 	return TFmode;
10619     }
10620 
10621   return VOIDmode;
10622 }
10623 
10624 /* Target hook for function_section.  */
10625 
10626 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10627 pa_function_section (tree decl, enum node_frequency freq,
10628 		     bool startup, bool exit)
10629 {
10630   /* Put functions in text section if target doesn't have named sections.  */
10631   if (!targetm_common.have_named_sections)
10632     return text_section;
10633 
10634   /* Force nested functions into the same section as the containing
10635      function.  */
10636   if (decl
10637       && DECL_SECTION_NAME (decl) == NULL
10638       && DECL_CONTEXT (decl) != NULL_TREE
10639       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10640       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10641     return function_section (DECL_CONTEXT (decl));
10642 
10643   /* Otherwise, use the default function section.  */
10644   return default_function_section (decl, freq, startup, exit);
10645 }
10646 
10647 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10648 
10649    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10650    that need more than three instructions to load prior to reload.  This
10651    limit is somewhat arbitrary.  It takes three instructions to load a
10652    CONST_INT from memory but two are memory accesses.  It may be better
10653    to increase the allowed range for CONST_INTS.  We may also be able
10654    to handle CONST_DOUBLES.  */
10655 
10656 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10657 pa_legitimate_constant_p (machine_mode mode, rtx x)
10658 {
10659   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10660     return false;
10661 
10662   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10663     return false;
10664 
10665   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10666      legitimate constants.  The other variants can't be handled by
10667      the move patterns after reload starts.  */
10668   if (tls_referenced_p (x))
10669     return false;
10670 
10671   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10672     return false;
10673 
10674   if (TARGET_64BIT
10675       && HOST_BITS_PER_WIDE_INT > 32
10676       && GET_CODE (x) == CONST_INT
10677       && !reload_in_progress
10678       && !reload_completed
10679       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10680       && !pa_cint_ok_for_move (UINTVAL (x)))
10681     return false;
10682 
10683   if (function_label_operand (x, mode))
10684     return false;
10685 
10686   return true;
10687 }
10688 
10689 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10690 
10691 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10692 pa_section_type_flags (tree decl, const char *name, int reloc)
10693 {
10694   unsigned int flags;
10695 
10696   flags = default_section_type_flags (decl, name, reloc);
10697 
10698   /* Function labels are placed in the constant pool.  This can
10699      cause a section conflict if decls are put in ".data.rel.ro"
10700      or ".data.rel.ro.local" using the __attribute__ construct.  */
10701   if (strcmp (name, ".data.rel.ro") == 0
10702       || strcmp (name, ".data.rel.ro.local") == 0)
10703     flags |= SECTION_WRITE | SECTION_RELRO;
10704 
10705   return flags;
10706 }
10707 
10708 /* pa_legitimate_address_p recognizes an RTL expression that is a
10709    valid memory address for an instruction.  The MODE argument is the
10710    machine mode for the MEM expression that wants to use this address.
10711 
10712    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10713    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10714    available with floating point loads and stores, and integer loads.
10715    We get better code by allowing indexed addresses in the initial
10716    RTL generation.
10717 
10718    The acceptance of indexed addresses as legitimate implies that we
10719    must provide patterns for doing indexed integer stores, or the move
10720    expanders must force the address of an indexed store to a register.
10721    We have adopted the latter approach.
10722 
10723    Another function of pa_legitimate_address_p is to ensure that
10724    the base register is a valid pointer for indexed instructions.
10725    On targets that have non-equivalent space registers, we have to
10726    know at the time of assembler output which register in a REG+REG
10727    pair is the base register.  The REG_POINTER flag is sometimes lost
10728    in reload and the following passes, so it can't be relied on during
10729    code generation.  Thus, we either have to canonicalize the order
10730    of the registers in REG+REG indexed addresses, or treat REG+REG
10731    addresses separately and provide patterns for both permutations.
10732 
10733    The latter approach requires several hundred additional lines of
10734    code in pa.md.  The downside to canonicalizing is that a PLUS
10735    in the wrong order can't combine to form to make a scaled indexed
10736    memory operand.  As we won't need to canonicalize the operands if
10737    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10738 
10739    We initially break out scaled indexed addresses in canonical order
10740    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10741    scaled indexed addresses during RTL generation.  However, fold_rtx
10742    has its own opinion on how the operands of a PLUS should be ordered.
10743    If one of the operands is equivalent to a constant, it will make
10744    that operand the second operand.  As the base register is likely to
10745    be equivalent to a SYMBOL_REF, we have made it the second operand.
10746 
10747    pa_legitimate_address_p accepts REG+REG as legitimate when the
10748    operands are in the order INDEX+BASE on targets with non-equivalent
10749    space registers, and in any order on targets with equivalent space
10750    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10751 
10752    We treat a SYMBOL_REF as legitimate if it is part of the current
10753    function's constant-pool, because such addresses can actually be
10754    output as REG+SMALLINT.  */
10755 
10756 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10757 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10758 {
10759   if ((REG_P (x)
10760        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10761 		  : REG_OK_FOR_BASE_P (x)))
10762       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10763 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10764 	  && REG_P (XEXP (x, 0))
10765 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10766 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10767     return true;
10768 
10769   if (GET_CODE (x) == PLUS)
10770     {
10771       rtx base, index;
10772 
10773       /* For REG+REG, the base register should be in XEXP (x, 1),
10774 	 so check it first.  */
10775       if (REG_P (XEXP (x, 1))
10776 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10777 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10778 	base = XEXP (x, 1), index = XEXP (x, 0);
10779       else if (REG_P (XEXP (x, 0))
10780 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10781 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10782 	base = XEXP (x, 0), index = XEXP (x, 1);
10783       else
10784 	return false;
10785 
10786       if (GET_CODE (index) == CONST_INT)
10787 	{
10788 	  if (INT_5_BITS (index))
10789 	    return true;
10790 
10791 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10792 	     to adjust the displacement of SImode and DImode floating point
10793 	     instructions but this may fail when the register also needs
10794 	     reloading.  So, we return false when STRICT is true.  We
10795 	     also reject long displacements for float mode addresses since
10796 	     the majority of accesses will use floating point instructions
10797 	     that don't support 14-bit offsets.  */
10798 	  if (!INT14_OK_STRICT
10799 	      && (strict || !(reload_in_progress || reload_completed))
10800 	      && mode != QImode
10801 	      && mode != HImode)
10802 	    return false;
10803 
10804 	  return base14_operand (index, mode);
10805 	}
10806 
10807       if (!TARGET_DISABLE_INDEXING
10808 	  /* Only accept the "canonical" INDEX+BASE operand order
10809 	     on targets with non-equivalent space registers.  */
10810 	  && (TARGET_NO_SPACE_REGS
10811 	      ? REG_P (index)
10812 	      : (base == XEXP (x, 1) && REG_P (index)
10813 		 && (reload_completed
10814 		     || (reload_in_progress && HARD_REGISTER_P (base))
10815 		     || REG_POINTER (base))
10816 		 && (reload_completed
10817 		     || (reload_in_progress && HARD_REGISTER_P (index))
10818 		     || !REG_POINTER (index))))
10819 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10820 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10821 		     : REG_OK_FOR_INDEX_P (index))
10822 	  && borx_reg_operand (base, Pmode)
10823 	  && borx_reg_operand (index, Pmode))
10824 	return true;
10825 
10826       if (!TARGET_DISABLE_INDEXING
10827 	  && GET_CODE (index) == MULT
10828 	  /* Only accept base operands with the REG_POINTER flag prior to
10829 	     reload on targets with non-equivalent space registers.  */
10830 	  && (TARGET_NO_SPACE_REGS
10831 	      || (base == XEXP (x, 1)
10832 		  && (reload_completed
10833 		      || (reload_in_progress && HARD_REGISTER_P (base))
10834 		      || REG_POINTER (base))))
10835 	  && REG_P (XEXP (index, 0))
10836 	  && GET_MODE (XEXP (index, 0)) == Pmode
10837 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10838 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10839 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10840 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10841 	  && INTVAL (XEXP (index, 1))
10842 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10843 	  && borx_reg_operand (base, Pmode))
10844 	return true;
10845 
10846       return false;
10847     }
10848 
10849   if (GET_CODE (x) == LO_SUM)
10850     {
10851       rtx y = XEXP (x, 0);
10852 
10853       if (GET_CODE (y) == SUBREG)
10854 	y = SUBREG_REG (y);
10855 
10856       if (REG_P (y)
10857 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10858 		     : REG_OK_FOR_BASE_P (y)))
10859 	{
10860 	  /* Needed for -fPIC */
10861 	  if (mode == Pmode
10862 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10863 	    return true;
10864 
10865 	  if (!INT14_OK_STRICT
10866 	      && (strict || !(reload_in_progress || reload_completed))
10867 	      && mode != QImode
10868 	      && mode != HImode)
10869 	    return false;
10870 
10871 	  if (CONSTANT_P (XEXP (x, 1)))
10872 	    return true;
10873 	}
10874       return false;
10875     }
10876 
10877   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10878     return true;
10879 
10880   return false;
10881 }
10882 
10883 /* Look for machine dependent ways to make the invalid address AD a
10884    valid address.
10885 
10886    For the PA, transform:
10887 
10888         memory(X + <large int>)
10889 
10890    into:
10891 
10892         if (<large int> & mask) >= 16
10893           Y = (<large int> & ~mask) + mask + 1  Round up.
10894         else
10895           Y = (<large int> & ~mask)             Round down.
10896         Z = X + Y
10897         memory (Z + (<large int> - Y));
10898 
10899    This makes reload inheritance and reload_cse work better since Z
10900    can be reused.
10901 
10902    There may be more opportunities to improve code with this hook.  */
10903 
10904 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10905 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10906 			      int opnum, int type,
10907 			      int ind_levels ATTRIBUTE_UNUSED)
10908 {
10909   long offset, newoffset, mask;
10910   rtx new_rtx, temp = NULL_RTX;
10911 
10912   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10913 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10914 
10915   if (optimize && GET_CODE (ad) == PLUS)
10916     temp = simplify_binary_operation (PLUS, Pmode,
10917 				      XEXP (ad, 0), XEXP (ad, 1));
10918 
10919   new_rtx = temp ? temp : ad;
10920 
10921   if (optimize
10922       && GET_CODE (new_rtx) == PLUS
10923       && GET_CODE (XEXP (new_rtx, 0)) == REG
10924       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10925     {
10926       offset = INTVAL (XEXP ((new_rtx), 1));
10927 
10928       /* Choose rounding direction.  Round up if we are >= halfway.  */
10929       if ((offset & mask) >= ((mask + 1) / 2))
10930 	newoffset = (offset & ~mask) + mask + 1;
10931       else
10932 	newoffset = offset & ~mask;
10933 
10934       /* Ensure that long displacements are aligned.  */
10935       if (mask == 0x3fff
10936 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10937 	      || (TARGET_64BIT && (mode) == DImode)))
10938 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10939 
10940       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10941 	{
10942 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10943 			       GEN_INT (newoffset));
10944 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10945 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10946 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10947 		       opnum, (enum reload_type) type);
10948 	  return ad;
10949 	}
10950     }
10951 
10952   return NULL_RTX;
10953 }
10954 
10955 /* Output address vector.  */
10956 
10957 void
pa_output_addr_vec(rtx lab,rtx body)10958 pa_output_addr_vec (rtx lab, rtx body)
10959 {
10960   int idx, vlen = XVECLEN (body, 0);
10961 
10962   if (!TARGET_SOM)
10963     fputs ("\t.align 4\n", asm_out_file);
10964   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10965   if (TARGET_GAS)
10966     fputs ("\t.begin_brtab\n", asm_out_file);
10967   for (idx = 0; idx < vlen; idx++)
10968     {
10969       ASM_OUTPUT_ADDR_VEC_ELT
10970 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10971     }
10972   if (TARGET_GAS)
10973     fputs ("\t.end_brtab\n", asm_out_file);
10974 }
10975 
10976 /* Output address difference vector.  */
10977 
10978 void
pa_output_addr_diff_vec(rtx lab,rtx body)10979 pa_output_addr_diff_vec (rtx lab, rtx body)
10980 {
10981   rtx base = XEXP (XEXP (body, 0), 0);
10982   int idx, vlen = XVECLEN (body, 1);
10983 
10984   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10985   if (TARGET_GAS)
10986     fputs ("\t.begin_brtab\n", asm_out_file);
10987   for (idx = 0; idx < vlen; idx++)
10988     {
10989       ASM_OUTPUT_ADDR_DIFF_ELT
10990 	(asm_out_file,
10991 	 body,
10992 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10993 	 CODE_LABEL_NUMBER (base));
10994     }
10995   if (TARGET_GAS)
10996     fputs ("\t.end_brtab\n", asm_out_file);
10997 }
10998 
10999 /* This is a helper function for the other atomic operations.  This function
11000    emits a loop that contains SEQ that iterates until a compare-and-swap
11001    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
11002    a set of instructions that takes a value from OLD_REG as an input and
11003    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
11004    set to the current contents of MEM.  After SEQ, a compare-and-swap will
11005    attempt to update MEM with NEW_REG.  The function returns true when the
11006    loop was generated successfully.  */
11007 
11008 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)11009 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
11010 {
11011   machine_mode mode = GET_MODE (mem);
11012   rtx_code_label *label;
11013   rtx cmp_reg, success, oldval;
11014 
11015   /* The loop we want to generate looks like
11016 
11017         cmp_reg = mem;
11018       label:
11019         old_reg = cmp_reg;
11020         seq;
11021         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
11022         if (success)
11023           goto label;
11024 
11025      Note that we only do the plain load from memory once.  Subsequent
11026      iterations use the value loaded by the compare-and-swap pattern.  */
11027 
11028   label = gen_label_rtx ();
11029   cmp_reg = gen_reg_rtx (mode);
11030 
11031   emit_move_insn (cmp_reg, mem);
11032   emit_label (label);
11033   emit_move_insn (old_reg, cmp_reg);
11034   if (seq)
11035     emit_insn (seq);
11036 
11037   success = NULL_RTX;
11038   oldval = cmp_reg;
11039   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
11040                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
11041                                        MEMMODEL_RELAXED))
11042     return false;
11043 
11044   if (oldval != cmp_reg)
11045     emit_move_insn (cmp_reg, oldval);
11046 
11047   /* Mark this jump predicted not taken.  */
11048   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
11049                            GET_MODE (success), 1, label,
11050 			   profile_probability::guessed_never ());
11051   return true;
11052 }
11053 
11054 /* This function tries to implement an atomic exchange operation using a
11055    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
11056    *MEM are returned, using TARGET if possible.  No memory model is required
11057    since a compare_and_swap loop is seq-cst.  */
11058 
11059 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)11060 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
11061 {
11062   machine_mode mode = GET_MODE (mem);
11063 
11064   if (can_compare_and_swap_p (mode, true))
11065     {
11066       if (!target || !register_operand (target, mode))
11067         target = gen_reg_rtx (mode);
11068       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
11069         return target;
11070     }
11071 
11072   return NULL_RTX;
11073 }
11074 
11075 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
11076    arguments passed by hidden reference in the 32-bit HP runtime.  Users
11077    can override this behavior for better compatibility with openmp at the
11078    risk of library incompatibilities.  Arguments are always passed by value
11079    in the 64-bit HP runtime.  */
11080 
11081 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)11082 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11083 {
11084   return !TARGET_CALLER_COPIES;
11085 }
11086 
11087 /* Implement TARGET_HARD_REGNO_NREGS.  */
11088 
11089 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)11090 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11091 {
11092   return PA_HARD_REGNO_NREGS (regno, mode);
11093 }
11094 
11095 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
11096 
11097 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)11098 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11099 {
11100   return PA_HARD_REGNO_MODE_OK (regno, mode);
11101 }
11102 
11103 /* Implement TARGET_STARTING_FRAME_OFFSET.
11104 
11105    On the 32-bit ports, we reserve one slot for the previous frame
11106    pointer and one fill slot.  The fill slot is for compatibility
11107    with HP compiled programs.  On the 64-bit ports, we reserve one
11108    slot for the previous frame pointer.  */
11109 
11110 static HOST_WIDE_INT
pa_starting_frame_offset(void)11111 pa_starting_frame_offset (void)
11112 {
11113   return 8;
11114 }
11115 
11116 /* Figure out the size in words of the function argument.  The size
11117    returned by this function should always be greater than zero because
11118    we pass variable and zero sized objects by reference.  */
11119 
11120 HOST_WIDE_INT
pa_function_arg_size(machine_mode mode,const_tree type)11121 pa_function_arg_size (machine_mode mode, const_tree type)
11122 {
11123   HOST_WIDE_INT size;
11124 
11125   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11126   return CEIL (size, UNITS_PER_WORD);
11127 }
11128 
11129 #include "gt-pa.h"
11130