xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/pa/pa.c (revision d536862b7d93d77932ef5de7eebdc48d76921b77)
1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2019 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 				    reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 			     rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_movmem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 				    HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
168 				  const_tree, bool);
169 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
170 				 tree, bool);
171 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
172 				     const_tree, bool);
173 static rtx pa_function_arg (cumulative_args_t, machine_mode,
174 			    const_tree, bool);
175 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
176 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
177 static struct machine_function * pa_init_machine_status (void);
178 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
179 					machine_mode,
180 					secondary_reload_info *);
181 static bool pa_secondary_memory_needed (machine_mode,
182 					reg_class_t, reg_class_t);
183 static void pa_extra_live_on_entry (bitmap);
184 static machine_mode pa_promote_function_mode (const_tree,
185 						   machine_mode, int *,
186 						   const_tree, int);
187 
188 static void pa_asm_trampoline_template (FILE *);
189 static void pa_trampoline_init (rtx, tree, rtx);
190 static rtx pa_trampoline_adjust_address (rtx);
191 static rtx pa_delegitimize_address (rtx);
192 static bool pa_print_operand_punct_valid_p (unsigned char);
193 static rtx pa_internal_arg_pointer (void);
194 static bool pa_can_eliminate (const int, const int);
195 static void pa_conditional_register_usage (void);
196 static machine_mode pa_c_mode_for_suffix (char);
197 static section *pa_function_section (tree, enum node_frequency, bool, bool);
198 static bool pa_cannot_force_const_mem (machine_mode, rtx);
199 static bool pa_legitimate_constant_p (machine_mode, rtx);
200 static unsigned int pa_section_type_flags (tree, const char *, int);
201 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
202 static bool pa_callee_copies (cumulative_args_t, machine_mode,
203 			      const_tree, bool);
204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
206 static bool pa_modes_tieable_p (machine_mode, machine_mode);
207 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
208 static HOST_WIDE_INT pa_starting_frame_offset (void);
209 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
210 
211 /* The following extra sections are only used for SOM.  */
212 static GTY(()) section *som_readonly_data_section;
213 static GTY(()) section *som_one_only_readonly_data_section;
214 static GTY(()) section *som_one_only_data_section;
215 static GTY(()) section *som_tm_clone_table_section;
216 
217 /* Counts for the number of callee-saved general and floating point
218    registers which were saved by the current function's prologue.  */
219 static int gr_saved, fr_saved;
220 
221 /* Boolean indicating whether the return pointer was saved by the
222    current function's prologue.  */
223 static bool rp_saved;
224 
225 static rtx find_addr_reg (rtx);
226 
227 /* Keep track of the number of bytes we have output in the CODE subspace
228    during this compilation so we'll know when to emit inline long-calls.  */
229 unsigned long total_code_bytes;
230 
231 /* The last address of the previous function plus the number of bytes in
232    associated thunks that have been output.  This is used to determine if
233    a thunk can use an IA-relative branch to reach its target function.  */
234 static unsigned int last_address;
235 
236 /* Variables to handle plabels that we discover are necessary at assembly
237    output time.  They are output after the current function.  */
238 struct GTY(()) deferred_plabel
239 {
240   rtx internal_label;
241   rtx symbol;
242 };
243 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
244   deferred_plabels;
245 static size_t n_deferred_plabels = 0;
246 
247 /* Initialize the GCC target structure.  */
248 
249 #undef TARGET_OPTION_OVERRIDE
250 #define TARGET_OPTION_OVERRIDE pa_option_override
251 
252 #undef TARGET_ASM_ALIGNED_HI_OP
253 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
254 #undef TARGET_ASM_ALIGNED_SI_OP
255 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
256 #undef TARGET_ASM_ALIGNED_DI_OP
257 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
258 #undef TARGET_ASM_UNALIGNED_HI_OP
259 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
260 #undef TARGET_ASM_UNALIGNED_SI_OP
261 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
262 #undef TARGET_ASM_UNALIGNED_DI_OP
263 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
264 #undef TARGET_ASM_INTEGER
265 #define TARGET_ASM_INTEGER pa_assemble_integer
266 
267 #undef TARGET_ASM_FUNCTION_EPILOGUE
268 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
269 
270 #undef TARGET_FUNCTION_VALUE
271 #define TARGET_FUNCTION_VALUE pa_function_value
272 #undef TARGET_LIBCALL_VALUE
273 #define TARGET_LIBCALL_VALUE pa_libcall_value
274 #undef TARGET_FUNCTION_VALUE_REGNO_P
275 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
276 
277 #undef TARGET_LEGITIMIZE_ADDRESS
278 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
279 
280 #undef TARGET_SCHED_ADJUST_COST
281 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
282 #undef TARGET_SCHED_ISSUE_RATE
283 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
284 
285 #undef TARGET_ENCODE_SECTION_INFO
286 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
287 #undef TARGET_STRIP_NAME_ENCODING
288 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
289 
290 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
291 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
292 
293 #undef TARGET_COMMUTATIVE_P
294 #define TARGET_COMMUTATIVE_P pa_commutative_p
295 
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
300 
301 #undef TARGET_ASM_FILE_END
302 #define TARGET_ASM_FILE_END pa_file_end
303 
304 #undef TARGET_ASM_RELOC_RW_MASK
305 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
306 
307 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
308 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
309 
310 #if !defined(USE_COLLECT2)
311 #undef TARGET_ASM_CONSTRUCTOR
312 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
313 #undef TARGET_ASM_DESTRUCTOR
314 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
315 #endif
316 
317 #undef TARGET_INIT_BUILTINS
318 #define TARGET_INIT_BUILTINS pa_init_builtins
319 
320 #undef TARGET_EXPAND_BUILTIN
321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
322 
323 #undef TARGET_REGISTER_MOVE_COST
324 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
325 #undef TARGET_RTX_COSTS
326 #define TARGET_RTX_COSTS hppa_rtx_costs
327 #undef TARGET_ADDRESS_COST
328 #define TARGET_ADDRESS_COST hppa_address_cost
329 
330 #undef TARGET_MACHINE_DEPENDENT_REORG
331 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
332 
333 #undef TARGET_INIT_LIBFUNCS
334 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
335 
336 #undef TARGET_PROMOTE_FUNCTION_MODE
337 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
338 #undef TARGET_PROMOTE_PROTOTYPES
339 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
340 
341 #undef TARGET_STRUCT_VALUE_RTX
342 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
343 #undef TARGET_RETURN_IN_MEMORY
344 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
345 #undef TARGET_MUST_PASS_IN_STACK
346 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
347 #undef TARGET_PASS_BY_REFERENCE
348 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
349 #undef TARGET_CALLEE_COPIES
350 #define TARGET_CALLEE_COPIES pa_callee_copies
351 #undef TARGET_ARG_PARTIAL_BYTES
352 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
353 #undef TARGET_FUNCTION_ARG
354 #define TARGET_FUNCTION_ARG pa_function_arg
355 #undef TARGET_FUNCTION_ARG_ADVANCE
356 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
357 #undef TARGET_FUNCTION_ARG_PADDING
358 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
359 #undef TARGET_FUNCTION_ARG_BOUNDARY
360 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
361 
362 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
363 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
364 #undef TARGET_EXPAND_BUILTIN_VA_START
365 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
366 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
367 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
368 
369 #undef TARGET_SCALAR_MODE_SUPPORTED_P
370 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
371 
372 #undef TARGET_CANNOT_FORCE_CONST_MEM
373 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
374 
375 #undef TARGET_SECONDARY_RELOAD
376 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
377 #undef TARGET_SECONDARY_MEMORY_NEEDED
378 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
379 
380 #undef TARGET_EXTRA_LIVE_ON_ENTRY
381 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
382 
383 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
384 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
385 #undef TARGET_TRAMPOLINE_INIT
386 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
387 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
388 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
389 #undef TARGET_DELEGITIMIZE_ADDRESS
390 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
391 #undef TARGET_INTERNAL_ARG_POINTER
392 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
393 #undef TARGET_CAN_ELIMINATE
394 #define TARGET_CAN_ELIMINATE pa_can_eliminate
395 #undef TARGET_CONDITIONAL_REGISTER_USAGE
396 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
397 #undef TARGET_C_MODE_FOR_SUFFIX
398 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
399 #undef TARGET_ASM_FUNCTION_SECTION
400 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
401 
402 #undef TARGET_LEGITIMATE_CONSTANT_P
403 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
404 #undef TARGET_SECTION_TYPE_FLAGS
405 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
406 #undef TARGET_LEGITIMATE_ADDRESS_P
407 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
408 
409 #undef TARGET_LRA_P
410 #define TARGET_LRA_P hook_bool_void_false
411 
412 #undef TARGET_HARD_REGNO_NREGS
413 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
414 #undef TARGET_HARD_REGNO_MODE_OK
415 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
416 #undef TARGET_MODES_TIEABLE_P
417 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
418 
419 #undef TARGET_CAN_CHANGE_MODE_CLASS
420 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
421 
422 #undef TARGET_CONSTANT_ALIGNMENT
423 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
424 
425 #undef TARGET_STARTING_FRAME_OFFSET
426 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
427 
428 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
429 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
430 
431 struct gcc_target targetm = TARGET_INITIALIZER;
432 
433 /* Parse the -mfixed-range= option string.  */
434 
435 static void
436 fix_range (const char *const_str)
437 {
438   int i, first, last;
439   char *str, *dash, *comma;
440 
441   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
442      REG2 are either register names or register numbers.  The effect
443      of this option is to mark the registers in the range from REG1 to
444      REG2 as ``fixed'' so they won't be used by the compiler.  This is
445      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
446 
447   i = strlen (const_str);
448   str = (char *) alloca (i + 1);
449   memcpy (str, const_str, i + 1);
450 
451   while (1)
452     {
453       dash = strchr (str, '-');
454       if (!dash)
455 	{
456 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
457 	  return;
458 	}
459       *dash = '\0';
460 
461       comma = strchr (dash + 1, ',');
462       if (comma)
463 	*comma = '\0';
464 
465       first = decode_reg_name (str);
466       if (first < 0)
467 	{
468 	  warning (0, "unknown register name: %s", str);
469 	  return;
470 	}
471 
472       last = decode_reg_name (dash + 1);
473       if (last < 0)
474 	{
475 	  warning (0, "unknown register name: %s", dash + 1);
476 	  return;
477 	}
478 
479       *dash = '-';
480 
481       if (first > last)
482 	{
483 	  warning (0, "%s-%s is an empty range", str, dash + 1);
484 	  return;
485 	}
486 
487       for (i = first; i <= last; ++i)
488 	fixed_regs[i] = call_used_regs[i] = 1;
489 
490       if (!comma)
491 	break;
492 
493       *comma = ',';
494       str = comma + 1;
495     }
496 
497   /* Check if all floating point registers have been fixed.  */
498   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
499     if (!fixed_regs[i])
500       break;
501 
502   if (i > FP_REG_LAST)
503     target_flags |= MASK_DISABLE_FPREGS;
504 }
505 
506 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
507 
508 static void
509 pa_option_override (void)
510 {
511   unsigned int i;
512   cl_deferred_option *opt;
513   vec<cl_deferred_option> *v
514     = (vec<cl_deferred_option> *) pa_deferred_options;
515 
516   if (v)
517     FOR_EACH_VEC_ELT (*v, i, opt)
518       {
519 	switch (opt->opt_index)
520 	  {
521 	  case OPT_mfixed_range_:
522 	    fix_range (opt->arg);
523 	    break;
524 
525 	  default:
526 	    gcc_unreachable ();
527 	  }
528       }
529 
530   if (flag_pic && TARGET_PORTABLE_RUNTIME)
531     {
532       warning (0, "PIC code generation is not supported in the portable runtime model");
533     }
534 
535   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
536    {
537       warning (0, "PIC code generation is not compatible with fast indirect calls");
538    }
539 
540   if (! TARGET_GAS && write_symbols != NO_DEBUG)
541     {
542       warning (0, "%<-g%> is only supported when using GAS on this processor,");
543       warning (0, "%<-g%> option disabled");
544       write_symbols = NO_DEBUG;
545     }
546 
547   /* We only support the "big PIC" model now.  And we always generate PIC
548      code when in 64bit mode.  */
549   if (flag_pic == 1 || TARGET_64BIT)
550     flag_pic = 2;
551 
552   /* Disable -freorder-blocks-and-partition as we don't support hot and
553      cold partitioning.  */
554   if (flag_reorder_blocks_and_partition)
555     {
556       inform (input_location,
557 	      "%<-freorder-blocks-and-partition%> does not work "
558 	      "on this architecture");
559       flag_reorder_blocks_and_partition = 0;
560       flag_reorder_blocks = 1;
561     }
562 
563   /* We can't guarantee that .dword is available for 32-bit targets.  */
564   if (UNITS_PER_WORD == 4)
565     targetm.asm_out.aligned_op.di = NULL;
566 
567   /* The unaligned ops are only available when using GAS.  */
568   if (!TARGET_GAS)
569     {
570       targetm.asm_out.unaligned_op.hi = NULL;
571       targetm.asm_out.unaligned_op.si = NULL;
572       targetm.asm_out.unaligned_op.di = NULL;
573     }
574 
575   init_machine_status = pa_init_machine_status;
576 }
577 
578 enum pa_builtins
579 {
580   PA_BUILTIN_COPYSIGNQ,
581   PA_BUILTIN_FABSQ,
582   PA_BUILTIN_INFQ,
583   PA_BUILTIN_HUGE_VALQ,
584   PA_BUILTIN_max
585 };
586 
587 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
588 
589 static void
590 pa_init_builtins (void)
591 {
592 #ifdef DONT_HAVE_FPUTC_UNLOCKED
593   {
594     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
595     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
596 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
597   }
598 #endif
599 #if TARGET_HPUX_11
600   {
601     tree decl;
602 
603     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
604       set_user_assembler_name (decl, "_Isfinite");
605     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
606       set_user_assembler_name (decl, "_Isfinitef");
607   }
608 #endif
609 
610   if (HPUX_LONG_DOUBLE_LIBRARY)
611     {
612       tree decl, ftype;
613 
614       /* Under HPUX, the __float128 type is a synonym for "long double".  */
615       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
616 						 "__float128");
617 
618       /* TFmode support builtins.  */
619       ftype = build_function_type_list (long_double_type_node,
620 					long_double_type_node,
621 					NULL_TREE);
622       decl = add_builtin_function ("__builtin_fabsq", ftype,
623 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
624 				   "_U_Qfabs", NULL_TREE);
625       TREE_READONLY (decl) = 1;
626       pa_builtins[PA_BUILTIN_FABSQ] = decl;
627 
628       ftype = build_function_type_list (long_double_type_node,
629 					long_double_type_node,
630 					long_double_type_node,
631 					NULL_TREE);
632       decl = add_builtin_function ("__builtin_copysignq", ftype,
633 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
634 				   "_U_Qfcopysign", NULL_TREE);
635       TREE_READONLY (decl) = 1;
636       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
637 
638       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
639       decl = add_builtin_function ("__builtin_infq", ftype,
640 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
641 				   NULL, NULL_TREE);
642       pa_builtins[PA_BUILTIN_INFQ] = decl;
643 
644       decl = add_builtin_function ("__builtin_huge_valq", ftype,
645                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
646                                    NULL, NULL_TREE);
647       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
648     }
649 }
650 
651 static rtx
652 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
653 		   machine_mode mode ATTRIBUTE_UNUSED,
654 		   int ignore ATTRIBUTE_UNUSED)
655 {
656   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
657   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
658 
659   switch (fcode)
660     {
661     case PA_BUILTIN_FABSQ:
662     case PA_BUILTIN_COPYSIGNQ:
663       return expand_call (exp, target, ignore);
664 
665     case PA_BUILTIN_INFQ:
666     case PA_BUILTIN_HUGE_VALQ:
667       {
668 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
669 	REAL_VALUE_TYPE inf;
670 	rtx tmp;
671 
672 	real_inf (&inf);
673 	tmp = const_double_from_real_value (inf, target_mode);
674 
675 	tmp = validize_mem (force_const_mem (target_mode, tmp));
676 
677 	if (target == 0)
678 	  target = gen_reg_rtx (target_mode);
679 
680 	emit_move_insn (target, tmp);
681 	return target;
682       }
683 
684     default:
685       gcc_unreachable ();
686     }
687 
688   return NULL_RTX;
689 }
690 
691 /* Function to init struct machine_function.
692    This will be called, via a pointer variable,
693    from push_function_context.  */
694 
695 static struct machine_function *
696 pa_init_machine_status (void)
697 {
698   return ggc_cleared_alloc<machine_function> ();
699 }
700 
701 /* If FROM is a probable pointer register, mark TO as a probable
702    pointer register with the same pointer alignment as FROM.  */
703 
704 static void
705 copy_reg_pointer (rtx to, rtx from)
706 {
707   if (REG_POINTER (from))
708     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
709 }
710 
711 /* Return 1 if X contains a symbolic expression.  We know these
712    expressions will have one of a few well defined forms, so
713    we need only check those forms.  */
714 int
715 pa_symbolic_expression_p (rtx x)
716 {
717 
718   /* Strip off any HIGH.  */
719   if (GET_CODE (x) == HIGH)
720     x = XEXP (x, 0);
721 
722   return symbolic_operand (x, VOIDmode);
723 }
724 
725 /* Accept any constant that can be moved in one instruction into a
726    general register.  */
727 int
728 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
729 {
730   /* OK if ldo, ldil, or zdepi, can be used.  */
731   return (VAL_14_BITS_P (ival)
732 	  || pa_ldil_cint_p (ival)
733 	  || pa_zdepi_cint_p (ival));
734 }
735 
736 /* True iff ldil can be used to load this CONST_INT.  The least
737    significant 11 bits of the value must be zero and the value must
738    not change sign when extended from 32 to 64 bits.  */
739 int
740 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
741 {
742   unsigned HOST_WIDE_INT x;
743 
744   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
745   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
746 }
747 
748 /* True iff zdepi can be used to generate this CONST_INT.
749    zdepi first sign extends a 5-bit signed number to a given field
750    length, then places this field anywhere in a zero.  */
751 int
752 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
753 {
754   unsigned HOST_WIDE_INT lsb_mask, t;
755 
756   /* This might not be obvious, but it's at least fast.
757      This function is critical; we don't have the time loops would take.  */
758   lsb_mask = x & -x;
759   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
760   /* Return true iff t is a power of two.  */
761   return ((t & (t - 1)) == 0);
762 }
763 
764 /* True iff depi or extru can be used to compute (reg & mask).
765    Accept bit pattern like these:
766    0....01....1
767    1....10....0
768    1..10..01..1  */
769 int
770 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
771 {
772   mask = ~mask;
773   mask += mask & -mask;
774   return (mask & (mask - 1)) == 0;
775 }
776 
777 /* True iff depi can be used to compute (reg | MASK).  */
778 int
779 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
780 {
781   mask += mask & -mask;
782   return (mask & (mask - 1)) == 0;
783 }
784 
785 /* Legitimize PIC addresses.  If the address is already
786    position-independent, we return ORIG.  Newly generated
787    position-independent addresses go to REG.  If we need more
788    than one register, we lose.  */
789 
790 static rtx
791 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
792 {
793   rtx pic_ref = orig;
794 
795   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
796 
797   /* Labels need special handling.  */
798   if (pic_label_operand (orig, mode))
799     {
800       rtx_insn *insn;
801 
802       /* We do not want to go through the movXX expanders here since that
803 	 would create recursion.
804 
805 	 Nor do we really want to call a generator for a named pattern
806 	 since that requires multiple patterns if we want to support
807 	 multiple word sizes.
808 
809 	 So instead we just emit the raw set, which avoids the movXX
810 	 expanders completely.  */
811       mark_reg_pointer (reg, BITS_PER_UNIT);
812       insn = emit_insn (gen_rtx_SET (reg, orig));
813 
814       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
815       add_reg_note (insn, REG_EQUAL, orig);
816 
817       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
818 	 and update LABEL_NUSES because this is not done automatically.  */
819       if (reload_in_progress || reload_completed)
820 	{
821 	  /* Extract LABEL_REF.  */
822 	  if (GET_CODE (orig) == CONST)
823 	    orig = XEXP (XEXP (orig, 0), 0);
824 	  /* Extract CODE_LABEL.  */
825 	  orig = XEXP (orig, 0);
826 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
827 	  /* Make sure we have label and not a note.  */
828 	  if (LABEL_P (orig))
829 	    LABEL_NUSES (orig)++;
830 	}
831       crtl->uses_pic_offset_table = 1;
832       return reg;
833     }
834   if (GET_CODE (orig) == SYMBOL_REF)
835     {
836       rtx_insn *insn;
837       rtx tmp_reg;
838 
839       gcc_assert (reg);
840 
841       /* Before reload, allocate a temporary register for the intermediate
842 	 result.  This allows the sequence to be deleted when the final
843 	 result is unused and the insns are trivially dead.  */
844       tmp_reg = ((reload_in_progress || reload_completed)
845 		 ? reg : gen_reg_rtx (Pmode));
846 
847       if (function_label_operand (orig, VOIDmode))
848 	{
849 	  /* Force function label into memory in word mode.  */
850 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
851 	  /* Load plabel address from DLT.  */
852 	  emit_move_insn (tmp_reg,
853 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
854 					gen_rtx_HIGH (word_mode, orig)));
855 	  pic_ref
856 	    = gen_const_mem (Pmode,
857 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
858 					     gen_rtx_UNSPEC (Pmode,
859 						         gen_rtvec (1, orig),
860 						         UNSPEC_DLTIND14R)));
861 	  emit_move_insn (reg, pic_ref);
862 	  /* Now load address of function descriptor.  */
863 	  pic_ref = gen_rtx_MEM (Pmode, reg);
864 	}
865       else
866 	{
867 	  /* Load symbol reference from DLT.  */
868 	  emit_move_insn (tmp_reg,
869 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
870 					gen_rtx_HIGH (word_mode, orig)));
871 	  pic_ref
872 	    = gen_const_mem (Pmode,
873 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
874 					     gen_rtx_UNSPEC (Pmode,
875 						         gen_rtvec (1, orig),
876 						         UNSPEC_DLTIND14R)));
877 	}
878 
879       crtl->uses_pic_offset_table = 1;
880       mark_reg_pointer (reg, BITS_PER_UNIT);
881       insn = emit_move_insn (reg, pic_ref);
882 
883       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
884       set_unique_reg_note (insn, REG_EQUAL, orig);
885 
886       return reg;
887     }
888   else if (GET_CODE (orig) == CONST)
889     {
890       rtx base;
891 
892       if (GET_CODE (XEXP (orig, 0)) == PLUS
893 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
894 	return orig;
895 
896       gcc_assert (reg);
897       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
898 
899       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
900       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
901 				     base == reg ? 0 : reg);
902 
903       if (GET_CODE (orig) == CONST_INT)
904 	{
905 	  if (INT_14_BITS (orig))
906 	    return plus_constant (Pmode, base, INTVAL (orig));
907 	  orig = force_reg (Pmode, orig);
908 	}
909       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
910       /* Likewise, should we set special REG_NOTEs here?  */
911     }
912 
913   return pic_ref;
914 }
915 
916 static GTY(()) rtx gen_tls_tga;
917 
918 static rtx
919 gen_tls_get_addr (void)
920 {
921   if (!gen_tls_tga)
922     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
923   return gen_tls_tga;
924 }
925 
926 static rtx
927 hppa_tls_call (rtx arg)
928 {
929   rtx ret;
930 
931   ret = gen_reg_rtx (Pmode);
932   emit_library_call_value (gen_tls_get_addr (), ret,
933 			   LCT_CONST, Pmode, arg, Pmode);
934 
935   return ret;
936 }
937 
938 static rtx
939 legitimize_tls_address (rtx addr)
940 {
941   rtx ret, tmp, t1, t2, tp;
942   rtx_insn *insn;
943 
944   /* Currently, we can't handle anything but a SYMBOL_REF.  */
945   if (GET_CODE (addr) != SYMBOL_REF)
946     return addr;
947 
948   switch (SYMBOL_REF_TLS_MODEL (addr))
949     {
950       case TLS_MODEL_GLOBAL_DYNAMIC:
951 	tmp = gen_reg_rtx (Pmode);
952 	if (flag_pic)
953 	  emit_insn (gen_tgd_load_pic (tmp, addr));
954 	else
955 	  emit_insn (gen_tgd_load (tmp, addr));
956 	ret = hppa_tls_call (tmp);
957 	break;
958 
959       case TLS_MODEL_LOCAL_DYNAMIC:
960 	ret = gen_reg_rtx (Pmode);
961 	tmp = gen_reg_rtx (Pmode);
962 	start_sequence ();
963 	if (flag_pic)
964 	  emit_insn (gen_tld_load_pic (tmp, addr));
965 	else
966 	  emit_insn (gen_tld_load (tmp, addr));
967 	t1 = hppa_tls_call (tmp);
968 	insn = get_insns ();
969 	end_sequence ();
970 	t2 = gen_reg_rtx (Pmode);
971 	emit_libcall_block (insn, t2, t1,
972 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
973 				            UNSPEC_TLSLDBASE));
974 	emit_insn (gen_tld_offset_load (ret, addr, t2));
975 	break;
976 
977       case TLS_MODEL_INITIAL_EXEC:
978 	tp = gen_reg_rtx (Pmode);
979 	tmp = gen_reg_rtx (Pmode);
980 	ret = gen_reg_rtx (Pmode);
981 	emit_insn (gen_tp_load (tp));
982 	if (flag_pic)
983 	  emit_insn (gen_tie_load_pic (tmp, addr));
984 	else
985 	  emit_insn (gen_tie_load (tmp, addr));
986 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
987 	break;
988 
989       case TLS_MODEL_LOCAL_EXEC:
990 	tp = gen_reg_rtx (Pmode);
991 	ret = gen_reg_rtx (Pmode);
992 	emit_insn (gen_tp_load (tp));
993 	emit_insn (gen_tle_load (ret, addr, tp));
994 	break;
995 
996       default:
997 	gcc_unreachable ();
998     }
999 
1000   return ret;
1001 }
1002 
1003 /* Helper for hppa_legitimize_address.  Given X, return true if it
1004    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1005 
1006    This respectively represent canonical shift-add rtxs or scaled
1007    memory addresses.  */
1008 static bool
1009 mem_shadd_or_shadd_rtx_p (rtx x)
1010 {
1011   return ((GET_CODE (x) == ASHIFT
1012 	   || GET_CODE (x) == MULT)
1013 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
1014 	  && ((GET_CODE (x) == ASHIFT
1015 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1016 	      || (GET_CODE (x) == MULT
1017 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1018 }
1019 
1020 /* Try machine-dependent ways of modifying an illegitimate address
1021    to be legitimate.  If we find one, return the new, valid address.
1022    This macro is used in only one place: `memory_address' in explow.c.
1023 
1024    OLDX is the address as it was before break_out_memory_refs was called.
1025    In some cases it is useful to look at this to decide what needs to be done.
1026 
1027    It is always safe for this macro to do nothing.  It exists to recognize
1028    opportunities to optimize the output.
1029 
1030    For the PA, transform:
1031 
1032 	memory(X + <large int>)
1033 
1034    into:
1035 
1036 	if (<large int> & mask) >= 16
1037 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1038 	else
1039 	  Y = (<large int> & ~mask)		Round down.
1040 	Z = X + Y
1041 	memory (Z + (<large int> - Y));
1042 
1043    This is for CSE to find several similar references, and only use one Z.
1044 
1045    X can either be a SYMBOL_REF or REG, but because combine cannot
1046    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1047    D will not fit in 14 bits.
1048 
1049    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1050    0x1f as the mask.
1051 
1052    MODE_INT references allow displacements which fit in 14 bits, so use
1053    0x3fff as the mask.
1054 
1055    This relies on the fact that most mode MODE_FLOAT references will use FP
1056    registers and most mode MODE_INT references will use integer registers.
1057    (In the rare case of an FP register used in an integer MODE, we depend
1058    on secondary reloads to clean things up.)
1059 
1060 
1061    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1062    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1063    addressing modes to be used).
1064 
1065    Note that the addresses passed into hppa_legitimize_address always
1066    come from a MEM, so we only have to match the MULT form on incoming
1067    addresses.  But to be future proof we also match the ASHIFT form.
1068 
1069    However, this routine always places those shift-add sequences into
1070    registers, so we have to generate the ASHIFT form as our output.
1071 
1072    Put X and Z into registers.  Then put the entire expression into
1073    a register.  */
1074 
1075 rtx
1076 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1077 			 machine_mode mode)
1078 {
1079   rtx orig = x;
1080 
1081   /* We need to canonicalize the order of operands in unscaled indexed
1082      addresses since the code that checks if an address is valid doesn't
1083      always try both orders.  */
1084   if (!TARGET_NO_SPACE_REGS
1085       && GET_CODE (x) == PLUS
1086       && GET_MODE (x) == Pmode
1087       && REG_P (XEXP (x, 0))
1088       && REG_P (XEXP (x, 1))
1089       && REG_POINTER (XEXP (x, 0))
1090       && !REG_POINTER (XEXP (x, 1)))
1091     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1092 
1093   if (tls_referenced_p (x))
1094     return legitimize_tls_address (x);
1095   else if (flag_pic)
1096     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1097 
1098   /* Strip off CONST.  */
1099   if (GET_CODE (x) == CONST)
1100     x = XEXP (x, 0);
1101 
1102   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1103      That should always be safe.  */
1104   if (GET_CODE (x) == PLUS
1105       && GET_CODE (XEXP (x, 0)) == REG
1106       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1107     {
1108       rtx reg = force_reg (Pmode, XEXP (x, 1));
1109       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1110     }
1111 
1112   /* Note we must reject symbols which represent function addresses
1113      since the assembler/linker can't handle arithmetic on plabels.  */
1114   if (GET_CODE (x) == PLUS
1115       && GET_CODE (XEXP (x, 1)) == CONST_INT
1116       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1117 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1118 	  || GET_CODE (XEXP (x, 0)) == REG))
1119     {
1120       rtx int_part, ptr_reg;
1121       HOST_WIDE_INT newoffset;
1122       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
1123       HOST_WIDE_INT mask;
1124 
1125       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1126 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1127 
1128       /* Choose which way to round the offset.  Round up if we
1129 	 are >= halfway to the next boundary.  */
1130       if ((offset & mask) >= ((mask + 1) / 2))
1131 	newoffset = (offset & ~ mask) + mask + 1;
1132       else
1133 	newoffset = (offset & ~ mask);
1134 
1135       /* If the newoffset will not fit in 14 bits (ldo), then
1136 	 handling this would take 4 or 5 instructions (2 to load
1137 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1138 	 add the new offset and the SYMBOL_REF.)  Combine cannot
1139 	 handle 4->2 or 5->2 combinations, so do not create
1140 	 them.  */
1141       if (! VAL_14_BITS_P (newoffset)
1142 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1143 	{
1144 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1145 	  rtx tmp_reg
1146 	    = force_reg (Pmode,
1147 			 gen_rtx_HIGH (Pmode, const_part));
1148 	  ptr_reg
1149 	    = force_reg (Pmode,
1150 			 gen_rtx_LO_SUM (Pmode,
1151 					 tmp_reg, const_part));
1152 	}
1153       else
1154 	{
1155 	  if (! VAL_14_BITS_P (newoffset))
1156 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1157 	  else
1158 	    int_part = GEN_INT (newoffset);
1159 
1160 	  ptr_reg = force_reg (Pmode,
1161 			       gen_rtx_PLUS (Pmode,
1162 					     force_reg (Pmode, XEXP (x, 0)),
1163 					     int_part));
1164 	}
1165       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1166     }
1167 
1168   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1169 
1170   if (GET_CODE (x) == PLUS
1171       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1172       && (OBJECT_P (XEXP (x, 1))
1173 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1174       && GET_CODE (XEXP (x, 1)) != CONST)
1175     {
1176       /* If we were given a MULT, we must fix the constant
1177 	 as we're going to create the ASHIFT form.  */
1178       HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1179       if (GET_CODE (XEXP (x, 0)) == MULT)
1180 	shift_val = exact_log2 (shift_val);
1181 
1182       rtx reg1, reg2;
1183       reg1 = XEXP (x, 1);
1184       if (GET_CODE (reg1) != REG)
1185 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1186 
1187       reg2 = XEXP (XEXP (x, 0), 0);
1188       if (GET_CODE (reg2) != REG)
1189         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1190 
1191       return force_reg (Pmode,
1192 			gen_rtx_PLUS (Pmode,
1193 				      gen_rtx_ASHIFT (Pmode, reg2,
1194 						      GEN_INT (shift_val)),
1195 				      reg1));
1196     }
1197 
1198   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1199 
1200      Only do so for floating point modes since this is more speculative
1201      and we lose if it's an integer store.  */
1202   if (GET_CODE (x) == PLUS
1203       && GET_CODE (XEXP (x, 0)) == PLUS
1204       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1205       && (mode == SFmode || mode == DFmode))
1206     {
1207       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1208 
1209       /* If we were given a MULT, we must fix the constant
1210 	 as we're going to create the ASHIFT form.  */
1211       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1212 	shift_val = exact_log2 (shift_val);
1213 
1214       /* Try and figure out what to use as a base register.  */
1215       rtx reg1, reg2, base, idx;
1216 
1217       reg1 = XEXP (XEXP (x, 0), 1);
1218       reg2 = XEXP (x, 1);
1219       base = NULL_RTX;
1220       idx = NULL_RTX;
1221 
1222       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1223 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1224 	 it's a base register below.  */
1225       if (GET_CODE (reg1) != REG)
1226 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1227 
1228       if (GET_CODE (reg2) != REG)
1229 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1230 
1231       /* Figure out what the base and index are.  */
1232 
1233       if (GET_CODE (reg1) == REG
1234 	  && REG_POINTER (reg1))
1235 	{
1236 	  base = reg1;
1237 	  idx = gen_rtx_PLUS (Pmode,
1238 			      gen_rtx_ASHIFT (Pmode,
1239 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1240 					      GEN_INT (shift_val)),
1241 			      XEXP (x, 1));
1242 	}
1243       else if (GET_CODE (reg2) == REG
1244 	       && REG_POINTER (reg2))
1245 	{
1246 	  base = reg2;
1247 	  idx = XEXP (x, 0);
1248 	}
1249 
1250       if (base == 0)
1251 	return orig;
1252 
1253       /* If the index adds a large constant, try to scale the
1254 	 constant so that it can be loaded with only one insn.  */
1255       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1256 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1257 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1258 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1259 	{
1260 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1261 	  HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1262 	  val /= (1 << shift_val);
1263 
1264 	  reg1 = XEXP (XEXP (idx, 0), 0);
1265 	  if (GET_CODE (reg1) != REG)
1266 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1267 
1268 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1269 
1270 	  /* We can now generate a simple scaled indexed address.  */
1271 	  return
1272 	    force_reg
1273 	      (Pmode, gen_rtx_PLUS (Pmode,
1274 				    gen_rtx_ASHIFT (Pmode, reg1,
1275 						    GEN_INT (shift_val)),
1276 				    base));
1277 	}
1278 
1279       /* If B + C is still a valid base register, then add them.  */
1280       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1281 	  && INTVAL (XEXP (idx, 1)) <= 4096
1282 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1283 	{
1284 	  rtx reg1, reg2;
1285 
1286 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1287 
1288 	  reg2 = XEXP (XEXP (idx, 0), 0);
1289 	  if (GET_CODE (reg2) != CONST_INT)
1290 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1291 
1292 	  return force_reg (Pmode,
1293 			    gen_rtx_PLUS (Pmode,
1294 					  gen_rtx_ASHIFT (Pmode, reg2,
1295 							  GEN_INT (shift_val)),
1296 					  reg1));
1297 	}
1298 
1299       /* Get the index into a register, then add the base + index and
1300 	 return a register holding the result.  */
1301 
1302       /* First get A into a register.  */
1303       reg1 = XEXP (XEXP (idx, 0), 0);
1304       if (GET_CODE (reg1) != REG)
1305 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1306 
1307       /* And get B into a register.  */
1308       reg2 = XEXP (idx, 1);
1309       if (GET_CODE (reg2) != REG)
1310 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1311 
1312       reg1 = force_reg (Pmode,
1313 			gen_rtx_PLUS (Pmode,
1314 				      gen_rtx_ASHIFT (Pmode, reg1,
1315 						      GEN_INT (shift_val)),
1316 				      reg2));
1317 
1318       /* Add the result to our base register and return.  */
1319       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1320 
1321     }
1322 
1323   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1324      special handling to avoid creating an indexed memory address
1325      with x-100000 as the base.
1326 
1327      If the constant part is small enough, then it's still safe because
1328      there is a guard page at the beginning and end of the data segment.
1329 
1330      Scaled references are common enough that we want to try and rearrange the
1331      terms so that we can use indexing for these addresses too.  Only
1332      do the optimization for floatint point modes.  */
1333 
1334   if (GET_CODE (x) == PLUS
1335       && pa_symbolic_expression_p (XEXP (x, 1)))
1336     {
1337       /* Ugly.  We modify things here so that the address offset specified
1338 	 by the index expression is computed first, then added to x to form
1339 	 the entire address.  */
1340 
1341       rtx regx1, regx2, regy1, regy2, y;
1342 
1343       /* Strip off any CONST.  */
1344       y = XEXP (x, 1);
1345       if (GET_CODE (y) == CONST)
1346 	y = XEXP (y, 0);
1347 
1348       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1349 	{
1350 	  /* See if this looks like
1351 		(plus (mult (reg) (mem_shadd_const))
1352 		      (const (plus (symbol_ref) (const_int))))
1353 
1354 	     Where const_int is small.  In that case the const
1355 	     expression is a valid pointer for indexing.
1356 
1357 	     If const_int is big, but can be divided evenly by shadd_const
1358 	     and added to (reg).  This allows more scaled indexed addresses.  */
1359 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1360 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1361 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1362 	      && INTVAL (XEXP (y, 1)) >= -4096
1363 	      && INTVAL (XEXP (y, 1)) <= 4095)
1364 	    {
1365 	      HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1366 
1367 	      /* If we were given a MULT, we must fix the constant
1368 		 as we're going to create the ASHIFT form.  */
1369 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1370 		shift_val = exact_log2 (shift_val);
1371 
1372 	      rtx reg1, reg2;
1373 
1374 	      reg1 = XEXP (x, 1);
1375 	      if (GET_CODE (reg1) != REG)
1376 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1377 
1378 	      reg2 = XEXP (XEXP (x, 0), 0);
1379 	      if (GET_CODE (reg2) != REG)
1380 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1381 
1382 	      return
1383 		force_reg (Pmode,
1384 			   gen_rtx_PLUS (Pmode,
1385 					 gen_rtx_ASHIFT (Pmode,
1386 							 reg2,
1387 							 GEN_INT (shift_val)),
1388 					 reg1));
1389 	    }
1390 	  else if ((mode == DFmode || mode == SFmode)
1391 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1392 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1393 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1394 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1395 	    {
1396 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1397 
1398 	      /* If we were given a MULT, we must fix the constant
1399 		 as we're going to create the ASHIFT form.  */
1400 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1401 		shift_val = exact_log2 (shift_val);
1402 
1403 	      regx1
1404 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1405 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1406 	      regx2 = XEXP (XEXP (x, 0), 0);
1407 	      if (GET_CODE (regx2) != REG)
1408 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1409 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1410 							regx2, regx1));
1411 	      return
1412 		force_reg (Pmode,
1413 			   gen_rtx_PLUS (Pmode,
1414 					 gen_rtx_ASHIFT (Pmode, regx2,
1415 						         GEN_INT (shift_val)),
1416 					 force_reg (Pmode, XEXP (y, 0))));
1417 	    }
1418 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1419 		   && INTVAL (XEXP (y, 1)) >= -4096
1420 		   && INTVAL (XEXP (y, 1)) <= 4095)
1421 	    {
1422 	      /* This is safe because of the guard page at the
1423 		 beginning and end of the data space.  Just
1424 		 return the original address.  */
1425 	      return orig;
1426 	    }
1427 	  else
1428 	    {
1429 	      /* Doesn't look like one we can optimize.  */
1430 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1431 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1432 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1433 	      regx1 = force_reg (Pmode,
1434 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1435 						 regx1, regy2));
1436 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1437 	    }
1438 	}
1439     }
1440 
1441   return orig;
1442 }
1443 
1444 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1445 
1446    Compute extra cost of moving data between one register class
1447    and another.
1448 
1449    Make moves from SAR so expensive they should never happen.  We used to
1450    have 0xffff here, but that generates overflow in rare cases.
1451 
1452    Copies involving a FP register and a non-FP register are relatively
1453    expensive because they must go through memory.
1454 
1455    Other copies are reasonably cheap.  */
1456 
1457 static int
1458 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1459 			 reg_class_t from, reg_class_t to)
1460 {
1461   if (from == SHIFT_REGS)
1462     return 0x100;
1463   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1464     return 18;
1465   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1466            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1467     return 16;
1468   else
1469     return 2;
1470 }
1471 
1472 /* For the HPPA, REG and REG+CONST is cost 0
1473    and addresses involving symbolic constants are cost 2.
1474 
1475    PIC addresses are very expensive.
1476 
1477    It is no coincidence that this has the same structure
1478    as pa_legitimate_address_p.  */
1479 
1480 static int
1481 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1482 		   addr_space_t as ATTRIBUTE_UNUSED,
1483 		   bool speed ATTRIBUTE_UNUSED)
1484 {
1485   switch (GET_CODE (X))
1486     {
1487     case REG:
1488     case PLUS:
1489     case LO_SUM:
1490       return 1;
1491     case HIGH:
1492       return 2;
1493     default:
1494       return 4;
1495     }
1496 }
1497 
1498 /* Compute a (partial) cost for rtx X.  Return true if the complete
1499    cost has been computed, and false if subexpressions should be
1500    scanned.  In either case, *TOTAL contains the cost result.  */
1501 
1502 static bool
1503 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1504 		int opno ATTRIBUTE_UNUSED,
1505 		int *total, bool speed ATTRIBUTE_UNUSED)
1506 {
1507   int factor;
1508   int code = GET_CODE (x);
1509 
1510   switch (code)
1511     {
1512     case CONST_INT:
1513       if (INTVAL (x) == 0)
1514 	*total = 0;
1515       else if (INT_14_BITS (x))
1516 	*total = 1;
1517       else
1518 	*total = 2;
1519       return true;
1520 
1521     case HIGH:
1522       *total = 2;
1523       return true;
1524 
1525     case CONST:
1526     case LABEL_REF:
1527     case SYMBOL_REF:
1528       *total = 4;
1529       return true;
1530 
1531     case CONST_DOUBLE:
1532       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1533 	  && outer_code != SET)
1534 	*total = 0;
1535       else
1536         *total = 8;
1537       return true;
1538 
1539     case MULT:
1540       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1541 	{
1542 	  *total = COSTS_N_INSNS (3);
1543 	  return true;
1544 	}
1545 
1546       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1547       factor = GET_MODE_SIZE (mode) / 4;
1548       if (factor == 0)
1549 	factor = 1;
1550 
1551       if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1552 	*total = factor * factor * COSTS_N_INSNS (8);
1553       else
1554 	*total = factor * factor * COSTS_N_INSNS (20);
1555       return true;
1556 
1557     case DIV:
1558       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1559 	{
1560 	  *total = COSTS_N_INSNS (14);
1561 	  return true;
1562 	}
1563       /* FALLTHRU */
1564 
1565     case UDIV:
1566     case MOD:
1567     case UMOD:
1568       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1569       factor = GET_MODE_SIZE (mode) / 4;
1570       if (factor == 0)
1571 	factor = 1;
1572 
1573       *total = factor * factor * COSTS_N_INSNS (60);
1574       return true;
1575 
1576     case PLUS: /* this includes shNadd insns */
1577     case MINUS:
1578       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1579 	{
1580 	  *total = COSTS_N_INSNS (3);
1581 	  return true;
1582 	}
1583 
1584       /* A size N times larger than UNITS_PER_WORD needs N times as
1585 	 many insns, taking N times as long.  */
1586       factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1587       if (factor == 0)
1588 	factor = 1;
1589       *total = factor * COSTS_N_INSNS (1);
1590       return true;
1591 
1592     case ASHIFT:
1593     case ASHIFTRT:
1594     case LSHIFTRT:
1595       *total = COSTS_N_INSNS (1);
1596       return true;
1597 
1598     default:
1599       return false;
1600     }
1601 }
1602 
1603 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1604    new rtx with the correct mode.  */
1605 static inline rtx
1606 force_mode (machine_mode mode, rtx orig)
1607 {
1608   if (mode == GET_MODE (orig))
1609     return orig;
1610 
1611   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1612 
1613   return gen_rtx_REG (mode, REGNO (orig));
1614 }
1615 
1616 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1617 
1618 static bool
1619 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1620 {
1621   return tls_referenced_p (x);
1622 }
1623 
1624 /* Emit insns to move operands[1] into operands[0].
1625 
1626    Return 1 if we have written out everything that needs to be done to
1627    do the move.  Otherwise, return 0 and the caller will emit the move
1628    normally.
1629 
1630    Note SCRATCH_REG may not be in the proper mode depending on how it
1631    will be used.  This routine is responsible for creating a new copy
1632    of SCRATCH_REG in the proper mode.  */
1633 
1634 int
1635 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1636 {
1637   register rtx operand0 = operands[0];
1638   register rtx operand1 = operands[1];
1639   register rtx tem;
1640 
1641   /* We can only handle indexed addresses in the destination operand
1642      of floating point stores.  Thus, we need to break out indexed
1643      addresses from the destination operand.  */
1644   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1645     {
1646       gcc_assert (can_create_pseudo_p ());
1647 
1648       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1649       operand0 = replace_equiv_address (operand0, tem);
1650     }
1651 
1652   /* On targets with non-equivalent space registers, break out unscaled
1653      indexed addresses from the source operand before the final CSE.
1654      We have to do this because the REG_POINTER flag is not correctly
1655      carried through various optimization passes and CSE may substitute
1656      a pseudo without the pointer set for one with the pointer set.  As
1657      a result, we loose various opportunities to create insns with
1658      unscaled indexed addresses.  */
1659   if (!TARGET_NO_SPACE_REGS
1660       && !cse_not_expected
1661       && GET_CODE (operand1) == MEM
1662       && GET_CODE (XEXP (operand1, 0)) == PLUS
1663       && REG_P (XEXP (XEXP (operand1, 0), 0))
1664       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1665     operand1
1666       = replace_equiv_address (operand1,
1667 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1668 
1669   if (scratch_reg
1670       && reload_in_progress && GET_CODE (operand0) == REG
1671       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1672     operand0 = reg_equiv_mem (REGNO (operand0));
1673   else if (scratch_reg
1674 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1675 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1676 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1677     {
1678      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1679 	the code which tracks sets/uses for delete_output_reload.  */
1680       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1681 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1682 				 SUBREG_BYTE (operand0));
1683       operand0 = alter_subreg (&temp, true);
1684     }
1685 
1686   if (scratch_reg
1687       && reload_in_progress && GET_CODE (operand1) == REG
1688       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1689     operand1 = reg_equiv_mem (REGNO (operand1));
1690   else if (scratch_reg
1691 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1692 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1693 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1694     {
1695      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1696 	the code which tracks sets/uses for delete_output_reload.  */
1697       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1698 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1699 				 SUBREG_BYTE (operand1));
1700       operand1 = alter_subreg (&temp, true);
1701     }
1702 
1703   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1704       && ((tem = find_replacement (&XEXP (operand0, 0)))
1705 	  != XEXP (operand0, 0)))
1706     operand0 = replace_equiv_address (operand0, tem);
1707 
1708   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1709       && ((tem = find_replacement (&XEXP (operand1, 0)))
1710 	  != XEXP (operand1, 0)))
1711     operand1 = replace_equiv_address (operand1, tem);
1712 
1713   /* Handle secondary reloads for loads/stores of FP registers from
1714      REG+D addresses where D does not fit in 5 or 14 bits, including
1715      (subreg (mem (addr))) cases, and reloads for other unsupported
1716      memory operands.  */
1717   if (scratch_reg
1718       && FP_REG_P (operand0)
1719       && (MEM_P (operand1)
1720 	  || (GET_CODE (operand1) == SUBREG
1721 	      && MEM_P (XEXP (operand1, 0)))))
1722     {
1723       rtx op1 = operand1;
1724 
1725       if (GET_CODE (op1) == SUBREG)
1726 	op1 = XEXP (op1, 0);
1727 
1728       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1729 	{
1730 	  if (!(TARGET_PA_20
1731 		&& !TARGET_ELF32
1732 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1733 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1734 	    {
1735 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1736 		 We want it in WORD_MODE regardless of what mode it was
1737 		 originally given to us.  */
1738 	      scratch_reg = force_mode (word_mode, scratch_reg);
1739 
1740 	      /* D might not fit in 14 bits either; for such cases load D
1741 		 into scratch reg.  */
1742 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1743 		{
1744 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1745 		  emit_move_insn (scratch_reg,
1746 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1747 						  Pmode,
1748 						  XEXP (XEXP (op1, 0), 0),
1749 						  scratch_reg));
1750 		}
1751 	      else
1752 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1753 	      op1 = replace_equiv_address (op1, scratch_reg);
1754 	    }
1755 	}
1756       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1757 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1758 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1759 	{
1760 	  /* Load memory address into SCRATCH_REG.  */
1761 	  scratch_reg = force_mode (word_mode, scratch_reg);
1762 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1763 	  op1 = replace_equiv_address (op1, scratch_reg);
1764 	}
1765       emit_insn (gen_rtx_SET (operand0, op1));
1766       return 1;
1767     }
1768   else if (scratch_reg
1769 	   && FP_REG_P (operand1)
1770 	   && (MEM_P (operand0)
1771 	       || (GET_CODE (operand0) == SUBREG
1772 		   && MEM_P (XEXP (operand0, 0)))))
1773     {
1774       rtx op0 = operand0;
1775 
1776       if (GET_CODE (op0) == SUBREG)
1777 	op0 = XEXP (op0, 0);
1778 
1779       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1780 	{
1781 	  if (!(TARGET_PA_20
1782 		&& !TARGET_ELF32
1783 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1784 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1785 	    {
1786 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1787 		 We want it in WORD_MODE regardless of what mode it was
1788 		 originally given to us.  */
1789 	      scratch_reg = force_mode (word_mode, scratch_reg);
1790 
1791 	      /* D might not fit in 14 bits either; for such cases load D
1792 		 into scratch reg.  */
1793 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1794 		{
1795 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1796 		  emit_move_insn (scratch_reg,
1797 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1798 						  Pmode,
1799 						  XEXP (XEXP (op0, 0), 0),
1800 						  scratch_reg));
1801 		}
1802 	      else
1803 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1804 	      op0 = replace_equiv_address (op0, scratch_reg);
1805 	    }
1806 	}
1807       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1808 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1809 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1810 	{
1811 	  /* Load memory address into SCRATCH_REG.  */
1812 	  scratch_reg = force_mode (word_mode, scratch_reg);
1813 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1814 	  op0 = replace_equiv_address (op0, scratch_reg);
1815 	}
1816       emit_insn (gen_rtx_SET (op0, operand1));
1817       return 1;
1818     }
1819   /* Handle secondary reloads for loads of FP registers from constant
1820      expressions by forcing the constant into memory.  For the most part,
1821      this is only necessary for SImode and DImode.
1822 
1823      Use scratch_reg to hold the address of the memory location.  */
1824   else if (scratch_reg
1825 	   && CONSTANT_P (operand1)
1826 	   && FP_REG_P (operand0))
1827     {
1828       rtx const_mem, xoperands[2];
1829 
1830       if (operand1 == CONST0_RTX (mode))
1831 	{
1832 	  emit_insn (gen_rtx_SET (operand0, operand1));
1833 	  return 1;
1834 	}
1835 
1836       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1837 	 it in WORD_MODE regardless of what mode it was originally given
1838 	 to us.  */
1839       scratch_reg = force_mode (word_mode, scratch_reg);
1840 
1841       /* Force the constant into memory and put the address of the
1842 	 memory location into scratch_reg.  */
1843       const_mem = force_const_mem (mode, operand1);
1844       xoperands[0] = scratch_reg;
1845       xoperands[1] = XEXP (const_mem, 0);
1846       pa_emit_move_sequence (xoperands, Pmode, 0);
1847 
1848       /* Now load the destination register.  */
1849       emit_insn (gen_rtx_SET (operand0,
1850 			      replace_equiv_address (const_mem, scratch_reg)));
1851       return 1;
1852     }
1853   /* Handle secondary reloads for SAR.  These occur when trying to load
1854      the SAR from memory or a constant.  */
1855   else if (scratch_reg
1856 	   && GET_CODE (operand0) == REG
1857 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1858 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1859 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1860     {
1861       /* D might not fit in 14 bits either; for such cases load D into
1862 	 scratch reg.  */
1863       if (GET_CODE (operand1) == MEM
1864 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1865 	{
1866 	  /* We are reloading the address into the scratch register, so we
1867 	     want to make sure the scratch register is a full register.  */
1868 	  scratch_reg = force_mode (word_mode, scratch_reg);
1869 
1870 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1871 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1872 								        0)),
1873 						       Pmode,
1874 						       XEXP (XEXP (operand1, 0),
1875 						       0),
1876 						       scratch_reg));
1877 
1878 	  /* Now we are going to load the scratch register from memory,
1879 	     we want to load it in the same width as the original MEM,
1880 	     which must be the same as the width of the ultimate destination,
1881 	     OPERAND0.  */
1882 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1883 
1884 	  emit_move_insn (scratch_reg,
1885 			  replace_equiv_address (operand1, scratch_reg));
1886 	}
1887       else
1888 	{
1889 	  /* We want to load the scratch register using the same mode as
1890 	     the ultimate destination.  */
1891 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1892 
1893 	  emit_move_insn (scratch_reg, operand1);
1894 	}
1895 
1896       /* And emit the insn to set the ultimate destination.  We know that
1897 	 the scratch register has the same mode as the destination at this
1898 	 point.  */
1899       emit_move_insn (operand0, scratch_reg);
1900       return 1;
1901     }
1902 
1903   /* Handle the most common case: storing into a register.  */
1904   if (register_operand (operand0, mode))
1905     {
1906       /* Legitimize TLS symbol references.  This happens for references
1907 	 that aren't a legitimate constant.  */
1908       if (PA_SYMBOL_REF_TLS_P (operand1))
1909 	operand1 = legitimize_tls_address (operand1);
1910 
1911       if (register_operand (operand1, mode)
1912 	  || (GET_CODE (operand1) == CONST_INT
1913 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
1914 	  || (operand1 == CONST0_RTX (mode))
1915 	  || (GET_CODE (operand1) == HIGH
1916 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1917 	  /* Only `general_operands' can come here, so MEM is ok.  */
1918 	  || GET_CODE (operand1) == MEM)
1919 	{
1920 	  /* Various sets are created during RTL generation which don't
1921 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1922 	     instruction recognition can fail if we don't consistently
1923 	     set this flag when performing register copies.  This should
1924 	     also improve the opportunities for creating insns that use
1925 	     unscaled indexing.  */
1926 	  if (REG_P (operand0) && REG_P (operand1))
1927 	    {
1928 	      if (REG_POINTER (operand1)
1929 		  && !REG_POINTER (operand0)
1930 		  && !HARD_REGISTER_P (operand0))
1931 		copy_reg_pointer (operand0, operand1);
1932 	    }
1933 
1934 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1935 	     get set.  In some cases, we can set the REG_POINTER flag
1936 	     from the declaration for the MEM.  */
1937 	  if (REG_P (operand0)
1938 	      && GET_CODE (operand1) == MEM
1939 	      && !REG_POINTER (operand0))
1940 	    {
1941 	      tree decl = MEM_EXPR (operand1);
1942 
1943 	      /* Set the register pointer flag and register alignment
1944 		 if the declaration for this memory reference is a
1945 		 pointer type.  */
1946 	      if (decl)
1947 		{
1948 		  tree type;
1949 
1950 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1951 		     tree operand 1.  */
1952 		  if (TREE_CODE (decl) == COMPONENT_REF)
1953 		    decl = TREE_OPERAND (decl, 1);
1954 
1955 		  type = TREE_TYPE (decl);
1956 		  type = strip_array_types (type);
1957 
1958 		  if (POINTER_TYPE_P (type))
1959 		    mark_reg_pointer (operand0, BITS_PER_UNIT);
1960 		}
1961 	    }
1962 
1963 	  emit_insn (gen_rtx_SET (operand0, operand1));
1964 	  return 1;
1965 	}
1966     }
1967   else if (GET_CODE (operand0) == MEM)
1968     {
1969       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1970 	  && !(reload_in_progress || reload_completed))
1971 	{
1972 	  rtx temp = gen_reg_rtx (DFmode);
1973 
1974 	  emit_insn (gen_rtx_SET (temp, operand1));
1975 	  emit_insn (gen_rtx_SET (operand0, temp));
1976 	  return 1;
1977 	}
1978       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1979 	{
1980 	  /* Run this case quickly.  */
1981 	  emit_insn (gen_rtx_SET (operand0, operand1));
1982 	  return 1;
1983 	}
1984       if (! (reload_in_progress || reload_completed))
1985 	{
1986 	  operands[0] = validize_mem (operand0);
1987 	  operands[1] = operand1 = force_reg (mode, operand1);
1988 	}
1989     }
1990 
1991   /* Simplify the source if we need to.
1992      Note we do have to handle function labels here, even though we do
1993      not consider them legitimate constants.  Loop optimizations can
1994      call the emit_move_xxx with one as a source.  */
1995   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1996       || (GET_CODE (operand1) == HIGH
1997 	  && symbolic_operand (XEXP (operand1, 0), mode))
1998       || function_label_operand (operand1, VOIDmode)
1999       || tls_referenced_p (operand1))
2000     {
2001       int ishighonly = 0;
2002 
2003       if (GET_CODE (operand1) == HIGH)
2004 	{
2005 	  ishighonly = 1;
2006 	  operand1 = XEXP (operand1, 0);
2007 	}
2008       if (symbolic_operand (operand1, mode))
2009 	{
2010 	  /* Argh.  The assembler and linker can't handle arithmetic
2011 	     involving plabels.
2012 
2013 	     So we force the plabel into memory, load operand0 from
2014 	     the memory location, then add in the constant part.  */
2015 	  if ((GET_CODE (operand1) == CONST
2016 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
2017 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2018 					  VOIDmode))
2019 	      || function_label_operand (operand1, VOIDmode))
2020 	    {
2021 	      rtx temp, const_part;
2022 
2023 	      /* Figure out what (if any) scratch register to use.  */
2024 	      if (reload_in_progress || reload_completed)
2025 		{
2026 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2027 		  /* SCRATCH_REG will hold an address and maybe the actual
2028 		     data.  We want it in WORD_MODE regardless of what mode it
2029 		     was originally given to us.  */
2030 		  scratch_reg = force_mode (word_mode, scratch_reg);
2031 		}
2032 	      else if (flag_pic)
2033 		scratch_reg = gen_reg_rtx (Pmode);
2034 
2035 	      if (GET_CODE (operand1) == CONST)
2036 		{
2037 		  /* Save away the constant part of the expression.  */
2038 		  const_part = XEXP (XEXP (operand1, 0), 1);
2039 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2040 
2041 		  /* Force the function label into memory.  */
2042 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2043 		}
2044 	      else
2045 		{
2046 		  /* No constant part.  */
2047 		  const_part = NULL_RTX;
2048 
2049 		  /* Force the function label into memory.  */
2050 		  temp = force_const_mem (mode, operand1);
2051 		}
2052 
2053 
2054 	      /* Get the address of the memory location.  PIC-ify it if
2055 		 necessary.  */
2056 	      temp = XEXP (temp, 0);
2057 	      if (flag_pic)
2058 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2059 
2060 	      /* Put the address of the memory location into our destination
2061 		 register.  */
2062 	      operands[1] = temp;
2063 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2064 
2065 	      /* Now load from the memory location into our destination
2066 		 register.  */
2067 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2068 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2069 
2070 	      /* And add back in the constant part.  */
2071 	      if (const_part != NULL_RTX)
2072 		expand_inc (operand0, const_part);
2073 
2074 	      return 1;
2075 	    }
2076 
2077 	  if (flag_pic)
2078 	    {
2079 	      rtx_insn *insn;
2080 	      rtx temp;
2081 
2082 	      if (reload_in_progress || reload_completed)
2083 		{
2084 		  temp = scratch_reg ? scratch_reg : operand0;
2085 		  /* TEMP will hold an address and maybe the actual
2086 		     data.  We want it in WORD_MODE regardless of what mode it
2087 		     was originally given to us.  */
2088 		  temp = force_mode (word_mode, temp);
2089 		}
2090 	      else
2091 		temp = gen_reg_rtx (Pmode);
2092 
2093 	      /* Force (const (plus (symbol) (const_int))) to memory
2094 	         if the const_int will not fit in 14 bits.  Although
2095 		 this requires a relocation, the instruction sequence
2096 		 needed to load the value is shorter.  */
2097 	      if (GET_CODE (operand1) == CONST
2098 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2099 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2100 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2101 		{
2102 		  rtx x, m = force_const_mem (mode, operand1);
2103 
2104 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2105 		  x = replace_equiv_address (m, x);
2106 		  insn = emit_move_insn (operand0, x);
2107 		}
2108 	      else
2109 		{
2110 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2111 		  if (REG_P (operand0) && REG_P (operands[1]))
2112 		    copy_reg_pointer (operand0, operands[1]);
2113 		  insn = emit_move_insn (operand0, operands[1]);
2114 		}
2115 
2116 	      /* Put a REG_EQUAL note on this insn.  */
2117 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2118 	    }
2119 	  /* On the HPPA, references to data space are supposed to use dp,
2120 	     register 27, but showing it in the RTL inhibits various cse
2121 	     and loop optimizations.  */
2122 	  else
2123 	    {
2124 	      rtx temp, set;
2125 
2126 	      if (reload_in_progress || reload_completed)
2127 		{
2128 		  temp = scratch_reg ? scratch_reg : operand0;
2129 		  /* TEMP will hold an address and maybe the actual
2130 		     data.  We want it in WORD_MODE regardless of what mode it
2131 		     was originally given to us.  */
2132 		  temp = force_mode (word_mode, temp);
2133 		}
2134 	      else
2135 		temp = gen_reg_rtx (mode);
2136 
2137 	      /* Loading a SYMBOL_REF into a register makes that register
2138 		 safe to be used as the base in an indexed address.
2139 
2140 		 Don't mark hard registers though.  That loses.  */
2141 	      if (GET_CODE (operand0) == REG
2142 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2143 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2144 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2145 		mark_reg_pointer (temp, BITS_PER_UNIT);
2146 
2147 	      if (ishighonly)
2148 		set = gen_rtx_SET (operand0, temp);
2149 	      else
2150 		set = gen_rtx_SET (operand0,
2151 				   gen_rtx_LO_SUM (mode, temp, operand1));
2152 
2153 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2154 	      emit_insn (set);
2155 
2156 	    }
2157 	  return 1;
2158 	}
2159       else if (tls_referenced_p (operand1))
2160 	{
2161 	  rtx tmp = operand1;
2162 	  rtx addend = NULL;
2163 
2164 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2165 	    {
2166 	      addend = XEXP (XEXP (tmp, 0), 1);
2167 	      tmp = XEXP (XEXP (tmp, 0), 0);
2168 	    }
2169 
2170 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2171 	  tmp = legitimize_tls_address (tmp);
2172 	  if (addend)
2173 	    {
2174 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2175 	      tmp = force_operand (tmp, operands[0]);
2176 	    }
2177 	  operands[1] = tmp;
2178 	}
2179       else if (GET_CODE (operand1) != CONST_INT
2180 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2181 	{
2182 	  rtx temp;
2183 	  rtx_insn *insn;
2184 	  rtx op1 = operand1;
2185 	  HOST_WIDE_INT value = 0;
2186 	  HOST_WIDE_INT insv = 0;
2187 	  int insert = 0;
2188 
2189 	  if (GET_CODE (operand1) == CONST_INT)
2190 	    value = INTVAL (operand1);
2191 
2192 	  if (TARGET_64BIT
2193 	      && GET_CODE (operand1) == CONST_INT
2194 	      && HOST_BITS_PER_WIDE_INT > 32
2195 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2196 	    {
2197 	      HOST_WIDE_INT nval;
2198 
2199 	      /* Extract the low order 32 bits of the value and sign extend.
2200 		 If the new value is the same as the original value, we can
2201 		 can use the original value as-is.  If the new value is
2202 		 different, we use it and insert the most-significant 32-bits
2203 		 of the original value into the final result.  */
2204 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2205 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2206 	      if (value != nval)
2207 		{
2208 #if HOST_BITS_PER_WIDE_INT > 32
2209 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2210 #endif
2211 		  insert = 1;
2212 		  value = nval;
2213 		  operand1 = GEN_INT (nval);
2214 		}
2215 	    }
2216 
2217 	  if (reload_in_progress || reload_completed)
2218 	    temp = scratch_reg ? scratch_reg : operand0;
2219 	  else
2220 	    temp = gen_reg_rtx (mode);
2221 
2222 	  /* We don't directly split DImode constants on 32-bit targets
2223 	     because PLUS uses an 11-bit immediate and the insn sequence
2224 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2225 	  if (GET_CODE (operand1) == CONST_INT
2226 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2227 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2228 	      && !insert)
2229 	    {
2230 	      /* Directly break constant into high and low parts.  This
2231 		 provides better optimization opportunities because various
2232 		 passes recognize constants split with PLUS but not LO_SUM.
2233 		 We use a 14-bit signed low part except when the addition
2234 		 of 0x4000 to the high part might change the sign of the
2235 		 high part.  */
2236 	      HOST_WIDE_INT low = value & 0x3fff;
2237 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2238 
2239 	      if (low >= 0x2000)
2240 		{
2241 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2242 		    high += 0x2000;
2243 		  else
2244 		    high += 0x4000;
2245 		}
2246 
2247 	      low = value - high;
2248 
2249 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2250 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2251 	    }
2252 	  else
2253 	    {
2254 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2255 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2256 	    }
2257 
2258 	  insn = emit_move_insn (operands[0], operands[1]);
2259 
2260 	  /* Now insert the most significant 32 bits of the value
2261 	     into the register.  When we don't have a second register
2262 	     available, it could take up to nine instructions to load
2263 	     a 64-bit integer constant.  Prior to reload, we force
2264 	     constants that would take more than three instructions
2265 	     to load to the constant pool.  During and after reload,
2266 	     we have to handle all possible values.  */
2267 	  if (insert)
2268 	    {
2269 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2270 		 register and the value to be inserted is outside the
2271 		 range that can be loaded with three depdi instructions.  */
2272 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2273 		{
2274 		  operand1 = GEN_INT (insv);
2275 
2276 		  emit_insn (gen_rtx_SET (temp,
2277 					  gen_rtx_HIGH (mode, operand1)));
2278 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2279 		  if (mode == DImode)
2280 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2281 						  const0_rtx, temp));
2282 		  else
2283 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2284 						  const0_rtx, temp));
2285 		}
2286 	      else
2287 		{
2288 		  int len = 5, pos = 27;
2289 
2290 		  /* Insert the bits using the depdi instruction.  */
2291 		  while (pos >= 0)
2292 		    {
2293 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2294 		      HOST_WIDE_INT sign = v5 < 0;
2295 
2296 		      /* Left extend the insertion.  */
2297 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2298 		      while (pos > 0 && (insv & 1) == sign)
2299 			{
2300 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2301 			  len += 1;
2302 			  pos -= 1;
2303 			}
2304 
2305 		      if (mode == DImode)
2306 			insn = emit_insn (gen_insvdi (operand0,
2307 						      GEN_INT (len),
2308 						      GEN_INT (pos),
2309 						      GEN_INT (v5)));
2310 		      else
2311 			insn = emit_insn (gen_insvsi (operand0,
2312 						      GEN_INT (len),
2313 						      GEN_INT (pos),
2314 						      GEN_INT (v5)));
2315 
2316 		      len = pos > 0 && pos < 5 ? pos : 5;
2317 		      pos -= len;
2318 		    }
2319 		}
2320 	    }
2321 
2322 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2323 
2324 	  return 1;
2325 	}
2326     }
2327   /* Now have insn-emit do whatever it normally does.  */
2328   return 0;
2329 }
2330 
2331 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2332    it will need a link/runtime reloc).  */
2333 
2334 int
2335 pa_reloc_needed (tree exp)
2336 {
2337   int reloc = 0;
2338 
2339   switch (TREE_CODE (exp))
2340     {
2341     case ADDR_EXPR:
2342       return 1;
2343 
2344     case POINTER_PLUS_EXPR:
2345     case PLUS_EXPR:
2346     case MINUS_EXPR:
2347       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2348       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2349       break;
2350 
2351     CASE_CONVERT:
2352     case NON_LVALUE_EXPR:
2353       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2354       break;
2355 
2356     case CONSTRUCTOR:
2357       {
2358 	tree value;
2359 	unsigned HOST_WIDE_INT ix;
2360 
2361 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2362 	  if (value)
2363 	    reloc |= pa_reloc_needed (value);
2364       }
2365       break;
2366 
2367     case ERROR_MARK:
2368       break;
2369 
2370     default:
2371       break;
2372     }
2373   return reloc;
2374 }
2375 
2376 
2377 /* Return the best assembler insn template
2378    for moving operands[1] into operands[0] as a fullword.  */
2379 const char *
2380 pa_singlemove_string (rtx *operands)
2381 {
2382   HOST_WIDE_INT intval;
2383 
2384   if (GET_CODE (operands[0]) == MEM)
2385     return "stw %r1,%0";
2386   if (GET_CODE (operands[1]) == MEM)
2387     return "ldw %1,%0";
2388   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2389     {
2390       long i;
2391 
2392       gcc_assert (GET_MODE (operands[1]) == SFmode);
2393 
2394       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2395 	 bit pattern.  */
2396       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2397 
2398       operands[1] = GEN_INT (i);
2399       /* Fall through to CONST_INT case.  */
2400     }
2401   if (GET_CODE (operands[1]) == CONST_INT)
2402     {
2403       intval = INTVAL (operands[1]);
2404 
2405       if (VAL_14_BITS_P (intval))
2406 	return "ldi %1,%0";
2407       else if ((intval & 0x7ff) == 0)
2408 	return "ldil L'%1,%0";
2409       else if (pa_zdepi_cint_p (intval))
2410 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2411       else
2412 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2413     }
2414   return "copy %1,%0";
2415 }
2416 
2417 
2418 /* Compute position (in OP[1]) and width (in OP[2])
2419    useful for copying IMM to a register using the zdepi
2420    instructions.  Store the immediate value to insert in OP[0].  */
2421 static void
2422 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2423 {
2424   int lsb, len;
2425 
2426   /* Find the least significant set bit in IMM.  */
2427   for (lsb = 0; lsb < 32; lsb++)
2428     {
2429       if ((imm & 1) != 0)
2430         break;
2431       imm >>= 1;
2432     }
2433 
2434   /* Choose variants based on *sign* of the 5-bit field.  */
2435   if ((imm & 0x10) == 0)
2436     len = (lsb <= 28) ? 4 : 32 - lsb;
2437   else
2438     {
2439       /* Find the width of the bitstring in IMM.  */
2440       for (len = 5; len < 32 - lsb; len++)
2441 	{
2442 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2443 	    break;
2444 	}
2445 
2446       /* Sign extend IMM as a 5-bit value.  */
2447       imm = (imm & 0xf) - 0x10;
2448     }
2449 
2450   op[0] = imm;
2451   op[1] = 31 - lsb;
2452   op[2] = len;
2453 }
2454 
2455 /* Compute position (in OP[1]) and width (in OP[2])
2456    useful for copying IMM to a register using the depdi,z
2457    instructions.  Store the immediate value to insert in OP[0].  */
2458 
2459 static void
2460 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2461 {
2462   int lsb, len, maxlen;
2463 
2464   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2465 
2466   /* Find the least significant set bit in IMM.  */
2467   for (lsb = 0; lsb < maxlen; lsb++)
2468     {
2469       if ((imm & 1) != 0)
2470         break;
2471       imm >>= 1;
2472     }
2473 
2474   /* Choose variants based on *sign* of the 5-bit field.  */
2475   if ((imm & 0x10) == 0)
2476     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2477   else
2478     {
2479       /* Find the width of the bitstring in IMM.  */
2480       for (len = 5; len < maxlen - lsb; len++)
2481 	{
2482 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2483 	    break;
2484 	}
2485 
2486       /* Extend length if host is narrow and IMM is negative.  */
2487       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2488 	len += 32;
2489 
2490       /* Sign extend IMM as a 5-bit value.  */
2491       imm = (imm & 0xf) - 0x10;
2492     }
2493 
2494   op[0] = imm;
2495   op[1] = 63 - lsb;
2496   op[2] = len;
2497 }
2498 
2499 /* Output assembler code to perform a doubleword move insn
2500    with operands OPERANDS.  */
2501 
2502 const char *
2503 pa_output_move_double (rtx *operands)
2504 {
2505   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2506   rtx latehalf[2];
2507   rtx addreg0 = 0, addreg1 = 0;
2508   int highonly = 0;
2509 
2510   /* First classify both operands.  */
2511 
2512   if (REG_P (operands[0]))
2513     optype0 = REGOP;
2514   else if (offsettable_memref_p (operands[0]))
2515     optype0 = OFFSOP;
2516   else if (GET_CODE (operands[0]) == MEM)
2517     optype0 = MEMOP;
2518   else
2519     optype0 = RNDOP;
2520 
2521   if (REG_P (operands[1]))
2522     optype1 = REGOP;
2523   else if (CONSTANT_P (operands[1]))
2524     optype1 = CNSTOP;
2525   else if (offsettable_memref_p (operands[1]))
2526     optype1 = OFFSOP;
2527   else if (GET_CODE (operands[1]) == MEM)
2528     optype1 = MEMOP;
2529   else
2530     optype1 = RNDOP;
2531 
2532   /* Check for the cases that the operand constraints are not
2533      supposed to allow to happen.  */
2534   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2535 
2536   /* Handle copies between general and floating registers.  */
2537 
2538   if (optype0 == REGOP && optype1 == REGOP
2539       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2540     {
2541       if (FP_REG_P (operands[0]))
2542 	{
2543 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2544 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2545 	  return "{fldds|fldd} -16(%%sp),%0";
2546 	}
2547       else
2548 	{
2549 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2550 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2551 	  return "{ldws|ldw} -12(%%sp),%R0";
2552 	}
2553     }
2554 
2555    /* Handle auto decrementing and incrementing loads and stores
2556      specifically, since the structure of the function doesn't work
2557      for them without major modification.  Do it better when we learn
2558      this port about the general inc/dec addressing of PA.
2559      (This was written by tege.  Chide him if it doesn't work.)  */
2560 
2561   if (optype0 == MEMOP)
2562     {
2563       /* We have to output the address syntax ourselves, since print_operand
2564 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2565 
2566       rtx addr = XEXP (operands[0], 0);
2567       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2568 	{
2569 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2570 
2571 	  operands[0] = XEXP (addr, 0);
2572 	  gcc_assert (GET_CODE (operands[1]) == REG
2573 		      && GET_CODE (operands[0]) == REG);
2574 
2575 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2576 
2577 	  /* No overlap between high target register and address
2578 	     register.  (We do this in a non-obvious way to
2579 	     save a register file writeback)  */
2580 	  if (GET_CODE (addr) == POST_INC)
2581 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2582 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2583 	}
2584       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2585 	{
2586 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2587 
2588 	  operands[0] = XEXP (addr, 0);
2589 	  gcc_assert (GET_CODE (operands[1]) == REG
2590 		      && GET_CODE (operands[0]) == REG);
2591 
2592 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2593 	  /* No overlap between high target register and address
2594 	     register.  (We do this in a non-obvious way to save a
2595 	     register file writeback)  */
2596 	  if (GET_CODE (addr) == PRE_INC)
2597 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2598 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2599 	}
2600     }
2601   if (optype1 == MEMOP)
2602     {
2603       /* We have to output the address syntax ourselves, since print_operand
2604 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2605 
2606       rtx addr = XEXP (operands[1], 0);
2607       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2608 	{
2609 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2610 
2611 	  operands[1] = XEXP (addr, 0);
2612 	  gcc_assert (GET_CODE (operands[0]) == REG
2613 		      && GET_CODE (operands[1]) == REG);
2614 
2615 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2616 	    {
2617 	      /* No overlap between high target register and address
2618 		 register.  (We do this in a non-obvious way to
2619 		 save a register file writeback)  */
2620 	      if (GET_CODE (addr) == POST_INC)
2621 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2622 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2623 	    }
2624 	  else
2625 	    {
2626 	      /* This is an undefined situation.  We should load into the
2627 		 address register *and* update that register.  Probably
2628 		 we don't need to handle this at all.  */
2629 	      if (GET_CODE (addr) == POST_INC)
2630 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2631 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2632 	    }
2633 	}
2634       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2635 	{
2636 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2637 
2638 	  operands[1] = XEXP (addr, 0);
2639 	  gcc_assert (GET_CODE (operands[0]) == REG
2640 		      && GET_CODE (operands[1]) == REG);
2641 
2642 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2643 	    {
2644 	      /* No overlap between high target register and address
2645 		 register.  (We do this in a non-obvious way to
2646 		 save a register file writeback)  */
2647 	      if (GET_CODE (addr) == PRE_INC)
2648 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2649 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2650 	    }
2651 	  else
2652 	    {
2653 	      /* This is an undefined situation.  We should load into the
2654 		 address register *and* update that register.  Probably
2655 		 we don't need to handle this at all.  */
2656 	      if (GET_CODE (addr) == PRE_INC)
2657 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2658 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2659 	    }
2660 	}
2661       else if (GET_CODE (addr) == PLUS
2662 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2663 	{
2664 	  rtx xoperands[4];
2665 
2666 	  /* Load address into left half of destination register.  */
2667 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2668 	  xoperands[1] = XEXP (addr, 1);
2669 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2670 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2671 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2672 			   xoperands);
2673 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2674 	}
2675       else if (GET_CODE (addr) == PLUS
2676 	       && REG_P (XEXP (addr, 0))
2677 	       && REG_P (XEXP (addr, 1)))
2678 	{
2679 	  rtx xoperands[3];
2680 
2681 	  /* Load address into left half of destination register.  */
2682 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2683 	  xoperands[1] = XEXP (addr, 0);
2684 	  xoperands[2] = XEXP (addr, 1);
2685 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2686 			   xoperands);
2687 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2688 	}
2689     }
2690 
2691   /* If an operand is an unoffsettable memory ref, find a register
2692      we can increment temporarily to make it refer to the second word.  */
2693 
2694   if (optype0 == MEMOP)
2695     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2696 
2697   if (optype1 == MEMOP)
2698     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2699 
2700   /* Ok, we can do one word at a time.
2701      Normally we do the low-numbered word first.
2702 
2703      In either case, set up in LATEHALF the operands to use
2704      for the high-numbered word and in some cases alter the
2705      operands in OPERANDS to be suitable for the low-numbered word.  */
2706 
2707   if (optype0 == REGOP)
2708     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2709   else if (optype0 == OFFSOP)
2710     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2711   else
2712     latehalf[0] = operands[0];
2713 
2714   if (optype1 == REGOP)
2715     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2716   else if (optype1 == OFFSOP)
2717     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2718   else if (optype1 == CNSTOP)
2719     {
2720       if (GET_CODE (operands[1]) == HIGH)
2721 	{
2722 	  operands[1] = XEXP (operands[1], 0);
2723 	  highonly = 1;
2724 	}
2725       split_double (operands[1], &operands[1], &latehalf[1]);
2726     }
2727   else
2728     latehalf[1] = operands[1];
2729 
2730   /* If the first move would clobber the source of the second one,
2731      do them in the other order.
2732 
2733      This can happen in two cases:
2734 
2735 	mem -> register where the first half of the destination register
2736  	is the same register used in the memory's address.  Reload
2737 	can create such insns.
2738 
2739 	mem in this case will be either register indirect or register
2740 	indirect plus a valid offset.
2741 
2742 	register -> register move where REGNO(dst) == REGNO(src + 1)
2743 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2744 
2745      Handle mem -> register case first.  */
2746   if (optype0 == REGOP
2747       && (optype1 == MEMOP || optype1 == OFFSOP)
2748       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2749     {
2750       /* Do the late half first.  */
2751       if (addreg1)
2752 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2753       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2754 
2755       /* Then clobber.  */
2756       if (addreg1)
2757 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2758       return pa_singlemove_string (operands);
2759     }
2760 
2761   /* Now handle register -> register case.  */
2762   if (optype0 == REGOP && optype1 == REGOP
2763       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2764     {
2765       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2766       return pa_singlemove_string (operands);
2767     }
2768 
2769   /* Normal case: do the two words, low-numbered first.  */
2770 
2771   output_asm_insn (pa_singlemove_string (operands), operands);
2772 
2773   /* Make any unoffsettable addresses point at high-numbered word.  */
2774   if (addreg0)
2775     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2776   if (addreg1)
2777     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2778 
2779   /* Do high-numbered word.  */
2780   if (highonly)
2781     output_asm_insn ("ldil L'%1,%0", latehalf);
2782   else
2783     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2784 
2785   /* Undo the adds we just did.  */
2786   if (addreg0)
2787     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2788   if (addreg1)
2789     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2790 
2791   return "";
2792 }
2793 
2794 const char *
2795 pa_output_fp_move_double (rtx *operands)
2796 {
2797   if (FP_REG_P (operands[0]))
2798     {
2799       if (FP_REG_P (operands[1])
2800 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2801 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2802       else
2803 	output_asm_insn ("fldd%F1 %1,%0", operands);
2804     }
2805   else if (FP_REG_P (operands[1]))
2806     {
2807       output_asm_insn ("fstd%F0 %1,%0", operands);
2808     }
2809   else
2810     {
2811       rtx xoperands[2];
2812 
2813       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2814 
2815       /* This is a pain.  You have to be prepared to deal with an
2816 	 arbitrary address here including pre/post increment/decrement.
2817 
2818 	 so avoid this in the MD.  */
2819       gcc_assert (GET_CODE (operands[0]) == REG);
2820 
2821       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2822       xoperands[0] = operands[0];
2823       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2824     }
2825   return "";
2826 }
2827 
2828 /* Return a REG that occurs in ADDR with coefficient 1.
2829    ADDR can be effectively incremented by incrementing REG.  */
2830 
2831 static rtx
2832 find_addr_reg (rtx addr)
2833 {
2834   while (GET_CODE (addr) == PLUS)
2835     {
2836       if (GET_CODE (XEXP (addr, 0)) == REG)
2837 	addr = XEXP (addr, 0);
2838       else if (GET_CODE (XEXP (addr, 1)) == REG)
2839 	addr = XEXP (addr, 1);
2840       else if (CONSTANT_P (XEXP (addr, 0)))
2841 	addr = XEXP (addr, 1);
2842       else if (CONSTANT_P (XEXP (addr, 1)))
2843 	addr = XEXP (addr, 0);
2844       else
2845 	gcc_unreachable ();
2846     }
2847   gcc_assert (GET_CODE (addr) == REG);
2848   return addr;
2849 }
2850 
2851 /* Emit code to perform a block move.
2852 
2853    OPERANDS[0] is the destination pointer as a REG, clobbered.
2854    OPERANDS[1] is the source pointer as a REG, clobbered.
2855    OPERANDS[2] is a register for temporary storage.
2856    OPERANDS[3] is a register for temporary storage.
2857    OPERANDS[4] is the size as a CONST_INT
2858    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2859    OPERANDS[6] is another temporary register.  */
2860 
2861 const char *
2862 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2863 {
2864   HOST_WIDE_INT align = INTVAL (operands[5]);
2865   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
2866 
2867   /* We can't move more than a word at a time because the PA
2868      has no longer integer move insns.  (Could use fp mem ops?)  */
2869   if (align > (TARGET_64BIT ? 8 : 4))
2870     align = (TARGET_64BIT ? 8 : 4);
2871 
2872   /* Note that we know each loop below will execute at least twice
2873      (else we would have open-coded the copy).  */
2874   switch (align)
2875     {
2876       case 8:
2877 	/* Pre-adjust the loop counter.  */
2878 	operands[4] = GEN_INT (n_bytes - 16);
2879 	output_asm_insn ("ldi %4,%2", operands);
2880 
2881 	/* Copying loop.  */
2882 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2883 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2884 	output_asm_insn ("std,ma %3,8(%0)", operands);
2885 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2886 	output_asm_insn ("std,ma %6,8(%0)", operands);
2887 
2888 	/* Handle the residual.  There could be up to 7 bytes of
2889 	   residual to copy!  */
2890 	if (n_bytes % 16 != 0)
2891 	  {
2892 	    operands[4] = GEN_INT (n_bytes % 8);
2893 	    if (n_bytes % 16 >= 8)
2894 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2895 	    if (n_bytes % 8 != 0)
2896 	      output_asm_insn ("ldd 0(%1),%6", operands);
2897 	    if (n_bytes % 16 >= 8)
2898 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2899 	    if (n_bytes % 8 != 0)
2900 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2901 	  }
2902 	return "";
2903 
2904       case 4:
2905 	/* Pre-adjust the loop counter.  */
2906 	operands[4] = GEN_INT (n_bytes - 8);
2907 	output_asm_insn ("ldi %4,%2", operands);
2908 
2909 	/* Copying loop.  */
2910 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2911 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2912 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2913 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2914 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2915 
2916 	/* Handle the residual.  There could be up to 7 bytes of
2917 	   residual to copy!  */
2918 	if (n_bytes % 8 != 0)
2919 	  {
2920 	    operands[4] = GEN_INT (n_bytes % 4);
2921 	    if (n_bytes % 8 >= 4)
2922 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2923 	    if (n_bytes % 4 != 0)
2924 	      output_asm_insn ("ldw 0(%1),%6", operands);
2925 	    if (n_bytes % 8 >= 4)
2926 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2927 	    if (n_bytes % 4 != 0)
2928 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2929 	  }
2930 	return "";
2931 
2932       case 2:
2933 	/* Pre-adjust the loop counter.  */
2934 	operands[4] = GEN_INT (n_bytes - 4);
2935 	output_asm_insn ("ldi %4,%2", operands);
2936 
2937 	/* Copying loop.  */
2938 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2939 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2940 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2941 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2942 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2943 
2944 	/* Handle the residual.  */
2945 	if (n_bytes % 4 != 0)
2946 	  {
2947 	    if (n_bytes % 4 >= 2)
2948 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2949 	    if (n_bytes % 2 != 0)
2950 	      output_asm_insn ("ldb 0(%1),%6", operands);
2951 	    if (n_bytes % 4 >= 2)
2952 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2953 	    if (n_bytes % 2 != 0)
2954 	      output_asm_insn ("stb %6,0(%0)", operands);
2955 	  }
2956 	return "";
2957 
2958       case 1:
2959 	/* Pre-adjust the loop counter.  */
2960 	operands[4] = GEN_INT (n_bytes - 2);
2961 	output_asm_insn ("ldi %4,%2", operands);
2962 
2963 	/* Copying loop.  */
2964 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2965 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2966 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2967 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2968 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2969 
2970 	/* Handle the residual.  */
2971 	if (n_bytes % 2 != 0)
2972 	  {
2973 	    output_asm_insn ("ldb 0(%1),%3", operands);
2974 	    output_asm_insn ("stb %3,0(%0)", operands);
2975 	  }
2976 	return "";
2977 
2978       default:
2979 	gcc_unreachable ();
2980     }
2981 }
2982 
2983 /* Count the number of insns necessary to handle this block move.
2984 
2985    Basic structure is the same as emit_block_move, except that we
2986    count insns rather than emit them.  */
2987 
2988 static int
2989 compute_movmem_length (rtx_insn *insn)
2990 {
2991   rtx pat = PATTERN (insn);
2992   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2993   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2994   unsigned int n_insns = 0;
2995 
2996   /* We can't move more than four bytes at a time because the PA
2997      has no longer integer move insns.  (Could use fp mem ops?)  */
2998   if (align > (TARGET_64BIT ? 8 : 4))
2999     align = (TARGET_64BIT ? 8 : 4);
3000 
3001   /* The basic copying loop.  */
3002   n_insns = 6;
3003 
3004   /* Residuals.  */
3005   if (n_bytes % (2 * align) != 0)
3006     {
3007       if ((n_bytes % (2 * align)) >= align)
3008 	n_insns += 2;
3009 
3010       if ((n_bytes % align) != 0)
3011 	n_insns += 2;
3012     }
3013 
3014   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3015   return n_insns * 4;
3016 }
3017 
3018 /* Emit code to perform a block clear.
3019 
3020    OPERANDS[0] is the destination pointer as a REG, clobbered.
3021    OPERANDS[1] is a register for temporary storage.
3022    OPERANDS[2] is the size as a CONST_INT
3023    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3024 
3025 const char *
3026 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3027 {
3028   HOST_WIDE_INT align = INTVAL (operands[3]);
3029   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
3030 
3031   /* We can't clear more than a word at a time because the PA
3032      has no longer integer move insns.  */
3033   if (align > (TARGET_64BIT ? 8 : 4))
3034     align = (TARGET_64BIT ? 8 : 4);
3035 
3036   /* Note that we know each loop below will execute at least twice
3037      (else we would have open-coded the copy).  */
3038   switch (align)
3039     {
3040       case 8:
3041 	/* Pre-adjust the loop counter.  */
3042 	operands[2] = GEN_INT (n_bytes - 16);
3043 	output_asm_insn ("ldi %2,%1", operands);
3044 
3045 	/* Loop.  */
3046 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3047 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3048 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3049 
3050 	/* Handle the residual.  There could be up to 7 bytes of
3051 	   residual to copy!  */
3052 	if (n_bytes % 16 != 0)
3053 	  {
3054 	    operands[2] = GEN_INT (n_bytes % 8);
3055 	    if (n_bytes % 16 >= 8)
3056 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3057 	    if (n_bytes % 8 != 0)
3058 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3059 	  }
3060 	return "";
3061 
3062       case 4:
3063 	/* Pre-adjust the loop counter.  */
3064 	operands[2] = GEN_INT (n_bytes - 8);
3065 	output_asm_insn ("ldi %2,%1", operands);
3066 
3067 	/* Loop.  */
3068 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3069 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3070 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3071 
3072 	/* Handle the residual.  There could be up to 7 bytes of
3073 	   residual to copy!  */
3074 	if (n_bytes % 8 != 0)
3075 	  {
3076 	    operands[2] = GEN_INT (n_bytes % 4);
3077 	    if (n_bytes % 8 >= 4)
3078 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3079 	    if (n_bytes % 4 != 0)
3080 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3081 	  }
3082 	return "";
3083 
3084       case 2:
3085 	/* Pre-adjust the loop counter.  */
3086 	operands[2] = GEN_INT (n_bytes - 4);
3087 	output_asm_insn ("ldi %2,%1", operands);
3088 
3089 	/* Loop.  */
3090 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3091 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3092 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3093 
3094 	/* Handle the residual.  */
3095 	if (n_bytes % 4 != 0)
3096 	  {
3097 	    if (n_bytes % 4 >= 2)
3098 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3099 	    if (n_bytes % 2 != 0)
3100 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3101 	  }
3102 	return "";
3103 
3104       case 1:
3105 	/* Pre-adjust the loop counter.  */
3106 	operands[2] = GEN_INT (n_bytes - 2);
3107 	output_asm_insn ("ldi %2,%1", operands);
3108 
3109 	/* Loop.  */
3110 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3111 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3112 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3113 
3114 	/* Handle the residual.  */
3115 	if (n_bytes % 2 != 0)
3116 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3117 
3118 	return "";
3119 
3120       default:
3121 	gcc_unreachable ();
3122     }
3123 }
3124 
3125 /* Count the number of insns necessary to handle this block move.
3126 
3127    Basic structure is the same as emit_block_move, except that we
3128    count insns rather than emit them.  */
3129 
3130 static int
3131 compute_clrmem_length (rtx_insn *insn)
3132 {
3133   rtx pat = PATTERN (insn);
3134   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3135   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3136   unsigned int n_insns = 0;
3137 
3138   /* We can't clear more than a word at a time because the PA
3139      has no longer integer move insns.  */
3140   if (align > (TARGET_64BIT ? 8 : 4))
3141     align = (TARGET_64BIT ? 8 : 4);
3142 
3143   /* The basic loop.  */
3144   n_insns = 4;
3145 
3146   /* Residuals.  */
3147   if (n_bytes % (2 * align) != 0)
3148     {
3149       if ((n_bytes % (2 * align)) >= align)
3150 	n_insns++;
3151 
3152       if ((n_bytes % align) != 0)
3153 	n_insns++;
3154     }
3155 
3156   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3157   return n_insns * 4;
3158 }
3159 
3160 
3161 const char *
3162 pa_output_and (rtx *operands)
3163 {
3164   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3165     {
3166       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3167       int ls0, ls1, ms0, p, len;
3168 
3169       for (ls0 = 0; ls0 < 32; ls0++)
3170 	if ((mask & (1 << ls0)) == 0)
3171 	  break;
3172 
3173       for (ls1 = ls0; ls1 < 32; ls1++)
3174 	if ((mask & (1 << ls1)) != 0)
3175 	  break;
3176 
3177       for (ms0 = ls1; ms0 < 32; ms0++)
3178 	if ((mask & (1 << ms0)) == 0)
3179 	  break;
3180 
3181       gcc_assert (ms0 == 32);
3182 
3183       if (ls1 == 32)
3184 	{
3185 	  len = ls0;
3186 
3187 	  gcc_assert (len);
3188 
3189 	  operands[2] = GEN_INT (len);
3190 	  return "{extru|extrw,u} %1,31,%2,%0";
3191 	}
3192       else
3193 	{
3194 	  /* We could use this `depi' for the case above as well, but `depi'
3195 	     requires one more register file access than an `extru'.  */
3196 
3197 	  p = 31 - ls0;
3198 	  len = ls1 - ls0;
3199 
3200 	  operands[2] = GEN_INT (p);
3201 	  operands[3] = GEN_INT (len);
3202 	  return "{depi|depwi} 0,%2,%3,%0";
3203 	}
3204     }
3205   else
3206     return "and %1,%2,%0";
3207 }
3208 
3209 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3210    storing the result in operands[0].  */
3211 const char *
3212 pa_output_64bit_and (rtx *operands)
3213 {
3214   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3215     {
3216       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3217       int ls0, ls1, ms0, p, len;
3218 
3219       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3220 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3221 	  break;
3222 
3223       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3224 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3225 	  break;
3226 
3227       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3228 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3229 	  break;
3230 
3231       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3232 
3233       if (ls1 == HOST_BITS_PER_WIDE_INT)
3234 	{
3235 	  len = ls0;
3236 
3237 	  gcc_assert (len);
3238 
3239 	  operands[2] = GEN_INT (len);
3240 	  return "extrd,u %1,63,%2,%0";
3241 	}
3242       else
3243 	{
3244 	  /* We could use this `depi' for the case above as well, but `depi'
3245 	     requires one more register file access than an `extru'.  */
3246 
3247 	  p = 63 - ls0;
3248 	  len = ls1 - ls0;
3249 
3250 	  operands[2] = GEN_INT (p);
3251 	  operands[3] = GEN_INT (len);
3252 	  return "depdi 0,%2,%3,%0";
3253 	}
3254     }
3255   else
3256     return "and %1,%2,%0";
3257 }
3258 
3259 const char *
3260 pa_output_ior (rtx *operands)
3261 {
3262   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3263   int bs0, bs1, p, len;
3264 
3265   if (INTVAL (operands[2]) == 0)
3266     return "copy %1,%0";
3267 
3268   for (bs0 = 0; bs0 < 32; bs0++)
3269     if ((mask & (1 << bs0)) != 0)
3270       break;
3271 
3272   for (bs1 = bs0; bs1 < 32; bs1++)
3273     if ((mask & (1 << bs1)) == 0)
3274       break;
3275 
3276   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3277 
3278   p = 31 - bs0;
3279   len = bs1 - bs0;
3280 
3281   operands[2] = GEN_INT (p);
3282   operands[3] = GEN_INT (len);
3283   return "{depi|depwi} -1,%2,%3,%0";
3284 }
3285 
3286 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3287    storing the result in operands[0].  */
3288 const char *
3289 pa_output_64bit_ior (rtx *operands)
3290 {
3291   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3292   int bs0, bs1, p, len;
3293 
3294   if (INTVAL (operands[2]) == 0)
3295     return "copy %1,%0";
3296 
3297   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3298     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3299       break;
3300 
3301   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3302     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3303       break;
3304 
3305   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3306 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3307 
3308   p = 63 - bs0;
3309   len = bs1 - bs0;
3310 
3311   operands[2] = GEN_INT (p);
3312   operands[3] = GEN_INT (len);
3313   return "depdi -1,%2,%3,%0";
3314 }
3315 
3316 /* Target hook for assembling integer objects.  This code handles
3317    aligned SI and DI integers specially since function references
3318    must be preceded by P%.  */
3319 
3320 static bool
3321 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3322 {
3323   bool result;
3324   tree decl = NULL;
3325 
3326   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3327      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3328      calling output_addr_const.  Otherwise, it may call assemble_external
3329      in the midst of outputing the assembler code for the SYMBOL_REF.
3330      We restore the SYMBOL_REF_DECL after the output is done.  */
3331   if (GET_CODE (x) == SYMBOL_REF)
3332     {
3333       decl = SYMBOL_REF_DECL (x);
3334       if (decl)
3335 	{
3336 	  assemble_external (decl);
3337 	  SET_SYMBOL_REF_DECL (x, NULL);
3338 	}
3339     }
3340 
3341   if (size == UNITS_PER_WORD
3342       && aligned_p
3343       && function_label_operand (x, VOIDmode))
3344     {
3345       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3346 
3347       /* We don't want an OPD when generating fast indirect calls.  */
3348       if (!TARGET_FAST_INDIRECT_CALLS)
3349 	fputs ("P%", asm_out_file);
3350 
3351       output_addr_const (asm_out_file, x);
3352       fputc ('\n', asm_out_file);
3353       result = true;
3354     }
3355   else
3356     result = default_assemble_integer (x, size, aligned_p);
3357 
3358   if (decl)
3359     SET_SYMBOL_REF_DECL (x, decl);
3360 
3361   return result;
3362 }
3363 
3364 /* Output an ascii string.  */
3365 void
3366 pa_output_ascii (FILE *file, const char *p, int size)
3367 {
3368   int i;
3369   int chars_output;
3370   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3371 
3372   /* The HP assembler can only take strings of 256 characters at one
3373      time.  This is a limitation on input line length, *not* the
3374      length of the string.  Sigh.  Even worse, it seems that the
3375      restriction is in number of input characters (see \xnn &
3376      \whatever).  So we have to do this very carefully.  */
3377 
3378   fputs ("\t.STRING \"", file);
3379 
3380   chars_output = 0;
3381   for (i = 0; i < size; i += 4)
3382     {
3383       int co = 0;
3384       int io = 0;
3385       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3386 	{
3387 	  register unsigned int c = (unsigned char) p[i + io];
3388 
3389 	  if (c == '\"' || c == '\\')
3390 	    partial_output[co++] = '\\';
3391 	  if (c >= ' ' && c < 0177)
3392 	    partial_output[co++] = c;
3393 	  else
3394 	    {
3395 	      unsigned int hexd;
3396 	      partial_output[co++] = '\\';
3397 	      partial_output[co++] = 'x';
3398 	      hexd =  c  / 16 - 0 + '0';
3399 	      if (hexd > '9')
3400 		hexd -= '9' - 'a' + 1;
3401 	      partial_output[co++] = hexd;
3402 	      hexd =  c % 16 - 0 + '0';
3403 	      if (hexd > '9')
3404 		hexd -= '9' - 'a' + 1;
3405 	      partial_output[co++] = hexd;
3406 	    }
3407 	}
3408       if (chars_output + co > 243)
3409 	{
3410 	  fputs ("\"\n\t.STRING \"", file);
3411 	  chars_output = 0;
3412 	}
3413       fwrite (partial_output, 1, (size_t) co, file);
3414       chars_output += co;
3415       co = 0;
3416     }
3417   fputs ("\"\n", file);
3418 }
3419 
3420 /* Try to rewrite floating point comparisons & branches to avoid
3421    useless add,tr insns.
3422 
3423    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3424    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3425    first attempt to remove useless add,tr insns.  It is zero
3426    for the second pass as reorg sometimes leaves bogus REG_DEAD
3427    notes lying around.
3428 
3429    When CHECK_NOTES is zero we can only eliminate add,tr insns
3430    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3431    instructions.  */
3432 static void
3433 remove_useless_addtr_insns (int check_notes)
3434 {
3435   rtx_insn *insn;
3436   static int pass = 0;
3437 
3438   /* This is fairly cheap, so always run it when optimizing.  */
3439   if (optimize > 0)
3440     {
3441       int fcmp_count = 0;
3442       int fbranch_count = 0;
3443 
3444       /* Walk all the insns in this function looking for fcmp & fbranch
3445 	 instructions.  Keep track of how many of each we find.  */
3446       for (insn = get_insns (); insn; insn = next_insn (insn))
3447 	{
3448 	  rtx tmp;
3449 
3450 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3451 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3452 	    continue;
3453 
3454 	  tmp = PATTERN (insn);
3455 
3456 	  /* It must be a set.  */
3457 	  if (GET_CODE (tmp) != SET)
3458 	    continue;
3459 
3460 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3461 	  tmp = SET_DEST (tmp);
3462 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3463 	    {
3464 	      fcmp_count++;
3465 	      continue;
3466 	    }
3467 
3468 	  tmp = PATTERN (insn);
3469 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3470 	  if (GET_CODE (tmp) == SET
3471 	      && SET_DEST (tmp) == pc_rtx
3472 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3473 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3474 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3475 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3476 	    {
3477 	      fbranch_count++;
3478 	      continue;
3479 	    }
3480 	}
3481 
3482 
3483       /* Find all floating point compare + branch insns.  If possible,
3484 	 reverse the comparison & the branch to avoid add,tr insns.  */
3485       for (insn = get_insns (); insn; insn = next_insn (insn))
3486 	{
3487 	  rtx tmp;
3488 	  rtx_insn *next;
3489 
3490 	  /* Ignore anything that isn't an INSN.  */
3491 	  if (! NONJUMP_INSN_P (insn))
3492 	    continue;
3493 
3494 	  tmp = PATTERN (insn);
3495 
3496 	  /* It must be a set.  */
3497 	  if (GET_CODE (tmp) != SET)
3498 	    continue;
3499 
3500 	  /* The destination must be CCFP, which is register zero.  */
3501 	  tmp = SET_DEST (tmp);
3502 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3503 	    continue;
3504 
3505 	  /* INSN should be a set of CCFP.
3506 
3507 	     See if the result of this insn is used in a reversed FP
3508 	     conditional branch.  If so, reverse our condition and
3509 	     the branch.  Doing so avoids useless add,tr insns.  */
3510 	  next = next_insn (insn);
3511 	  while (next)
3512 	    {
3513 	      /* Jumps, calls and labels stop our search.  */
3514 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3515 		break;
3516 
3517 	      /* As does another fcmp insn.  */
3518 	      if (NONJUMP_INSN_P (next)
3519 		  && GET_CODE (PATTERN (next)) == SET
3520 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3521 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3522 		break;
3523 
3524 	      next = next_insn (next);
3525 	    }
3526 
3527 	  /* Is NEXT_INSN a branch?  */
3528 	  if (next && JUMP_P (next))
3529 	    {
3530 	      rtx pattern = PATTERN (next);
3531 
3532 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3533 		 and CCFP dies, then reverse our conditional and the branch
3534 		 to avoid the add,tr.  */
3535 	      if (GET_CODE (pattern) == SET
3536 		  && SET_DEST (pattern) == pc_rtx
3537 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3538 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3539 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3540 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3541 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3542 		  && (fcmp_count == fbranch_count
3543 		      || (check_notes
3544 			  && find_regno_note (next, REG_DEAD, 0))))
3545 		{
3546 		  /* Reverse the branch.  */
3547 		  tmp = XEXP (SET_SRC (pattern), 1);
3548 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3549 		  XEXP (SET_SRC (pattern), 2) = tmp;
3550 		  INSN_CODE (next) = -1;
3551 
3552 		  /* Reverse our condition.  */
3553 		  tmp = PATTERN (insn);
3554 		  PUT_CODE (XEXP (tmp, 1),
3555 			    (reverse_condition_maybe_unordered
3556 			     (GET_CODE (XEXP (tmp, 1)))));
3557 		}
3558 	    }
3559 	}
3560     }
3561 
3562   pass = !pass;
3563 
3564 }
3565 
3566 /* You may have trouble believing this, but this is the 32 bit HP-PA
3567    stack layout.  Wow.
3568 
3569    Offset		Contents
3570 
3571    Variable arguments	(optional; any number may be allocated)
3572 
3573    SP-(4*(N+9))		arg word N
3574    	:		    :
3575       SP-56		arg word 5
3576       SP-52		arg word 4
3577 
3578    Fixed arguments	(must be allocated; may remain unused)
3579 
3580       SP-48		arg word 3
3581       SP-44		arg word 2
3582       SP-40		arg word 1
3583       SP-36		arg word 0
3584 
3585    Frame Marker
3586 
3587       SP-32		External Data Pointer (DP)
3588       SP-28		External sr4
3589       SP-24		External/stub RP (RP')
3590       SP-20		Current RP
3591       SP-16		Static Link
3592       SP-12		Clean up
3593       SP-8		Calling Stub RP (RP'')
3594       SP-4		Previous SP
3595 
3596    Top of Frame
3597 
3598       SP-0		Stack Pointer (points to next available address)
3599 
3600 */
3601 
3602 /* This function saves registers as follows.  Registers marked with ' are
3603    this function's registers (as opposed to the previous function's).
3604    If a frame_pointer isn't needed, r4 is saved as a general register;
3605    the space for the frame pointer is still allocated, though, to keep
3606    things simple.
3607 
3608 
3609    Top of Frame
3610 
3611        SP (FP')		Previous FP
3612        SP + 4		Alignment filler (sigh)
3613        SP + 8		Space for locals reserved here.
3614        .
3615        .
3616        .
3617        SP + n		All call saved register used.
3618        .
3619        .
3620        .
3621        SP + o		All call saved fp registers used.
3622        .
3623        .
3624        .
3625        SP + p (SP')	points to next available address.
3626 
3627 */
3628 
3629 /* Global variables set by output_function_prologue().  */
3630 /* Size of frame.  Need to know this to emit return insns from
3631    leaf procedures.  */
3632 static HOST_WIDE_INT actual_fsize, local_fsize;
3633 static int save_fregs;
3634 
3635 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3636    Handle case where DISP > 8k by using the add_high_const patterns.
3637 
3638    Note in DISP > 8k case, we will leave the high part of the address
3639    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3640 
3641 static void
3642 store_reg (int reg, HOST_WIDE_INT disp, int base)
3643 {
3644   rtx dest, src, basereg;
3645   rtx_insn *insn;
3646 
3647   src = gen_rtx_REG (word_mode, reg);
3648   basereg = gen_rtx_REG (Pmode, base);
3649   if (VAL_14_BITS_P (disp))
3650     {
3651       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3652       insn = emit_move_insn (dest, src);
3653     }
3654   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3655     {
3656       rtx delta = GEN_INT (disp);
3657       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3658 
3659       emit_move_insn (tmpreg, delta);
3660       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3661       if (DO_FRAME_NOTES)
3662 	{
3663 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3664 			gen_rtx_SET (tmpreg,
3665 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3666 	  RTX_FRAME_RELATED_P (insn) = 1;
3667 	}
3668       dest = gen_rtx_MEM (word_mode, tmpreg);
3669       insn = emit_move_insn (dest, src);
3670     }
3671   else
3672     {
3673       rtx delta = GEN_INT (disp);
3674       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3675       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3676 
3677       emit_move_insn (tmpreg, high);
3678       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3679       insn = emit_move_insn (dest, src);
3680       if (DO_FRAME_NOTES)
3681 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3682 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3683 						gen_rtx_PLUS (word_mode,
3684 							      basereg,
3685 							      delta)),
3686 				   src));
3687     }
3688 
3689   if (DO_FRAME_NOTES)
3690     RTX_FRAME_RELATED_P (insn) = 1;
3691 }
3692 
3693 /* Emit RTL to store REG at the memory location specified by BASE and then
3694    add MOD to BASE.  MOD must be <= 8k.  */
3695 
3696 static void
3697 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3698 {
3699   rtx basereg, srcreg, delta;
3700   rtx_insn *insn;
3701 
3702   gcc_assert (VAL_14_BITS_P (mod));
3703 
3704   basereg = gen_rtx_REG (Pmode, base);
3705   srcreg = gen_rtx_REG (word_mode, reg);
3706   delta = GEN_INT (mod);
3707 
3708   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3709   if (DO_FRAME_NOTES)
3710     {
3711       RTX_FRAME_RELATED_P (insn) = 1;
3712 
3713       /* RTX_FRAME_RELATED_P must be set on each frame related set
3714 	 in a parallel with more than one element.  */
3715       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3716       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3717     }
3718 }
3719 
3720 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3721    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3722    whether to add a frame note or not.
3723 
3724    In the DISP > 8k case, we leave the high part of the address in %r1.
3725    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3726 
3727 static void
3728 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3729 {
3730   rtx_insn *insn;
3731 
3732   if (VAL_14_BITS_P (disp))
3733     {
3734       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3735 			     plus_constant (Pmode,
3736 					    gen_rtx_REG (Pmode, base), disp));
3737     }
3738   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3739     {
3740       rtx basereg = gen_rtx_REG (Pmode, base);
3741       rtx delta = GEN_INT (disp);
3742       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3743 
3744       emit_move_insn (tmpreg, delta);
3745       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3746 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3747       if (DO_FRAME_NOTES)
3748 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3749 		      gen_rtx_SET (tmpreg,
3750 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3751     }
3752   else
3753     {
3754       rtx basereg = gen_rtx_REG (Pmode, base);
3755       rtx delta = GEN_INT (disp);
3756       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3757 
3758       emit_move_insn (tmpreg,
3759 		      gen_rtx_PLUS (Pmode, basereg,
3760 				    gen_rtx_HIGH (Pmode, delta)));
3761       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3762 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3763     }
3764 
3765   if (DO_FRAME_NOTES && note)
3766     RTX_FRAME_RELATED_P (insn) = 1;
3767 }
3768 
3769 HOST_WIDE_INT
3770 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3771 {
3772   int freg_saved = 0;
3773   int i, j;
3774 
3775   /* The code in pa_expand_prologue and pa_expand_epilogue must
3776      be consistent with the rounding and size calculation done here.
3777      Change them at the same time.  */
3778 
3779   /* We do our own stack alignment.  First, round the size of the
3780      stack locals up to a word boundary.  */
3781   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3782 
3783   /* Space for previous frame pointer + filler.  If any frame is
3784      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3785      waste some space here for the sake of HP compatibility.  The
3786      first slot is only used when the frame pointer is needed.  */
3787   if (size || frame_pointer_needed)
3788     size += pa_starting_frame_offset ();
3789 
3790   /* If the current function calls __builtin_eh_return, then we need
3791      to allocate stack space for registers that will hold data for
3792      the exception handler.  */
3793   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3794     {
3795       unsigned int i;
3796 
3797       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3798 	continue;
3799       size += i * UNITS_PER_WORD;
3800     }
3801 
3802   /* Account for space used by the callee general register saves.  */
3803   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3804     if (df_regs_ever_live_p (i))
3805       size += UNITS_PER_WORD;
3806 
3807   /* Account for space used by the callee floating point register saves.  */
3808   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3809     if (df_regs_ever_live_p (i)
3810 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3811       {
3812 	freg_saved = 1;
3813 
3814 	/* We always save both halves of the FP register, so always
3815 	   increment the frame size by 8 bytes.  */
3816 	size += 8;
3817       }
3818 
3819   /* If any of the floating registers are saved, account for the
3820      alignment needed for the floating point register save block.  */
3821   if (freg_saved)
3822     {
3823       size = (size + 7) & ~7;
3824       if (fregs_live)
3825 	*fregs_live = 1;
3826     }
3827 
3828   /* The various ABIs include space for the outgoing parameters in the
3829      size of the current function's stack frame.  We don't need to align
3830      for the outgoing arguments as their alignment is set by the final
3831      rounding for the frame as a whole.  */
3832   size += crtl->outgoing_args_size;
3833 
3834   /* Allocate space for the fixed frame marker.  This space must be
3835      allocated for any function that makes calls or allocates
3836      stack space.  */
3837   if (!crtl->is_leaf || size)
3838     size += TARGET_64BIT ? 48 : 32;
3839 
3840   /* Finally, round to the preferred stack boundary.  */
3841   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3842 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3843 }
3844 
3845 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3846 
3847 void
3848 pa_output_function_label (FILE *file)
3849 {
3850   /* The function's label and associated .PROC must never be
3851      separated and must be output *after* any profiling declarations
3852      to avoid changing spaces/subspaces within a procedure.  */
3853   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3854   fputs ("\t.PROC\n", file);
3855 
3856   /* pa_expand_prologue does the dirty work now.  We just need
3857      to output the assembler directives which denote the start
3858      of a function.  */
3859   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3860   if (crtl->is_leaf)
3861     fputs (",NO_CALLS", file);
3862   else
3863     fputs (",CALLS", file);
3864   if (rp_saved)
3865     fputs (",SAVE_RP", file);
3866 
3867   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3868      at the beginning of the frame and that it is used as the frame
3869      pointer for the frame.  We do this because our current frame
3870      layout doesn't conform to that specified in the HP runtime
3871      documentation and we need a way to indicate to programs such as
3872      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3873      isn't used by HP compilers but is supported by the assembler.
3874      However, SAVE_SP is supposed to indicate that the previous stack
3875      pointer has been saved in the frame marker.  */
3876   if (frame_pointer_needed)
3877     fputs (",SAVE_SP", file);
3878 
3879   /* Pass on information about the number of callee register saves
3880      performed in the prologue.
3881 
3882      The compiler is supposed to pass the highest register number
3883      saved, the assembler then has to adjust that number before
3884      entering it into the unwind descriptor (to account for any
3885      caller saved registers with lower register numbers than the
3886      first callee saved register).  */
3887   if (gr_saved)
3888     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3889 
3890   if (fr_saved)
3891     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3892 
3893   fputs ("\n\t.ENTRY\n", file);
3894 }
3895 
3896 /* Output function prologue.  */
3897 
3898 static void
3899 pa_output_function_prologue (FILE *file)
3900 {
3901   pa_output_function_label (file);
3902   remove_useless_addtr_insns (0);
3903 }
3904 
3905 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
3906 
3907 static void
3908 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
3909 {
3910   remove_useless_addtr_insns (0);
3911 }
3912 
3913 void
3914 pa_expand_prologue (void)
3915 {
3916   int merge_sp_adjust_with_store = 0;
3917   HOST_WIDE_INT size = get_frame_size ();
3918   HOST_WIDE_INT offset;
3919   int i;
3920   rtx tmpreg;
3921   rtx_insn *insn;
3922 
3923   gr_saved = 0;
3924   fr_saved = 0;
3925   save_fregs = 0;
3926 
3927   /* Compute total size for frame pointer, filler, locals and rounding to
3928      the next word boundary.  Similar code appears in pa_compute_frame_size
3929      and must be changed in tandem with this code.  */
3930   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3931   if (local_fsize || frame_pointer_needed)
3932     local_fsize += pa_starting_frame_offset ();
3933 
3934   actual_fsize = pa_compute_frame_size (size, &save_fregs);
3935   if (flag_stack_usage_info)
3936     current_function_static_stack_size = actual_fsize;
3937 
3938   /* Compute a few things we will use often.  */
3939   tmpreg = gen_rtx_REG (word_mode, 1);
3940 
3941   /* Save RP first.  The calling conventions manual states RP will
3942      always be stored into the caller's frame at sp - 20 or sp - 16
3943      depending on which ABI is in use.  */
3944   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3945     {
3946       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3947       rp_saved = true;
3948     }
3949   else
3950     rp_saved = false;
3951 
3952   /* Allocate the local frame and set up the frame pointer if needed.  */
3953   if (actual_fsize != 0)
3954     {
3955       if (frame_pointer_needed)
3956 	{
3957 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3958 	     new stack pointer, then store away the saved old frame pointer
3959 	     into the stack at sp and at the same time update the stack
3960 	     pointer by actual_fsize bytes.  Two versions, first
3961 	     handles small (<8k) frames.  The second handles large (>=8k)
3962 	     frames.  */
3963 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3964 	  if (DO_FRAME_NOTES)
3965 	    RTX_FRAME_RELATED_P (insn) = 1;
3966 
3967 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3968 	  if (DO_FRAME_NOTES)
3969 	    RTX_FRAME_RELATED_P (insn) = 1;
3970 
3971 	  if (VAL_14_BITS_P (actual_fsize))
3972 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3973 	  else
3974 	    {
3975 	      /* It is incorrect to store the saved frame pointer at *sp,
3976 		 then increment sp (writes beyond the current stack boundary).
3977 
3978 		 So instead use stwm to store at *sp and post-increment the
3979 		 stack pointer as an atomic operation.  Then increment sp to
3980 		 finish allocating the new frame.  */
3981 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3982 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3983 
3984 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3985 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3986 			      adjust2, 1);
3987 	    }
3988 
3989 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3990 	     we need to store the previous stack pointer (frame pointer)
3991 	     into the frame marker on targets that use the HP unwind
3992 	     library.  This allows the HP unwind library to be used to
3993 	     unwind GCC frames.  However, we are not fully compatible
3994 	     with the HP library because our frame layout differs from
3995 	     that specified in the HP runtime specification.
3996 
3997 	     We don't want a frame note on this instruction as the frame
3998 	     marker moves during dynamic stack allocation.
3999 
4000 	     This instruction also serves as a blockage to prevent
4001 	     register spills from being scheduled before the stack
4002 	     pointer is raised.  This is necessary as we store
4003 	     registers using the frame pointer as a base register,
4004 	     and the frame pointer is set before sp is raised.  */
4005 	  if (TARGET_HPUX_UNWIND_LIBRARY)
4006 	    {
4007 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4008 				       GEN_INT (TARGET_64BIT ? -8 : -4));
4009 
4010 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
4011 			      hard_frame_pointer_rtx);
4012 	    }
4013 	  else
4014 	    emit_insn (gen_blockage ());
4015 	}
4016       /* no frame pointer needed.  */
4017       else
4018 	{
4019 	  /* In some cases we can perform the first callee register save
4020 	     and allocating the stack frame at the same time.   If so, just
4021 	     make a note of it and defer allocating the frame until saving
4022 	     the callee registers.  */
4023 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4024 	    merge_sp_adjust_with_store = 1;
4025 	  /* Cannot optimize.  Adjust the stack frame by actual_fsize
4026 	     bytes.  */
4027 	  else
4028 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4029 			    actual_fsize, 1);
4030 	}
4031     }
4032 
4033   /* Normal register save.
4034 
4035      Do not save the frame pointer in the frame_pointer_needed case.  It
4036      was done earlier.  */
4037   if (frame_pointer_needed)
4038     {
4039       offset = local_fsize;
4040 
4041       /* Saving the EH return data registers in the frame is the simplest
4042 	 way to get the frame unwind information emitted.  We put them
4043 	 just before the general registers.  */
4044       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4045 	{
4046 	  unsigned int i, regno;
4047 
4048 	  for (i = 0; ; ++i)
4049 	    {
4050 	      regno = EH_RETURN_DATA_REGNO (i);
4051 	      if (regno == INVALID_REGNUM)
4052 		break;
4053 
4054 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4055 	      offset += UNITS_PER_WORD;
4056 	    }
4057 	}
4058 
4059       for (i = 18; i >= 4; i--)
4060 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4061 	  {
4062 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4063 	    offset += UNITS_PER_WORD;
4064 	    gr_saved++;
4065 	  }
4066       /* Account for %r3 which is saved in a special place.  */
4067       gr_saved++;
4068     }
4069   /* No frame pointer needed.  */
4070   else
4071     {
4072       offset = local_fsize - actual_fsize;
4073 
4074       /* Saving the EH return data registers in the frame is the simplest
4075          way to get the frame unwind information emitted.  */
4076       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4077 	{
4078 	  unsigned int i, regno;
4079 
4080 	  for (i = 0; ; ++i)
4081 	    {
4082 	      regno = EH_RETURN_DATA_REGNO (i);
4083 	      if (regno == INVALID_REGNUM)
4084 		break;
4085 
4086 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4087 		 optimize the first save.  */
4088 	      if (merge_sp_adjust_with_store)
4089 		{
4090 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4091 		  merge_sp_adjust_with_store = 0;
4092 		}
4093 	      else
4094 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4095 	      offset += UNITS_PER_WORD;
4096 	    }
4097 	}
4098 
4099       for (i = 18; i >= 3; i--)
4100       	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4101 	  {
4102 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4103 	       optimize the first GR save.  */
4104 	    if (merge_sp_adjust_with_store)
4105 	      {
4106 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4107 		merge_sp_adjust_with_store = 0;
4108 	      }
4109 	    else
4110 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4111 	    offset += UNITS_PER_WORD;
4112 	    gr_saved++;
4113 	  }
4114 
4115       /* If we wanted to merge the SP adjustment with a GR save, but we never
4116 	 did any GR saves, then just emit the adjustment here.  */
4117       if (merge_sp_adjust_with_store)
4118 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4119 			actual_fsize, 1);
4120     }
4121 
4122   /* The hppa calling conventions say that %r19, the pic offset
4123      register, is saved at sp - 32 (in this function's frame)
4124      when generating PIC code.  FIXME:  What is the correct thing
4125      to do for functions which make no calls and allocate no
4126      frame?  Do we need to allocate a frame, or can we just omit
4127      the save?   For now we'll just omit the save.
4128 
4129      We don't want a note on this insn as the frame marker can
4130      move if there is a dynamic stack allocation.  */
4131   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4132     {
4133       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4134 
4135       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4136 
4137     }
4138 
4139   /* Align pointer properly (doubleword boundary).  */
4140   offset = (offset + 7) & ~7;
4141 
4142   /* Floating point register store.  */
4143   if (save_fregs)
4144     {
4145       rtx base;
4146 
4147       /* First get the frame or stack pointer to the start of the FP register
4148 	 save area.  */
4149       if (frame_pointer_needed)
4150 	{
4151 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4152 	  base = hard_frame_pointer_rtx;
4153 	}
4154       else
4155 	{
4156 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4157 	  base = stack_pointer_rtx;
4158 	}
4159 
4160       /* Now actually save the FP registers.  */
4161       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4162 	{
4163 	  if (df_regs_ever_live_p (i)
4164 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4165 	    {
4166 	      rtx addr, reg;
4167 	      rtx_insn *insn;
4168 	      addr = gen_rtx_MEM (DFmode,
4169 				  gen_rtx_POST_INC (word_mode, tmpreg));
4170 	      reg = gen_rtx_REG (DFmode, i);
4171 	      insn = emit_move_insn (addr, reg);
4172 	      if (DO_FRAME_NOTES)
4173 		{
4174 		  RTX_FRAME_RELATED_P (insn) = 1;
4175 		  if (TARGET_64BIT)
4176 		    {
4177 		      rtx mem = gen_rtx_MEM (DFmode,
4178 					     plus_constant (Pmode, base,
4179 							    offset));
4180 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4181 				    gen_rtx_SET (mem, reg));
4182 		    }
4183 		  else
4184 		    {
4185 		      rtx meml = gen_rtx_MEM (SFmode,
4186 					      plus_constant (Pmode, base,
4187 							     offset));
4188 		      rtx memr = gen_rtx_MEM (SFmode,
4189 					      plus_constant (Pmode, base,
4190 							     offset + 4));
4191 		      rtx regl = gen_rtx_REG (SFmode, i);
4192 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4193 		      rtx setl = gen_rtx_SET (meml, regl);
4194 		      rtx setr = gen_rtx_SET (memr, regr);
4195 		      rtvec vec;
4196 
4197 		      RTX_FRAME_RELATED_P (setl) = 1;
4198 		      RTX_FRAME_RELATED_P (setr) = 1;
4199 		      vec = gen_rtvec (2, setl, setr);
4200 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4201 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4202 		    }
4203 		}
4204 	      offset += GET_MODE_SIZE (DFmode);
4205 	      fr_saved++;
4206 	    }
4207 	}
4208     }
4209 }
4210 
4211 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4212    Handle case where DISP > 8k by using the add_high_const patterns.  */
4213 
4214 static void
4215 load_reg (int reg, HOST_WIDE_INT disp, int base)
4216 {
4217   rtx dest = gen_rtx_REG (word_mode, reg);
4218   rtx basereg = gen_rtx_REG (Pmode, base);
4219   rtx src;
4220 
4221   if (VAL_14_BITS_P (disp))
4222     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4223   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4224     {
4225       rtx delta = GEN_INT (disp);
4226       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4227 
4228       emit_move_insn (tmpreg, delta);
4229       if (TARGET_DISABLE_INDEXING)
4230 	{
4231 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4232 	  src = gen_rtx_MEM (word_mode, tmpreg);
4233 	}
4234       else
4235 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4236     }
4237   else
4238     {
4239       rtx delta = GEN_INT (disp);
4240       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4241       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4242 
4243       emit_move_insn (tmpreg, high);
4244       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4245     }
4246 
4247   emit_move_insn (dest, src);
4248 }
4249 
4250 /* Update the total code bytes output to the text section.  */
4251 
4252 static void
4253 update_total_code_bytes (unsigned int nbytes)
4254 {
4255   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4256       && !IN_NAMED_SECTION_P (cfun->decl))
4257     {
4258       unsigned int old_total = total_code_bytes;
4259 
4260       total_code_bytes += nbytes;
4261 
4262       /* Be prepared to handle overflows.  */
4263       if (old_total > total_code_bytes)
4264         total_code_bytes = UINT_MAX;
4265     }
4266 }
4267 
4268 /* This function generates the assembly code for function exit.
4269    Args are as for output_function_prologue ().
4270 
4271    The function epilogue should not depend on the current stack
4272    pointer!  It should use the frame pointer only.  This is mandatory
4273    because of alloca; we also take advantage of it to omit stack
4274    adjustments before returning.  */
4275 
4276 static void
4277 pa_output_function_epilogue (FILE *file)
4278 {
4279   rtx_insn *insn = get_last_insn ();
4280   bool extra_nop;
4281 
4282   /* pa_expand_epilogue does the dirty work now.  We just need
4283      to output the assembler directives which denote the end
4284      of a function.
4285 
4286      To make debuggers happy, emit a nop if the epilogue was completely
4287      eliminated due to a volatile call as the last insn in the
4288      current function.  That way the return address (in %r2) will
4289      always point to a valid instruction in the current function.  */
4290 
4291   /* Get the last real insn.  */
4292   if (NOTE_P (insn))
4293     insn = prev_real_insn (insn);
4294 
4295   /* If it is a sequence, then look inside.  */
4296   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4297     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4298 
4299   /* If insn is a CALL_INSN, then it must be a call to a volatile
4300      function (otherwise there would be epilogue insns).  */
4301   if (insn && CALL_P (insn))
4302     {
4303       fputs ("\tnop\n", file);
4304       extra_nop = true;
4305     }
4306   else
4307     extra_nop = false;
4308 
4309   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4310 
4311   if (TARGET_SOM && TARGET_GAS)
4312     {
4313       /* We are done with this subspace except possibly for some additional
4314 	 debug information.  Forget that we are in this subspace to ensure
4315 	 that the next function is output in its own subspace.  */
4316       in_section = NULL;
4317       cfun->machine->in_nsubspa = 2;
4318     }
4319 
4320   /* Thunks do their own insn accounting.  */
4321   if (cfun->is_thunk)
4322     return;
4323 
4324   if (INSN_ADDRESSES_SET_P ())
4325     {
4326       last_address = extra_nop ? 4 : 0;
4327       insn = get_last_nonnote_insn ();
4328       if (insn)
4329 	{
4330 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4331 	  if (INSN_P (insn))
4332 	    last_address += insn_default_length (insn);
4333 	}
4334       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4335 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4336     }
4337   else
4338     last_address = UINT_MAX;
4339 
4340   /* Finally, update the total number of code bytes output so far.  */
4341   update_total_code_bytes (last_address);
4342 }
4343 
4344 void
4345 pa_expand_epilogue (void)
4346 {
4347   rtx tmpreg;
4348   HOST_WIDE_INT offset;
4349   HOST_WIDE_INT ret_off = 0;
4350   int i;
4351   int merge_sp_adjust_with_load = 0;
4352 
4353   /* We will use this often.  */
4354   tmpreg = gen_rtx_REG (word_mode, 1);
4355 
4356   /* Try to restore RP early to avoid load/use interlocks when
4357      RP gets used in the return (bv) instruction.  This appears to still
4358      be necessary even when we schedule the prologue and epilogue.  */
4359   if (rp_saved)
4360     {
4361       ret_off = TARGET_64BIT ? -16 : -20;
4362       if (frame_pointer_needed)
4363 	{
4364 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4365 	  ret_off = 0;
4366 	}
4367       else
4368 	{
4369 	  /* No frame pointer, and stack is smaller than 8k.  */
4370 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4371 	    {
4372 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4373 	      ret_off = 0;
4374 	    }
4375 	}
4376     }
4377 
4378   /* General register restores.  */
4379   if (frame_pointer_needed)
4380     {
4381       offset = local_fsize;
4382 
4383       /* If the current function calls __builtin_eh_return, then we need
4384          to restore the saved EH data registers.  */
4385       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4386 	{
4387 	  unsigned int i, regno;
4388 
4389 	  for (i = 0; ; ++i)
4390 	    {
4391 	      regno = EH_RETURN_DATA_REGNO (i);
4392 	      if (regno == INVALID_REGNUM)
4393 		break;
4394 
4395 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4396 	      offset += UNITS_PER_WORD;
4397 	    }
4398 	}
4399 
4400       for (i = 18; i >= 4; i--)
4401 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4402 	  {
4403 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4404 	    offset += UNITS_PER_WORD;
4405 	  }
4406     }
4407   else
4408     {
4409       offset = local_fsize - actual_fsize;
4410 
4411       /* If the current function calls __builtin_eh_return, then we need
4412          to restore the saved EH data registers.  */
4413       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4414 	{
4415 	  unsigned int i, regno;
4416 
4417 	  for (i = 0; ; ++i)
4418 	    {
4419 	      regno = EH_RETURN_DATA_REGNO (i);
4420 	      if (regno == INVALID_REGNUM)
4421 		break;
4422 
4423 	      /* Only for the first load.
4424 	         merge_sp_adjust_with_load holds the register load
4425 	         with which we will merge the sp adjustment.  */
4426 	      if (merge_sp_adjust_with_load == 0
4427 		  && local_fsize == 0
4428 		  && VAL_14_BITS_P (-actual_fsize))
4429 	        merge_sp_adjust_with_load = regno;
4430 	      else
4431 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4432 	      offset += UNITS_PER_WORD;
4433 	    }
4434 	}
4435 
4436       for (i = 18; i >= 3; i--)
4437 	{
4438 	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4439 	    {
4440 	      /* Only for the first load.
4441 	         merge_sp_adjust_with_load holds the register load
4442 	         with which we will merge the sp adjustment.  */
4443 	      if (merge_sp_adjust_with_load == 0
4444 		  && local_fsize == 0
4445 		  && VAL_14_BITS_P (-actual_fsize))
4446 	        merge_sp_adjust_with_load = i;
4447 	      else
4448 		load_reg (i, offset, STACK_POINTER_REGNUM);
4449 	      offset += UNITS_PER_WORD;
4450 	    }
4451 	}
4452     }
4453 
4454   /* Align pointer properly (doubleword boundary).  */
4455   offset = (offset + 7) & ~7;
4456 
4457   /* FP register restores.  */
4458   if (save_fregs)
4459     {
4460       /* Adjust the register to index off of.  */
4461       if (frame_pointer_needed)
4462 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4463       else
4464 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4465 
4466       /* Actually do the restores now.  */
4467       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4468 	if (df_regs_ever_live_p (i)
4469 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4470 	  {
4471 	    rtx src = gen_rtx_MEM (DFmode,
4472 				   gen_rtx_POST_INC (word_mode, tmpreg));
4473 	    rtx dest = gen_rtx_REG (DFmode, i);
4474 	    emit_move_insn (dest, src);
4475 	  }
4476     }
4477 
4478   /* Emit a blockage insn here to keep these insns from being moved to
4479      an earlier spot in the epilogue, or into the main instruction stream.
4480 
4481      This is necessary as we must not cut the stack back before all the
4482      restores are finished.  */
4483   emit_insn (gen_blockage ());
4484 
4485   /* Reset stack pointer (and possibly frame pointer).  The stack
4486      pointer is initially set to fp + 64 to avoid a race condition.  */
4487   if (frame_pointer_needed)
4488     {
4489       rtx delta = GEN_INT (-64);
4490 
4491       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4492       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4493 			       stack_pointer_rtx, delta));
4494     }
4495   /* If we were deferring a callee register restore, do it now.  */
4496   else if (merge_sp_adjust_with_load)
4497     {
4498       rtx delta = GEN_INT (-actual_fsize);
4499       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4500 
4501       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4502     }
4503   else if (actual_fsize != 0)
4504     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4505 		    - actual_fsize, 0);
4506 
4507   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4508      frame greater than 8k), do so now.  */
4509   if (ret_off != 0)
4510     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4511 
4512   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4513     {
4514       rtx sa = EH_RETURN_STACKADJ_RTX;
4515 
4516       emit_insn (gen_blockage ());
4517       emit_insn (TARGET_64BIT
4518 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4519 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4520     }
4521 }
4522 
4523 bool
4524 pa_can_use_return_insn (void)
4525 {
4526   if (!reload_completed)
4527     return false;
4528 
4529   if (frame_pointer_needed)
4530     return false;
4531 
4532   if (df_regs_ever_live_p (2))
4533     return false;
4534 
4535   if (crtl->profile)
4536     return false;
4537 
4538   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4539 }
4540 
4541 rtx
4542 hppa_pic_save_rtx (void)
4543 {
4544   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4545 }
4546 
4547 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4548 #define NO_DEFERRED_PROFILE_COUNTERS 0
4549 #endif
4550 
4551 
4552 /* Vector of funcdef numbers.  */
4553 static vec<int> funcdef_nos;
4554 
4555 /* Output deferred profile counters.  */
4556 static void
4557 output_deferred_profile_counters (void)
4558 {
4559   unsigned int i;
4560   int align, n;
4561 
4562   if (funcdef_nos.is_empty ())
4563    return;
4564 
4565   switch_to_section (data_section);
4566   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4567   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4568 
4569   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4570     {
4571       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4572       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4573     }
4574 
4575   funcdef_nos.release ();
4576 }
4577 
4578 void
4579 hppa_profile_hook (int label_no)
4580 {
4581   rtx_code_label *label_rtx = gen_label_rtx ();
4582   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4583   rtx arg_bytes, begin_label_rtx, mcount, sym;
4584   rtx_insn *call_insn;
4585   char begin_label_name[16];
4586   bool use_mcount_pcrel_call;
4587 
4588   /* Set up call destination.  */
4589   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4590   pa_encode_label (sym);
4591   mcount = gen_rtx_MEM (Pmode, sym);
4592 
4593   /* If we can reach _mcount with a pc-relative call, we can optimize
4594      loading the address of the current function.  This requires linker
4595      long branch stub support.  */
4596   if (!TARGET_PORTABLE_RUNTIME
4597       && !TARGET_LONG_CALLS
4598       && (TARGET_SOM || flag_function_sections))
4599     use_mcount_pcrel_call = TRUE;
4600   else
4601     use_mcount_pcrel_call = FALSE;
4602 
4603   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4604 			       label_no);
4605   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4606 
4607   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4608 
4609   if (!use_mcount_pcrel_call)
4610     {
4611       /* The address of the function is loaded into %r25 with an instruction-
4612 	 relative sequence that avoids the use of relocations.  We use SImode
4613 	 for the address of the function in both 32 and 64-bit code to avoid
4614 	 having to provide DImode versions of the lcla2 pattern.  */
4615       if (TARGET_PA_20)
4616 	emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4617       else
4618 	emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4619     }
4620 
4621   if (!NO_DEFERRED_PROFILE_COUNTERS)
4622     {
4623       rtx count_label_rtx, addr, r24;
4624       char count_label_name[16];
4625 
4626       funcdef_nos.safe_push (label_no);
4627       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4628       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4629 					    ggc_strdup (count_label_name));
4630 
4631       addr = force_reg (Pmode, count_label_rtx);
4632       r24 = gen_rtx_REG (Pmode, 24);
4633       emit_move_insn (r24, addr);
4634 
4635       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4636       if (use_mcount_pcrel_call)
4637 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4638 						     begin_label_rtx));
4639       else
4640 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4641 
4642       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4643     }
4644   else
4645     {
4646       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4647       if (use_mcount_pcrel_call)
4648 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4649 						     begin_label_rtx));
4650       else
4651 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4652     }
4653 
4654   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4655   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4656 
4657   /* Indicate the _mcount call cannot throw, nor will it execute a
4658      non-local goto.  */
4659   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4660 
4661   /* Allocate space for fixed arguments.  */
4662   if (reg_parm_stack_space > crtl->outgoing_args_size)
4663     crtl->outgoing_args_size = reg_parm_stack_space;
4664 }
4665 
4666 /* Fetch the return address for the frame COUNT steps up from
4667    the current frame, after the prologue.  FRAMEADDR is the
4668    frame pointer of the COUNT frame.
4669 
4670    We want to ignore any export stub remnants here.  To handle this,
4671    we examine the code at the return address, and if it is an export
4672    stub, we return a memory rtx for the stub return address stored
4673    at frame-24.
4674 
4675    The value returned is used in two different ways:
4676 
4677 	1. To find a function's caller.
4678 
4679 	2. To change the return address for a function.
4680 
4681    This function handles most instances of case 1; however, it will
4682    fail if there are two levels of stubs to execute on the return
4683    path.  The only way I believe that can happen is if the return value
4684    needs a parameter relocation, which never happens for C code.
4685 
4686    This function handles most instances of case 2; however, it will
4687    fail if we did not originally have stub code on the return path
4688    but will need stub code on the new return path.  This can happen if
4689    the caller & callee are both in the main program, but the new
4690    return location is in a shared library.  */
4691 
4692 rtx
4693 pa_return_addr_rtx (int count, rtx frameaddr)
4694 {
4695   rtx label;
4696   rtx rp;
4697   rtx saved_rp;
4698   rtx ins;
4699 
4700   /* The instruction stream at the return address of a PA1.X export stub is:
4701 
4702 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4703 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4704 	0x00011820 | stub+16:  mtsp r1,sr0
4705 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4706 
4707      0xe0400002 must be specified as -532676606 so that it won't be
4708      rejected as an invalid immediate operand on 64-bit hosts.
4709 
4710      The instruction stream at the return address of a PA2.0 export stub is:
4711 
4712 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4713 	0xe840d002 | stub+12:  bve,n (rp)
4714   */
4715 
4716   HOST_WIDE_INT insns[4];
4717   int i, len;
4718 
4719   if (count != 0)
4720     return NULL_RTX;
4721 
4722   rp = get_hard_reg_initial_val (Pmode, 2);
4723 
4724   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4725     return rp;
4726 
4727   /* If there is no export stub then just use the value saved from
4728      the return pointer register.  */
4729 
4730   saved_rp = gen_reg_rtx (Pmode);
4731   emit_move_insn (saved_rp, rp);
4732 
4733   /* Get pointer to the instruction stream.  We have to mask out the
4734      privilege level from the two low order bits of the return address
4735      pointer here so that ins will point to the start of the first
4736      instruction that would have been executed if we returned.  */
4737   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4738   label = gen_label_rtx ();
4739 
4740   if (TARGET_PA_20)
4741     {
4742       insns[0] = 0x4bc23fd1;
4743       insns[1] = -398405630;
4744       len = 2;
4745     }
4746   else
4747     {
4748       insns[0] = 0x4bc23fd1;
4749       insns[1] = 0x004010a1;
4750       insns[2] = 0x00011820;
4751       insns[3] = -532676606;
4752       len = 4;
4753     }
4754 
4755   /* Check the instruction stream at the normal return address for the
4756      export stub.  If it is an export stub, than our return address is
4757      really in -24[frameaddr].  */
4758 
4759   for (i = 0; i < len; i++)
4760     {
4761       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4762       rtx op1 = GEN_INT (insns[i]);
4763       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4764     }
4765 
4766   /* Here we know that our return address points to an export
4767      stub.  We don't want to return the address of the export stub,
4768      but rather the return address of the export stub.  That return
4769      address is stored at -24[frameaddr].  */
4770 
4771   emit_move_insn (saved_rp,
4772 		  gen_rtx_MEM (Pmode,
4773 			       memory_address (Pmode,
4774 					       plus_constant (Pmode, frameaddr,
4775 							      -24))));
4776 
4777   emit_label (label);
4778 
4779   return saved_rp;
4780 }
4781 
4782 void
4783 pa_emit_bcond_fp (rtx operands[])
4784 {
4785   enum rtx_code code = GET_CODE (operands[0]);
4786   rtx operand0 = operands[1];
4787   rtx operand1 = operands[2];
4788   rtx label = operands[3];
4789 
4790   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4791 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4792 
4793   emit_jump_insn (gen_rtx_SET (pc_rtx,
4794 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4795 						     gen_rtx_fmt_ee (NE,
4796 							      VOIDmode,
4797 							      gen_rtx_REG (CCFPmode, 0),
4798 							      const0_rtx),
4799 						     gen_rtx_LABEL_REF (VOIDmode, label),
4800 						     pc_rtx)));
4801 
4802 }
4803 
4804 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4805    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4806 
4807 static int
4808 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4809 		unsigned int)
4810 {
4811   enum attr_type attr_type;
4812 
4813   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4814      true dependencies as they are described with bypasses now.  */
4815   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4816     return cost;
4817 
4818   if (! recog_memoized (insn))
4819     return 0;
4820 
4821   attr_type = get_attr_type (insn);
4822 
4823   switch (dep_type)
4824     {
4825     case REG_DEP_ANTI:
4826       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4827 	 cycles later.  */
4828 
4829       if (attr_type == TYPE_FPLOAD)
4830 	{
4831 	  rtx pat = PATTERN (insn);
4832 	  rtx dep_pat = PATTERN (dep_insn);
4833 	  if (GET_CODE (pat) == PARALLEL)
4834 	    {
4835 	      /* This happens for the fldXs,mb patterns.  */
4836 	      pat = XVECEXP (pat, 0, 0);
4837 	    }
4838 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4839 	    /* If this happens, we have to extend this to schedule
4840 	       optimally.  Return 0 for now.  */
4841 	  return 0;
4842 
4843 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4844 	    {
4845 	      if (! recog_memoized (dep_insn))
4846 		return 0;
4847 	      switch (get_attr_type (dep_insn))
4848 		{
4849 		case TYPE_FPALU:
4850 		case TYPE_FPMULSGL:
4851 		case TYPE_FPMULDBL:
4852 		case TYPE_FPDIVSGL:
4853 		case TYPE_FPDIVDBL:
4854 		case TYPE_FPSQRTSGL:
4855 		case TYPE_FPSQRTDBL:
4856 		  /* A fpload can't be issued until one cycle before a
4857 		     preceding arithmetic operation has finished if
4858 		     the target of the fpload is any of the sources
4859 		     (or destination) of the arithmetic operation.  */
4860 		  return insn_default_latency (dep_insn) - 1;
4861 
4862 		default:
4863 		  return 0;
4864 		}
4865 	    }
4866 	}
4867       else if (attr_type == TYPE_FPALU)
4868 	{
4869 	  rtx pat = PATTERN (insn);
4870 	  rtx dep_pat = PATTERN (dep_insn);
4871 	  if (GET_CODE (pat) == PARALLEL)
4872 	    {
4873 	      /* This happens for the fldXs,mb patterns.  */
4874 	      pat = XVECEXP (pat, 0, 0);
4875 	    }
4876 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4877 	    /* If this happens, we have to extend this to schedule
4878 	       optimally.  Return 0 for now.  */
4879 	  return 0;
4880 
4881 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4882 	    {
4883 	      if (! recog_memoized (dep_insn))
4884 		return 0;
4885 	      switch (get_attr_type (dep_insn))
4886 		{
4887 		case TYPE_FPDIVSGL:
4888 		case TYPE_FPDIVDBL:
4889 		case TYPE_FPSQRTSGL:
4890 		case TYPE_FPSQRTDBL:
4891 		  /* An ALU flop can't be issued until two cycles before a
4892 		     preceding divide or sqrt operation has finished if
4893 		     the target of the ALU flop is any of the sources
4894 		     (or destination) of the divide or sqrt operation.  */
4895 		  return insn_default_latency (dep_insn) - 2;
4896 
4897 		default:
4898 		  return 0;
4899 		}
4900 	    }
4901 	}
4902 
4903       /* For other anti dependencies, the cost is 0.  */
4904       return 0;
4905 
4906     case REG_DEP_OUTPUT:
4907       /* Output dependency; DEP_INSN writes a register that INSN writes some
4908 	 cycles later.  */
4909       if (attr_type == TYPE_FPLOAD)
4910 	{
4911 	  rtx pat = PATTERN (insn);
4912 	  rtx dep_pat = PATTERN (dep_insn);
4913 	  if (GET_CODE (pat) == PARALLEL)
4914 	    {
4915 	      /* This happens for the fldXs,mb patterns.  */
4916 	      pat = XVECEXP (pat, 0, 0);
4917 	    }
4918 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4919 	    /* If this happens, we have to extend this to schedule
4920 	       optimally.  Return 0 for now.  */
4921 	  return 0;
4922 
4923 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4924 	    {
4925 	      if (! recog_memoized (dep_insn))
4926 		return 0;
4927 	      switch (get_attr_type (dep_insn))
4928 		{
4929 		case TYPE_FPALU:
4930 		case TYPE_FPMULSGL:
4931 		case TYPE_FPMULDBL:
4932 		case TYPE_FPDIVSGL:
4933 		case TYPE_FPDIVDBL:
4934 		case TYPE_FPSQRTSGL:
4935 		case TYPE_FPSQRTDBL:
4936 		  /* A fpload can't be issued until one cycle before a
4937 		     preceding arithmetic operation has finished if
4938 		     the target of the fpload is the destination of the
4939 		     arithmetic operation.
4940 
4941 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4942 		     is 3 cycles, unless they bundle together.   We also
4943 		     pay the penalty if the second insn is a fpload.  */
4944 		  return insn_default_latency (dep_insn) - 1;
4945 
4946 		default:
4947 		  return 0;
4948 		}
4949 	    }
4950 	}
4951       else if (attr_type == TYPE_FPALU)
4952 	{
4953 	  rtx pat = PATTERN (insn);
4954 	  rtx dep_pat = PATTERN (dep_insn);
4955 	  if (GET_CODE (pat) == PARALLEL)
4956 	    {
4957 	      /* This happens for the fldXs,mb patterns.  */
4958 	      pat = XVECEXP (pat, 0, 0);
4959 	    }
4960 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4961 	    /* If this happens, we have to extend this to schedule
4962 	       optimally.  Return 0 for now.  */
4963 	  return 0;
4964 
4965 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4966 	    {
4967 	      if (! recog_memoized (dep_insn))
4968 		return 0;
4969 	      switch (get_attr_type (dep_insn))
4970 		{
4971 		case TYPE_FPDIVSGL:
4972 		case TYPE_FPDIVDBL:
4973 		case TYPE_FPSQRTSGL:
4974 		case TYPE_FPSQRTDBL:
4975 		  /* An ALU flop can't be issued until two cycles before a
4976 		     preceding divide or sqrt operation has finished if
4977 		     the target of the ALU flop is also the target of
4978 		     the divide or sqrt operation.  */
4979 		  return insn_default_latency (dep_insn) - 2;
4980 
4981 		default:
4982 		  return 0;
4983 		}
4984 	    }
4985 	}
4986 
4987       /* For other output dependencies, the cost is 0.  */
4988       return 0;
4989 
4990     default:
4991       gcc_unreachable ();
4992     }
4993 }
4994 
4995 /* The 700 can only issue a single insn at a time.
4996    The 7XXX processors can issue two insns at a time.
4997    The 8000 can issue 4 insns at a time.  */
4998 static int
4999 pa_issue_rate (void)
5000 {
5001   switch (pa_cpu)
5002     {
5003     case PROCESSOR_700:		return 1;
5004     case PROCESSOR_7100:	return 2;
5005     case PROCESSOR_7100LC:	return 2;
5006     case PROCESSOR_7200:	return 2;
5007     case PROCESSOR_7300:	return 2;
5008     case PROCESSOR_8000:	return 4;
5009 
5010     default:
5011       gcc_unreachable ();
5012     }
5013 }
5014 
5015 
5016 
5017 /* Return any length plus adjustment needed by INSN which already has
5018    its length computed as LENGTH.   Return LENGTH if no adjustment is
5019    necessary.
5020 
5021    Also compute the length of an inline block move here as it is too
5022    complicated to express as a length attribute in pa.md.  */
5023 int
5024 pa_adjust_insn_length (rtx_insn *insn, int length)
5025 {
5026   rtx pat = PATTERN (insn);
5027 
5028   /* If length is negative or undefined, provide initial length.  */
5029   if ((unsigned int) length >= INT_MAX)
5030     {
5031       if (GET_CODE (pat) == SEQUENCE)
5032 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5033 
5034       switch (get_attr_type (insn))
5035 	{
5036 	case TYPE_MILLI:
5037 	  length = pa_attr_length_millicode_call (insn);
5038 	  break;
5039 	case TYPE_CALL:
5040 	  length = pa_attr_length_call (insn, 0);
5041 	  break;
5042 	case TYPE_SIBCALL:
5043 	  length = pa_attr_length_call (insn, 1);
5044 	  break;
5045 	case TYPE_DYNCALL:
5046 	  length = pa_attr_length_indirect_call (insn);
5047 	  break;
5048 	case TYPE_SH_FUNC_ADRS:
5049 	  length = pa_attr_length_millicode_call (insn) + 20;
5050 	  break;
5051 	default:
5052 	  gcc_unreachable ();
5053 	}
5054     }
5055 
5056   /* Block move pattern.  */
5057   if (NONJUMP_INSN_P (insn)
5058       && GET_CODE (pat) == PARALLEL
5059       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5060       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5061       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5062       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5063       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5064     length += compute_movmem_length (insn) - 4;
5065   /* Block clear pattern.  */
5066   else if (NONJUMP_INSN_P (insn)
5067 	   && GET_CODE (pat) == PARALLEL
5068 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5069 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5070 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5071 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5072     length += compute_clrmem_length (insn) - 4;
5073   /* Conditional branch with an unfilled delay slot.  */
5074   else if (JUMP_P (insn) && ! simplejump_p (insn))
5075     {
5076       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5077       if (GET_CODE (pat) == SET
5078 	  && length == 4
5079 	  && JUMP_LABEL (insn) != NULL_RTX
5080 	  && ! forward_branch_p (insn))
5081 	length += 4;
5082       else if (GET_CODE (pat) == PARALLEL
5083 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5084 	       && length == 4)
5085 	length += 4;
5086       /* Adjust dbra insn with short backwards conditional branch with
5087 	 unfilled delay slot -- only for case where counter is in a
5088 	 general register register.  */
5089       else if (GET_CODE (pat) == PARALLEL
5090 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5091 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5092  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5093 	       && length == 4
5094 	       && ! forward_branch_p (insn))
5095 	length += 4;
5096     }
5097   return length;
5098 }
5099 
5100 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5101 
5102 static bool
5103 pa_print_operand_punct_valid_p (unsigned char code)
5104 {
5105   if (code == '@'
5106       || code == '#'
5107       || code == '*'
5108       || code == '^')
5109     return true;
5110 
5111   return false;
5112 }
5113 
5114 /* Print operand X (an rtx) in assembler syntax to file FILE.
5115    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5116    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5117 
5118 void
5119 pa_print_operand (FILE *file, rtx x, int code)
5120 {
5121   switch (code)
5122     {
5123     case '#':
5124       /* Output a 'nop' if there's nothing for the delay slot.  */
5125       if (dbr_sequence_length () == 0)
5126 	fputs ("\n\tnop", file);
5127       return;
5128     case '*':
5129       /* Output a nullification completer if there's nothing for the */
5130       /* delay slot or nullification is requested.  */
5131       if (dbr_sequence_length () == 0 ||
5132 	  (final_sequence &&
5133 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5134         fputs (",n", file);
5135       return;
5136     case 'R':
5137       /* Print out the second register name of a register pair.
5138 	 I.e., R (6) => 7.  */
5139       fputs (reg_names[REGNO (x) + 1], file);
5140       return;
5141     case 'r':
5142       /* A register or zero.  */
5143       if (x == const0_rtx
5144 	  || (x == CONST0_RTX (DFmode))
5145 	  || (x == CONST0_RTX (SFmode)))
5146 	{
5147 	  fputs ("%r0", file);
5148 	  return;
5149 	}
5150       else
5151 	break;
5152     case 'f':
5153       /* A register or zero (floating point).  */
5154       if (x == const0_rtx
5155 	  || (x == CONST0_RTX (DFmode))
5156 	  || (x == CONST0_RTX (SFmode)))
5157 	{
5158 	  fputs ("%fr0", file);
5159 	  return;
5160 	}
5161       else
5162 	break;
5163     case 'A':
5164       {
5165 	rtx xoperands[2];
5166 
5167 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5168 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5169 	pa_output_global_address (file, xoperands[1], 0);
5170         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5171 	return;
5172       }
5173 
5174     case 'C':			/* Plain (C)ondition */
5175     case 'X':
5176       switch (GET_CODE (x))
5177 	{
5178 	case EQ:
5179 	  fputs ("=", file);  break;
5180 	case NE:
5181 	  fputs ("<>", file);  break;
5182 	case GT:
5183 	  fputs (">", file);  break;
5184 	case GE:
5185 	  fputs (">=", file);  break;
5186 	case GEU:
5187 	  fputs (">>=", file);  break;
5188 	case GTU:
5189 	  fputs (">>", file);  break;
5190 	case LT:
5191 	  fputs ("<", file);  break;
5192 	case LE:
5193 	  fputs ("<=", file);  break;
5194 	case LEU:
5195 	  fputs ("<<=", file);  break;
5196 	case LTU:
5197 	  fputs ("<<", file);  break;
5198 	default:
5199 	  gcc_unreachable ();
5200 	}
5201       return;
5202     case 'N':			/* Condition, (N)egated */
5203       switch (GET_CODE (x))
5204 	{
5205 	case EQ:
5206 	  fputs ("<>", file);  break;
5207 	case NE:
5208 	  fputs ("=", file);  break;
5209 	case GT:
5210 	  fputs ("<=", file);  break;
5211 	case GE:
5212 	  fputs ("<", file);  break;
5213 	case GEU:
5214 	  fputs ("<<", file);  break;
5215 	case GTU:
5216 	  fputs ("<<=", file);  break;
5217 	case LT:
5218 	  fputs (">=", file);  break;
5219 	case LE:
5220 	  fputs (">", file);  break;
5221 	case LEU:
5222 	  fputs (">>", file);  break;
5223 	case LTU:
5224 	  fputs (">>=", file);  break;
5225 	default:
5226 	  gcc_unreachable ();
5227 	}
5228       return;
5229     /* For floating point comparisons.  Note that the output
5230        predicates are the complement of the desired mode.  The
5231        conditions for GT, GE, LT, LE and LTGT cause an invalid
5232        operation exception if the result is unordered and this
5233        exception is enabled in the floating-point status register.  */
5234     case 'Y':
5235       switch (GET_CODE (x))
5236 	{
5237 	case EQ:
5238 	  fputs ("!=", file);  break;
5239 	case NE:
5240 	  fputs ("=", file);  break;
5241 	case GT:
5242 	  fputs ("!>", file);  break;
5243 	case GE:
5244 	  fputs ("!>=", file);  break;
5245 	case LT:
5246 	  fputs ("!<", file);  break;
5247 	case LE:
5248 	  fputs ("!<=", file);  break;
5249 	case LTGT:
5250 	  fputs ("!<>", file);  break;
5251 	case UNLE:
5252 	  fputs ("!?<=", file);  break;
5253 	case UNLT:
5254 	  fputs ("!?<", file);  break;
5255 	case UNGE:
5256 	  fputs ("!?>=", file);  break;
5257 	case UNGT:
5258 	  fputs ("!?>", file);  break;
5259 	case UNEQ:
5260 	  fputs ("!?=", file);  break;
5261 	case UNORDERED:
5262 	  fputs ("!?", file);  break;
5263 	case ORDERED:
5264 	  fputs ("?", file);  break;
5265 	default:
5266 	  gcc_unreachable ();
5267 	}
5268       return;
5269     case 'S':			/* Condition, operands are (S)wapped.  */
5270       switch (GET_CODE (x))
5271 	{
5272 	case EQ:
5273 	  fputs ("=", file);  break;
5274 	case NE:
5275 	  fputs ("<>", file);  break;
5276 	case GT:
5277 	  fputs ("<", file);  break;
5278 	case GE:
5279 	  fputs ("<=", file);  break;
5280 	case GEU:
5281 	  fputs ("<<=", file);  break;
5282 	case GTU:
5283 	  fputs ("<<", file);  break;
5284 	case LT:
5285 	  fputs (">", file);  break;
5286 	case LE:
5287 	  fputs (">=", file);  break;
5288 	case LEU:
5289 	  fputs (">>=", file);  break;
5290 	case LTU:
5291 	  fputs (">>", file);  break;
5292 	default:
5293 	  gcc_unreachable ();
5294 	}
5295       return;
5296     case 'B':			/* Condition, (B)oth swapped and negate.  */
5297       switch (GET_CODE (x))
5298 	{
5299 	case EQ:
5300 	  fputs ("<>", file);  break;
5301 	case NE:
5302 	  fputs ("=", file);  break;
5303 	case GT:
5304 	  fputs (">=", file);  break;
5305 	case GE:
5306 	  fputs (">", file);  break;
5307 	case GEU:
5308 	  fputs (">>", file);  break;
5309 	case GTU:
5310 	  fputs (">>=", file);  break;
5311 	case LT:
5312 	  fputs ("<=", file);  break;
5313 	case LE:
5314 	  fputs ("<", file);  break;
5315 	case LEU:
5316 	  fputs ("<<", file);  break;
5317 	case LTU:
5318 	  fputs ("<<=", file);  break;
5319 	default:
5320 	  gcc_unreachable ();
5321 	}
5322       return;
5323     case 'k':
5324       gcc_assert (GET_CODE (x) == CONST_INT);
5325       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5326       return;
5327     case 'Q':
5328       gcc_assert (GET_CODE (x) == CONST_INT);
5329       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5330       return;
5331     case 'L':
5332       gcc_assert (GET_CODE (x) == CONST_INT);
5333       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5334       return;
5335     case 'o':
5336       gcc_assert (GET_CODE (x) == CONST_INT
5337 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5338       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5339       return;
5340     case 'O':
5341       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5342       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5343       return;
5344     case 'p':
5345       gcc_assert (GET_CODE (x) == CONST_INT);
5346       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5347       return;
5348     case 'P':
5349       gcc_assert (GET_CODE (x) == CONST_INT);
5350       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5351       return;
5352     case 'I':
5353       if (GET_CODE (x) == CONST_INT)
5354 	fputs ("i", file);
5355       return;
5356     case 'M':
5357     case 'F':
5358       switch (GET_CODE (XEXP (x, 0)))
5359 	{
5360 	case PRE_DEC:
5361 	case PRE_INC:
5362 	  if (ASSEMBLER_DIALECT == 0)
5363 	    fputs ("s,mb", file);
5364 	  else
5365 	    fputs (",mb", file);
5366 	  break;
5367 	case POST_DEC:
5368 	case POST_INC:
5369 	  if (ASSEMBLER_DIALECT == 0)
5370 	    fputs ("s,ma", file);
5371 	  else
5372 	    fputs (",ma", file);
5373 	  break;
5374 	case PLUS:
5375 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5376 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5377 	    {
5378 	      if (ASSEMBLER_DIALECT == 0)
5379 		fputs ("x", file);
5380 	    }
5381 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5382 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5383 	    {
5384 	      if (ASSEMBLER_DIALECT == 0)
5385 		fputs ("x,s", file);
5386 	      else
5387 		fputs (",s", file);
5388 	    }
5389 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5390 	    fputs ("s", file);
5391 	  break;
5392 	default:
5393 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5394 	    fputs ("s", file);
5395 	  break;
5396 	}
5397       return;
5398     case 'G':
5399       pa_output_global_address (file, x, 0);
5400       return;
5401     case 'H':
5402       pa_output_global_address (file, x, 1);
5403       return;
5404     case 0:			/* Don't do anything special */
5405       break;
5406     case 'Z':
5407       {
5408 	unsigned op[3];
5409 	compute_zdepwi_operands (INTVAL (x), op);
5410 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5411 	return;
5412       }
5413     case 'z':
5414       {
5415 	unsigned op[3];
5416 	compute_zdepdi_operands (INTVAL (x), op);
5417 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5418 	return;
5419       }
5420     case 'c':
5421       /* We can get here from a .vtable_inherit due to our
5422 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5423 	 addresses.  */
5424       break;
5425     default:
5426       gcc_unreachable ();
5427     }
5428   if (GET_CODE (x) == REG)
5429     {
5430       fputs (reg_names [REGNO (x)], file);
5431       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5432 	{
5433 	  fputs ("R", file);
5434 	  return;
5435 	}
5436       if (FP_REG_P (x)
5437 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5438 	  && (REGNO (x) & 1) == 0)
5439 	fputs ("L", file);
5440     }
5441   else if (GET_CODE (x) == MEM)
5442     {
5443       int size = GET_MODE_SIZE (GET_MODE (x));
5444       rtx base = NULL_RTX;
5445       switch (GET_CODE (XEXP (x, 0)))
5446 	{
5447 	case PRE_DEC:
5448 	case POST_DEC:
5449           base = XEXP (XEXP (x, 0), 0);
5450 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5451 	  break;
5452 	case PRE_INC:
5453 	case POST_INC:
5454           base = XEXP (XEXP (x, 0), 0);
5455 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5456 	  break;
5457 	case PLUS:
5458 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5459 	    fprintf (file, "%s(%s)",
5460 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5461 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5462 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5463 	    fprintf (file, "%s(%s)",
5464 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5465 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5466 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5467 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5468 	    {
5469 	      /* Because the REG_POINTER flag can get lost during reload,
5470 		 pa_legitimate_address_p canonicalizes the order of the
5471 		 index and base registers in the combined move patterns.  */
5472 	      rtx base = XEXP (XEXP (x, 0), 1);
5473 	      rtx index = XEXP (XEXP (x, 0), 0);
5474 
5475 	      fprintf (file, "%s(%s)",
5476 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5477 	    }
5478 	  else
5479 	    output_address (GET_MODE (x), XEXP (x, 0));
5480 	  break;
5481 	default:
5482 	  output_address (GET_MODE (x), XEXP (x, 0));
5483 	  break;
5484 	}
5485     }
5486   else
5487     output_addr_const (file, x);
5488 }
5489 
5490 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5491 
5492 void
5493 pa_output_global_address (FILE *file, rtx x, int round_constant)
5494 {
5495 
5496   /* Imagine  (high (const (plus ...))).  */
5497   if (GET_CODE (x) == HIGH)
5498     x = XEXP (x, 0);
5499 
5500   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5501     output_addr_const (file, x);
5502   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5503     {
5504       output_addr_const (file, x);
5505       fputs ("-$global$", file);
5506     }
5507   else if (GET_CODE (x) == CONST)
5508     {
5509       const char *sep = "";
5510       int offset = 0;		/* assembler wants -$global$ at end */
5511       rtx base = NULL_RTX;
5512 
5513       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5514 	{
5515 	case LABEL_REF:
5516 	case SYMBOL_REF:
5517 	  base = XEXP (XEXP (x, 0), 0);
5518 	  output_addr_const (file, base);
5519 	  break;
5520 	case CONST_INT:
5521 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5522 	  break;
5523 	default:
5524 	  gcc_unreachable ();
5525 	}
5526 
5527       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5528 	{
5529 	case LABEL_REF:
5530 	case SYMBOL_REF:
5531 	  base = XEXP (XEXP (x, 0), 1);
5532 	  output_addr_const (file, base);
5533 	  break;
5534 	case CONST_INT:
5535 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5536 	  break;
5537 	default:
5538 	  gcc_unreachable ();
5539 	}
5540 
5541       /* How bogus.  The compiler is apparently responsible for
5542 	 rounding the constant if it uses an LR field selector.
5543 
5544 	 The linker and/or assembler seem a better place since
5545 	 they have to do this kind of thing already.
5546 
5547 	 If we fail to do this, HP's optimizing linker may eliminate
5548 	 an addil, but not update the ldw/stw/ldo instruction that
5549 	 uses the result of the addil.  */
5550       if (round_constant)
5551 	offset = ((offset + 0x1000) & ~0x1fff);
5552 
5553       switch (GET_CODE (XEXP (x, 0)))
5554 	{
5555 	case PLUS:
5556 	  if (offset < 0)
5557 	    {
5558 	      offset = -offset;
5559 	      sep = "-";
5560 	    }
5561 	  else
5562 	    sep = "+";
5563 	  break;
5564 
5565 	case MINUS:
5566 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5567 	  sep = "-";
5568 	  break;
5569 
5570 	default:
5571 	  gcc_unreachable ();
5572 	}
5573 
5574       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5575 	fputs ("-$global$", file);
5576       if (offset)
5577 	fprintf (file, "%s%d", sep, offset);
5578     }
5579   else
5580     output_addr_const (file, x);
5581 }
5582 
5583 /* Output boilerplate text to appear at the beginning of the file.
5584    There are several possible versions.  */
5585 #define aputs(x) fputs(x, asm_out_file)
5586 static inline void
5587 pa_file_start_level (void)
5588 {
5589   if (TARGET_64BIT)
5590     aputs ("\t.LEVEL 2.0w\n");
5591   else if (TARGET_PA_20)
5592     aputs ("\t.LEVEL 2.0\n");
5593   else if (TARGET_PA_11)
5594     aputs ("\t.LEVEL 1.1\n");
5595   else
5596     aputs ("\t.LEVEL 1.0\n");
5597 }
5598 
5599 static inline void
5600 pa_file_start_space (int sortspace)
5601 {
5602   aputs ("\t.SPACE $PRIVATE$");
5603   if (sortspace)
5604     aputs (",SORT=16");
5605   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5606   if (flag_tm)
5607     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5608   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5609 	 "\n\t.SPACE $TEXT$");
5610   if (sortspace)
5611     aputs (",SORT=8");
5612   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5613 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5614 }
5615 
5616 static inline void
5617 pa_file_start_file (int want_version)
5618 {
5619   if (write_symbols != NO_DEBUG)
5620     {
5621       output_file_directive (asm_out_file, main_input_filename);
5622       if (want_version)
5623 	aputs ("\t.version\t\"01.01\"\n");
5624     }
5625 }
5626 
5627 static inline void
5628 pa_file_start_mcount (const char *aswhat)
5629 {
5630   if (profile_flag)
5631     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5632 }
5633 
5634 static void
5635 pa_elf_file_start (void)
5636 {
5637   pa_file_start_level ();
5638   pa_file_start_mcount ("ENTRY");
5639   pa_file_start_file (0);
5640 }
5641 
5642 static void
5643 pa_som_file_start (void)
5644 {
5645   pa_file_start_level ();
5646   pa_file_start_space (0);
5647   aputs ("\t.IMPORT $global$,DATA\n"
5648          "\t.IMPORT $$dyncall,MILLICODE\n");
5649   pa_file_start_mcount ("CODE");
5650   pa_file_start_file (0);
5651 }
5652 
5653 static void
5654 pa_linux_file_start (void)
5655 {
5656   pa_file_start_file (0);
5657   pa_file_start_level ();
5658   pa_file_start_mcount ("CODE");
5659 }
5660 
5661 static void
5662 pa_hpux64_gas_file_start (void)
5663 {
5664   pa_file_start_level ();
5665 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5666   if (profile_flag)
5667     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5668 #endif
5669   pa_file_start_file (1);
5670 }
5671 
5672 static void
5673 pa_hpux64_hpas_file_start (void)
5674 {
5675   pa_file_start_level ();
5676   pa_file_start_space (1);
5677   pa_file_start_mcount ("CODE");
5678   pa_file_start_file (0);
5679 }
5680 #undef aputs
5681 
5682 /* Search the deferred plabel list for SYMBOL and return its internal
5683    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5684 
5685 rtx
5686 pa_get_deferred_plabel (rtx symbol)
5687 {
5688   const char *fname = XSTR (symbol, 0);
5689   size_t i;
5690 
5691   /* See if we have already put this function on the list of deferred
5692      plabels.  This list is generally small, so a liner search is not
5693      too ugly.  If it proves too slow replace it with something faster.  */
5694   for (i = 0; i < n_deferred_plabels; i++)
5695     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5696       break;
5697 
5698   /* If the deferred plabel list is empty, or this entry was not found
5699      on the list, create a new entry on the list.  */
5700   if (deferred_plabels == NULL || i == n_deferred_plabels)
5701     {
5702       tree id;
5703 
5704       if (deferred_plabels == 0)
5705 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5706       else
5707         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5708                                           deferred_plabels,
5709                                           n_deferred_plabels + 1);
5710 
5711       i = n_deferred_plabels++;
5712       deferred_plabels[i].internal_label = gen_label_rtx ();
5713       deferred_plabels[i].symbol = symbol;
5714 
5715       /* Gross.  We have just implicitly taken the address of this
5716 	 function.  Mark it in the same manner as assemble_name.  */
5717       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5718       if (id)
5719 	mark_referenced (id);
5720     }
5721 
5722   return deferred_plabels[i].internal_label;
5723 }
5724 
5725 static void
5726 output_deferred_plabels (void)
5727 {
5728   size_t i;
5729 
5730   /* If we have some deferred plabels, then we need to switch into the
5731      data or readonly data section, and align it to a 4 byte boundary
5732      before outputting the deferred plabels.  */
5733   if (n_deferred_plabels)
5734     {
5735       switch_to_section (flag_pic ? data_section : readonly_data_section);
5736       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5737     }
5738 
5739   /* Now output the deferred plabels.  */
5740   for (i = 0; i < n_deferred_plabels; i++)
5741     {
5742       targetm.asm_out.internal_label (asm_out_file, "L",
5743 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5744       assemble_integer (deferred_plabels[i].symbol,
5745 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5746     }
5747 }
5748 
5749 /* Initialize optabs to point to emulation routines.  */
5750 
5751 static void
5752 pa_init_libfuncs (void)
5753 {
5754   if (HPUX_LONG_DOUBLE_LIBRARY)
5755     {
5756       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5757       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5758       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5759       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5760       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5761       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5762       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5763       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5764       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5765 
5766       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5767       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5768       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5769       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5770       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5771       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5772       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5773 
5774       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5775       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5776       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5777       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5778 
5779       set_conv_libfunc (sfix_optab, SImode, TFmode,
5780 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5781 				     : "_U_Qfcnvfxt_quad_to_sgl");
5782       set_conv_libfunc (sfix_optab, DImode, TFmode,
5783 			"_U_Qfcnvfxt_quad_to_dbl");
5784       set_conv_libfunc (ufix_optab, SImode, TFmode,
5785 			"_U_Qfcnvfxt_quad_to_usgl");
5786       set_conv_libfunc (ufix_optab, DImode, TFmode,
5787 			"_U_Qfcnvfxt_quad_to_udbl");
5788 
5789       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5790 			"_U_Qfcnvxf_sgl_to_quad");
5791       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5792 			"_U_Qfcnvxf_dbl_to_quad");
5793       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5794 			"_U_Qfcnvxf_usgl_to_quad");
5795       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5796 			"_U_Qfcnvxf_udbl_to_quad");
5797     }
5798 
5799   if (TARGET_SYNC_LIBCALL)
5800     init_sync_libfuncs (8);
5801 }
5802 
5803 /* HP's millicode routines mean something special to the assembler.
5804    Keep track of which ones we have used.  */
5805 
5806 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5807 static void import_milli (enum millicodes);
5808 static char imported[(int) end1000];
5809 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5810 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5811 #define MILLI_START 10
5812 
5813 static void
5814 import_milli (enum millicodes code)
5815 {
5816   char str[sizeof (import_string)];
5817 
5818   if (!imported[(int) code])
5819     {
5820       imported[(int) code] = 1;
5821       strcpy (str, import_string);
5822       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5823       output_asm_insn (str, 0);
5824     }
5825 }
5826 
5827 /* The register constraints have put the operands and return value in
5828    the proper registers.  */
5829 
5830 const char *
5831 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5832 {
5833   import_milli (mulI);
5834   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5835 }
5836 
5837 /* Emit the rtl for doing a division by a constant.  */
5838 
5839 /* Do magic division millicodes exist for this value? */
5840 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5841 
5842 /* We'll use an array to keep track of the magic millicodes and
5843    whether or not we've used them already. [n][0] is signed, [n][1] is
5844    unsigned.  */
5845 
5846 static int div_milli[16][2];
5847 
5848 int
5849 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5850 {
5851   if (GET_CODE (operands[2]) == CONST_INT
5852       && INTVAL (operands[2]) > 0
5853       && INTVAL (operands[2]) < 16
5854       && pa_magic_milli[INTVAL (operands[2])])
5855     {
5856       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5857 
5858       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5859       emit
5860 	(gen_rtx_PARALLEL
5861 	 (VOIDmode,
5862 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5863 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5864 						     SImode,
5865 						     gen_rtx_REG (SImode, 26),
5866 						     operands[2])),
5867 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5868 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5869 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5870 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5871 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5872       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5873       return 1;
5874     }
5875   return 0;
5876 }
5877 
5878 const char *
5879 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5880 {
5881   HOST_WIDE_INT divisor;
5882 
5883   /* If the divisor is a constant, try to use one of the special
5884      opcodes .*/
5885   if (GET_CODE (operands[0]) == CONST_INT)
5886     {
5887       static char buf[100];
5888       divisor = INTVAL (operands[0]);
5889       if (!div_milli[divisor][unsignedp])
5890 	{
5891 	  div_milli[divisor][unsignedp] = 1;
5892 	  if (unsignedp)
5893 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5894 	  else
5895 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5896 	}
5897       if (unsignedp)
5898 	{
5899 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5900 		   INTVAL (operands[0]));
5901 	  return pa_output_millicode_call (insn,
5902 					   gen_rtx_SYMBOL_REF (SImode, buf));
5903 	}
5904       else
5905 	{
5906 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5907 		   INTVAL (operands[0]));
5908 	  return pa_output_millicode_call (insn,
5909 					   gen_rtx_SYMBOL_REF (SImode, buf));
5910 	}
5911     }
5912   /* Divisor isn't a special constant.  */
5913   else
5914     {
5915       if (unsignedp)
5916 	{
5917 	  import_milli (divU);
5918 	  return pa_output_millicode_call (insn,
5919 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5920 	}
5921       else
5922 	{
5923 	  import_milli (divI);
5924 	  return pa_output_millicode_call (insn,
5925 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5926 	}
5927     }
5928 }
5929 
5930 /* Output a $$rem millicode to do mod.  */
5931 
5932 const char *
5933 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5934 {
5935   if (unsignedp)
5936     {
5937       import_milli (remU);
5938       return pa_output_millicode_call (insn,
5939 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5940     }
5941   else
5942     {
5943       import_milli (remI);
5944       return pa_output_millicode_call (insn,
5945 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5946     }
5947 }
5948 
5949 void
5950 pa_output_arg_descriptor (rtx_insn *call_insn)
5951 {
5952   const char *arg_regs[4];
5953   machine_mode arg_mode;
5954   rtx link;
5955   int i, output_flag = 0;
5956   int regno;
5957 
5958   /* We neither need nor want argument location descriptors for the
5959      64bit runtime environment or the ELF32 environment.  */
5960   if (TARGET_64BIT || TARGET_ELF32)
5961     return;
5962 
5963   for (i = 0; i < 4; i++)
5964     arg_regs[i] = 0;
5965 
5966   /* Specify explicitly that no argument relocations should take place
5967      if using the portable runtime calling conventions.  */
5968   if (TARGET_PORTABLE_RUNTIME)
5969     {
5970       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5971 	     asm_out_file);
5972       return;
5973     }
5974 
5975   gcc_assert (CALL_P (call_insn));
5976   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5977        link; link = XEXP (link, 1))
5978     {
5979       rtx use = XEXP (link, 0);
5980 
5981       if (! (GET_CODE (use) == USE
5982 	     && GET_CODE (XEXP (use, 0)) == REG
5983 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5984 	continue;
5985 
5986       arg_mode = GET_MODE (XEXP (use, 0));
5987       regno = REGNO (XEXP (use, 0));
5988       if (regno >= 23 && regno <= 26)
5989 	{
5990 	  arg_regs[26 - regno] = "GR";
5991 	  if (arg_mode == DImode)
5992 	    arg_regs[25 - regno] = "GR";
5993 	}
5994       else if (regno >= 32 && regno <= 39)
5995 	{
5996 	  if (arg_mode == SFmode)
5997 	    arg_regs[(regno - 32) / 2] = "FR";
5998 	  else
5999 	    {
6000 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6001 	      arg_regs[(regno - 34) / 2] = "FR";
6002 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
6003 #else
6004 	      arg_regs[(regno - 34) / 2] = "FU";
6005 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6006 #endif
6007 	    }
6008 	}
6009     }
6010   fputs ("\t.CALL ", asm_out_file);
6011   for (i = 0; i < 4; i++)
6012     {
6013       if (arg_regs[i])
6014 	{
6015 	  if (output_flag++)
6016 	    fputc (',', asm_out_file);
6017 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6018 	}
6019     }
6020   fputc ('\n', asm_out_file);
6021 }
6022 
6023 /* Inform reload about cases where moving X with a mode MODE to or from
6024    a register in RCLASS requires an extra scratch or immediate register.
6025    Return the class needed for the immediate register.  */
6026 
6027 static reg_class_t
6028 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6029 		     machine_mode mode, secondary_reload_info *sri)
6030 {
6031   int regno;
6032   enum reg_class rclass = (enum reg_class) rclass_i;
6033 
6034   /* Handle the easy stuff first.  */
6035   if (rclass == R1_REGS)
6036     return NO_REGS;
6037 
6038   if (REG_P (x))
6039     {
6040       regno = REGNO (x);
6041       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6042 	return NO_REGS;
6043     }
6044   else
6045     regno = -1;
6046 
6047   /* If we have something like (mem (mem (...)), we can safely assume the
6048      inner MEM will end up in a general register after reloading, so there's
6049      no need for a secondary reload.  */
6050   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6051     return NO_REGS;
6052 
6053   /* Trying to load a constant into a FP register during PIC code
6054      generation requires %r1 as a scratch register.  For float modes,
6055      the only legitimate constant is CONST0_RTX.  However, there are
6056      a few patterns that accept constant double operands.  */
6057   if (flag_pic
6058       && FP_REG_CLASS_P (rclass)
6059       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6060     {
6061       switch (mode)
6062 	{
6063 	case E_SImode:
6064 	  sri->icode = CODE_FOR_reload_insi_r1;
6065 	  break;
6066 
6067 	case E_DImode:
6068 	  sri->icode = CODE_FOR_reload_indi_r1;
6069 	  break;
6070 
6071 	case E_SFmode:
6072 	  sri->icode = CODE_FOR_reload_insf_r1;
6073 	  break;
6074 
6075 	case E_DFmode:
6076 	  sri->icode = CODE_FOR_reload_indf_r1;
6077 	  break;
6078 
6079 	default:
6080 	  gcc_unreachable ();
6081 	}
6082       return NO_REGS;
6083     }
6084 
6085   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6086      register when we're generating PIC code or when the operand isn't
6087      readonly.  */
6088   if (pa_symbolic_expression_p (x))
6089     {
6090       if (GET_CODE (x) == HIGH)
6091 	x = XEXP (x, 0);
6092 
6093       if (flag_pic || !read_only_operand (x, VOIDmode))
6094 	{
6095 	  switch (mode)
6096 	    {
6097 	    case E_SImode:
6098 	      sri->icode = CODE_FOR_reload_insi_r1;
6099 	      break;
6100 
6101 	    case E_DImode:
6102 	      sri->icode = CODE_FOR_reload_indi_r1;
6103 	      break;
6104 
6105 	    default:
6106 	      gcc_unreachable ();
6107 	    }
6108 	  return NO_REGS;
6109 	}
6110     }
6111 
6112   /* Profiling showed the PA port spends about 1.3% of its compilation
6113      time in true_regnum from calls inside pa_secondary_reload_class.  */
6114   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6115     regno = true_regnum (x);
6116 
6117   /* Handle reloads for floating point loads and stores.  */
6118   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6119       && FP_REG_CLASS_P (rclass))
6120     {
6121       if (MEM_P (x))
6122 	{
6123 	  x = XEXP (x, 0);
6124 
6125 	  /* We don't need a secondary reload for indexed memory addresses.
6126 
6127 	     When INT14_OK_STRICT is true, it might appear that we could
6128 	     directly allow register indirect memory addresses.  However,
6129 	     this doesn't work because we don't support SUBREGs in
6130 	     floating-point register copies and reload doesn't tell us
6131 	     when it's going to use a SUBREG.  */
6132 	  if (IS_INDEX_ADDR_P (x))
6133 	    return NO_REGS;
6134 	}
6135 
6136       /* Request a secondary reload with a general scratch register
6137 	 for everything else.  ??? Could symbolic operands be handled
6138 	 directly when generating non-pic PA 2.0 code?  */
6139       sri->icode = (in_p
6140 		    ? direct_optab_handler (reload_in_optab, mode)
6141 		    : direct_optab_handler (reload_out_optab, mode));
6142       return NO_REGS;
6143     }
6144 
6145   /* A SAR<->FP register copy requires an intermediate general register
6146      and secondary memory.  We need a secondary reload with a general
6147      scratch register for spills.  */
6148   if (rclass == SHIFT_REGS)
6149     {
6150       /* Handle spill.  */
6151       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6152 	{
6153 	  sri->icode = (in_p
6154 			? direct_optab_handler (reload_in_optab, mode)
6155 			: direct_optab_handler (reload_out_optab, mode));
6156 	  return NO_REGS;
6157 	}
6158 
6159       /* Handle FP copy.  */
6160       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6161 	return GENERAL_REGS;
6162     }
6163 
6164   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6165       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6166       && FP_REG_CLASS_P (rclass))
6167     return GENERAL_REGS;
6168 
6169   return NO_REGS;
6170 }
6171 
6172 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6173 
6174 static bool
6175 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6176 			    reg_class_t class1 ATTRIBUTE_UNUSED,
6177 			    reg_class_t class2 ATTRIBUTE_UNUSED)
6178 {
6179 #ifdef PA_SECONDARY_MEMORY_NEEDED
6180   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6181 #else
6182   return false;
6183 #endif
6184 }
6185 
6186 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6187    is only marked as live on entry by df-scan when it is a fixed
6188    register.  It isn't a fixed register in the 64-bit runtime,
6189    so we need to mark it here.  */
6190 
6191 static void
6192 pa_extra_live_on_entry (bitmap regs)
6193 {
6194   if (TARGET_64BIT)
6195     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6196 }
6197 
6198 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6199    to prevent it from being deleted.  */
6200 
6201 rtx
6202 pa_eh_return_handler_rtx (void)
6203 {
6204   rtx tmp;
6205 
6206   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6207 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6208   tmp = gen_rtx_MEM (word_mode, tmp);
6209   tmp->volatil = 1;
6210   return tmp;
6211 }
6212 
6213 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6214    by invisible reference.  As a GCC extension, we also pass anything
6215    with a zero or variable size by reference.
6216 
6217    The 64-bit runtime does not describe passing any types by invisible
6218    reference.  The internals of GCC can't currently handle passing
6219    empty structures, and zero or variable length arrays when they are
6220    not passed entirely on the stack or by reference.  Thus, as a GCC
6221    extension, we pass these types by reference.  The HP compiler doesn't
6222    support these types, so hopefully there shouldn't be any compatibility
6223    issues.  This may have to be revisited when HP releases a C99 compiler
6224    or updates the ABI.  */
6225 
6226 static bool
6227 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6228 		      machine_mode mode, const_tree type,
6229 		      bool named ATTRIBUTE_UNUSED)
6230 {
6231   HOST_WIDE_INT size;
6232 
6233   if (type)
6234     size = int_size_in_bytes (type);
6235   else
6236     size = GET_MODE_SIZE (mode);
6237 
6238   if (TARGET_64BIT)
6239     return size <= 0;
6240   else
6241     return size <= 0 || size > 8;
6242 }
6243 
6244 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6245 
6246 static pad_direction
6247 pa_function_arg_padding (machine_mode mode, const_tree type)
6248 {
6249   if (mode == BLKmode
6250       || (TARGET_64BIT
6251 	  && type
6252 	  && (AGGREGATE_TYPE_P (type)
6253 	      || TREE_CODE (type) == COMPLEX_TYPE
6254 	      || TREE_CODE (type) == VECTOR_TYPE)))
6255     {
6256       /* Return PAD_NONE if justification is not required.  */
6257       if (type
6258 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6259 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6260 	return PAD_NONE;
6261 
6262       /* The directions set here are ignored when a BLKmode argument larger
6263 	 than a word is placed in a register.  Different code is used for
6264 	 the stack and registers.  This makes it difficult to have a
6265 	 consistent data representation for both the stack and registers.
6266 	 For both runtimes, the justification and padding for arguments on
6267 	 the stack and in registers should be identical.  */
6268       if (TARGET_64BIT)
6269 	/* The 64-bit runtime specifies left justification for aggregates.  */
6270 	return PAD_UPWARD;
6271       else
6272 	/* The 32-bit runtime architecture specifies right justification.
6273 	   When the argument is passed on the stack, the argument is padded
6274 	   with garbage on the left.  The HP compiler pads with zeros.  */
6275 	return PAD_DOWNWARD;
6276     }
6277 
6278   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6279     return PAD_DOWNWARD;
6280   else
6281     return PAD_NONE;
6282 }
6283 
6284 
6285 /* Do what is necessary for `va_start'.  We look at the current function
6286    to determine if stdargs or varargs is used and fill in an initial
6287    va_list.  A pointer to this constructor is returned.  */
6288 
6289 static rtx
6290 hppa_builtin_saveregs (void)
6291 {
6292   rtx offset, dest;
6293   tree fntype = TREE_TYPE (current_function_decl);
6294   int argadj = ((!stdarg_p (fntype))
6295 		? UNITS_PER_WORD : 0);
6296 
6297   if (argadj)
6298     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6299   else
6300     offset = crtl->args.arg_offset_rtx;
6301 
6302   if (TARGET_64BIT)
6303     {
6304       int i, off;
6305 
6306       /* Adjust for varargs/stdarg differences.  */
6307       if (argadj)
6308 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6309       else
6310 	offset = crtl->args.arg_offset_rtx;
6311 
6312       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6313 	 from the incoming arg pointer and growing to larger addresses.  */
6314       for (i = 26, off = -64; i >= 19; i--, off += 8)
6315 	emit_move_insn (gen_rtx_MEM (word_mode,
6316 				     plus_constant (Pmode,
6317 						    arg_pointer_rtx, off)),
6318 			gen_rtx_REG (word_mode, i));
6319 
6320       /* The incoming args pointer points just beyond the flushback area;
6321 	 normally this is not a serious concern.  However, when we are doing
6322 	 varargs/stdargs we want to make the arg pointer point to the start
6323 	 of the incoming argument area.  */
6324       emit_move_insn (virtual_incoming_args_rtx,
6325 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6326 
6327       /* Now return a pointer to the first anonymous argument.  */
6328       return copy_to_reg (expand_binop (Pmode, add_optab,
6329 					virtual_incoming_args_rtx,
6330 					offset, 0, 0, OPTAB_LIB_WIDEN));
6331     }
6332 
6333   /* Store general registers on the stack.  */
6334   dest = gen_rtx_MEM (BLKmode,
6335 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6336 				     -16));
6337   set_mem_alias_set (dest, get_varargs_alias_set ());
6338   set_mem_align (dest, BITS_PER_WORD);
6339   move_block_from_reg (23, dest, 4);
6340 
6341   /* move_block_from_reg will emit code to store the argument registers
6342      individually as scalar stores.
6343 
6344      However, other insns may later load from the same addresses for
6345      a structure load (passing a struct to a varargs routine).
6346 
6347      The alias code assumes that such aliasing can never happen, so we
6348      have to keep memory referencing insns from moving up beyond the
6349      last argument register store.  So we emit a blockage insn here.  */
6350   emit_insn (gen_blockage ());
6351 
6352   return copy_to_reg (expand_binop (Pmode, add_optab,
6353 				    crtl->args.internal_arg_pointer,
6354 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6355 }
6356 
6357 static void
6358 hppa_va_start (tree valist, rtx nextarg)
6359 {
6360   nextarg = expand_builtin_saveregs ();
6361   std_expand_builtin_va_start (valist, nextarg);
6362 }
6363 
6364 static tree
6365 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6366 			   gimple_seq *post_p)
6367 {
6368   if (TARGET_64BIT)
6369     {
6370       /* Args grow upward.  We can use the generic routines.  */
6371       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6372     }
6373   else /* !TARGET_64BIT */
6374     {
6375       tree ptr = build_pointer_type (type);
6376       tree valist_type;
6377       tree t, u;
6378       unsigned int size, ofs;
6379       bool indirect;
6380 
6381       indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6382       if (indirect)
6383 	{
6384 	  type = ptr;
6385 	  ptr = build_pointer_type (type);
6386 	}
6387       size = int_size_in_bytes (type);
6388       valist_type = TREE_TYPE (valist);
6389 
6390       /* Args grow down.  Not handled by generic routines.  */
6391 
6392       u = fold_convert (sizetype, size_in_bytes (type));
6393       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6394       t = fold_build_pointer_plus (valist, u);
6395 
6396       /* Align to 4 or 8 byte boundary depending on argument size.  */
6397 
6398       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6399       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6400       t = fold_convert (valist_type, t);
6401 
6402       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6403 
6404       ofs = (8 - size) % 4;
6405       if (ofs != 0)
6406 	t = fold_build_pointer_plus_hwi (t, ofs);
6407 
6408       t = fold_convert (ptr, t);
6409       t = build_va_arg_indirect_ref (t);
6410 
6411       if (indirect)
6412 	t = build_va_arg_indirect_ref (t);
6413 
6414       return t;
6415     }
6416 }
6417 
6418 /* True if MODE is valid for the target.  By "valid", we mean able to
6419    be manipulated in non-trivial ways.  In particular, this means all
6420    the arithmetic is supported.
6421 
6422    Currently, TImode is not valid as the HP 64-bit runtime documentation
6423    doesn't document the alignment and calling conventions for this type.
6424    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6425    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6426 
6427 static bool
6428 pa_scalar_mode_supported_p (scalar_mode mode)
6429 {
6430   int precision = GET_MODE_PRECISION (mode);
6431 
6432   switch (GET_MODE_CLASS (mode))
6433     {
6434     case MODE_PARTIAL_INT:
6435     case MODE_INT:
6436       if (precision == CHAR_TYPE_SIZE)
6437 	return true;
6438       if (precision == SHORT_TYPE_SIZE)
6439 	return true;
6440       if (precision == INT_TYPE_SIZE)
6441 	return true;
6442       if (precision == LONG_TYPE_SIZE)
6443 	return true;
6444       if (precision == LONG_LONG_TYPE_SIZE)
6445 	return true;
6446       return false;
6447 
6448     case MODE_FLOAT:
6449       if (precision == FLOAT_TYPE_SIZE)
6450 	return true;
6451       if (precision == DOUBLE_TYPE_SIZE)
6452 	return true;
6453       if (precision == LONG_DOUBLE_TYPE_SIZE)
6454 	return true;
6455       return false;
6456 
6457     case MODE_DECIMAL_FLOAT:
6458       return false;
6459 
6460     default:
6461       gcc_unreachable ();
6462     }
6463 }
6464 
6465 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6466    it branches into the delay slot.  Otherwise, return FALSE.  */
6467 
6468 static bool
6469 branch_to_delay_slot_p (rtx_insn *insn)
6470 {
6471   rtx_insn *jump_insn;
6472 
6473   if (dbr_sequence_length ())
6474     return FALSE;
6475 
6476   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6477   while (insn)
6478     {
6479       insn = next_active_insn (insn);
6480       if (jump_insn == insn)
6481 	return TRUE;
6482 
6483       /* We can't rely on the length of asms.  So, we return FALSE when
6484 	 the branch is followed by an asm.  */
6485       if (!insn
6486 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6487 	  || asm_noperands (PATTERN (insn)) >= 0
6488 	  || get_attr_length (insn) > 0)
6489 	break;
6490     }
6491 
6492   return FALSE;
6493 }
6494 
6495 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6496 
6497    This occurs when INSN has an unfilled delay slot and is followed
6498    by an asm.  Disaster can occur if the asm is empty and the jump
6499    branches into the delay slot.  So, we add a nop in the delay slot
6500    when this occurs.  */
6501 
6502 static bool
6503 branch_needs_nop_p (rtx_insn *insn)
6504 {
6505   rtx_insn *jump_insn;
6506 
6507   if (dbr_sequence_length ())
6508     return FALSE;
6509 
6510   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6511   while (insn)
6512     {
6513       insn = next_active_insn (insn);
6514       if (!insn || jump_insn == insn)
6515 	return TRUE;
6516 
6517       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6518 	   || asm_noperands (PATTERN (insn)) >= 0)
6519 	  && get_attr_length (insn) > 0)
6520 	break;
6521     }
6522 
6523   return FALSE;
6524 }
6525 
6526 /* Return TRUE if INSN, a forward jump insn, can use nullification
6527    to skip the following instruction.  This avoids an extra cycle due
6528    to a mis-predicted branch when we fall through.  */
6529 
6530 static bool
6531 use_skip_p (rtx_insn *insn)
6532 {
6533   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6534 
6535   while (insn)
6536     {
6537       insn = next_active_insn (insn);
6538 
6539       /* We can't rely on the length of asms, so we can't skip asms.  */
6540       if (!insn
6541 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6542 	  || asm_noperands (PATTERN (insn)) >= 0)
6543 	break;
6544       if (get_attr_length (insn) == 4
6545 	  && jump_insn == next_active_insn (insn))
6546 	return TRUE;
6547       if (get_attr_length (insn) > 0)
6548 	break;
6549     }
6550 
6551   return FALSE;
6552 }
6553 
6554 /* This routine handles all the normal conditional branch sequences we
6555    might need to generate.  It handles compare immediate vs compare
6556    register, nullification of delay slots, varying length branches,
6557    negated branches, and all combinations of the above.  It returns the
6558    output appropriate to emit the branch corresponding to all given
6559    parameters.  */
6560 
6561 const char *
6562 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6563 {
6564   static char buf[100];
6565   bool useskip;
6566   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6567   int length = get_attr_length (insn);
6568   int xdelay;
6569 
6570   /* A conditional branch to the following instruction (e.g. the delay slot)
6571      is asking for a disaster.  This can happen when not optimizing and
6572      when jump optimization fails.
6573 
6574      While it is usually safe to emit nothing, this can fail if the
6575      preceding instruction is a nullified branch with an empty delay
6576      slot and the same branch target as this branch.  We could check
6577      for this but jump optimization should eliminate nop jumps.  It
6578      is always safe to emit a nop.  */
6579   if (branch_to_delay_slot_p (insn))
6580     return "nop";
6581 
6582   /* The doubleword form of the cmpib instruction doesn't have the LEU
6583      and GTU conditions while the cmpb instruction does.  Since we accept
6584      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6585   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6586     operands[2] = gen_rtx_REG (DImode, 0);
6587   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6588     operands[1] = gen_rtx_REG (DImode, 0);
6589 
6590   /* If this is a long branch with its delay slot unfilled, set `nullify'
6591      as it can nullify the delay slot and save a nop.  */
6592   if (length == 8 && dbr_sequence_length () == 0)
6593     nullify = 1;
6594 
6595   /* If this is a short forward conditional branch which did not get
6596      its delay slot filled, the delay slot can still be nullified.  */
6597   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6598     nullify = forward_branch_p (insn);
6599 
6600   /* A forward branch over a single nullified insn can be done with a
6601      comclr instruction.  This avoids a single cycle penalty due to
6602      mis-predicted branch if we fall through (branch not taken).  */
6603   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6604 
6605   switch (length)
6606     {
6607       /* All short conditional branches except backwards with an unfilled
6608 	 delay slot.  */
6609       case 4:
6610 	if (useskip)
6611 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6612 	else
6613 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6614 	if (GET_MODE (operands[1]) == DImode)
6615 	  strcat (buf, "*");
6616 	if (negated)
6617 	  strcat (buf, "%B3");
6618 	else
6619 	  strcat (buf, "%S3");
6620 	if (useskip)
6621 	  strcat (buf, " %2,%r1,%%r0");
6622 	else if (nullify)
6623 	  {
6624 	    if (branch_needs_nop_p (insn))
6625 	      strcat (buf, ",n %2,%r1,%0%#");
6626 	    else
6627 	      strcat (buf, ",n %2,%r1,%0");
6628 	  }
6629 	else
6630 	  strcat (buf, " %2,%r1,%0");
6631 	break;
6632 
6633      /* All long conditionals.  Note a short backward branch with an
6634 	unfilled delay slot is treated just like a long backward branch
6635 	with an unfilled delay slot.  */
6636       case 8:
6637 	/* Handle weird backwards branch with a filled delay slot
6638 	   which is nullified.  */
6639 	if (dbr_sequence_length () != 0
6640 	    && ! forward_branch_p (insn)
6641 	    && nullify)
6642 	  {
6643 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6644 	    if (GET_MODE (operands[1]) == DImode)
6645 	      strcat (buf, "*");
6646 	    if (negated)
6647 	      strcat (buf, "%S3");
6648 	    else
6649 	      strcat (buf, "%B3");
6650 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6651 	  }
6652 	/* Handle short backwards branch with an unfilled delay slot.
6653 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6654 	   taken and untaken branches.  */
6655 	else if (dbr_sequence_length () == 0
6656 		 && ! forward_branch_p (insn)
6657 		 && INSN_ADDRESSES_SET_P ()
6658 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6659 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6660 	  {
6661 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6662 	    if (GET_MODE (operands[1]) == DImode)
6663 	      strcat (buf, "*");
6664 	    if (negated)
6665 	      strcat (buf, "%B3 %2,%r1,%0%#");
6666 	    else
6667 	      strcat (buf, "%S3 %2,%r1,%0%#");
6668 	  }
6669 	else
6670 	  {
6671 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6672 	    if (GET_MODE (operands[1]) == DImode)
6673 	      strcat (buf, "*");
6674 	    if (negated)
6675 	      strcat (buf, "%S3");
6676 	    else
6677 	      strcat (buf, "%B3");
6678 	    if (nullify)
6679 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6680 	    else
6681 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6682 	  }
6683 	break;
6684 
6685       default:
6686 	/* The reversed conditional branch must branch over one additional
6687 	   instruction if the delay slot is filled and needs to be extracted
6688 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6689 	   nullified forward branch, the instruction after the reversed
6690 	   condition branch must be nullified.  */
6691 	if (dbr_sequence_length () == 0
6692 	    || (nullify && forward_branch_p (insn)))
6693 	  {
6694 	    nullify = 1;
6695 	    xdelay = 0;
6696 	    operands[4] = GEN_INT (length);
6697 	  }
6698 	else
6699 	  {
6700 	    xdelay = 1;
6701 	    operands[4] = GEN_INT (length + 4);
6702 	  }
6703 
6704 	/* Create a reversed conditional branch which branches around
6705 	   the following insns.  */
6706 	if (GET_MODE (operands[1]) != DImode)
6707 	  {
6708 	    if (nullify)
6709 	      {
6710 		if (negated)
6711 		  strcpy (buf,
6712 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6713 		else
6714 		  strcpy (buf,
6715 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6716 	      }
6717 	    else
6718 	      {
6719 		if (negated)
6720 		  strcpy (buf,
6721 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6722 		else
6723 		  strcpy (buf,
6724 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6725 	      }
6726 	  }
6727 	else
6728 	  {
6729 	    if (nullify)
6730 	      {
6731 		if (negated)
6732 		  strcpy (buf,
6733 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6734 		else
6735 		  strcpy (buf,
6736 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6737 	      }
6738 	    else
6739 	      {
6740 		if (negated)
6741 		  strcpy (buf,
6742 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6743 		else
6744 		  strcpy (buf,
6745 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6746 	      }
6747 	  }
6748 
6749 	output_asm_insn (buf, operands);
6750 	return pa_output_lbranch (operands[0], insn, xdelay);
6751     }
6752   return buf;
6753 }
6754 
6755 /* Output a PIC pc-relative instruction sequence to load the address of
6756    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6757    or a code label.  OPERANDS[1] specifies the register to use to load
6758    the program counter.  OPERANDS[3] may be used for label generation
6759    The sequence is always three instructions in length.  The program
6760    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6761    Register %r1 is clobbered.  */
6762 
6763 static void
6764 pa_output_pic_pcrel_sequence (rtx *operands)
6765 {
6766   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6767   if (TARGET_PA_20)
6768     {
6769       /* We can use mfia to determine the current program counter.  */
6770       if (TARGET_SOM || !TARGET_GAS)
6771 	{
6772 	  operands[3] = gen_label_rtx ();
6773 	  targetm.asm_out.internal_label (asm_out_file, "L",
6774 					  CODE_LABEL_NUMBER (operands[3]));
6775 	  output_asm_insn ("mfia %1", operands);
6776 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6777 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6778 	}
6779       else
6780 	{
6781 	  output_asm_insn ("mfia %1", operands);
6782 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6783 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6784 	}
6785     }
6786   else
6787     {
6788       /* We need to use a branch to determine the current program counter.  */
6789       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6790       if (TARGET_SOM || !TARGET_GAS)
6791 	{
6792 	  operands[3] = gen_label_rtx ();
6793 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6794 	  targetm.asm_out.internal_label (asm_out_file, "L",
6795 					  CODE_LABEL_NUMBER (operands[3]));
6796 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6797 	}
6798       else
6799 	{
6800 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6801 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6802 	}
6803     }
6804 }
6805 
6806 /* This routine handles output of long unconditional branches that
6807    exceed the maximum range of a simple branch instruction.  Since
6808    we don't have a register available for the branch, we save register
6809    %r1 in the frame marker, load the branch destination DEST into %r1,
6810    execute the branch, and restore %r1 in the delay slot of the branch.
6811 
6812    Since long branches may have an insn in the delay slot and the
6813    delay slot is used to restore %r1, we in general need to extract
6814    this insn and execute it before the branch.  However, to facilitate
6815    use of this function by conditional branches, we also provide an
6816    option to not extract the delay insn so that it will be emitted
6817    after the long branch.  So, if there is an insn in the delay slot,
6818    it is extracted if XDELAY is nonzero.
6819 
6820    The lengths of the various long-branch sequences are 20, 16 and 24
6821    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6822 
6823 const char *
6824 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6825 {
6826   rtx xoperands[4];
6827 
6828   xoperands[0] = dest;
6829 
6830   /* First, free up the delay slot.  */
6831   if (xdelay && dbr_sequence_length () != 0)
6832     {
6833       /* We can't handle a jump in the delay slot.  */
6834       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6835 
6836       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6837 		       optimize, 0, NULL);
6838 
6839       /* Now delete the delay insn.  */
6840       SET_INSN_DELETED (NEXT_INSN (insn));
6841     }
6842 
6843   /* Output an insn to save %r1.  The runtime documentation doesn't
6844      specify whether the "Clean Up" slot in the callers frame can
6845      be clobbered by the callee.  It isn't copied by HP's builtin
6846      alloca, so this suggests that it can be clobbered if necessary.
6847      The "Static Link" location is copied by HP builtin alloca, so
6848      we avoid using it.  Using the cleanup slot might be a problem
6849      if we have to interoperate with languages that pass cleanup
6850      information.  However, it should be possible to handle these
6851      situations with GCC's asm feature.
6852 
6853      The "Current RP" slot is reserved for the called procedure, so
6854      we try to use it when we don't have a frame of our own.  It's
6855      rather unlikely that we won't have a frame when we need to emit
6856      a very long branch.
6857 
6858      Really the way to go long term is a register scavenger; goto
6859      the target of the jump and find a register which we can use
6860      as a scratch to hold the value in %r1.  Then, we wouldn't have
6861      to free up the delay slot or clobber a slot that may be needed
6862      for other purposes.  */
6863   if (TARGET_64BIT)
6864     {
6865       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6866 	/* Use the return pointer slot in the frame marker.  */
6867 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6868       else
6869 	/* Use the slot at -40 in the frame marker since HP builtin
6870 	   alloca doesn't copy it.  */
6871 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6872     }
6873   else
6874     {
6875       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6876 	/* Use the return pointer slot in the frame marker.  */
6877 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6878       else
6879 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6880 	   the only other use of this location is for copying a
6881 	   floating point double argument from a floating-point
6882 	   register to two general registers.  The copy is done
6883 	   as an "atomic" operation when outputting a call, so it
6884 	   won't interfere with our using the location here.  */
6885 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6886     }
6887 
6888   if (TARGET_PORTABLE_RUNTIME)
6889     {
6890       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6891       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6892       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6893     }
6894   else if (flag_pic)
6895     {
6896       xoperands[1] = gen_rtx_REG (Pmode, 1);
6897       xoperands[2] = xoperands[1];
6898       pa_output_pic_pcrel_sequence (xoperands);
6899       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6900     }
6901   else
6902     /* Now output a very long branch to the original target.  */
6903     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6904 
6905   /* Now restore the value of %r1 in the delay slot.  */
6906   if (TARGET_64BIT)
6907     {
6908       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6909 	return "ldd -16(%%r30),%%r1";
6910       else
6911 	return "ldd -40(%%r30),%%r1";
6912     }
6913   else
6914     {
6915       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6916 	return "ldw -20(%%r30),%%r1";
6917       else
6918 	return "ldw -12(%%r30),%%r1";
6919     }
6920 }
6921 
6922 /* This routine handles all the branch-on-bit conditional branch sequences we
6923    might need to generate.  It handles nullification of delay slots,
6924    varying length branches, negated branches and all combinations of the
6925    above.  it returns the appropriate output template to emit the branch.  */
6926 
6927 const char *
6928 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6929 {
6930   static char buf[100];
6931   bool useskip;
6932   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6933   int length = get_attr_length (insn);
6934   int xdelay;
6935 
6936   /* A conditional branch to the following instruction (e.g. the delay slot) is
6937      asking for a disaster.  I do not think this can happen as this pattern
6938      is only used when optimizing; jump optimization should eliminate the
6939      jump.  But be prepared just in case.  */
6940 
6941   if (branch_to_delay_slot_p (insn))
6942     return "nop";
6943 
6944   /* If this is a long branch with its delay slot unfilled, set `nullify'
6945      as it can nullify the delay slot and save a nop.  */
6946   if (length == 8 && dbr_sequence_length () == 0)
6947     nullify = 1;
6948 
6949   /* If this is a short forward conditional branch which did not get
6950      its delay slot filled, the delay slot can still be nullified.  */
6951   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6952     nullify = forward_branch_p (insn);
6953 
6954   /* A forward branch over a single nullified insn can be done with a
6955      extrs instruction.  This avoids a single cycle penalty due to
6956      mis-predicted branch if we fall through (branch not taken).  */
6957   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6958 
6959   switch (length)
6960     {
6961 
6962       /* All short conditional branches except backwards with an unfilled
6963 	 delay slot.  */
6964       case 4:
6965 	if (useskip)
6966 	  strcpy (buf, "{extrs,|extrw,s,}");
6967 	else
6968 	  strcpy (buf, "bb,");
6969 	if (useskip && GET_MODE (operands[0]) == DImode)
6970 	  strcpy (buf, "extrd,s,*");
6971 	else if (GET_MODE (operands[0]) == DImode)
6972 	  strcpy (buf, "bb,*");
6973 	if ((which == 0 && negated)
6974 	     || (which == 1 && ! negated))
6975 	  strcat (buf, ">=");
6976 	else
6977 	  strcat (buf, "<");
6978 	if (useskip)
6979 	  strcat (buf, " %0,%1,1,%%r0");
6980 	else if (nullify && negated)
6981 	  {
6982 	    if (branch_needs_nop_p (insn))
6983 	      strcat (buf, ",n %0,%1,%3%#");
6984 	    else
6985 	      strcat (buf, ",n %0,%1,%3");
6986 	  }
6987 	else if (nullify && ! negated)
6988 	  {
6989 	    if (branch_needs_nop_p (insn))
6990 	      strcat (buf, ",n %0,%1,%2%#");
6991 	    else
6992 	      strcat (buf, ",n %0,%1,%2");
6993 	  }
6994 	else if (! nullify && negated)
6995 	  strcat (buf, " %0,%1,%3");
6996 	else if (! nullify && ! negated)
6997 	  strcat (buf, " %0,%1,%2");
6998 	break;
6999 
7000      /* All long conditionals.  Note a short backward branch with an
7001 	unfilled delay slot is treated just like a long backward branch
7002 	with an unfilled delay slot.  */
7003       case 8:
7004 	/* Handle weird backwards branch with a filled delay slot
7005 	   which is nullified.  */
7006 	if (dbr_sequence_length () != 0
7007 	    && ! forward_branch_p (insn)
7008 	    && nullify)
7009 	  {
7010 	    strcpy (buf, "bb,");
7011 	    if (GET_MODE (operands[0]) == DImode)
7012 	      strcat (buf, "*");
7013 	    if ((which == 0 && negated)
7014 		|| (which == 1 && ! negated))
7015 	      strcat (buf, "<");
7016 	    else
7017 	      strcat (buf, ">=");
7018 	    if (negated)
7019 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
7020 	    else
7021 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
7022 	  }
7023 	/* Handle short backwards branch with an unfilled delay slot.
7024 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7025 	   taken and untaken branches.  */
7026 	else if (dbr_sequence_length () == 0
7027 		 && ! forward_branch_p (insn)
7028 		 && INSN_ADDRESSES_SET_P ()
7029 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7030 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7031 	  {
7032 	    strcpy (buf, "bb,");
7033 	    if (GET_MODE (operands[0]) == DImode)
7034 	      strcat (buf, "*");
7035 	    if ((which == 0 && negated)
7036 		|| (which == 1 && ! negated))
7037 	      strcat (buf, ">=");
7038 	    else
7039 	      strcat (buf, "<");
7040 	    if (negated)
7041 	      strcat (buf, " %0,%1,%3%#");
7042 	    else
7043 	      strcat (buf, " %0,%1,%2%#");
7044 	  }
7045 	else
7046 	  {
7047 	    if (GET_MODE (operands[0]) == DImode)
7048 	      strcpy (buf, "extrd,s,*");
7049 	    else
7050 	      strcpy (buf, "{extrs,|extrw,s,}");
7051 	    if ((which == 0 && negated)
7052 		|| (which == 1 && ! negated))
7053 	      strcat (buf, "<");
7054 	    else
7055 	      strcat (buf, ">=");
7056 	    if (nullify && negated)
7057 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7058 	    else if (nullify && ! negated)
7059 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7060 	    else if (negated)
7061 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7062 	    else
7063 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7064 	  }
7065 	break;
7066 
7067       default:
7068 	/* The reversed conditional branch must branch over one additional
7069 	   instruction if the delay slot is filled and needs to be extracted
7070 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7071 	   nullified forward branch, the instruction after the reversed
7072 	   condition branch must be nullified.  */
7073 	if (dbr_sequence_length () == 0
7074 	    || (nullify && forward_branch_p (insn)))
7075 	  {
7076 	    nullify = 1;
7077 	    xdelay = 0;
7078 	    operands[4] = GEN_INT (length);
7079 	  }
7080 	else
7081 	  {
7082 	    xdelay = 1;
7083 	    operands[4] = GEN_INT (length + 4);
7084 	  }
7085 
7086 	if (GET_MODE (operands[0]) == DImode)
7087 	  strcpy (buf, "bb,*");
7088 	else
7089 	  strcpy (buf, "bb,");
7090 	if ((which == 0 && negated)
7091 	    || (which == 1 && !negated))
7092 	  strcat (buf, "<");
7093 	else
7094 	  strcat (buf, ">=");
7095 	if (nullify)
7096 	  strcat (buf, ",n %0,%1,.+%4");
7097 	else
7098 	  strcat (buf, " %0,%1,.+%4");
7099 	output_asm_insn (buf, operands);
7100 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7101 				  insn, xdelay);
7102     }
7103   return buf;
7104 }
7105 
7106 /* This routine handles all the branch-on-variable-bit conditional branch
7107    sequences we might need to generate.  It handles nullification of delay
7108    slots, varying length branches, negated branches and all combinations
7109    of the above.  it returns the appropriate output template to emit the
7110    branch.  */
7111 
7112 const char *
7113 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7114 	       int which)
7115 {
7116   static char buf[100];
7117   bool useskip;
7118   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7119   int length = get_attr_length (insn);
7120   int xdelay;
7121 
7122   /* A conditional branch to the following instruction (e.g. the delay slot) is
7123      asking for a disaster.  I do not think this can happen as this pattern
7124      is only used when optimizing; jump optimization should eliminate the
7125      jump.  But be prepared just in case.  */
7126 
7127   if (branch_to_delay_slot_p (insn))
7128     return "nop";
7129 
7130   /* If this is a long branch with its delay slot unfilled, set `nullify'
7131      as it can nullify the delay slot and save a nop.  */
7132   if (length == 8 && dbr_sequence_length () == 0)
7133     nullify = 1;
7134 
7135   /* If this is a short forward conditional branch which did not get
7136      its delay slot filled, the delay slot can still be nullified.  */
7137   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7138     nullify = forward_branch_p (insn);
7139 
7140   /* A forward branch over a single nullified insn can be done with a
7141      extrs instruction.  This avoids a single cycle penalty due to
7142      mis-predicted branch if we fall through (branch not taken).  */
7143   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7144 
7145   switch (length)
7146     {
7147 
7148       /* All short conditional branches except backwards with an unfilled
7149 	 delay slot.  */
7150       case 4:
7151 	if (useskip)
7152 	  strcpy (buf, "{vextrs,|extrw,s,}");
7153 	else
7154 	  strcpy (buf, "{bvb,|bb,}");
7155 	if (useskip && GET_MODE (operands[0]) == DImode)
7156 	  strcpy (buf, "extrd,s,*");
7157 	else if (GET_MODE (operands[0]) == DImode)
7158 	  strcpy (buf, "bb,*");
7159 	if ((which == 0 && negated)
7160 	     || (which == 1 && ! negated))
7161 	  strcat (buf, ">=");
7162 	else
7163 	  strcat (buf, "<");
7164 	if (useskip)
7165 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7166 	else if (nullify && negated)
7167 	  {
7168 	    if (branch_needs_nop_p (insn))
7169 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7170 	    else
7171 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7172 	  }
7173 	else if (nullify && ! negated)
7174 	  {
7175 	    if (branch_needs_nop_p (insn))
7176 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7177 	    else
7178 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7179 	  }
7180 	else if (! nullify && negated)
7181 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7182 	else if (! nullify && ! negated)
7183 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7184 	break;
7185 
7186      /* All long conditionals.  Note a short backward branch with an
7187 	unfilled delay slot is treated just like a long backward branch
7188 	with an unfilled delay slot.  */
7189       case 8:
7190 	/* Handle weird backwards branch with a filled delay slot
7191 	   which is nullified.  */
7192 	if (dbr_sequence_length () != 0
7193 	    && ! forward_branch_p (insn)
7194 	    && nullify)
7195 	  {
7196 	    strcpy (buf, "{bvb,|bb,}");
7197 	    if (GET_MODE (operands[0]) == DImode)
7198 	      strcat (buf, "*");
7199 	    if ((which == 0 && negated)
7200 		|| (which == 1 && ! negated))
7201 	      strcat (buf, "<");
7202 	    else
7203 	      strcat (buf, ">=");
7204 	    if (negated)
7205 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7206 	    else
7207 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7208 	  }
7209 	/* Handle short backwards branch with an unfilled delay slot.
7210 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7211 	   taken and untaken branches.  */
7212 	else if (dbr_sequence_length () == 0
7213 		 && ! forward_branch_p (insn)
7214 		 && INSN_ADDRESSES_SET_P ()
7215 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7216 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7217 	  {
7218 	    strcpy (buf, "{bvb,|bb,}");
7219 	    if (GET_MODE (operands[0]) == DImode)
7220 	      strcat (buf, "*");
7221 	    if ((which == 0 && negated)
7222 		|| (which == 1 && ! negated))
7223 	      strcat (buf, ">=");
7224 	    else
7225 	      strcat (buf, "<");
7226 	    if (negated)
7227 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7228 	    else
7229 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7230 	  }
7231 	else
7232 	  {
7233 	    strcpy (buf, "{vextrs,|extrw,s,}");
7234 	    if (GET_MODE (operands[0]) == DImode)
7235 	      strcpy (buf, "extrd,s,*");
7236 	    if ((which == 0 && negated)
7237 		|| (which == 1 && ! negated))
7238 	      strcat (buf, "<");
7239 	    else
7240 	      strcat (buf, ">=");
7241 	    if (nullify && negated)
7242 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7243 	    else if (nullify && ! negated)
7244 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7245 	    else if (negated)
7246 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7247 	    else
7248 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7249 	  }
7250 	break;
7251 
7252       default:
7253 	/* The reversed conditional branch must branch over one additional
7254 	   instruction if the delay slot is filled and needs to be extracted
7255 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7256 	   nullified forward branch, the instruction after the reversed
7257 	   condition branch must be nullified.  */
7258 	if (dbr_sequence_length () == 0
7259 	    || (nullify && forward_branch_p (insn)))
7260 	  {
7261 	    nullify = 1;
7262 	    xdelay = 0;
7263 	    operands[4] = GEN_INT (length);
7264 	  }
7265 	else
7266 	  {
7267 	    xdelay = 1;
7268 	    operands[4] = GEN_INT (length + 4);
7269 	  }
7270 
7271 	if (GET_MODE (operands[0]) == DImode)
7272 	  strcpy (buf, "bb,*");
7273 	else
7274 	  strcpy (buf, "{bvb,|bb,}");
7275 	if ((which == 0 && negated)
7276 	    || (which == 1 && !negated))
7277 	  strcat (buf, "<");
7278 	else
7279 	  strcat (buf, ">=");
7280 	if (nullify)
7281 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7282 	else
7283 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7284 	output_asm_insn (buf, operands);
7285 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7286 				  insn, xdelay);
7287     }
7288   return buf;
7289 }
7290 
7291 /* Return the output template for emitting a dbra type insn.
7292 
7293    Note it may perform some output operations on its own before
7294    returning the final output string.  */
7295 const char *
7296 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7297 {
7298   int length = get_attr_length (insn);
7299 
7300   /* A conditional branch to the following instruction (e.g. the delay slot) is
7301      asking for a disaster.  Be prepared!  */
7302 
7303   if (branch_to_delay_slot_p (insn))
7304     {
7305       if (which_alternative == 0)
7306 	return "ldo %1(%0),%0";
7307       else if (which_alternative == 1)
7308 	{
7309 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7310 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7311 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7312 	  return "{fldws|fldw} -16(%%r30),%0";
7313 	}
7314       else
7315 	{
7316 	  output_asm_insn ("ldw %0,%4", operands);
7317 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7318 	}
7319     }
7320 
7321   if (which_alternative == 0)
7322     {
7323       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7324       int xdelay;
7325 
7326       /* If this is a long branch with its delay slot unfilled, set `nullify'
7327 	 as it can nullify the delay slot and save a nop.  */
7328       if (length == 8 && dbr_sequence_length () == 0)
7329 	nullify = 1;
7330 
7331       /* If this is a short forward conditional branch which did not get
7332 	 its delay slot filled, the delay slot can still be nullified.  */
7333       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7334 	nullify = forward_branch_p (insn);
7335 
7336       switch (length)
7337 	{
7338 	case 4:
7339 	  if (nullify)
7340 	    {
7341 	      if (branch_needs_nop_p (insn))
7342 		return "addib,%C2,n %1,%0,%3%#";
7343 	      else
7344 		return "addib,%C2,n %1,%0,%3";
7345 	    }
7346 	  else
7347 	    return "addib,%C2 %1,%0,%3";
7348 
7349 	case 8:
7350 	  /* Handle weird backwards branch with a fulled delay slot
7351 	     which is nullified.  */
7352 	  if (dbr_sequence_length () != 0
7353 	      && ! forward_branch_p (insn)
7354 	      && nullify)
7355 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7356 	  /* Handle short backwards branch with an unfilled delay slot.
7357 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7358 	     taken and untaken branches.  */
7359 	  else if (dbr_sequence_length () == 0
7360 		   && ! forward_branch_p (insn)
7361 		   && INSN_ADDRESSES_SET_P ()
7362 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7363 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7364 	      return "addib,%C2 %1,%0,%3%#";
7365 
7366 	  /* Handle normal cases.  */
7367 	  if (nullify)
7368 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7369 	  else
7370 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7371 
7372 	default:
7373 	  /* The reversed conditional branch must branch over one additional
7374 	     instruction if the delay slot is filled and needs to be extracted
7375 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7376 	     nullified forward branch, the instruction after the reversed
7377 	     condition branch must be nullified.  */
7378 	  if (dbr_sequence_length () == 0
7379 	      || (nullify && forward_branch_p (insn)))
7380 	    {
7381 	      nullify = 1;
7382 	      xdelay = 0;
7383 	      operands[4] = GEN_INT (length);
7384 	    }
7385 	  else
7386 	    {
7387 	      xdelay = 1;
7388 	      operands[4] = GEN_INT (length + 4);
7389 	    }
7390 
7391 	  if (nullify)
7392 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7393 	  else
7394 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7395 
7396 	  return pa_output_lbranch (operands[3], insn, xdelay);
7397 	}
7398 
7399     }
7400   /* Deal with gross reload from FP register case.  */
7401   else if (which_alternative == 1)
7402     {
7403       /* Move loop counter from FP register to MEM then into a GR,
7404 	 increment the GR, store the GR into MEM, and finally reload
7405 	 the FP register from MEM from within the branch's delay slot.  */
7406       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7407 		       operands);
7408       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7409       if (length == 24)
7410 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7411       else if (length == 28)
7412 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7413       else
7414 	{
7415 	  operands[5] = GEN_INT (length - 16);
7416 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7417 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7418 	  return pa_output_lbranch (operands[3], insn, 0);
7419 	}
7420     }
7421   /* Deal with gross reload from memory case.  */
7422   else
7423     {
7424       /* Reload loop counter from memory, the store back to memory
7425 	 happens in the branch's delay slot.  */
7426       output_asm_insn ("ldw %0,%4", operands);
7427       if (length == 12)
7428 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7429       else if (length == 16)
7430 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7431       else
7432 	{
7433 	  operands[5] = GEN_INT (length - 4);
7434 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7435 	  return pa_output_lbranch (operands[3], insn, 0);
7436 	}
7437     }
7438 }
7439 
7440 /* Return the output template for emitting a movb type insn.
7441 
7442    Note it may perform some output operations on its own before
7443    returning the final output string.  */
7444 const char *
7445 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7446 	     int reverse_comparison)
7447 {
7448   int length = get_attr_length (insn);
7449 
7450   /* A conditional branch to the following instruction (e.g. the delay slot) is
7451      asking for a disaster.  Be prepared!  */
7452 
7453   if (branch_to_delay_slot_p (insn))
7454     {
7455       if (which_alternative == 0)
7456 	return "copy %1,%0";
7457       else if (which_alternative == 1)
7458 	{
7459 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7460 	  return "{fldws|fldw} -16(%%r30),%0";
7461 	}
7462       else if (which_alternative == 2)
7463 	return "stw %1,%0";
7464       else
7465 	return "mtsar %r1";
7466     }
7467 
7468   /* Support the second variant.  */
7469   if (reverse_comparison)
7470     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7471 
7472   if (which_alternative == 0)
7473     {
7474       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7475       int xdelay;
7476 
7477       /* If this is a long branch with its delay slot unfilled, set `nullify'
7478 	 as it can nullify the delay slot and save a nop.  */
7479       if (length == 8 && dbr_sequence_length () == 0)
7480 	nullify = 1;
7481 
7482       /* If this is a short forward conditional branch which did not get
7483 	 its delay slot filled, the delay slot can still be nullified.  */
7484       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7485 	nullify = forward_branch_p (insn);
7486 
7487       switch (length)
7488 	{
7489 	case 4:
7490 	  if (nullify)
7491 	    {
7492 	      if (branch_needs_nop_p (insn))
7493 		return "movb,%C2,n %1,%0,%3%#";
7494 	      else
7495 		return "movb,%C2,n %1,%0,%3";
7496 	    }
7497 	  else
7498 	    return "movb,%C2 %1,%0,%3";
7499 
7500 	case 8:
7501 	  /* Handle weird backwards branch with a filled delay slot
7502 	     which is nullified.  */
7503 	  if (dbr_sequence_length () != 0
7504 	      && ! forward_branch_p (insn)
7505 	      && nullify)
7506 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7507 
7508 	  /* Handle short backwards branch with an unfilled delay slot.
7509 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7510 	     taken and untaken branches.  */
7511 	  else if (dbr_sequence_length () == 0
7512 		   && ! forward_branch_p (insn)
7513 		   && INSN_ADDRESSES_SET_P ()
7514 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7515 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7516 	    return "movb,%C2 %1,%0,%3%#";
7517 	  /* Handle normal cases.  */
7518 	  if (nullify)
7519 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7520 	  else
7521 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7522 
7523 	default:
7524 	  /* The reversed conditional branch must branch over one additional
7525 	     instruction if the delay slot is filled and needs to be extracted
7526 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7527 	     nullified forward branch, the instruction after the reversed
7528 	     condition branch must be nullified.  */
7529 	  if (dbr_sequence_length () == 0
7530 	      || (nullify && forward_branch_p (insn)))
7531 	    {
7532 	      nullify = 1;
7533 	      xdelay = 0;
7534 	      operands[4] = GEN_INT (length);
7535 	    }
7536 	  else
7537 	    {
7538 	      xdelay = 1;
7539 	      operands[4] = GEN_INT (length + 4);
7540 	    }
7541 
7542 	  if (nullify)
7543 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7544 	  else
7545 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7546 
7547 	  return pa_output_lbranch (operands[3], insn, xdelay);
7548 	}
7549     }
7550   /* Deal with gross reload for FP destination register case.  */
7551   else if (which_alternative == 1)
7552     {
7553       /* Move source register to MEM, perform the branch test, then
7554 	 finally load the FP register from MEM from within the branch's
7555 	 delay slot.  */
7556       output_asm_insn ("stw %1,-16(%%r30)", operands);
7557       if (length == 12)
7558 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7559       else if (length == 16)
7560 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7561       else
7562 	{
7563 	  operands[4] = GEN_INT (length - 4);
7564 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7565 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7566 	  return pa_output_lbranch (operands[3], insn, 0);
7567 	}
7568     }
7569   /* Deal with gross reload from memory case.  */
7570   else if (which_alternative == 2)
7571     {
7572       /* Reload loop counter from memory, the store back to memory
7573 	 happens in the branch's delay slot.  */
7574       if (length == 8)
7575 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7576       else if (length == 12)
7577 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7578       else
7579 	{
7580 	  operands[4] = GEN_INT (length);
7581 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7582 			   operands);
7583 	  return pa_output_lbranch (operands[3], insn, 0);
7584 	}
7585     }
7586   /* Handle SAR as a destination.  */
7587   else
7588     {
7589       if (length == 8)
7590 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7591       else if (length == 12)
7592 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7593       else
7594 	{
7595 	  operands[4] = GEN_INT (length);
7596 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7597 			   operands);
7598 	  return pa_output_lbranch (operands[3], insn, 0);
7599 	}
7600     }
7601 }
7602 
7603 /* Copy any FP arguments in INSN into integer registers.  */
7604 static void
7605 copy_fp_args (rtx_insn *insn)
7606 {
7607   rtx link;
7608   rtx xoperands[2];
7609 
7610   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7611     {
7612       int arg_mode, regno;
7613       rtx use = XEXP (link, 0);
7614 
7615       if (! (GET_CODE (use) == USE
7616 	  && GET_CODE (XEXP (use, 0)) == REG
7617 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7618 	continue;
7619 
7620       arg_mode = GET_MODE (XEXP (use, 0));
7621       regno = REGNO (XEXP (use, 0));
7622 
7623       /* Is it a floating point register?  */
7624       if (regno >= 32 && regno <= 39)
7625 	{
7626 	  /* Copy the FP register into an integer register via memory.  */
7627 	  if (arg_mode == SFmode)
7628 	    {
7629 	      xoperands[0] = XEXP (use, 0);
7630 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7631 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7632 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7633 	    }
7634 	  else
7635 	    {
7636 	      xoperands[0] = XEXP (use, 0);
7637 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7638 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7639 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7640 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7641 	    }
7642 	}
7643     }
7644 }
7645 
7646 /* Compute length of the FP argument copy sequence for INSN.  */
7647 static int
7648 length_fp_args (rtx_insn *insn)
7649 {
7650   int length = 0;
7651   rtx link;
7652 
7653   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7654     {
7655       int arg_mode, regno;
7656       rtx use = XEXP (link, 0);
7657 
7658       if (! (GET_CODE (use) == USE
7659 	  && GET_CODE (XEXP (use, 0)) == REG
7660 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7661 	continue;
7662 
7663       arg_mode = GET_MODE (XEXP (use, 0));
7664       regno = REGNO (XEXP (use, 0));
7665 
7666       /* Is it a floating point register?  */
7667       if (regno >= 32 && regno <= 39)
7668 	{
7669 	  if (arg_mode == SFmode)
7670 	    length += 8;
7671 	  else
7672 	    length += 12;
7673 	}
7674     }
7675 
7676   return length;
7677 }
7678 
7679 /* Return the attribute length for the millicode call instruction INSN.
7680    The length must match the code generated by pa_output_millicode_call.
7681    We include the delay slot in the returned length as it is better to
7682    over estimate the length than to under estimate it.  */
7683 
7684 int
7685 pa_attr_length_millicode_call (rtx_insn *insn)
7686 {
7687   unsigned long distance = -1;
7688   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7689 
7690   if (INSN_ADDRESSES_SET_P ())
7691     {
7692       distance = (total + insn_current_reference_address (insn));
7693       if (distance < total)
7694 	distance = -1;
7695     }
7696 
7697   if (TARGET_64BIT)
7698     {
7699       if (!TARGET_LONG_CALLS && distance < 7600000)
7700 	return 8;
7701 
7702       return 20;
7703     }
7704   else if (TARGET_PORTABLE_RUNTIME)
7705     return 24;
7706   else
7707     {
7708       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7709 	return 8;
7710 
7711       if (!flag_pic)
7712 	return 12;
7713 
7714       return 24;
7715     }
7716 }
7717 
7718 /* INSN is a function call.
7719 
7720    CALL_DEST is the routine we are calling.  */
7721 
7722 const char *
7723 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7724 {
7725   int attr_length = get_attr_length (insn);
7726   int seq_length = dbr_sequence_length ();
7727   rtx xoperands[4];
7728 
7729   xoperands[0] = call_dest;
7730 
7731   /* Handle the common case where we are sure that the branch will
7732      reach the beginning of the $CODE$ subspace.  The within reach
7733      form of the $$sh_func_adrs call has a length of 28.  Because it
7734      has an attribute type of sh_func_adrs, it never has a nonzero
7735      sequence length (i.e., the delay slot is never filled).  */
7736   if (!TARGET_LONG_CALLS
7737       && (attr_length == 8
7738 	  || (attr_length == 28
7739 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7740     {
7741       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7742       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7743     }
7744   else
7745     {
7746       if (TARGET_64BIT)
7747 	{
7748 	  /* It might seem that one insn could be saved by accessing
7749 	     the millicode function using the linkage table.  However,
7750 	     this doesn't work in shared libraries and other dynamically
7751 	     loaded objects.  Using a pc-relative sequence also avoids
7752 	     problems related to the implicit use of the gp register.  */
7753 	  xoperands[1] = gen_rtx_REG (Pmode, 1);
7754 	  xoperands[2] = xoperands[1];
7755 	  pa_output_pic_pcrel_sequence (xoperands);
7756 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7757 	}
7758       else if (TARGET_PORTABLE_RUNTIME)
7759 	{
7760 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7761 	     have PIC support in the assembler/linker, so this sequence
7762 	     is needed.  */
7763 
7764 	  /* Get the address of our target into %r1.  */
7765 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7766 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7767 
7768 	  /* Get our return address into %r31.  */
7769 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7770 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7771 
7772 	  /* Jump to our target address in %r1.  */
7773 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7774 	}
7775       else if (!flag_pic)
7776 	{
7777 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7778 	  if (TARGET_PA_20)
7779 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7780 	  else
7781 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7782 	}
7783       else
7784 	{
7785 	  xoperands[1] = gen_rtx_REG (Pmode, 31);
7786 	  xoperands[2] = gen_rtx_REG (Pmode, 1);
7787 	  pa_output_pic_pcrel_sequence (xoperands);
7788 
7789 	  /* Adjust return address.  */
7790 	  output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7791 
7792 	  /* Jump to our target address in %r1.  */
7793 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7794 	}
7795     }
7796 
7797   if (seq_length == 0)
7798     output_asm_insn ("nop", xoperands);
7799 
7800   return "";
7801 }
7802 
7803 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7804    flag indicates whether INSN is a regular call or a sibling call.  The
7805    length returned must be longer than the code actually generated by
7806    pa_output_call.  Since branch shortening is done before delay branch
7807    sequencing, there is no way to determine whether or not the delay
7808    slot will be filled during branch shortening.  Even when the delay
7809    slot is filled, we may have to add a nop if the delay slot contains
7810    a branch that can't reach its target.  Thus, we always have to include
7811    the delay slot in the length estimate.  This used to be done in
7812    pa_adjust_insn_length but we do it here now as some sequences always
7813    fill the delay slot and we can save four bytes in the estimate for
7814    these sequences.  */
7815 
7816 int
7817 pa_attr_length_call (rtx_insn *insn, int sibcall)
7818 {
7819   int local_call;
7820   rtx call, call_dest;
7821   tree call_decl;
7822   int length = 0;
7823   rtx pat = PATTERN (insn);
7824   unsigned long distance = -1;
7825 
7826   gcc_assert (CALL_P (insn));
7827 
7828   if (INSN_ADDRESSES_SET_P ())
7829     {
7830       unsigned long total;
7831 
7832       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7833       distance = (total + insn_current_reference_address (insn));
7834       if (distance < total)
7835 	distance = -1;
7836     }
7837 
7838   gcc_assert (GET_CODE (pat) == PARALLEL);
7839 
7840   /* Get the call rtx.  */
7841   call = XVECEXP (pat, 0, 0);
7842   if (GET_CODE (call) == SET)
7843     call = SET_SRC (call);
7844 
7845   gcc_assert (GET_CODE (call) == CALL);
7846 
7847   /* Determine if this is a local call.  */
7848   call_dest = XEXP (XEXP (call, 0), 0);
7849   call_decl = SYMBOL_REF_DECL (call_dest);
7850   local_call = call_decl && targetm.binds_local_p (call_decl);
7851 
7852   /* pc-relative branch.  */
7853   if (!TARGET_LONG_CALLS
7854       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7855 	  || distance < MAX_PCREL17F_OFFSET))
7856     length += 8;
7857 
7858   /* 64-bit plabel sequence.  */
7859   else if (TARGET_64BIT && !local_call)
7860     length += 24;
7861 
7862   /* non-pic long absolute branch sequence.  */
7863   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7864     length += 12;
7865 
7866   /* long pc-relative branch sequence.  */
7867   else if (TARGET_LONG_PIC_SDIFF_CALL
7868 	   || (TARGET_GAS && !TARGET_SOM && local_call))
7869     {
7870       length += 20;
7871 
7872       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7873 	length += 8;
7874     }
7875 
7876   /* 32-bit plabel sequence.  */
7877   else
7878     {
7879       length += 32;
7880 
7881       if (TARGET_SOM)
7882 	length += length_fp_args (insn);
7883 
7884       if (flag_pic)
7885 	length += 4;
7886 
7887       if (!TARGET_PA_20)
7888 	{
7889 	  if (!sibcall)
7890 	    length += 8;
7891 
7892 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7893 	    length += 8;
7894 	}
7895     }
7896 
7897   return length;
7898 }
7899 
7900 /* INSN is a function call.
7901 
7902    CALL_DEST is the routine we are calling.  */
7903 
7904 const char *
7905 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7906 {
7907   int seq_length = dbr_sequence_length ();
7908   tree call_decl = SYMBOL_REF_DECL (call_dest);
7909   int local_call = call_decl && targetm.binds_local_p (call_decl);
7910   rtx xoperands[4];
7911 
7912   xoperands[0] = call_dest;
7913 
7914   /* Handle the common case where we're sure that the branch will reach
7915      the beginning of the "$CODE$" subspace.  This is the beginning of
7916      the current function if we are in a named section.  */
7917   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7918     {
7919       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7920       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7921     }
7922   else
7923     {
7924       if (TARGET_64BIT && !local_call)
7925 	{
7926 	  /* ??? As far as I can tell, the HP linker doesn't support the
7927 	     long pc-relative sequence described in the 64-bit runtime
7928 	     architecture.  So, we use a slightly longer indirect call.  */
7929 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
7930 	  xoperands[1] = gen_label_rtx ();
7931 
7932 	  /* Put the load of %r27 into the delay slot.  We don't need to
7933 	     do anything when generating fast indirect calls.  */
7934 	  if (seq_length != 0)
7935 	    {
7936 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7937 			       optimize, 0, NULL);
7938 
7939 	      /* Now delete the delay insn.  */
7940 	      SET_INSN_DELETED (NEXT_INSN (insn));
7941 	    }
7942 
7943 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7944 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7945 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7946 	  output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7947 	  output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7948 	  output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7949 	  seq_length = 1;
7950 	}
7951       else
7952 	{
7953 	  int indirect_call = 0;
7954 
7955 	  /* Emit a long call.  There are several different sequences
7956 	     of increasing length and complexity.  In most cases,
7957              they don't allow an instruction in the delay slot.  */
7958 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7959 	      && !TARGET_LONG_PIC_SDIFF_CALL
7960 	      && !(TARGET_GAS && !TARGET_SOM && local_call)
7961 	      && !TARGET_64BIT)
7962 	    indirect_call = 1;
7963 
7964 	  if (seq_length != 0
7965 	      && !sibcall
7966 	      && (!TARGET_PA_20
7967 		  || indirect_call
7968 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7969 	    {
7970 	      /* A non-jump insn in the delay slot.  By definition we can
7971 		 emit this insn before the call (and in fact before argument
7972 		 relocating.  */
7973 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7974 			       NULL);
7975 
7976 	      /* Now delete the delay insn.  */
7977 	      SET_INSN_DELETED (NEXT_INSN (insn));
7978 	      seq_length = 0;
7979 	    }
7980 
7981 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7982 	    {
7983 	      /* This is the best sequence for making long calls in
7984 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7985 		 the stub needed for external calls, and GAS's support
7986 		 for this with the SOM linker is buggy.  It is safe
7987 		 to use this for local calls.  */
7988 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7989 	      if (sibcall)
7990 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7991 	      else
7992 		{
7993 		  if (TARGET_PA_20)
7994 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7995 				     xoperands);
7996 		  else
7997 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7998 
7999 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
8000 		  seq_length = 1;
8001 		}
8002 	    }
8003 	  else
8004 	    {
8005 	      /* The HP assembler and linker can handle relocations for
8006 		 the difference of two symbols.  The HP assembler
8007 		 recognizes the sequence as a pc-relative call and
8008 		 the linker provides stubs when needed.  */
8009 
8010 	      /* GAS currently can't generate the relocations that
8011 		 are needed for the SOM linker under HP-UX using this
8012 		 sequence.  The GNU linker doesn't generate the stubs
8013 		 that are needed for external calls on TARGET_ELF32
8014 		 with this sequence.  For now, we have to use a longer
8015 	         plabel sequence when using GAS for non local calls.  */
8016 	      if (TARGET_LONG_PIC_SDIFF_CALL
8017 		  || (TARGET_GAS && !TARGET_SOM && local_call))
8018 		{
8019 		  xoperands[1] = gen_rtx_REG (Pmode, 1);
8020 		  xoperands[2] = xoperands[1];
8021 		  pa_output_pic_pcrel_sequence (xoperands);
8022 		}
8023 	      else
8024 		{
8025 		  /* Emit a long plabel-based call sequence.  This is
8026 		     essentially an inline implementation of $$dyncall.
8027 		     We don't actually try to call $$dyncall as this is
8028 		     as difficult as calling the function itself.  */
8029 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8030 		  xoperands[1] = gen_label_rtx ();
8031 
8032 		  /* Since the call is indirect, FP arguments in registers
8033 		     need to be copied to the general registers.  Then, the
8034 		     argument relocation stub will copy them back.  */
8035 		  if (TARGET_SOM)
8036 		    copy_fp_args (insn);
8037 
8038 		  if (flag_pic)
8039 		    {
8040 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8041 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8042 		      output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8043 		    }
8044 		  else
8045 		    {
8046 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8047 				       xoperands);
8048 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8049 				       xoperands);
8050 		    }
8051 
8052 		  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8053 		  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8054 		  /* Should this be an ordered load to ensure the target
8055 	             address is loaded before the global pointer?  */
8056 		  output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8057 		  output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8058 
8059 		  if (!sibcall && !TARGET_PA_20)
8060 		    {
8061 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8062 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8063 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8064 		      else
8065 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8066 		    }
8067 		}
8068 
8069 	      if (TARGET_PA_20)
8070 		{
8071 		  if (sibcall)
8072 		    output_asm_insn ("bve (%%r1)", xoperands);
8073 		  else
8074 		    {
8075 		      if (indirect_call)
8076 			{
8077 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8078 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8079 			  seq_length = 1;
8080 			}
8081 		      else
8082 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8083 		    }
8084 		}
8085 	      else
8086 		{
8087 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8088 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8089 				     xoperands);
8090 
8091 		  if (sibcall)
8092 		    {
8093 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8094 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8095 		      else
8096 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8097 		    }
8098 		  else
8099 		    {
8100 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8101 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8102 		      else
8103 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8104 
8105 		      if (indirect_call)
8106 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8107 		      else
8108 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8109 		      seq_length = 1;
8110 		    }
8111 		}
8112 	    }
8113 	}
8114     }
8115 
8116   if (seq_length == 0)
8117     output_asm_insn ("nop", xoperands);
8118 
8119   return "";
8120 }
8121 
8122 /* Return the attribute length of the indirect call instruction INSN.
8123    The length must match the code generated by output_indirect call.
8124    The returned length includes the delay slot.  Currently, the delay
8125    slot of an indirect call sequence is not exposed and it is used by
8126    the sequence itself.  */
8127 
8128 int
8129 pa_attr_length_indirect_call (rtx_insn *insn)
8130 {
8131   unsigned long distance = -1;
8132   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8133 
8134   if (INSN_ADDRESSES_SET_P ())
8135     {
8136       distance = (total + insn_current_reference_address (insn));
8137       if (distance < total)
8138 	distance = -1;
8139     }
8140 
8141   if (TARGET_64BIT)
8142     return 12;
8143 
8144   if (TARGET_FAST_INDIRECT_CALLS)
8145     return 8;
8146 
8147   if (TARGET_PORTABLE_RUNTIME)
8148     return 16;
8149 
8150   if (!TARGET_LONG_CALLS
8151       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8152 	  || distance < MAX_PCREL17F_OFFSET))
8153     return 8;
8154 
8155   /* Out of reach, can use ble.  */
8156   if (!flag_pic)
8157     return 12;
8158 
8159   /* Inline versions of $$dyncall.  */
8160   if (!optimize_size)
8161     {
8162       if (TARGET_NO_SPACE_REGS)
8163 	return 28;
8164 
8165       if (TARGET_PA_20)
8166 	return 32;
8167     }
8168 
8169   /* Long PIC pc-relative call.  */
8170   return 20;
8171 }
8172 
8173 const char *
8174 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8175 {
8176   rtx xoperands[4];
8177   int length;
8178 
8179   if (TARGET_64BIT)
8180     {
8181       xoperands[0] = call_dest;
8182       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8183 		       "bve,l (%%r2),%%r2\n\t"
8184 		       "ldd 24(%0),%%r27", xoperands);
8185       return "";
8186     }
8187 
8188   /* First the special case for kernels, level 0 systems, etc.  */
8189   if (TARGET_FAST_INDIRECT_CALLS)
8190     {
8191       pa_output_arg_descriptor (insn);
8192       if (TARGET_PA_20)
8193 	return "bve,l,n (%%r22),%%r2\n\tnop";
8194       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8195     }
8196 
8197   if (TARGET_PORTABLE_RUNTIME)
8198     {
8199       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8200 		       "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8201       pa_output_arg_descriptor (insn);
8202       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8203     }
8204 
8205   /* Now the normal case -- we can reach $$dyncall directly or
8206      we're sure that we can get there via a long-branch stub.
8207 
8208      No need to check target flags as the length uniquely identifies
8209      the remaining cases.  */
8210   length = pa_attr_length_indirect_call (insn);
8211   if (length == 8)
8212     {
8213       pa_output_arg_descriptor (insn);
8214 
8215       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8216 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8217 	 variant of the B,L instruction can't be used on the SOM target.  */
8218       if (TARGET_PA_20 && !TARGET_SOM)
8219 	return "b,l,n $$dyncall,%%r2\n\tnop";
8220       else
8221 	return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8222     }
8223 
8224   /* Long millicode call, but we are not generating PIC or portable runtime
8225      code.  */
8226   if (length == 12)
8227     {
8228       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8229       pa_output_arg_descriptor (insn);
8230       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8231     }
8232 
8233   /* The long PIC pc-relative call sequence is five instructions.  So,
8234      let's use an inline version of $$dyncall when the calling sequence
8235      has a roughly similar number of instructions and we are not optimizing
8236      for size.  We need two instructions to load the return pointer plus
8237      the $$dyncall implementation.  */
8238   if (!optimize_size)
8239     {
8240       if (TARGET_NO_SPACE_REGS)
8241 	{
8242 	  pa_output_arg_descriptor (insn);
8243 	  output_asm_insn ("bl .+8,%%r2\n\t"
8244 			   "ldo 20(%%r2),%%r2\n\t"
8245 			   "extru,<> %%r22,30,1,%%r0\n\t"
8246 			   "bv,n %%r0(%%r22)\n\t"
8247 			   "ldw -2(%%r22),%%r21\n\t"
8248 			   "bv %%r0(%%r21)\n\t"
8249 			   "ldw 2(%%r22),%%r19", xoperands);
8250 	  return "";
8251 	}
8252       if (TARGET_PA_20)
8253 	{
8254 	  pa_output_arg_descriptor (insn);
8255 	  output_asm_insn ("bl .+8,%%r2\n\t"
8256 			   "ldo 24(%%r2),%%r2\n\t"
8257 			   "stw %%r2,-24(%%sp)\n\t"
8258 			   "extru,<> %r22,30,1,%%r0\n\t"
8259 			   "bve,n (%%r22)\n\t"
8260 			   "ldw -2(%%r22),%%r21\n\t"
8261 			   "bve (%%r21)\n\t"
8262 			   "ldw 2(%%r22),%%r19", xoperands);
8263 	  return "";
8264 	}
8265     }
8266 
8267   /* We need a long PIC call to $$dyncall.  */
8268   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8269   xoperands[1] = gen_rtx_REG (Pmode, 2);
8270   xoperands[2] = gen_rtx_REG (Pmode, 1);
8271   pa_output_pic_pcrel_sequence (xoperands);
8272   pa_output_arg_descriptor (insn);
8273   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8274 }
8275 
8276 /* In HPUX 8.0's shared library scheme, special relocations are needed
8277    for function labels if they might be passed to a function
8278    in a shared library (because shared libraries don't live in code
8279    space), and special magic is needed to construct their address.  */
8280 
8281 void
8282 pa_encode_label (rtx sym)
8283 {
8284   const char *str = XSTR (sym, 0);
8285   int len = strlen (str) + 1;
8286   char *newstr, *p;
8287 
8288   p = newstr = XALLOCAVEC (char, len + 1);
8289   *p++ = '@';
8290   strcpy (p, str);
8291 
8292   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8293 }
8294 
8295 static void
8296 pa_encode_section_info (tree decl, rtx rtl, int first)
8297 {
8298   int old_referenced = 0;
8299 
8300   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8301     old_referenced
8302       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8303 
8304   default_encode_section_info (decl, rtl, first);
8305 
8306   if (first && TEXT_SPACE_P (decl))
8307     {
8308       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8309       if (TREE_CODE (decl) == FUNCTION_DECL)
8310 	pa_encode_label (XEXP (rtl, 0));
8311     }
8312   else if (old_referenced)
8313     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8314 }
8315 
8316 /* This is sort of inverse to pa_encode_section_info.  */
8317 
8318 static const char *
8319 pa_strip_name_encoding (const char *str)
8320 {
8321   str += (*str == '@');
8322   str += (*str == '*');
8323   return str;
8324 }
8325 
8326 /* Returns 1 if OP is a function label involved in a simple addition
8327    with a constant.  Used to keep certain patterns from matching
8328    during instruction combination.  */
8329 int
8330 pa_is_function_label_plus_const (rtx op)
8331 {
8332   /* Strip off any CONST.  */
8333   if (GET_CODE (op) == CONST)
8334     op = XEXP (op, 0);
8335 
8336   return (GET_CODE (op) == PLUS
8337 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8338 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8339 }
8340 
8341 /* Output assembly code for a thunk to FUNCTION.  */
8342 
8343 static void
8344 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8345 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8346 			tree function)
8347 {
8348   static unsigned int current_thunk_number;
8349   int val_14 = VAL_14_BITS_P (delta);
8350   unsigned int old_last_address = last_address, nbytes = 0;
8351   char label[17];
8352   rtx xoperands[4];
8353 
8354   xoperands[0] = XEXP (DECL_RTL (function), 0);
8355   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8356   xoperands[2] = GEN_INT (delta);
8357 
8358   final_start_function (emit_barrier (), file, 1);
8359 
8360   /* Output the thunk.  We know that the function is in the same
8361      translation unit (i.e., the same space) as the thunk, and that
8362      thunks are output after their method.  Thus, we don't need an
8363      external branch to reach the function.  With SOM and GAS,
8364      functions and thunks are effectively in different sections.
8365      Thus, we can always use a IA-relative branch and the linker
8366      will add a long branch stub if necessary.
8367 
8368      However, we have to be careful when generating PIC code on the
8369      SOM port to ensure that the sequence does not transfer to an
8370      import stub for the target function as this could clobber the
8371      return value saved at SP-24.  This would also apply to the
8372      32-bit linux port if the multi-space model is implemented.  */
8373   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8374        && !(flag_pic && TREE_PUBLIC (function))
8375        && (TARGET_GAS || last_address < 262132))
8376       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8377 	  && ((targetm_common.have_named_sections
8378 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8379 	       /* The GNU 64-bit linker has rather poor stub management.
8380 		  So, we use a long branch from thunks that aren't in
8381 		  the same section as the target function.  */
8382 	       && ((!TARGET_64BIT
8383 		    && (DECL_SECTION_NAME (thunk_fndecl)
8384 			!= DECL_SECTION_NAME (function)))
8385 		   || ((DECL_SECTION_NAME (thunk_fndecl)
8386 			== DECL_SECTION_NAME (function))
8387 		       && last_address < 262132)))
8388 	      /* In this case, we need to be able to reach the start of
8389 		 the stub table even though the function is likely closer
8390 		 and can be jumped to directly.  */
8391 	      || (targetm_common.have_named_sections
8392 		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8393 		  && DECL_SECTION_NAME (function) == NULL
8394 		  && total_code_bytes < MAX_PCREL17F_OFFSET)
8395 	      /* Likewise.  */
8396 	      || (!targetm_common.have_named_sections
8397 		  && total_code_bytes < MAX_PCREL17F_OFFSET))))
8398     {
8399       if (!val_14)
8400 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8401 
8402       output_asm_insn ("b %0", xoperands);
8403 
8404       if (val_14)
8405 	{
8406 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8407 	  nbytes += 8;
8408 	}
8409       else
8410 	{
8411 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8412 	  nbytes += 12;
8413 	}
8414     }
8415   else if (TARGET_64BIT)
8416     {
8417       rtx xop[4];
8418 
8419       /* We only have one call-clobbered scratch register, so we can't
8420          make use of the delay slot if delta doesn't fit in 14 bits.  */
8421       if (!val_14)
8422 	{
8423 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8424 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8425 	}
8426 
8427       /* Load function address into %r1.  */
8428       xop[0] = xoperands[0];
8429       xop[1] = gen_rtx_REG (Pmode, 1);
8430       xop[2] = xop[1];
8431       pa_output_pic_pcrel_sequence (xop);
8432 
8433       if (val_14)
8434 	{
8435 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8436 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8437 	  nbytes += 20;
8438 	}
8439       else
8440 	{
8441 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8442 	  nbytes += 24;
8443 	}
8444     }
8445   else if (TARGET_PORTABLE_RUNTIME)
8446     {
8447       output_asm_insn ("ldil L'%0,%%r1", xoperands);
8448       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8449 
8450       if (!val_14)
8451 	output_asm_insn ("ldil L'%2,%%r26", xoperands);
8452 
8453       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8454 
8455       if (val_14)
8456 	{
8457 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8458 	  nbytes += 16;
8459 	}
8460       else
8461 	{
8462 	  output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8463 	  nbytes += 20;
8464 	}
8465     }
8466   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8467     {
8468       /* The function is accessible from outside this module.  The only
8469 	 way to avoid an import stub between the thunk and function is to
8470 	 call the function directly with an indirect sequence similar to
8471 	 that used by $$dyncall.  This is possible because $$dyncall acts
8472 	 as the import stub in an indirect call.  */
8473       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8474       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8475       output_asm_insn ("addil LT'%3,%%r19", xoperands);
8476       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8477       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8478       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8479       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8480       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8481       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8482 
8483       if (!val_14)
8484 	{
8485 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8486 	  nbytes += 4;
8487 	}
8488 
8489       if (TARGET_PA_20)
8490 	{
8491 	  output_asm_insn ("bve (%%r22)", xoperands);
8492 	  nbytes += 36;
8493 	}
8494       else if (TARGET_NO_SPACE_REGS)
8495 	{
8496 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8497 	  nbytes += 36;
8498 	}
8499       else
8500 	{
8501 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8502 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8503 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8504 	  nbytes += 44;
8505 	}
8506 
8507       if (val_14)
8508 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8509       else
8510 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8511     }
8512   else if (flag_pic)
8513     {
8514       rtx xop[4];
8515 
8516       /* Load function address into %r22.  */
8517       xop[0] = xoperands[0];
8518       xop[1] = gen_rtx_REG (Pmode, 1);
8519       xop[2] = gen_rtx_REG (Pmode, 22);
8520       pa_output_pic_pcrel_sequence (xop);
8521 
8522       if (!val_14)
8523 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8524 
8525       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8526 
8527       if (val_14)
8528 	{
8529 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8530 	  nbytes += 20;
8531 	}
8532       else
8533 	{
8534 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8535 	  nbytes += 24;
8536 	}
8537     }
8538   else
8539     {
8540       if (!val_14)
8541 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8542 
8543       output_asm_insn ("ldil L'%0,%%r22", xoperands);
8544       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8545 
8546       if (val_14)
8547 	{
8548 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8549 	  nbytes += 12;
8550 	}
8551       else
8552 	{
8553 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8554 	  nbytes += 16;
8555 	}
8556     }
8557 
8558   final_end_function ();
8559 
8560   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8561     {
8562       switch_to_section (data_section);
8563       output_asm_insn (".align 4", xoperands);
8564       ASM_OUTPUT_LABEL (file, label);
8565       output_asm_insn (".word P'%0", xoperands);
8566     }
8567 
8568   current_thunk_number++;
8569   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8570 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8571   last_address += nbytes;
8572   if (old_last_address > last_address)
8573     last_address = UINT_MAX;
8574   update_total_code_bytes (nbytes);
8575 }
8576 
8577 /* Only direct calls to static functions are allowed to be sibling (tail)
8578    call optimized.
8579 
8580    This restriction is necessary because some linker generated stubs will
8581    store return pointers into rp' in some cases which might clobber a
8582    live value already in rp'.
8583 
8584    In a sibcall the current function and the target function share stack
8585    space.  Thus if the path to the current function and the path to the
8586    target function save a value in rp', they save the value into the
8587    same stack slot, which has undesirable consequences.
8588 
8589    Because of the deferred binding nature of shared libraries any function
8590    with external scope could be in a different load module and thus require
8591    rp' to be saved when calling that function.  So sibcall optimizations
8592    can only be safe for static function.
8593 
8594    Note that GCC never needs return value relocations, so we don't have to
8595    worry about static calls with return value relocations (which require
8596    saving rp').
8597 
8598    It is safe to perform a sibcall optimization when the target function
8599    will never return.  */
8600 static bool
8601 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8602 {
8603   /* Sibcalls are not ok because the arg pointer register is not a fixed
8604      register.  This prevents the sibcall optimization from occurring.  In
8605      addition, there are problems with stub placement using GNU ld.  This
8606      is because a normal sibcall branch uses a 17-bit relocation while
8607      a regular call branch uses a 22-bit relocation.  As a result, more
8608      care needs to be taken in the placement of long-branch stubs.  */
8609   if (TARGET_64BIT)
8610     return false;
8611 
8612   if (TARGET_PORTABLE_RUNTIME)
8613     return false;
8614 
8615   /* Sibcalls are only ok within a translation unit.  */
8616   return decl && targetm.binds_local_p (decl);
8617 }
8618 
8619 /* ??? Addition is not commutative on the PA due to the weird implicit
8620    space register selection rules for memory addresses.  Therefore, we
8621    don't consider a + b == b + a, as this might be inside a MEM.  */
8622 static bool
8623 pa_commutative_p (const_rtx x, int outer_code)
8624 {
8625   return (COMMUTATIVE_P (x)
8626 	  && (TARGET_NO_SPACE_REGS
8627 	      || (outer_code != UNKNOWN && outer_code != MEM)
8628 	      || GET_CODE (x) != PLUS));
8629 }
8630 
8631 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8632    use in fmpyadd instructions.  */
8633 int
8634 pa_fmpyaddoperands (rtx *operands)
8635 {
8636   machine_mode mode = GET_MODE (operands[0]);
8637 
8638   /* Must be a floating point mode.  */
8639   if (mode != SFmode && mode != DFmode)
8640     return 0;
8641 
8642   /* All modes must be the same.  */
8643   if (! (mode == GET_MODE (operands[1])
8644 	 && mode == GET_MODE (operands[2])
8645 	 && mode == GET_MODE (operands[3])
8646 	 && mode == GET_MODE (operands[4])
8647 	 && mode == GET_MODE (operands[5])))
8648     return 0;
8649 
8650   /* All operands must be registers.  */
8651   if (! (GET_CODE (operands[1]) == REG
8652 	 && GET_CODE (operands[2]) == REG
8653 	 && GET_CODE (operands[3]) == REG
8654 	 && GET_CODE (operands[4]) == REG
8655 	 && GET_CODE (operands[5]) == REG))
8656     return 0;
8657 
8658   /* Only 2 real operands to the addition.  One of the input operands must
8659      be the same as the output operand.  */
8660   if (! rtx_equal_p (operands[3], operands[4])
8661       && ! rtx_equal_p (operands[3], operands[5]))
8662     return 0;
8663 
8664   /* Inout operand of add cannot conflict with any operands from multiply.  */
8665   if (rtx_equal_p (operands[3], operands[0])
8666      || rtx_equal_p (operands[3], operands[1])
8667      || rtx_equal_p (operands[3], operands[2]))
8668     return 0;
8669 
8670   /* multiply cannot feed into addition operands.  */
8671   if (rtx_equal_p (operands[4], operands[0])
8672       || rtx_equal_p (operands[5], operands[0]))
8673     return 0;
8674 
8675   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8676   if (mode == SFmode
8677       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8678 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8679 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8680 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8681 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8682 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8683     return 0;
8684 
8685   /* Passed.  Operands are suitable for fmpyadd.  */
8686   return 1;
8687 }
8688 
8689 #if !defined(USE_COLLECT2)
8690 static void
8691 pa_asm_out_constructor (rtx symbol, int priority)
8692 {
8693   if (!function_label_operand (symbol, VOIDmode))
8694     pa_encode_label (symbol);
8695 
8696 #ifdef CTORS_SECTION_ASM_OP
8697   default_ctor_section_asm_out_constructor (symbol, priority);
8698 #else
8699 # ifdef TARGET_ASM_NAMED_SECTION
8700   default_named_section_asm_out_constructor (symbol, priority);
8701 # else
8702   default_stabs_asm_out_constructor (symbol, priority);
8703 # endif
8704 #endif
8705 }
8706 
8707 static void
8708 pa_asm_out_destructor (rtx symbol, int priority)
8709 {
8710   if (!function_label_operand (symbol, VOIDmode))
8711     pa_encode_label (symbol);
8712 
8713 #ifdef DTORS_SECTION_ASM_OP
8714   default_dtor_section_asm_out_destructor (symbol, priority);
8715 #else
8716 # ifdef TARGET_ASM_NAMED_SECTION
8717   default_named_section_asm_out_destructor (symbol, priority);
8718 # else
8719   default_stabs_asm_out_destructor (symbol, priority);
8720 # endif
8721 #endif
8722 }
8723 #endif
8724 
8725 /* This function places uninitialized global data in the bss section.
8726    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8727    function on the SOM port to prevent uninitialized global data from
8728    being placed in the data section.  */
8729 
8730 void
8731 pa_asm_output_aligned_bss (FILE *stream,
8732 			   const char *name,
8733 			   unsigned HOST_WIDE_INT size,
8734 			   unsigned int align)
8735 {
8736   switch_to_section (bss_section);
8737   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8738 
8739 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8740   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8741 #endif
8742 
8743 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8744   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8745 #endif
8746 
8747   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8748   ASM_OUTPUT_LABEL (stream, name);
8749   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8750 }
8751 
8752 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8753    that doesn't allow the alignment of global common storage to be directly
8754    specified.  The SOM linker aligns common storage based on the rounded
8755    value of the NUM_BYTES parameter in the .comm directive.  It's not
8756    possible to use the .align directive as it doesn't affect the alignment
8757    of the label associated with a .comm directive.  */
8758 
8759 void
8760 pa_asm_output_aligned_common (FILE *stream,
8761 			      const char *name,
8762 			      unsigned HOST_WIDE_INT size,
8763 			      unsigned int align)
8764 {
8765   unsigned int max_common_align;
8766 
8767   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8768   if (align > max_common_align)
8769     {
8770       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8771 	       "for global common data.  Using %u",
8772 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8773       align = max_common_align;
8774     }
8775 
8776   switch_to_section (bss_section);
8777 
8778   assemble_name (stream, name);
8779   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8780            MAX (size, align / BITS_PER_UNIT));
8781 }
8782 
8783 /* We can't use .comm for local common storage as the SOM linker effectively
8784    treats the symbol as universal and uses the same storage for local symbols
8785    with the same name in different object files.  The .block directive
8786    reserves an uninitialized block of storage.  However, it's not common
8787    storage.  Fortunately, GCC never requests common storage with the same
8788    name in any given translation unit.  */
8789 
8790 void
8791 pa_asm_output_aligned_local (FILE *stream,
8792 			     const char *name,
8793 			     unsigned HOST_WIDE_INT size,
8794 			     unsigned int align)
8795 {
8796   switch_to_section (bss_section);
8797   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8798 
8799 #ifdef LOCAL_ASM_OP
8800   fprintf (stream, "%s", LOCAL_ASM_OP);
8801   assemble_name (stream, name);
8802   fprintf (stream, "\n");
8803 #endif
8804 
8805   ASM_OUTPUT_LABEL (stream, name);
8806   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8807 }
8808 
8809 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8810    use in fmpysub instructions.  */
8811 int
8812 pa_fmpysuboperands (rtx *operands)
8813 {
8814   machine_mode mode = GET_MODE (operands[0]);
8815 
8816   /* Must be a floating point mode.  */
8817   if (mode != SFmode && mode != DFmode)
8818     return 0;
8819 
8820   /* All modes must be the same.  */
8821   if (! (mode == GET_MODE (operands[1])
8822 	 && mode == GET_MODE (operands[2])
8823 	 && mode == GET_MODE (operands[3])
8824 	 && mode == GET_MODE (operands[4])
8825 	 && mode == GET_MODE (operands[5])))
8826     return 0;
8827 
8828   /* All operands must be registers.  */
8829   if (! (GET_CODE (operands[1]) == REG
8830 	 && GET_CODE (operands[2]) == REG
8831 	 && GET_CODE (operands[3]) == REG
8832 	 && GET_CODE (operands[4]) == REG
8833 	 && GET_CODE (operands[5]) == REG))
8834     return 0;
8835 
8836   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8837      operation, so operands[4] must be the same as operand[3].  */
8838   if (! rtx_equal_p (operands[3], operands[4]))
8839     return 0;
8840 
8841   /* multiply cannot feed into subtraction.  */
8842   if (rtx_equal_p (operands[5], operands[0]))
8843     return 0;
8844 
8845   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8846   if (rtx_equal_p (operands[3], operands[0])
8847      || rtx_equal_p (operands[3], operands[1])
8848      || rtx_equal_p (operands[3], operands[2]))
8849     return 0;
8850 
8851   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8852   if (mode == SFmode
8853       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8854 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8855 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8856 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8857 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8858 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8859     return 0;
8860 
8861   /* Passed.  Operands are suitable for fmpysub.  */
8862   return 1;
8863 }
8864 
8865 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8866    constants for a MULT embedded inside a memory address.  */
8867 int
8868 pa_mem_shadd_constant_p (int val)
8869 {
8870   if (val == 2 || val == 4 || val == 8)
8871     return 1;
8872   else
8873     return 0;
8874 }
8875 
8876 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
8877    constants for shadd instructions.  */
8878 int
8879 pa_shadd_constant_p (int val)
8880 {
8881   if (val == 1 || val == 2 || val == 3)
8882     return 1;
8883   else
8884     return 0;
8885 }
8886 
8887 /* Return TRUE if INSN branches forward.  */
8888 
8889 static bool
8890 forward_branch_p (rtx_insn *insn)
8891 {
8892   rtx lab = JUMP_LABEL (insn);
8893 
8894   /* The INSN must have a jump label.  */
8895   gcc_assert (lab != NULL_RTX);
8896 
8897   if (INSN_ADDRESSES_SET_P ())
8898     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8899 
8900   while (insn)
8901     {
8902       if (insn == lab)
8903 	return true;
8904       else
8905 	insn = NEXT_INSN (insn);
8906     }
8907 
8908   return false;
8909 }
8910 
8911 /* Output an unconditional move and branch insn.  */
8912 
8913 const char *
8914 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8915 {
8916   int length = get_attr_length (insn);
8917 
8918   /* These are the cases in which we win.  */
8919   if (length == 4)
8920     return "mov%I1b,tr %1,%0,%2";
8921 
8922   /* None of the following cases win, but they don't lose either.  */
8923   if (length == 8)
8924     {
8925       if (dbr_sequence_length () == 0)
8926 	{
8927 	  /* Nothing in the delay slot, fake it by putting the combined
8928 	     insn (the copy or add) in the delay slot of a bl.  */
8929 	  if (GET_CODE (operands[1]) == CONST_INT)
8930 	    return "b %2\n\tldi %1,%0";
8931 	  else
8932 	    return "b %2\n\tcopy %1,%0";
8933 	}
8934       else
8935 	{
8936 	  /* Something in the delay slot, but we've got a long branch.  */
8937 	  if (GET_CODE (operands[1]) == CONST_INT)
8938 	    return "ldi %1,%0\n\tb %2";
8939 	  else
8940 	    return "copy %1,%0\n\tb %2";
8941 	}
8942     }
8943 
8944   if (GET_CODE (operands[1]) == CONST_INT)
8945     output_asm_insn ("ldi %1,%0", operands);
8946   else
8947     output_asm_insn ("copy %1,%0", operands);
8948   return pa_output_lbranch (operands[2], insn, 1);
8949 }
8950 
8951 /* Output an unconditional add and branch insn.  */
8952 
8953 const char *
8954 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8955 {
8956   int length = get_attr_length (insn);
8957 
8958   /* To make life easy we want operand0 to be the shared input/output
8959      operand and operand1 to be the readonly operand.  */
8960   if (operands[0] == operands[1])
8961     operands[1] = operands[2];
8962 
8963   /* These are the cases in which we win.  */
8964   if (length == 4)
8965     return "add%I1b,tr %1,%0,%3";
8966 
8967   /* None of the following cases win, but they don't lose either.  */
8968   if (length == 8)
8969     {
8970       if (dbr_sequence_length () == 0)
8971 	/* Nothing in the delay slot, fake it by putting the combined
8972 	   insn (the copy or add) in the delay slot of a bl.  */
8973 	return "b %3\n\tadd%I1 %1,%0,%0";
8974       else
8975 	/* Something in the delay slot, but we've got a long branch.  */
8976 	return "add%I1 %1,%0,%0\n\tb %3";
8977     }
8978 
8979   output_asm_insn ("add%I1 %1,%0,%0", operands);
8980   return pa_output_lbranch (operands[3], insn, 1);
8981 }
8982 
8983 /* We use this hook to perform a PA specific optimization which is difficult
8984    to do in earlier passes.  */
8985 
8986 static void
8987 pa_reorg (void)
8988 {
8989   remove_useless_addtr_insns (1);
8990 
8991   if (pa_cpu < PROCESSOR_8000)
8992     pa_combine_instructions ();
8993 }
8994 
8995 /* The PA has a number of odd instructions which can perform multiple
8996    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8997    it may be profitable to combine two instructions into one instruction
8998    with two outputs.  It's not profitable PA2.0 machines because the
8999    two outputs would take two slots in the reorder buffers.
9000 
9001    This routine finds instructions which can be combined and combines
9002    them.  We only support some of the potential combinations, and we
9003    only try common ways to find suitable instructions.
9004 
9005       * addb can add two registers or a register and a small integer
9006       and jump to a nearby (+-8k) location.  Normally the jump to the
9007       nearby location is conditional on the result of the add, but by
9008       using the "true" condition we can make the jump unconditional.
9009       Thus addb can perform two independent operations in one insn.
9010 
9011       * movb is similar to addb in that it can perform a reg->reg
9012       or small immediate->reg copy and jump to a nearby (+-8k location).
9013 
9014       * fmpyadd and fmpysub can perform a FP multiply and either an
9015       FP add or FP sub if the operands of the multiply and add/sub are
9016       independent (there are other minor restrictions).  Note both
9017       the fmpy and fadd/fsub can in theory move to better spots according
9018       to data dependencies, but for now we require the fmpy stay at a
9019       fixed location.
9020 
9021       * Many of the memory operations can perform pre & post updates
9022       of index registers.  GCC's pre/post increment/decrement addressing
9023       is far too simple to take advantage of all the possibilities.  This
9024       pass may not be suitable since those insns may not be independent.
9025 
9026       * comclr can compare two ints or an int and a register, nullify
9027       the following instruction and zero some other register.  This
9028       is more difficult to use as it's harder to find an insn which
9029       will generate a comclr than finding something like an unconditional
9030       branch.  (conditional moves & long branches create comclr insns).
9031 
9032       * Most arithmetic operations can conditionally skip the next
9033       instruction.  They can be viewed as "perform this operation
9034       and conditionally jump to this nearby location" (where nearby
9035       is an insns away).  These are difficult to use due to the
9036       branch length restrictions.  */
9037 
9038 static void
9039 pa_combine_instructions (void)
9040 {
9041   rtx_insn *anchor;
9042 
9043   /* This can get expensive since the basic algorithm is on the
9044      order of O(n^2) (or worse).  Only do it for -O2 or higher
9045      levels of optimization.  */
9046   if (optimize < 2)
9047     return;
9048 
9049   /* Walk down the list of insns looking for "anchor" insns which
9050      may be combined with "floating" insns.  As the name implies,
9051      "anchor" instructions don't move, while "floating" insns may
9052      move around.  */
9053   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9054   rtx_insn *new_rtx = make_insn_raw (par);
9055 
9056   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9057     {
9058       enum attr_pa_combine_type anchor_attr;
9059       enum attr_pa_combine_type floater_attr;
9060 
9061       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9062 	 Also ignore any special USE insns.  */
9063       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9064 	  || GET_CODE (PATTERN (anchor)) == USE
9065 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9066 	continue;
9067 
9068       anchor_attr = get_attr_pa_combine_type (anchor);
9069       /* See if anchor is an insn suitable for combination.  */
9070       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9071 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9072 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9073 	      && ! forward_branch_p (anchor)))
9074 	{
9075 	  rtx_insn *floater;
9076 
9077 	  for (floater = PREV_INSN (anchor);
9078 	       floater;
9079 	       floater = PREV_INSN (floater))
9080 	    {
9081 	      if (NOTE_P (floater)
9082 		  || (NONJUMP_INSN_P (floater)
9083 		      && (GET_CODE (PATTERN (floater)) == USE
9084 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9085 		continue;
9086 
9087 	      /* Anything except a regular INSN will stop our search.  */
9088 	      if (! NONJUMP_INSN_P (floater))
9089 		{
9090 		  floater = NULL;
9091 		  break;
9092 		}
9093 
9094 	      /* See if FLOATER is suitable for combination with the
9095 		 anchor.  */
9096 	      floater_attr = get_attr_pa_combine_type (floater);
9097 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9098 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9099 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9100 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9101 		{
9102 		  /* If ANCHOR and FLOATER can be combined, then we're
9103 		     done with this pass.  */
9104 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9105 					SET_DEST (PATTERN (floater)),
9106 					XEXP (SET_SRC (PATTERN (floater)), 0),
9107 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9108 		    break;
9109 		}
9110 
9111 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9112 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9113 		{
9114 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9115 		    {
9116 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9117 					    SET_DEST (PATTERN (floater)),
9118 					XEXP (SET_SRC (PATTERN (floater)), 0),
9119 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9120 			break;
9121 		    }
9122 		  else
9123 		    {
9124 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9125 					    SET_DEST (PATTERN (floater)),
9126 					    SET_SRC (PATTERN (floater)),
9127 					    SET_SRC (PATTERN (floater))))
9128 			break;
9129 		    }
9130 		}
9131 	    }
9132 
9133 	  /* If we didn't find anything on the backwards scan try forwards.  */
9134 	  if (!floater
9135 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9136 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9137 	    {
9138 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9139 		{
9140 		  if (NOTE_P (floater)
9141 		      || (NONJUMP_INSN_P (floater)
9142 			  && (GET_CODE (PATTERN (floater)) == USE
9143 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9144 
9145 		    continue;
9146 
9147 		  /* Anything except a regular INSN will stop our search.  */
9148 		  if (! NONJUMP_INSN_P (floater))
9149 		    {
9150 		      floater = NULL;
9151 		      break;
9152 		    }
9153 
9154 		  /* See if FLOATER is suitable for combination with the
9155 		     anchor.  */
9156 		  floater_attr = get_attr_pa_combine_type (floater);
9157 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9158 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9159 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9160 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9161 		    {
9162 		      /* If ANCHOR and FLOATER can be combined, then we're
9163 			 done with this pass.  */
9164 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9165 					    SET_DEST (PATTERN (floater)),
9166 					    XEXP (SET_SRC (PATTERN (floater)),
9167 						  0),
9168 					    XEXP (SET_SRC (PATTERN (floater)),
9169 						  1)))
9170 			break;
9171 		    }
9172 		}
9173 	    }
9174 
9175 	  /* FLOATER will be nonzero if we found a suitable floating
9176 	     insn for combination with ANCHOR.  */
9177 	  if (floater
9178 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9179 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9180 	    {
9181 	      /* Emit the new instruction and delete the old anchor.  */
9182 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9183 				       copy_rtx (PATTERN (floater)));
9184 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9185 	      emit_insn_before (temp, anchor);
9186 
9187 	      SET_INSN_DELETED (anchor);
9188 
9189 	      /* Emit a special USE insn for FLOATER, then delete
9190 		 the floating insn.  */
9191 	      temp = copy_rtx (PATTERN (floater));
9192 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9193 	      delete_insn (floater);
9194 
9195 	      continue;
9196 	    }
9197 	  else if (floater
9198 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9199 	    {
9200 	      /* Emit the new_jump instruction and delete the old anchor.  */
9201 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9202 				       copy_rtx (PATTERN (floater)));
9203 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9204 	      temp = emit_jump_insn_before (temp, anchor);
9205 
9206 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9207 	      SET_INSN_DELETED (anchor);
9208 
9209 	      /* Emit a special USE insn for FLOATER, then delete
9210 		 the floating insn.  */
9211 	      temp = copy_rtx (PATTERN (floater));
9212 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9213 	      delete_insn (floater);
9214 	      continue;
9215 	    }
9216 	}
9217     }
9218 }
9219 
9220 static int
9221 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9222 		  int reversed, rtx dest,
9223 		  rtx src1, rtx src2)
9224 {
9225   int insn_code_number;
9226   rtx_insn *start, *end;
9227 
9228   /* Create a PARALLEL with the patterns of ANCHOR and
9229      FLOATER, try to recognize it, then test constraints
9230      for the resulting pattern.
9231 
9232      If the pattern doesn't match or the constraints
9233      aren't met keep searching for a suitable floater
9234      insn.  */
9235   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9236   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9237   INSN_CODE (new_rtx) = -1;
9238   insn_code_number = recog_memoized (new_rtx);
9239   basic_block bb = BLOCK_FOR_INSN (anchor);
9240   if (insn_code_number < 0
9241       || (extract_insn (new_rtx),
9242 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9243     return 0;
9244 
9245   if (reversed)
9246     {
9247       start = anchor;
9248       end = floater;
9249     }
9250   else
9251     {
9252       start = floater;
9253       end = anchor;
9254     }
9255 
9256   /* There's up to three operands to consider.  One
9257      output and two inputs.
9258 
9259      The output must not be used between FLOATER & ANCHOR
9260      exclusive.  The inputs must not be set between
9261      FLOATER and ANCHOR exclusive.  */
9262 
9263   if (reg_used_between_p (dest, start, end))
9264     return 0;
9265 
9266   if (reg_set_between_p (src1, start, end))
9267     return 0;
9268 
9269   if (reg_set_between_p (src2, start, end))
9270     return 0;
9271 
9272   /* If we get here, then everything is good.  */
9273   return 1;
9274 }
9275 
9276 /* Return nonzero if references for INSN are delayed.
9277 
9278    Millicode insns are actually function calls with some special
9279    constraints on arguments and register usage.
9280 
9281    Millicode calls always expect their arguments in the integer argument
9282    registers, and always return their result in %r29 (ret1).  They
9283    are expected to clobber their arguments, %r1, %r29, and the return
9284    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9285 
9286    This function tells reorg that the references to arguments and
9287    millicode calls do not appear to happen until after the millicode call.
9288    This allows reorg to put insns which set the argument registers into the
9289    delay slot of the millicode call -- thus they act more like traditional
9290    CALL_INSNs.
9291 
9292    Note we cannot consider side effects of the insn to be delayed because
9293    the branch and link insn will clobber the return pointer.  If we happened
9294    to use the return pointer in the delay slot of the call, then we lose.
9295 
9296    get_attr_type will try to recognize the given insn, so make sure to
9297    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9298    in particular.  */
9299 int
9300 pa_insn_refs_are_delayed (rtx_insn *insn)
9301 {
9302   return ((NONJUMP_INSN_P (insn)
9303 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9304 	   && GET_CODE (PATTERN (insn)) != USE
9305 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9306 	   && get_attr_type (insn) == TYPE_MILLI));
9307 }
9308 
9309 /* Promote the return value, but not the arguments.  */
9310 
9311 static machine_mode
9312 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9313                           machine_mode mode,
9314                           int *punsignedp ATTRIBUTE_UNUSED,
9315                           const_tree fntype ATTRIBUTE_UNUSED,
9316                           int for_return)
9317 {
9318   if (for_return == 0)
9319     return mode;
9320   return promote_mode (type, mode, punsignedp);
9321 }
9322 
9323 /* On the HP-PA the value is found in register(s) 28(-29), unless
9324    the mode is SF or DF. Then the value is returned in fr4 (32).
9325 
9326    This must perform the same promotions as PROMOTE_MODE, else promoting
9327    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9328 
9329    Small structures must be returned in a PARALLEL on PA64 in order
9330    to match the HP Compiler ABI.  */
9331 
9332 static rtx
9333 pa_function_value (const_tree valtype,
9334                    const_tree func ATTRIBUTE_UNUSED,
9335                    bool outgoing ATTRIBUTE_UNUSED)
9336 {
9337   machine_mode valmode;
9338 
9339   if (AGGREGATE_TYPE_P (valtype)
9340       || TREE_CODE (valtype) == COMPLEX_TYPE
9341       || TREE_CODE (valtype) == VECTOR_TYPE)
9342     {
9343       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9344 
9345       /* Handle aggregates that fit exactly in a word or double word.  */
9346       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9347 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9348 
9349       if (TARGET_64BIT)
9350 	{
9351           /* Aggregates with a size less than or equal to 128 bits are
9352 	     returned in GR 28(-29).  They are left justified.  The pad
9353 	     bits are undefined.  Larger aggregates are returned in
9354 	     memory.  */
9355 	  rtx loc[2];
9356 	  int i, offset = 0;
9357 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9358 
9359 	  for (i = 0; i < ub; i++)
9360 	    {
9361 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9362 					  gen_rtx_REG (DImode, 28 + i),
9363 					  GEN_INT (offset));
9364 	      offset += 8;
9365 	    }
9366 
9367 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9368 	}
9369       else if (valsize > UNITS_PER_WORD)
9370 	{
9371 	  /* Aggregates 5 to 8 bytes in size are returned in general
9372 	     registers r28-r29 in the same manner as other non
9373 	     floating-point objects.  The data is right-justified and
9374 	     zero-extended to 64 bits.  This is opposite to the normal
9375 	     justification used on big endian targets and requires
9376 	     special treatment.  */
9377 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9378 				       gen_rtx_REG (DImode, 28), const0_rtx);
9379 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9380 	}
9381     }
9382 
9383   if ((INTEGRAL_TYPE_P (valtype)
9384        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9385       || POINTER_TYPE_P (valtype))
9386     valmode = word_mode;
9387   else
9388     valmode = TYPE_MODE (valtype);
9389 
9390   if (TREE_CODE (valtype) == REAL_TYPE
9391       && !AGGREGATE_TYPE_P (valtype)
9392       && TYPE_MODE (valtype) != TFmode
9393       && !TARGET_SOFT_FLOAT)
9394     return gen_rtx_REG (valmode, 32);
9395 
9396   return gen_rtx_REG (valmode, 28);
9397 }
9398 
9399 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9400 
9401 static rtx
9402 pa_libcall_value (machine_mode mode,
9403 		  const_rtx fun ATTRIBUTE_UNUSED)
9404 {
9405   if (! TARGET_SOFT_FLOAT
9406       && (mode == SFmode || mode == DFmode))
9407     return  gen_rtx_REG (mode, 32);
9408   else
9409     return  gen_rtx_REG (mode, 28);
9410 }
9411 
9412 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9413 
9414 static bool
9415 pa_function_value_regno_p (const unsigned int regno)
9416 {
9417   if (regno == 28
9418       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9419     return true;
9420 
9421   return false;
9422 }
9423 
9424 /* Update the data in CUM to advance over an argument
9425    of mode MODE and data type TYPE.
9426    (TYPE is null for libcalls where that information may not be available.)  */
9427 
9428 static void
9429 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9430 			 const_tree type, bool named ATTRIBUTE_UNUSED)
9431 {
9432   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9433   int arg_size = pa_function_arg_size (mode, type);
9434 
9435   cum->nargs_prototype--;
9436   cum->words += (arg_size
9437 		 + ((cum->words & 01)
9438 		    && type != NULL_TREE
9439 		    && arg_size > 1));
9440 }
9441 
9442 /* Return the location of a parameter that is passed in a register or NULL
9443    if the parameter has any component that is passed in memory.
9444 
9445    This is new code and will be pushed to into the net sources after
9446    further testing.
9447 
9448    ??? We might want to restructure this so that it looks more like other
9449    ports.  */
9450 static rtx
9451 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9452 		 const_tree type, bool named ATTRIBUTE_UNUSED)
9453 {
9454   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9455   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9456   int alignment = 0;
9457   int arg_size;
9458   int fpr_reg_base;
9459   int gpr_reg_base;
9460   rtx retval;
9461 
9462   if (mode == VOIDmode)
9463     return NULL_RTX;
9464 
9465   arg_size = pa_function_arg_size (mode, type);
9466 
9467   /* If this arg would be passed partially or totally on the stack, then
9468      this routine should return zero.  pa_arg_partial_bytes will
9469      handle arguments which are split between regs and stack slots if
9470      the ABI mandates split arguments.  */
9471   if (!TARGET_64BIT)
9472     {
9473       /* The 32-bit ABI does not split arguments.  */
9474       if (cum->words + arg_size > max_arg_words)
9475 	return NULL_RTX;
9476     }
9477   else
9478     {
9479       if (arg_size > 1)
9480 	alignment = cum->words & 1;
9481       if (cum->words + alignment >= max_arg_words)
9482 	return NULL_RTX;
9483     }
9484 
9485   /* The 32bit ABIs and the 64bit ABIs are rather different,
9486      particularly in their handling of FP registers.  We might
9487      be able to cleverly share code between them, but I'm not
9488      going to bother in the hope that splitting them up results
9489      in code that is more easily understood.  */
9490 
9491   if (TARGET_64BIT)
9492     {
9493       /* Advance the base registers to their current locations.
9494 
9495          Remember, gprs grow towards smaller register numbers while
9496 	 fprs grow to higher register numbers.  Also remember that
9497 	 although FP regs are 32-bit addressable, we pretend that
9498 	 the registers are 64-bits wide.  */
9499       gpr_reg_base = 26 - cum->words;
9500       fpr_reg_base = 32 + cum->words;
9501 
9502       /* Arguments wider than one word and small aggregates need special
9503 	 treatment.  */
9504       if (arg_size > 1
9505 	  || mode == BLKmode
9506 	  || (type && (AGGREGATE_TYPE_P (type)
9507 		       || TREE_CODE (type) == COMPLEX_TYPE
9508 		       || TREE_CODE (type) == VECTOR_TYPE)))
9509 	{
9510 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9511 	     and aggregates including complex numbers are aligned on
9512 	     128-bit boundaries.  The first eight 64-bit argument slots
9513 	     are associated one-to-one, with general registers r26
9514 	     through r19, and also with floating-point registers fr4
9515 	     through fr11.  Arguments larger than one word are always
9516 	     passed in general registers.
9517 
9518 	     Using a PARALLEL with a word mode register results in left
9519 	     justified data on a big-endian target.  */
9520 
9521 	  rtx loc[8];
9522 	  int i, offset = 0, ub = arg_size;
9523 
9524 	  /* Align the base register.  */
9525 	  gpr_reg_base -= alignment;
9526 
9527 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9528 	  for (i = 0; i < ub; i++)
9529 	    {
9530 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9531 					  gen_rtx_REG (DImode, gpr_reg_base),
9532 					  GEN_INT (offset));
9533 	      gpr_reg_base -= 1;
9534 	      offset += 8;
9535 	    }
9536 
9537 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9538 	}
9539      }
9540   else
9541     {
9542       /* If the argument is larger than a word, then we know precisely
9543 	 which registers we must use.  */
9544       if (arg_size > 1)
9545 	{
9546 	  if (cum->words)
9547 	    {
9548 	      gpr_reg_base = 23;
9549 	      fpr_reg_base = 38;
9550 	    }
9551 	  else
9552 	    {
9553 	      gpr_reg_base = 25;
9554 	      fpr_reg_base = 34;
9555 	    }
9556 
9557 	  /* Structures 5 to 8 bytes in size are passed in the general
9558 	     registers in the same manner as other non floating-point
9559 	     objects.  The data is right-justified and zero-extended
9560 	     to 64 bits.  This is opposite to the normal justification
9561 	     used on big endian targets and requires special treatment.
9562 	     We now define BLOCK_REG_PADDING to pad these objects.
9563 	     Aggregates, complex and vector types are passed in the same
9564 	     manner as structures.  */
9565 	  if (mode == BLKmode
9566 	      || (type && (AGGREGATE_TYPE_P (type)
9567 			   || TREE_CODE (type) == COMPLEX_TYPE
9568 			   || TREE_CODE (type) == VECTOR_TYPE)))
9569 	    {
9570 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9571 					   gen_rtx_REG (DImode, gpr_reg_base),
9572 					   const0_rtx);
9573 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9574 	    }
9575 	}
9576       else
9577         {
9578 	   /* We have a single word (32 bits).  A simple computation
9579 	      will get us the register #s we need.  */
9580 	   gpr_reg_base = 26 - cum->words;
9581 	   fpr_reg_base = 32 + 2 * cum->words;
9582 	}
9583     }
9584 
9585   /* Determine if the argument needs to be passed in both general and
9586      floating point registers.  */
9587   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9588        /* If we are doing soft-float with portable runtime, then there
9589 	  is no need to worry about FP regs.  */
9590        && !TARGET_SOFT_FLOAT
9591        /* The parameter must be some kind of scalar float, else we just
9592 	  pass it in integer registers.  */
9593        && GET_MODE_CLASS (mode) == MODE_FLOAT
9594        /* The target function must not have a prototype.  */
9595        && cum->nargs_prototype <= 0
9596        /* libcalls do not need to pass items in both FP and general
9597 	  registers.  */
9598        && type != NULL_TREE
9599        /* All this hair applies to "outgoing" args only.  This includes
9600 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9601        && !cum->incoming)
9602       /* Also pass outgoing floating arguments in both registers in indirect
9603 	 calls with the 32 bit ABI and the HP assembler since there is no
9604 	 way to the specify argument locations in static functions.  */
9605       || (!TARGET_64BIT
9606 	  && !TARGET_GAS
9607 	  && !cum->incoming
9608 	  && cum->indirect
9609 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9610     {
9611       retval
9612 	= gen_rtx_PARALLEL
9613 	    (mode,
9614 	     gen_rtvec (2,
9615 			gen_rtx_EXPR_LIST (VOIDmode,
9616 					   gen_rtx_REG (mode, fpr_reg_base),
9617 					   const0_rtx),
9618 			gen_rtx_EXPR_LIST (VOIDmode,
9619 					   gen_rtx_REG (mode, gpr_reg_base),
9620 					   const0_rtx)));
9621     }
9622   else
9623     {
9624       /* See if we should pass this parameter in a general register.  */
9625       if (TARGET_SOFT_FLOAT
9626 	  /* Indirect calls in the normal 32bit ABI require all arguments
9627 	     to be passed in general registers.  */
9628 	  || (!TARGET_PORTABLE_RUNTIME
9629 	      && !TARGET_64BIT
9630 	      && !TARGET_ELF32
9631 	      && cum->indirect)
9632 	  /* If the parameter is not a scalar floating-point parameter,
9633 	     then it belongs in GPRs.  */
9634 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9635 	  /* Structure with single SFmode field belongs in GPR.  */
9636 	  || (type && AGGREGATE_TYPE_P (type)))
9637 	retval = gen_rtx_REG (mode, gpr_reg_base);
9638       else
9639 	retval = gen_rtx_REG (mode, fpr_reg_base);
9640     }
9641   return retval;
9642 }
9643 
9644 /* Arguments larger than one word are double word aligned.  */
9645 
9646 static unsigned int
9647 pa_function_arg_boundary (machine_mode mode, const_tree type)
9648 {
9649   bool singleword = (type
9650 		     ? (integer_zerop (TYPE_SIZE (type))
9651 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9652 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9653 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9654 
9655   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9656 }
9657 
9658 /* If this arg would be passed totally in registers or totally on the stack,
9659    then this routine should return zero.  */
9660 
9661 static int
9662 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9663 		      tree type, bool named ATTRIBUTE_UNUSED)
9664 {
9665   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9666   unsigned int max_arg_words = 8;
9667   unsigned int offset = 0;
9668 
9669   if (!TARGET_64BIT)
9670     return 0;
9671 
9672   if (pa_function_arg_size (mode, type) > 1 && (cum->words & 1))
9673     offset = 1;
9674 
9675   if (cum->words + offset + pa_function_arg_size (mode, type) <= max_arg_words)
9676     /* Arg fits fully into registers.  */
9677     return 0;
9678   else if (cum->words + offset >= max_arg_words)
9679     /* Arg fully on the stack.  */
9680     return 0;
9681   else
9682     /* Arg is split.  */
9683     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9684 }
9685 
9686 
9687 /* A get_unnamed_section callback for switching to the text section.
9688 
9689    This function is only used with SOM.  Because we don't support
9690    named subspaces, we can only create a new subspace or switch back
9691    to the default text subspace.  */
9692 
9693 static void
9694 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9695 {
9696   gcc_assert (TARGET_SOM);
9697   if (TARGET_GAS)
9698     {
9699       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9700 	{
9701 	  /* We only want to emit a .nsubspa directive once at the
9702 	     start of the function.  */
9703 	  cfun->machine->in_nsubspa = 1;
9704 
9705 	  /* Create a new subspace for the text.  This provides
9706 	     better stub placement and one-only functions.  */
9707 	  if (cfun->decl
9708 	      && DECL_ONE_ONLY (cfun->decl)
9709 	      && !DECL_WEAK (cfun->decl))
9710 	    {
9711 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9712 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9713 				     "ACCESS=44,SORT=24,COMDAT");
9714 	      return;
9715 	    }
9716 	}
9717       else
9718 	{
9719 	  /* There isn't a current function or the body of the current
9720 	     function has been completed.  So, we are changing to the
9721 	     text section to output debugging information.  Thus, we
9722 	     need to forget that we are in the text section so that
9723 	     varasm.c will call us when text_section is selected again.  */
9724 	  gcc_assert (!cfun || !cfun->machine
9725 		      || cfun->machine->in_nsubspa == 2);
9726 	  in_section = NULL;
9727 	}
9728       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9729       return;
9730     }
9731   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9732 }
9733 
9734 /* A get_unnamed_section callback for switching to comdat data
9735    sections.  This function is only used with SOM.  */
9736 
9737 static void
9738 som_output_comdat_data_section_asm_op (const void *data)
9739 {
9740   in_section = NULL;
9741   output_section_asm_op (data);
9742 }
9743 
9744 /* Implement TARGET_ASM_INIT_SECTIONS.  */
9745 
9746 static void
9747 pa_som_asm_init_sections (void)
9748 {
9749   text_section
9750     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9751 
9752   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9753      is not being generated.  */
9754   som_readonly_data_section
9755     = get_unnamed_section (0, output_section_asm_op,
9756 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9757 
9758   /* When secondary definitions are not supported, SOM makes readonly
9759      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9760      the comdat flag.  */
9761   som_one_only_readonly_data_section
9762     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9763 			   "\t.SPACE $TEXT$\n"
9764 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9765 			   "ACCESS=0x2c,SORT=16,COMDAT");
9766 
9767 
9768   /* When secondary definitions are not supported, SOM makes data one-only
9769      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9770   som_one_only_data_section
9771     = get_unnamed_section (SECTION_WRITE,
9772 			   som_output_comdat_data_section_asm_op,
9773 			   "\t.SPACE $PRIVATE$\n"
9774 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9775 			   "ACCESS=31,SORT=24,COMDAT");
9776 
9777   if (flag_tm)
9778     som_tm_clone_table_section
9779       = get_unnamed_section (0, output_section_asm_op,
9780 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9781 
9782   /* HPUX ld generates incorrect GOT entries for "T" fixups which
9783      reference data within the $TEXT$ space (for example constant
9784      strings in the $LIT$ subspace).
9785 
9786      The assemblers (GAS and HP as) both have problems with handling
9787      the difference of two symbols.  This is the other correct way to
9788      reference constant data during PIC code generation.
9789 
9790      Thus, we can't put constant data needing relocation in the $TEXT$
9791      space during PIC generation.
9792 
9793      Previously, we placed all constant data into the $DATA$ subspace
9794      when generating PIC code.  This reduces sharing, but it works
9795      correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
9796      This puts constant data not needing relocation into the $TEXT$ space.  */
9797   readonly_data_section = som_readonly_data_section;
9798 
9799   /* We must not have a reference to an external symbol defined in a
9800      shared library in a readonly section, else the SOM linker will
9801      complain.
9802 
9803      So, we force exception information into the data section.  */
9804   exception_section = data_section;
9805 }
9806 
9807 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
9808 
9809 static section *
9810 pa_som_tm_clone_table_section (void)
9811 {
9812   return som_tm_clone_table_section;
9813 }
9814 
9815 /* On hpux10, the linker will give an error if we have a reference
9816    in the read-only data section to a symbol defined in a shared
9817    library.  Therefore, expressions that might require a reloc
9818    cannot be placed in the read-only data section.  */
9819 
9820 static section *
9821 pa_select_section (tree exp, int reloc,
9822 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9823 {
9824   if (TREE_CODE (exp) == VAR_DECL
9825       && TREE_READONLY (exp)
9826       && !TREE_THIS_VOLATILE (exp)
9827       && DECL_INITIAL (exp)
9828       && (DECL_INITIAL (exp) == error_mark_node
9829           || TREE_CONSTANT (DECL_INITIAL (exp)))
9830       && !(reloc & pa_reloc_rw_mask ()))
9831     {
9832       if (TARGET_SOM
9833 	  && DECL_ONE_ONLY (exp)
9834 	  && !DECL_WEAK (exp))
9835 	return som_one_only_readonly_data_section;
9836       else
9837 	return readonly_data_section;
9838     }
9839   else if (CONSTANT_CLASS_P (exp)
9840 	   && !(reloc & pa_reloc_rw_mask ()))
9841     return readonly_data_section;
9842   else if (TARGET_SOM
9843 	   && TREE_CODE (exp) == VAR_DECL
9844 	   && DECL_ONE_ONLY (exp)
9845 	   && !DECL_WEAK (exp))
9846     return som_one_only_data_section;
9847   else
9848     return data_section;
9849 }
9850 
9851 /* Implement pa_elf_select_rtx_section.  If X is a function label operand
9852    and the function is in a COMDAT group, place the plabel reference in the
9853    .data.rel.ro.local section.  The linker ignores references to symbols in
9854    discarded sections from this section.  */
9855 
9856 static section *
9857 pa_elf_select_rtx_section (machine_mode mode, rtx x,
9858 			   unsigned HOST_WIDE_INT align)
9859 {
9860   if (function_label_operand (x, VOIDmode))
9861     {
9862       tree decl = SYMBOL_REF_DECL (x);
9863 
9864       if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
9865 	return get_named_section (NULL, ".data.rel.ro.local", 1);
9866     }
9867 
9868   return default_elf_select_rtx_section (mode, x, align);
9869 }
9870 
9871 /* Implement pa_reloc_rw_mask.  */
9872 
9873 static int
9874 pa_reloc_rw_mask (void)
9875 {
9876   if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
9877     return 3;
9878 
9879   /* HP linker does not support global relocs in readonly memory.  */
9880   return TARGET_SOM ? 2 : 0;
9881 }
9882 
9883 static void
9884 pa_globalize_label (FILE *stream, const char *name)
9885 {
9886   /* We only handle DATA objects here, functions are globalized in
9887      ASM_DECLARE_FUNCTION_NAME.  */
9888   if (! FUNCTION_NAME_P (name))
9889   {
9890     fputs ("\t.EXPORT ", stream);
9891     assemble_name (stream, name);
9892     fputs (",DATA\n", stream);
9893   }
9894 }
9895 
9896 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9897 
9898 static rtx
9899 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9900 		     int incoming ATTRIBUTE_UNUSED)
9901 {
9902   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9903 }
9904 
9905 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9906 
9907 bool
9908 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9909 {
9910   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9911      PA64 ABI says that objects larger than 128 bits are returned in memory.
9912      Note, int_size_in_bytes can return -1 if the size of the object is
9913      variable or larger than the maximum value that can be expressed as
9914      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9915      simplest way to handle variable and empty types is to pass them in
9916      memory.  This avoids problems in defining the boundaries of argument
9917      slots, allocating registers, etc.  */
9918   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9919 	  || int_size_in_bytes (type) <= 0);
9920 }
9921 
9922 /* Structure to hold declaration and name of external symbols that are
9923    emitted by GCC.  We generate a vector of these symbols and output them
9924    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9925    This avoids putting out names that are never really used.  */
9926 
9927 typedef struct GTY(()) extern_symbol
9928 {
9929   tree decl;
9930   const char *name;
9931 } extern_symbol;
9932 
9933 /* Define gc'd vector type for extern_symbol.  */
9934 
9935 /* Vector of extern_symbol pointers.  */
9936 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9937 
9938 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9939 /* Mark DECL (name NAME) as an external reference (assembler output
9940    file FILE).  This saves the names to output at the end of the file
9941    if actually referenced.  */
9942 
9943 void
9944 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9945 {
9946   gcc_assert (file == asm_out_file);
9947   extern_symbol p = {decl, name};
9948   vec_safe_push (extern_symbols, p);
9949 }
9950 #endif
9951 
9952 /* Output text required at the end of an assembler file.
9953    This includes deferred plabels and .import directives for
9954    all external symbols that were actually referenced.  */
9955 
9956 static void
9957 pa_file_end (void)
9958 {
9959 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9960   unsigned int i;
9961   extern_symbol *p;
9962 
9963   if (!NO_DEFERRED_PROFILE_COUNTERS)
9964     output_deferred_profile_counters ();
9965 #endif
9966 
9967   output_deferred_plabels ();
9968 
9969 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9970   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9971     {
9972       tree decl = p->decl;
9973 
9974       if (!TREE_ASM_WRITTEN (decl)
9975 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9976 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9977     }
9978 
9979   vec_free (extern_symbols);
9980 #endif
9981 
9982   if (NEED_INDICATE_EXEC_STACK)
9983     file_end_indicate_exec_stack ();
9984 }
9985 
9986 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
9987 
9988 static bool
9989 pa_can_change_mode_class (machine_mode from, machine_mode to,
9990 			  reg_class_t rclass)
9991 {
9992   if (from == to)
9993     return true;
9994 
9995   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9996     return true;
9997 
9998   /* Reject changes to/from modes with zero size.  */
9999   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10000     return false;
10001 
10002   /* Reject changes to/from complex and vector modes.  */
10003   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10004       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10005     return false;
10006 
10007   /* There is no way to load QImode or HImode values directly from memory
10008      to a FP register.  SImode loads to the FP registers are not zero
10009      extended.  On the 64-bit target, this conflicts with the definition
10010      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
10011      except for DImode to SImode on the 64-bit target.  It is handled by
10012      register renaming in pa_print_operand.  */
10013   if (MAYBE_FP_REG_CLASS_P (rclass))
10014     return TARGET_64BIT && from == DImode && to == SImode;
10015 
10016   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10017      in specific sets of registers.  Thus, we cannot allow changing
10018      to a larger mode when it's larger than a word.  */
10019   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10020       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10021     return false;
10022 
10023   return true;
10024 }
10025 
10026 /* Implement TARGET_MODES_TIEABLE_P.
10027 
10028    We should return FALSE for QImode and HImode because these modes
10029    are not ok in the floating-point registers.  However, this prevents
10030    tieing these modes to SImode and DImode in the general registers.
10031    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10032    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10033    in the floating-point registers.  */
10034 
10035 static bool
10036 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10037 {
10038   /* Don't tie modes in different classes.  */
10039   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10040     return false;
10041 
10042   return true;
10043 }
10044 
10045 
10046 /* Length in units of the trampoline instruction code.  */
10047 
10048 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10049 
10050 
10051 /* Output assembler code for a block containing the constant parts
10052    of a trampoline, leaving space for the variable parts.\
10053 
10054    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10055    and then branches to the specified routine.
10056 
10057    This code template is copied from text segment to stack location
10058    and then patched with pa_trampoline_init to contain valid values,
10059    and then entered as a subroutine.
10060 
10061    It is best to keep this as small as possible to avoid having to
10062    flush multiple lines in the cache.  */
10063 
10064 static void
10065 pa_asm_trampoline_template (FILE *f)
10066 {
10067   if (!TARGET_64BIT)
10068     {
10069       if (TARGET_PA_20)
10070 	{
10071 	  fputs ("\tmfia	%r20\n", f);
10072 	  fputs ("\tldw		48(%r20),%r22\n", f);
10073 	  fputs ("\tcopy	%r22,%r21\n", f);
10074 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10075 	  fputs ("\tdepwi	0,31,2,%r22\n", f);
10076 	  fputs ("\tldw		0(%r22),%r21\n", f);
10077 	  fputs ("\tldw		4(%r22),%r19\n", f);
10078 	  fputs ("\tbve		(%r21)\n", f);
10079 	  fputs ("\tldw		52(%r1),%r29\n", f);
10080 	  fputs ("\t.word	0\n", f);
10081 	  fputs ("\t.word	0\n", f);
10082 	  fputs ("\t.word	0\n", f);
10083 	}
10084       else
10085 	{
10086 	  if (ASSEMBLER_DIALECT == 0)
10087 	    {
10088 	      fputs ("\tbl	.+8,%r20\n", f);
10089 	      fputs ("\tdepi	0,31,2,%r20\n", f);
10090 	    }
10091 	  else
10092 	    {
10093 	      fputs ("\tb,l	.+8,%r20\n", f);
10094 	      fputs ("\tdepwi	0,31,2,%r20\n", f);
10095 	    }
10096 	  fputs ("\tldw		40(%r20),%r22\n", f);
10097 	  fputs ("\tcopy	%r22,%r21\n", f);
10098 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10099 	  if (ASSEMBLER_DIALECT == 0)
10100 	    fputs ("\tdepi	0,31,2,%r22\n", f);
10101 	  else
10102 	    fputs ("\tdepwi	0,31,2,%r22\n", f);
10103 	  fputs ("\tldw		0(%r22),%r21\n", f);
10104 	  fputs ("\tldw		4(%r22),%r19\n", f);
10105 	  fputs ("\tldsid	(%r21),%r1\n", f);
10106 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10107 	  fputs ("\tbe		0(%sr0,%r21)\n", f);
10108 	  fputs ("\tldw		44(%r20),%r29\n", f);
10109 	}
10110       fputs ("\t.word	0\n", f);
10111       fputs ("\t.word	0\n", f);
10112       fputs ("\t.word	0\n", f);
10113       fputs ("\t.word	0\n", f);
10114     }
10115   else
10116     {
10117       fputs ("\t.dword 0\n", f);
10118       fputs ("\t.dword 0\n", f);
10119       fputs ("\t.dword 0\n", f);
10120       fputs ("\t.dword 0\n", f);
10121       fputs ("\tmfia	%r31\n", f);
10122       fputs ("\tldd	24(%r31),%r27\n", f);
10123       fputs ("\tldd	32(%r31),%r31\n", f);
10124       fputs ("\tldd	16(%r27),%r1\n", f);
10125       fputs ("\tbve	(%r1)\n", f);
10126       fputs ("\tldd	24(%r27),%r27\n", f);
10127       fputs ("\t.dword 0  ; fptr\n", f);
10128       fputs ("\t.dword 0  ; static link\n", f);
10129     }
10130 }
10131 
10132 /* Emit RTL insns to initialize the variable parts of a trampoline.
10133    FNADDR is an RTX for the address of the function's pure code.
10134    CXT is an RTX for the static chain value for the function.
10135 
10136    Move the function address to the trampoline template at offset 48.
10137    Move the static chain value to trampoline template at offset 52.
10138    Move the trampoline address to trampoline template at offset 56.
10139    Move r19 to trampoline template at offset 60.  The latter two
10140    words create a plabel for the indirect call to the trampoline.
10141 
10142    A similar sequence is used for the 64-bit port but the plabel is
10143    at the beginning of the trampoline.
10144 
10145    Finally, the cache entries for the trampoline code are flushed.
10146    This is necessary to ensure that the trampoline instruction sequence
10147    is written to memory prior to any attempts at prefetching the code
10148    sequence.  */
10149 
10150 static void
10151 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10152 {
10153   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10154   rtx start_addr = gen_reg_rtx (Pmode);
10155   rtx end_addr = gen_reg_rtx (Pmode);
10156   rtx line_length = gen_reg_rtx (Pmode);
10157   rtx r_tramp, tmp;
10158 
10159   emit_block_move (m_tramp, assemble_trampoline_template (),
10160 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10161   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10162 
10163   if (!TARGET_64BIT)
10164     {
10165       tmp = adjust_address (m_tramp, Pmode, 48);
10166       emit_move_insn (tmp, fnaddr);
10167       tmp = adjust_address (m_tramp, Pmode, 52);
10168       emit_move_insn (tmp, chain_value);
10169 
10170       /* Create a fat pointer for the trampoline.  */
10171       tmp = adjust_address (m_tramp, Pmode, 56);
10172       emit_move_insn (tmp, r_tramp);
10173       tmp = adjust_address (m_tramp, Pmode, 60);
10174       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10175 
10176       /* fdc and fic only use registers for the address to flush,
10177 	 they do not accept integer displacements.  We align the
10178 	 start and end addresses to the beginning of their respective
10179 	 cache lines to minimize the number of lines flushed.  */
10180       emit_insn (gen_andsi3 (start_addr, r_tramp,
10181 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10182       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10183 					     TRAMPOLINE_CODE_SIZE-1));
10184       emit_insn (gen_andsi3 (end_addr, tmp,
10185 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10186       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10187       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10188       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10189 				    gen_reg_rtx (Pmode),
10190 				    gen_reg_rtx (Pmode)));
10191     }
10192   else
10193     {
10194       tmp = adjust_address (m_tramp, Pmode, 56);
10195       emit_move_insn (tmp, fnaddr);
10196       tmp = adjust_address (m_tramp, Pmode, 64);
10197       emit_move_insn (tmp, chain_value);
10198 
10199       /* Create a fat pointer for the trampoline.  */
10200       tmp = adjust_address (m_tramp, Pmode, 16);
10201       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10202 							    r_tramp, 32)));
10203       tmp = adjust_address (m_tramp, Pmode, 24);
10204       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10205 
10206       /* fdc and fic only use registers for the address to flush,
10207 	 they do not accept integer displacements.  We align the
10208 	 start and end addresses to the beginning of their respective
10209 	 cache lines to minimize the number of lines flushed.  */
10210       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10211       emit_insn (gen_anddi3 (start_addr, tmp,
10212 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10213       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10214 					     TRAMPOLINE_CODE_SIZE - 1));
10215       emit_insn (gen_anddi3 (end_addr, tmp,
10216 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10217       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10218       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10219       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10220 				    gen_reg_rtx (Pmode),
10221 				    gen_reg_rtx (Pmode)));
10222     }
10223 
10224 #ifdef HAVE_ENABLE_EXECUTE_STACK
10225   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10226 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10227 #endif
10228 }
10229 
10230 /* Perform any machine-specific adjustment in the address of the trampoline.
10231    ADDR contains the address that was passed to pa_trampoline_init.
10232    Adjust the trampoline address to point to the plabel at offset 56.  */
10233 
10234 static rtx
10235 pa_trampoline_adjust_address (rtx addr)
10236 {
10237   if (!TARGET_64BIT)
10238     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10239   return addr;
10240 }
10241 
10242 static rtx
10243 pa_delegitimize_address (rtx orig_x)
10244 {
10245   rtx x = delegitimize_mem_from_attrs (orig_x);
10246 
10247   if (GET_CODE (x) == LO_SUM
10248       && GET_CODE (XEXP (x, 1)) == UNSPEC
10249       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10250     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10251   return x;
10252 }
10253 
10254 static rtx
10255 pa_internal_arg_pointer (void)
10256 {
10257   /* The argument pointer and the hard frame pointer are the same in
10258      the 32-bit runtime, so we don't need a copy.  */
10259   if (TARGET_64BIT)
10260     return copy_to_reg (virtual_incoming_args_rtx);
10261   else
10262     return virtual_incoming_args_rtx;
10263 }
10264 
10265 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10266    Frame pointer elimination is automatically handled.  */
10267 
10268 static bool
10269 pa_can_eliminate (const int from, const int to)
10270 {
10271   /* The argument cannot be eliminated in the 64-bit runtime.  */
10272   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10273     return false;
10274 
10275   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10276           ? ! frame_pointer_needed
10277           : true);
10278 }
10279 
10280 /* Define the offset between two registers, FROM to be eliminated and its
10281    replacement TO, at the start of a routine.  */
10282 HOST_WIDE_INT
10283 pa_initial_elimination_offset (int from, int to)
10284 {
10285   HOST_WIDE_INT offset;
10286 
10287   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10288       && to == STACK_POINTER_REGNUM)
10289     offset = -pa_compute_frame_size (get_frame_size (), 0);
10290   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10291     offset = 0;
10292   else
10293     gcc_unreachable ();
10294 
10295   return offset;
10296 }
10297 
10298 static void
10299 pa_conditional_register_usage (void)
10300 {
10301   int i;
10302 
10303   if (!TARGET_64BIT && !TARGET_PA_11)
10304     {
10305       for (i = 56; i <= FP_REG_LAST; i++)
10306 	fixed_regs[i] = call_used_regs[i] = 1;
10307       for (i = 33; i < 56; i += 2)
10308 	fixed_regs[i] = call_used_regs[i] = 1;
10309     }
10310   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10311     {
10312       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10313 	fixed_regs[i] = call_used_regs[i] = 1;
10314     }
10315   if (flag_pic)
10316     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10317 }
10318 
10319 /* Target hook for c_mode_for_suffix.  */
10320 
10321 static machine_mode
10322 pa_c_mode_for_suffix (char suffix)
10323 {
10324   if (HPUX_LONG_DOUBLE_LIBRARY)
10325     {
10326       if (suffix == 'q')
10327 	return TFmode;
10328     }
10329 
10330   return VOIDmode;
10331 }
10332 
10333 /* Target hook for function_section.  */
10334 
10335 static section *
10336 pa_function_section (tree decl, enum node_frequency freq,
10337 		     bool startup, bool exit)
10338 {
10339   /* Put functions in text section if target doesn't have named sections.  */
10340   if (!targetm_common.have_named_sections)
10341     return text_section;
10342 
10343   /* Force nested functions into the same section as the containing
10344      function.  */
10345   if (decl
10346       && DECL_SECTION_NAME (decl) == NULL
10347       && DECL_CONTEXT (decl) != NULL_TREE
10348       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10349       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10350     return function_section (DECL_CONTEXT (decl));
10351 
10352   /* Otherwise, use the default function section.  */
10353   return default_function_section (decl, freq, startup, exit);
10354 }
10355 
10356 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10357 
10358    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10359    that need more than three instructions to load prior to reload.  This
10360    limit is somewhat arbitrary.  It takes three instructions to load a
10361    CONST_INT from memory but two are memory accesses.  It may be better
10362    to increase the allowed range for CONST_INTS.  We may also be able
10363    to handle CONST_DOUBLES.  */
10364 
10365 static bool
10366 pa_legitimate_constant_p (machine_mode mode, rtx x)
10367 {
10368   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10369     return false;
10370 
10371   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10372     return false;
10373 
10374   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10375      legitimate constants.  The other variants can't be handled by
10376      the move patterns after reload starts.  */
10377   if (tls_referenced_p (x))
10378     return false;
10379 
10380   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10381     return false;
10382 
10383   if (TARGET_64BIT
10384       && HOST_BITS_PER_WIDE_INT > 32
10385       && GET_CODE (x) == CONST_INT
10386       && !reload_in_progress
10387       && !reload_completed
10388       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10389       && !pa_cint_ok_for_move (UINTVAL (x)))
10390     return false;
10391 
10392   if (function_label_operand (x, mode))
10393     return false;
10394 
10395   return true;
10396 }
10397 
10398 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10399 
10400 static unsigned int
10401 pa_section_type_flags (tree decl, const char *name, int reloc)
10402 {
10403   unsigned int flags;
10404 
10405   flags = default_section_type_flags (decl, name, reloc);
10406 
10407   /* Function labels are placed in the constant pool.  This can
10408      cause a section conflict if decls are put in ".data.rel.ro"
10409      or ".data.rel.ro.local" using the __attribute__ construct.  */
10410   if (strcmp (name, ".data.rel.ro") == 0
10411       || strcmp (name, ".data.rel.ro.local") == 0)
10412     flags |= SECTION_WRITE | SECTION_RELRO;
10413 
10414   return flags;
10415 }
10416 
10417 /* pa_legitimate_address_p recognizes an RTL expression that is a
10418    valid memory address for an instruction.  The MODE argument is the
10419    machine mode for the MEM expression that wants to use this address.
10420 
10421    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10422    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10423    available with floating point loads and stores, and integer loads.
10424    We get better code by allowing indexed addresses in the initial
10425    RTL generation.
10426 
10427    The acceptance of indexed addresses as legitimate implies that we
10428    must provide patterns for doing indexed integer stores, or the move
10429    expanders must force the address of an indexed store to a register.
10430    We have adopted the latter approach.
10431 
10432    Another function of pa_legitimate_address_p is to ensure that
10433    the base register is a valid pointer for indexed instructions.
10434    On targets that have non-equivalent space registers, we have to
10435    know at the time of assembler output which register in a REG+REG
10436    pair is the base register.  The REG_POINTER flag is sometimes lost
10437    in reload and the following passes, so it can't be relied on during
10438    code generation.  Thus, we either have to canonicalize the order
10439    of the registers in REG+REG indexed addresses, or treat REG+REG
10440    addresses separately and provide patterns for both permutations.
10441 
10442    The latter approach requires several hundred additional lines of
10443    code in pa.md.  The downside to canonicalizing is that a PLUS
10444    in the wrong order can't combine to form to make a scaled indexed
10445    memory operand.  As we won't need to canonicalize the operands if
10446    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10447 
10448    We initially break out scaled indexed addresses in canonical order
10449    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10450    scaled indexed addresses during RTL generation.  However, fold_rtx
10451    has its own opinion on how the operands of a PLUS should be ordered.
10452    If one of the operands is equivalent to a constant, it will make
10453    that operand the second operand.  As the base register is likely to
10454    be equivalent to a SYMBOL_REF, we have made it the second operand.
10455 
10456    pa_legitimate_address_p accepts REG+REG as legitimate when the
10457    operands are in the order INDEX+BASE on targets with non-equivalent
10458    space registers, and in any order on targets with equivalent space
10459    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10460 
10461    We treat a SYMBOL_REF as legitimate if it is part of the current
10462    function's constant-pool, because such addresses can actually be
10463    output as REG+SMALLINT.  */
10464 
10465 static bool
10466 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10467 {
10468   if ((REG_P (x)
10469        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10470 		  : REG_OK_FOR_BASE_P (x)))
10471       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10472 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10473 	  && REG_P (XEXP (x, 0))
10474 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10475 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10476     return true;
10477 
10478   if (GET_CODE (x) == PLUS)
10479     {
10480       rtx base, index;
10481 
10482       /* For REG+REG, the base register should be in XEXP (x, 1),
10483 	 so check it first.  */
10484       if (REG_P (XEXP (x, 1))
10485 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10486 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10487 	base = XEXP (x, 1), index = XEXP (x, 0);
10488       else if (REG_P (XEXP (x, 0))
10489 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10490 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10491 	base = XEXP (x, 0), index = XEXP (x, 1);
10492       else
10493 	return false;
10494 
10495       if (GET_CODE (index) == CONST_INT)
10496 	{
10497 	  if (INT_5_BITS (index))
10498 	    return true;
10499 
10500 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10501 	     to adjust the displacement of SImode and DImode floating point
10502 	     instructions but this may fail when the register also needs
10503 	     reloading.  So, we return false when STRICT is true.  We
10504 	     also reject long displacements for float mode addresses since
10505 	     the majority of accesses will use floating point instructions
10506 	     that don't support 14-bit offsets.  */
10507 	  if (!INT14_OK_STRICT
10508 	      && (strict || !(reload_in_progress || reload_completed))
10509 	      && mode != QImode
10510 	      && mode != HImode)
10511 	    return false;
10512 
10513 	  return base14_operand (index, mode);
10514 	}
10515 
10516       if (!TARGET_DISABLE_INDEXING
10517 	  /* Only accept the "canonical" INDEX+BASE operand order
10518 	     on targets with non-equivalent space registers.  */
10519 	  && (TARGET_NO_SPACE_REGS
10520 	      ? REG_P (index)
10521 	      : (base == XEXP (x, 1) && REG_P (index)
10522 		 && (reload_completed
10523 		     || (reload_in_progress && HARD_REGISTER_P (base))
10524 		     || REG_POINTER (base))
10525 		 && (reload_completed
10526 		     || (reload_in_progress && HARD_REGISTER_P (index))
10527 		     || !REG_POINTER (index))))
10528 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10529 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10530 		     : REG_OK_FOR_INDEX_P (index))
10531 	  && borx_reg_operand (base, Pmode)
10532 	  && borx_reg_operand (index, Pmode))
10533 	return true;
10534 
10535       if (!TARGET_DISABLE_INDEXING
10536 	  && GET_CODE (index) == MULT
10537 	  /* Only accept base operands with the REG_POINTER flag prior to
10538 	     reload on targets with non-equivalent space registers.  */
10539 	  && (TARGET_NO_SPACE_REGS
10540 	      || (base == XEXP (x, 1)
10541 		  && (reload_completed
10542 		      || (reload_in_progress && HARD_REGISTER_P (base))
10543 		      || REG_POINTER (base))))
10544 	  && REG_P (XEXP (index, 0))
10545 	  && GET_MODE (XEXP (index, 0)) == Pmode
10546 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10547 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10548 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10549 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10550 	  && INTVAL (XEXP (index, 1))
10551 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10552 	  && borx_reg_operand (base, Pmode))
10553 	return true;
10554 
10555       return false;
10556     }
10557 
10558   if (GET_CODE (x) == LO_SUM)
10559     {
10560       rtx y = XEXP (x, 0);
10561 
10562       if (GET_CODE (y) == SUBREG)
10563 	y = SUBREG_REG (y);
10564 
10565       if (REG_P (y)
10566 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10567 		     : REG_OK_FOR_BASE_P (y)))
10568 	{
10569 	  /* Needed for -fPIC */
10570 	  if (mode == Pmode
10571 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10572 	    return true;
10573 
10574 	  if (!INT14_OK_STRICT
10575 	      && (strict || !(reload_in_progress || reload_completed))
10576 	      && mode != QImode
10577 	      && mode != HImode)
10578 	    return false;
10579 
10580 	  if (CONSTANT_P (XEXP (x, 1)))
10581 	    return true;
10582 	}
10583       return false;
10584     }
10585 
10586   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10587     return true;
10588 
10589   return false;
10590 }
10591 
10592 /* Look for machine dependent ways to make the invalid address AD a
10593    valid address.
10594 
10595    For the PA, transform:
10596 
10597         memory(X + <large int>)
10598 
10599    into:
10600 
10601         if (<large int> & mask) >= 16
10602           Y = (<large int> & ~mask) + mask + 1  Round up.
10603         else
10604           Y = (<large int> & ~mask)             Round down.
10605         Z = X + Y
10606         memory (Z + (<large int> - Y));
10607 
10608    This makes reload inheritance and reload_cse work better since Z
10609    can be reused.
10610 
10611    There may be more opportunities to improve code with this hook.  */
10612 
10613 rtx
10614 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10615 			      int opnum, int type,
10616 			      int ind_levels ATTRIBUTE_UNUSED)
10617 {
10618   long offset, newoffset, mask;
10619   rtx new_rtx, temp = NULL_RTX;
10620 
10621   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10622 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10623 
10624   if (optimize && GET_CODE (ad) == PLUS)
10625     temp = simplify_binary_operation (PLUS, Pmode,
10626 				      XEXP (ad, 0), XEXP (ad, 1));
10627 
10628   new_rtx = temp ? temp : ad;
10629 
10630   if (optimize
10631       && GET_CODE (new_rtx) == PLUS
10632       && GET_CODE (XEXP (new_rtx, 0)) == REG
10633       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10634     {
10635       offset = INTVAL (XEXP ((new_rtx), 1));
10636 
10637       /* Choose rounding direction.  Round up if we are >= halfway.  */
10638       if ((offset & mask) >= ((mask + 1) / 2))
10639 	newoffset = (offset & ~mask) + mask + 1;
10640       else
10641 	newoffset = offset & ~mask;
10642 
10643       /* Ensure that long displacements are aligned.  */
10644       if (mask == 0x3fff
10645 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10646 	      || (TARGET_64BIT && (mode) == DImode)))
10647 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10648 
10649       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10650 	{
10651 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10652 			       GEN_INT (newoffset));
10653 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10654 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10655 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10656 		       opnum, (enum reload_type) type);
10657 	  return ad;
10658 	}
10659     }
10660 
10661   return NULL_RTX;
10662 }
10663 
10664 /* Output address vector.  */
10665 
10666 void
10667 pa_output_addr_vec (rtx lab, rtx body)
10668 {
10669   int idx, vlen = XVECLEN (body, 0);
10670 
10671   if (!TARGET_SOM)
10672     fputs ("\t.align 4\n", asm_out_file);
10673   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10674   if (TARGET_GAS)
10675     fputs ("\t.begin_brtab\n", asm_out_file);
10676   for (idx = 0; idx < vlen; idx++)
10677     {
10678       ASM_OUTPUT_ADDR_VEC_ELT
10679 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10680     }
10681   if (TARGET_GAS)
10682     fputs ("\t.end_brtab\n", asm_out_file);
10683 }
10684 
10685 /* Output address difference vector.  */
10686 
10687 void
10688 pa_output_addr_diff_vec (rtx lab, rtx body)
10689 {
10690   rtx base = XEXP (XEXP (body, 0), 0);
10691   int idx, vlen = XVECLEN (body, 1);
10692 
10693   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10694   if (TARGET_GAS)
10695     fputs ("\t.begin_brtab\n", asm_out_file);
10696   for (idx = 0; idx < vlen; idx++)
10697     {
10698       ASM_OUTPUT_ADDR_DIFF_ELT
10699 	(asm_out_file,
10700 	 body,
10701 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10702 	 CODE_LABEL_NUMBER (base));
10703     }
10704   if (TARGET_GAS)
10705     fputs ("\t.end_brtab\n", asm_out_file);
10706 }
10707 
10708 /* This is a helper function for the other atomic operations.  This function
10709    emits a loop that contains SEQ that iterates until a compare-and-swap
10710    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
10711    a set of instructions that takes a value from OLD_REG as an input and
10712    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
10713    set to the current contents of MEM.  After SEQ, a compare-and-swap will
10714    attempt to update MEM with NEW_REG.  The function returns true when the
10715    loop was generated successfully.  */
10716 
10717 static bool
10718 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10719 {
10720   machine_mode mode = GET_MODE (mem);
10721   rtx_code_label *label;
10722   rtx cmp_reg, success, oldval;
10723 
10724   /* The loop we want to generate looks like
10725 
10726         cmp_reg = mem;
10727       label:
10728         old_reg = cmp_reg;
10729         seq;
10730         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10731         if (success)
10732           goto label;
10733 
10734      Note that we only do the plain load from memory once.  Subsequent
10735      iterations use the value loaded by the compare-and-swap pattern.  */
10736 
10737   label = gen_label_rtx ();
10738   cmp_reg = gen_reg_rtx (mode);
10739 
10740   emit_move_insn (cmp_reg, mem);
10741   emit_label (label);
10742   emit_move_insn (old_reg, cmp_reg);
10743   if (seq)
10744     emit_insn (seq);
10745 
10746   success = NULL_RTX;
10747   oldval = cmp_reg;
10748   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10749                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10750                                        MEMMODEL_RELAXED))
10751     return false;
10752 
10753   if (oldval != cmp_reg)
10754     emit_move_insn (cmp_reg, oldval);
10755 
10756   /* Mark this jump predicted not taken.  */
10757   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10758                            GET_MODE (success), 1, label,
10759 			   profile_probability::guessed_never ());
10760   return true;
10761 }
10762 
10763 /* This function tries to implement an atomic exchange operation using a
10764    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
10765    *MEM are returned, using TARGET if possible.  No memory model is required
10766    since a compare_and_swap loop is seq-cst.  */
10767 
10768 rtx
10769 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10770 {
10771   machine_mode mode = GET_MODE (mem);
10772 
10773   if (can_compare_and_swap_p (mode, true))
10774     {
10775       if (!target || !register_operand (target, mode))
10776         target = gen_reg_rtx (mode);
10777       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10778         return target;
10779     }
10780 
10781   return NULL_RTX;
10782 }
10783 
10784 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
10785    arguments passed by hidden reference in the 32-bit HP runtime.  Users
10786    can override this behavior for better compatibility with openmp at the
10787    risk of library incompatibilities.  Arguments are always passed by value
10788    in the 64-bit HP runtime.  */
10789 
10790 static bool
10791 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10792 		  machine_mode mode ATTRIBUTE_UNUSED,
10793 		  const_tree type ATTRIBUTE_UNUSED,
10794 		  bool named ATTRIBUTE_UNUSED)
10795 {
10796   return !TARGET_CALLER_COPIES;
10797 }
10798 
10799 /* Implement TARGET_HARD_REGNO_NREGS.  */
10800 
10801 static unsigned int
10802 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
10803 {
10804   return PA_HARD_REGNO_NREGS (regno, mode);
10805 }
10806 
10807 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
10808 
10809 static bool
10810 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10811 {
10812   return PA_HARD_REGNO_MODE_OK (regno, mode);
10813 }
10814 
10815 /* Implement TARGET_STARTING_FRAME_OFFSET.
10816 
10817    On the 32-bit ports, we reserve one slot for the previous frame
10818    pointer and one fill slot.  The fill slot is for compatibility
10819    with HP compiled programs.  On the 64-bit ports, we reserve one
10820    slot for the previous frame pointer.  */
10821 
10822 static HOST_WIDE_INT
10823 pa_starting_frame_offset (void)
10824 {
10825   return 8;
10826 }
10827 
10828 /* Figure out the size in words of the function argument.  The size
10829    returned by this function should always be greater than zero because
10830    we pass variable and zero sized objects by reference.  */
10831 
10832 HOST_WIDE_INT
10833 pa_function_arg_size (machine_mode mode, const_tree type)
10834 {
10835   HOST_WIDE_INT size;
10836 
10837   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
10838   return CEIL (size, UNITS_PER_WORD);
10839 }
10840 
10841 #include "gt-pa.h"
10842